Open
Description
For feature-parity it'd be good to support atomic max
on Float
.
Currently MWE below fails:
using AMDGPU
using ROCKernels
using KernelAbstractions
using KernelAbstractions: @atomic
@kernel function f(target, source, indices)
i = @index(Global)
idx = indices[i]
v = source[i]
@atomic max(target[idx], v)
end
function main()
source = rand(Float32, 1024)
indices = rand(1:32, 1024)
target = zeros(Float32, 32)
@assert length(unique(indices)) < length(indices)
dsource, dindices, dtarget = ROCArray.((source, indices, target))
for i in 1:1024
idx = indices[i]
target[idx] = max(target[idx], source[i])
end
wait(f(AMDGPU.default_device())(dtarget, dsource, dindices; ndrange=1024))
@assert Array(dtarget) == target
end
Error
ERROR: InvalidIRError: compiling kernel gpu_f(KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, AMDGPU.Device.ROCDeviceVector{Float32, 1}, AMDGPU.Device.ROCDeviceVector{Float32, 1}, AMDGPU.Device.ROCDeviceVector{Int64, 1}) resulted in invalid LLVM IR
Reason: unsupported call to an unknown function (call to ijl_get_nth_field_checked)
Stacktrace:
[1] indexed_iterate
@ ./namedtuple.jl:140
[2] multiple call sites
@ unknown:0
Reason: unsupported call to an unknown function (call to jl_f_tuple)
Stacktrace:
[1] indexed_iterate
@ ./namedtuple.jl:140
[2] multiple call sites
@ unknown:0
Reason: unsupported call to an unknown function (call to ijl_get_nth_field_checked)
Stacktrace:
[1] atomic_pointermodify
@ ~/.julia/packages/LLVM/9gCXO/src/interop/atomics.jl:395
[2] modify!
@ ~/.julia/packages/UnsafeAtomicsLLVM/i4GMj/src/internal.jl:18
[3] modify!
@ ~/.julia/packages/Atomix/F9VIX/src/core.jl:33
[4] macro expansion
@ ~/code/Nerf.jl/src/Nerf.jl:155
[5] gpu_f
@ ~/.julia/packages/KernelAbstractions/C8flJ/src/macros.jl:81
[6] gpu_f
@ ./none:0
Reason: unsupported dynamic function invocation (call to atomic_pointerreplace)
Stacktrace:
[1] atomic_pointermodify
@ ~/.julia/packages/LLVM/9gCXO/src/interop/atomics.jl:395
[2] modify!
@ ~/.julia/packages/UnsafeAtomicsLLVM/i4GMj/src/internal.jl:18
[3] modify!
@ ~/.julia/packages/Atomix/F9VIX/src/core.jl:33
[4] macro expansion
@ ~/code/Nerf.jl/src/Nerf.jl:155
[5] gpu_f
@ ~/.julia/packages/KernelAbstractions/C8flJ/src/macros.jl:81
[6] gpu_f
@ ./none:0
As a temporary workaround, we can reinterpret Float32
as UInt32
:
wait(f(DEVICE)(
reinterpret(UInt32, dtarget),
reinterpret(UInt32, dsource), dindices; ndrange=1024))