Open
Description
Following code works on 1.8, but errors on 1.9:
julia> using CUDA, StaticArrays
julia> x = CUDA.zeros(SVector{2, Float32}, 16, 16);
julia> xraw = reinterpret(Float32, @view(x[:, end:-1:1]));
julia> Array(xraw)
Swapping @view
with reinterpret
solves the issue.
Error:
ERROR: InvalidIRError: compiling kernel #linear_copy_kernel!(CUDA.CuKernelContext, CuDeviceVector{SVector{2, Float32}, 1}, Int64, Base.ReinterpretArray{Float32, 2, SVector{2, Float32}, SubArray{SVector{2, Float32}, 2, CuDeviceMatrix{SVector{2, Float32}, 1}, Tuple{Base.Slice{Base.OneTo{Int64}}, StepRange{Int64, Int64}}, false}, false}, Int64, Int64) resulted in invalid LLVM IR
Reason: unsupported dynamic function invocation (call to array_subpadding(S, T) @ Base reinterpretarray.jl:726)
Stacktrace:
[1] check_readable
@ ./reinterpretarray.jl:205
[2] getindex
@ ./reinterpretarray.jl:371
[3] linear_copy_kernel!
@ ~/.julia/packages/GPUArrays/fqD8z/src/host/abstractarray.jl:88
Reason: unsupported dynamic function invocation (call to convert)
Stacktrace:
[1] setindex!
@ ~/.julia/packages/CUDA/DfvRa/src/device/array.jl:194
[2] linear_copy_kernel!
@ ~/.julia/packages/GPUArrays/fqD8z/src/host/abstractarray.jl:88
Hint: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl
Stacktrace:
[1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{typeof(GPUArrays.linear_copy_kernel!), Tuple{CUDA.CuKernelContext, CuDeviceVector{SVector{2, Float32}, 1}, Int64, Base.ReinterpretArray{Float32, 2, SVector{2, Float32}, SubArray{SVector{2, Float32}, 2, CuDeviceMatrix{SVector{2, Float32}, 1}, Tuple{Base.Slice{Base.OneTo{Int64}}, StepRange{Int64, Int64}}, false}, false}, Int64, Int64}}}, args::LLVM.Module)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2N9V9/src/validation.jl:141
[2] macro expansion
@ ~/.julia/packages/GPUCompiler/2N9V9/src/driver.jl:418 [inlined]
[3] macro expansion
@ ~/.julia/packages/TimerOutputs/LHjFw/src/TimerOutput.jl:253 [inlined]
[4] macro expansion
@ ~/.julia/packages/GPUCompiler/2N9V9/src/driver.jl:416 [inlined]
[5] emit_asm(job::GPUCompiler.CompilerJob, ir::LLVM.Module; strip::Bool, validate::Bool, format::LLVM.API.LLVMCodeGenFileType)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2N9V9/src/utils.jl:68
[6] emit_asm
@ ~/.julia/packages/GPUCompiler/2N9V9/src/utils.jl:62 [inlined]
[7] cufunction_compile(job::GPUCompiler.CompilerJob, ctx::LLVM.ThreadSafeContext)
@ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:354
[8] #224
@ ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:347 [inlined]
[9] LLVM.ThreadSafeContext(f::CUDA.var"#224#225"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{typeof(GPUArrays.linear_copy_kernel!), Tuple{CUDA.CuKernelContext, CuDeviceVector{SVector{2, Float32}, 1}, Int64, Base.ReinterpretArray{Float32, 2, SVector{2, Float32}, SubArray{SVector{2, Float32}, 2, CuDeviceMatrix{SVector{2, Float32}, 1}, Tuple{Base.Slice{Base.OneTo{Int64}}, StepRange{Int64, Int64}}, false}, false}, Int64, Int64}}}})
@ LLVM ~/.julia/packages/LLVM/WjSQG/src/executionengine/ts_module.jl:14
[10] JuliaContext(f::CUDA.var"#224#225"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{typeof(GPUArrays.linear_copy_kernel!), Tuple{CUDA.CuKernelContext, CuDeviceVector{SVector{2, Float32}, 1}, Int64, Base.ReinterpretArray{Float32, 2, SVector{2, Float32}, SubArray{SVector{2, Float32}, 2, CuDeviceMatrix{SVector{2, Float32}, 1}, Tuple{Base.Slice{Base.OneTo{Int64}}, StepRange{Int64, Int64}}, false}, false}, Int64, Int64}}}})
@ GPUCompiler ~/.julia/packages/GPUCompiler/2N9V9/src/driver.jl:74
[11] cufunction_compile(job::GPUCompiler.CompilerJob)
@ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:346
[12] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/2N9V9/src/cache.jl:90
[13] cufunction(f::typeof(GPUArrays.linear_copy_kernel!), tt::Type{Tuple{CUDA.CuKernelContext, CuDeviceVector{SVector{2, Float32}, 1}, Int64, Base.ReinterpretArray{Float32, 2, SVector{2, Float32}, SubArray{SVector{2, Float32}, 2, CuDeviceMatrix{SVector{2, Float32}, 1}, Tuple{Base.Slice{Base.OneTo{Int64}}, StepRange{Int64, Int64}}, false}, false}, Int64, Int64}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:299
[14] cufunction
@ ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:292 [inlined]
[15] macro expansion
@ ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:102 [inlined]
[16] #launch_heuristic#248
@ ~/.julia/packages/CUDA/DfvRa/src/gpuarrays.jl:17 [inlined]
[17] launch_heuristic
@ ~/.julia/packages/CUDA/DfvRa/src/gpuarrays.jl:15 [inlined]
[18] gpu_call(::typeof(GPUArrays.linear_copy_kernel!), ::CuArray{SVector{2, Float32}, 1, CUDA.Mem.DeviceBuffer}, ::Int64, ::Base.ReinterpretArray{Float32, 2, SVector{2, Float32}, SubArray{SVector{2, Float32}, 2, CuArray{SVector{2, Float32}, 2, CUDA.Mem.DeviceBuffer}, Tuple{Base.Slice{Base.OneTo{Int64}}, StepRange{Int64, Int64}}, false}, false}, ::Int64, ::Int64; target::CuArray{SVector{2, Float32}, 1, CUDA.Mem.DeviceBuffer}, elements::Int64, threads::Nothing, blocks::Nothing, name::Nothing)
@ GPUArrays ~/.julia/packages/GPUArrays/fqD8z/src/device/execution.jl:61
[19] gpu_call
@ ~/.julia/packages/GPUArrays/fqD8z/src/device/execution.jl:34 [inlined]
[20] copyto!(dest::CuArray{SVector{2, Float32}, 1, CUDA.Mem.DeviceBuffer}, dstart::Int64, src::Base.ReinterpretArray{Float32, 2, SVector{2, Float32}, SubArray{SVector{2, Float32}, 2, CuArray{SVector{2, Float32}, 2, CUDA.Mem.DeviceBuffer}, Tuple{Base.Slice{Base.OneTo{Int64}}, StepRange{Int64, Int64}}, false}, false}, sstart::Int64, n::Int64)
@ GPUArrays ~/.julia/packages/GPUArrays/fqD8z/src/host/abstractarray.jl:101
[21] copyto!
@ ~/.julia/packages/GPUArrays/fqD8z/src/host/abstractarray.jl:113 [inlined]
[22] copyto!(dest::Matrix{Float32}, src::Base.ReinterpretArray{Float32, 2, SVector{2, Float32}, SubArray{SVector{2, Float32}, 2, CuArray{SVector{2, Float32}, 2, CUDA.Mem.DeviceBuffer}, Tuple{Base.Slice{Base.OneTo{Int64}}, StepRange{Int64, Int64}}, false}, false})
@ GPUArrays ~/.julia/packages/GPUArrays/fqD8z/src/host/abstractarray.jl:79
[23] copyto_axcheck!(dest::Matrix{Float32}, src::Base.ReinterpretArray{Float32, 2, SVector{2, Float32}, SubArray{SVector{2, Float32}, 2, CuArray{SVector{2, Float32}, 2, CUDA.Mem.DeviceBuffer}, Tuple{Base.Slice{Base.OneTo{Int64}}, StepRange{Int64, Int64}}, false}, false})
@ Base ./abstractarray.jl:1180
[24] Matrix{Float32}(x::Base.ReinterpretArray{Float32, 2, SVector{2, Float32}, SubArray{SVector{2, Float32}, 2, CuArray{SVector{2, Float32}, 2, CUDA.Mem.DeviceBuffer}, Tuple{Base.Slice{Base.OneTo{Int64}}, StepRange{Int64, Int64}}, false}, false})
@ Base ./array.jl:621
[25] Array(A::Base.ReinterpretArray{Float32, 2, SVector{2, Float32}, SubArray{SVector{2, Float32}, 2, CuArray{SVector{2, Float32}, 2, CUDA.Mem.DeviceBuffer}, Tuple{Base.Slice{Base.OneTo{Int64}}, StepRange{Int64, Int64}}, false}, false})
@ Core ./boot.jl:500
[26] top-level scope
@ REPL[22]:1
[27] top-level scope
@ ~/.julia/packages/CUDA/DfvRa/src/initialization.jl:52