Open
Description
To reproduce
The Minimal Working Example (MWE) for this bug:
using CUDA
using Adapt
using SIMD
using KernelAbstractions
@inline function vload(::Type{SIMD.Vec{N, T}}, ptr::Core.LLVMPtr{T, AS}) where {N, T, AS}
alignment = sizeof(T) * N
vec_ptr = Base.bitcast(Core.LLVMPtr{SIMD.Vec{N, T}, AS}, ptr)
return unsafe_load(vec_ptr, 1, Val(alignment))
end
@inline function vstore!(ptr::Core.LLVMPtr{T, AS}, x::SIMD.Vec{N, T}) where {N, T, AS}
alignment = sizeof(T) * N
vec_ptr = Base.bitcast(Core.LLVMPtr{SIMD.Vec{N, T}, AS}, ptr)
unsafe_store!(vec_ptr, x, 1, Val(alignment))
return
end
@kernel function ker!(y, x)
i = @index(Global)
v = vload(SIMD.Vec{4, Float32}, pointer(x))
v = exp(v)
vstore!(pointer(y), v)
end
function tt(kab)
x = Adapt.adapt(kab, ones(Float32, 4))
y = Adapt.adapt(kab, ones(Float32, 4))
ker!(kab)(y, x; ndrange=1)
@show y
return
end
tt(CUDABackend())
Error:
ERROR: LLVM error: Undefined external symbol "expf"
Stacktrace:
[1] handle_error(reason::Cstring)
@ LLVM ~/.julia/packages/LLVM/b3kFs/src/core/context.jl:194
[2] LLVMTargetMachineEmitToMemoryBuffer(T::LLVM.TargetMachine, M::LLVM.Module, codegen::LLVM.API.LLVMCodeGenFileType, ErrorMessage::Base.RefValue{Cstring}, OutMemBuf::Base.RefValue{Ptr{…}})
@ LLVM.API ~/.julia/packages/LLVM/b3kFs/lib/16/libLLVM.jl:11138
[3] emit(tm::LLVM.TargetMachine, mod::LLVM.Module, filetype::LLVM.API.LLVMCodeGenFileType)
@ LLVM ~/.julia/packages/LLVM/b3kFs/src/targetmachine.jl:118
[4] mcgen(job::GPUCompiler.CompilerJob, mod::LLVM.Module, format::LLVM.API.LLVMCodeGenFileType)
@ GPUCompiler ~/.julia/packages/GPUCompiler/OGnEB/src/mcgen.jl:75
[5] mcgen(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, mod::LLVM.Module, format::LLVM.API.LLVMCodeGenFileType)
@ CUDA ~/.julia/packages/CUDA/1kIOw/src/compiler/compilation.jl:127
[6] macro expansion
@ ~/.julia/packages/GPUCompiler/OGnEB/src/driver.jl:400 [inlined]
[7] emit_asm(job::GPUCompiler.CompilerJob, ir::LLVM.Module; strip::Bool, validate::Bool, format::LLVM.API.LLVMCodeGenFileType)
@ GPUCompiler ~/.julia/packages/GPUCompiler/OGnEB/src/utils.jl:110
[8] emit_asm
@ ~/.julia/packages/GPUCompiler/OGnEB/src/utils.jl:108 [inlined]
[9] codegen(output::Symbol, job::GPUCompiler.CompilerJob; toplevel::Bool, libraries::Bool, optimize::Bool, cleanup::Bool, validate::Bool, strip::Bool, only_entry::Bool, parent_job::Nothing)
@ GPUCompiler ~/.julia/packages/GPUCompiler/OGnEB/src/driver.jl:120
...