-
Notifications
You must be signed in to change notification settings - Fork 145
Open
Description
Hi,
I have a kernel that depends on NVIDIA's sincos intrinsic, that I am not able to differentiate with Enzyme.
MWE in Julia:
using Enzyme, CUDA, KernelAbstractions, Adapt
function sincos_caller!(x, backend)
sincos_kernel!(backend)(x; ndrange=length(x))
KernelAbstractions.synchronize(backend)
end
@kernel function sincos_kernel!(x)
i = @index(Global, Linear)
s, c = sincos(x[i])
x[i] = s + c
end
backend = CUDABackend()
x = rand(Float32, 100) |> adapt(backend)
∂x = ones(Float32, 100) |> adapt(backend)
autodiff(Reverse, sincos_caller!, Duplicated(x, ∂x), Const(backend))Which gives the following error:
ERROR: AssertionError: expectedTapeType === TapeType
Stacktrace:
[1] macro expansion
@ ~/BlochHole/.julia/packages/Enzyme/EITgk/src/compiler.jl:2597 [inlined]
[2] macro expansion
@ ~/BlochHole/.julia/packages/LLVM/iza6e/src/base.jl:97 [inlined]
[3] enzyme!(job::GPUCompiler.CompilerJob{…}, interp::Enzyme.Compiler.Interpreter.EnzymeInterpreter{…}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::Tuple{…} where N, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{…}, boxedArgs::Set{…})
@ Enzyme.Compiler ~/BlochHole/.julia/packages/Enzyme/EITgk/src/compiler.jl:2534
[4] compile_unhooked(output::Symbol, job::GPUCompiler.CompilerJob{…})
@ Enzyme.Compiler ~/BlochHole/.julia/packages/Enzyme/EITgk/src/compiler.jl:5138
[5] codegen(output::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler ~/BlochHole/.julia/packages/GPUCompiler/4uj4i/src/driver.jl:77
[6] codegen
@ ~/BlochHole/.julia/packages/GPUCompiler/4uj4i/src/driver.jl:71 [inlined]
[7] (::GPUCompiler.var"#156#161"{GPUCompiler.CompilerJob{…}})()
@ GPUCompiler ~/BlochHole/.julia/packages/GPUCompiler/4uj4i/src/driver.jl:256
[8] get!(default::GPUCompiler.var"#156#161"{…}, h::Dict{…}, key::GPUCompiler.CompilerJob{…})
@ Base ./dict.jl:479
[9] macro expansion
@ ~/BlochHole/.julia/packages/GPUCompiler/4uj4i/src/driver.jl:252 [inlined]
[10] emit_llvm(job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler ~/BlochHole/.julia/packages/GPUCompiler/4uj4i/src/utils.jl:116
[11] emit_llvm
@ ~/BlochHole/.julia/packages/GPUCompiler/4uj4i/src/utils.jl:114 [inlined]
[12] compile_unhooked(output::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler ~/BlochHole/.julia/packages/GPUCompiler/4uj4i/src/driver.jl:95
[13] compile_unhooked(output::Symbol, job::GPUCompiler.CompilerJob)
@ GPUCompiler ~/BlochHole/.julia/packages/GPUCompiler/4uj4i/src/driver.jl:80
[14] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler ~/BlochHole/.julia/packages/GPUCompiler/4uj4i/src/driver.jl:67
[15] compile
@ ~/BlochHole/.julia/packages/GPUCompiler/4uj4i/src/driver.jl:55 [inlined]
[16] #1188
@ ~/BlochHole/.julia/packages/CUDA/g94EB/src/compiler/compilation.jl:250 [inlined]
[17] JuliaContext(f::CUDA.var"#1188#1191"{GPUCompiler.CompilerJob{…}}; kwargs::@Kwargs{})
@ GPUCompiler ~/BlochHole/.julia/packages/GPUCompiler/4uj4i/src/driver.jl:34
[18] JuliaContext(f::Function)
@ GPUCompiler ~/BlochHole/.julia/packages/GPUCompiler/4uj4i/src/driver.jl:25
[19] compile(job::GPUCompiler.CompilerJob)
@ CUDA ~/BlochHole/.julia/packages/CUDA/g94EB/src/compiler/compilation.jl:249
[20] actual_compilation(cache::Dict{…}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{…}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
@ GPUCompiler ~/BlochHole/.julia/packages/GPUCompiler/4uj4i/src/execution.jl:245
[21] cached_compilation(cache::Dict{…}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{…}, compiler::Function, linker::Function)
@ GPUCompiler ~/BlochHole/.julia/packages/GPUCompiler/4uj4i/src/execution.jl:159
[22] macro expansion
@ ~/BlochHole/.julia/packages/CUDA/g94EB/src/compiler/execution.jl:373 [inlined]
[23] macro expansion
@ ./lock.jl:267 [inlined]
[24] cufunction(f::typeof(EnzymeExt.gpu_aug_fwd), tt::Type{…}; kwargs::@Kwargs{…})
@ CUDA ~/BlochHole/.julia/packages/CUDA/g94EB/src/compiler/execution.jl:368
[25] macro expansion
@ ~/BlochHole/.julia/packages/CUDA/g94EB/src/compiler/execution.jl:112 [inlined]
[26] (::KernelAbstractions.Kernel{…})(::Function, ::Vararg{…}; ndrange::Tuple{…}, workgroupsize::Nothing)
@ CUDA.CUDAKernels ~/BlochHole/.julia/packages/CUDA/g94EB/src/CUDAKernels.jl:127
[27] #augmented_primal#7
@ ~/BlochHole/.julia/packages/KernelAbstractions/X5fk1/ext/EnzymeCore08Ext.jl:264
[28] augmented_primal
@ ~/BlochHole/.julia/packages/KernelAbstractions/X5fk1/ext/EnzymeCore08Ext.jl:214 [inlined]
[29] sincos_caller!
@ ./REPL[5]:2 [inlined]
[30] sincos_caller!
@ ./REPL[5]:0 [inlined]
[31] diffejulia_sincos_caller__1063_inner_1wrap
@ ./REPL[5]:0
[32] macro expansion
@ ~/BlochHole/.julia/packages/Enzyme/EITgk/src/compiler.jl:5923 [inlined]
[33] enzyme_call
@ ~/BlochHole/.julia/packages/Enzyme/EITgk/src/compiler.jl:5454 [inlined]
[34] CombinedAdjointThunk
@ ~/BlochHole/.julia/packages/Enzyme/EITgk/src/compiler.jl:5340 [inlined]
[35] autodiff
@ ~/BlochHole/.julia/packages/Enzyme/EITgk/src/Enzyme.jl:534 [inlined]
[36] autodiff
@ ~/BlochHole/.julia/packages/Enzyme/EITgk/src/Enzyme.jl:575 [inlined]
[37] autodiff(::ReverseMode{…}, ::typeof(sincos_caller!), ::Duplicated{…}, ::Const{…})
@ Enzyme ~/BlochHole/.julia/packages/Enzyme/EITgk/src/Enzyme.jl:547
[38] top-level scope
@ REPL[10]:1
[39] top-level scope
@ none:1
Some type information was truncated. Use `show(err)` to see complete types.My understanding is that it would be an easy add to this list (adding __nv_sincosf should be enough (?)):
| ["__fd_sincos_1", "__fd_sincos_1f", "__fd_sincos_1l"], |
Thanks for all the help! If this belongs to the Enzyme.jl repo, feel free to move it.
Metadata
Metadata
Assignees
Labels
No labels