diff --git a/Project.toml b/Project.toml index 483872b2..359b6670 100644 --- a/Project.toml +++ b/Project.toml @@ -21,9 +21,10 @@ TimerOutputs = "0.5" julia = "1.6" [extras] +Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" SPIRV_LLVM_Translator_jll = "4a5d46fc-d8cf-5151-a261-86b458210efb" SPIRV_Tools_jll = "6ac6d60f-d740-5983-97d7-a4482c0689f4" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "SPIRV_LLVM_Translator_jll", "SPIRV_Tools_jll"] +test = ["Test", "SPIRV_LLVM_Translator_jll", "SPIRV_Tools_jll", "Distributed"] diff --git a/src/driver.jl b/src/driver.jl index 9f36d720..2dd00514 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -219,8 +219,12 @@ const __llvm_initialized = Ref(false) internalize!(pm, exports) # eliminate all unused internal functions + add!(pm, ModulePass("ExternalizeJuliaGlobals", + externalize_julia_globals!)) global_optimizer!(pm) global_dce!(pm) + add!(pm, ModulePass("InternalizeJuliaGlobals", + internalize_julia_globals!)) strip_dead_prototypes!(pm) # merge constants (such as exception messages) from the runtime @@ -309,6 +313,39 @@ const __llvm_initialized = Ref(false) return ir, (; entry, compiled) end +# Protect null globals from being killed and inlined +function externalize_julia_globals!(mod::LLVM.Module) + changed = false + for gbl in LLVM.globals(mod) + if LLVM.linkage(gbl) == LLVM.API.LLVMInternalLinkage && + typeof(LLVM.initializer(gbl)) <: LLVM.PointerNull && + (startswith(LLVM.name(gbl), "jl_global") || + startswith(LLVM.name(gbl), "jl_sym")) + LLVM.linkage!(gbl, LLVM.API.LLVMExternalLinkage) + LLVM.initializer!(gbl, nothing) + LLVM.extinit!(gbl, true) + changed = true + end + end + changed +end +# And reset the back later +function internalize_julia_globals!(mod::LLVM.Module) + changed = false + for gbl in LLVM.globals(mod) + if LLVM.linkage(gbl) == LLVM.API.LLVMExternalLinkage && + LLVM.initializer(gbl) === nothing && + (startswith(LLVM.name(gbl), "jl_global") || + startswith(LLVM.name(gbl), "jl_sym")) + LLVM.extinit!(gbl, false) + LLVM.initializer!(gbl, null(eltype(llvmtype(gbl)))) + LLVM.linkage!(gbl, LLVM.API.LLVMInternalLinkage) + changed = true + end + end + changed +end + @locked function emit_asm(@nospecialize(job::CompilerJob), ir::LLVM.Module; strip::Bool=false, validate::Bool=true, format::LLVM.API.LLVMCodeGenFileType) finish_module!(job, ir) diff --git a/src/interface.jl b/src/interface.jl index 299dbffe..0876a49c 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -197,3 +197,6 @@ function llvm_debug_info(@nospecialize(job::CompilerJob)) LLVM.API.LLVMDebugEmissionKindFullDebug end end + +# whether we should compile in imaging mode +extern_policy(::CompilerJob) = false diff --git a/src/mcgen.jl b/src/mcgen.jl index ba28e787..404abb39 100644 --- a/src/mcgen.jl +++ b/src/mcgen.jl @@ -7,11 +7,15 @@ function prepare_execution!(@nospecialize(job::CompilerJob), mod::LLVM.Module) global current_job current_job = job + add!(pm, ModulePass("ExternalizeJuliaGlobals", + externalize_julia_globals!)) global_optimizer!(pm) add!(pm, ModulePass("ResolveCPUReferences", resolve_cpu_references!)) global_dce!(pm) + add!(pm, ModulePass("InternalizeJuliaGlobals", + internalize_julia_globals!)) strip_dead_prototypes!(pm) run!(pm, mod) diff --git a/src/native.jl b/src/native.jl index 738b76c2..cfc7f2a8 100644 --- a/src/native.jl +++ b/src/native.jl @@ -7,7 +7,9 @@ export NativeCompilerTarget Base.@kwdef struct NativeCompilerTarget <: AbstractCompilerTarget cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) - always_inline::Bool=false # will mark the job function as always inline + always_inline::Bool=false # will mark the job function as always inline + reloc::LLVM.API.LLVMRelocMode=LLVM.API.LLVMRelocDefault + extern::Bool=false end llvm_triple(::NativeCompilerTarget) = Sys.MACHINE @@ -17,7 +19,9 @@ function llvm_machine(target::NativeCompilerTarget) t = Target(triple=triple) - tm = TargetMachine(t, triple, target.cpu, target.features) + optlevel = LLVM.API.LLVMCodeGenLevelDefault + reloc = target.reloc + tm = TargetMachine(t, triple, target.cpu, target.features, optlevel, reloc) asm_verbosity!(tm, true) return tm @@ -30,6 +34,9 @@ function process_entry!(job::CompilerJob{NativeCompilerTarget}, mod::LLVM.Module invoke(process_entry!, Tuple{CompilerJob, LLVM.Module, LLVM.Function}, job, mod, entry) end +GPUCompiler.extern_policy(job::CompilerJob{NativeCompilerTarget,P} where P) = + job.target.extern + ## job runtime_slug(job::CompilerJob{NativeCompilerTarget}) = "native_$(job.target.cpu)-$(hash(job.target.features))" diff --git a/test/native.jl b/test/native.jl index 2fbb2070..b0da63f9 100644 --- a/test/native.jl +++ b/test/native.jl @@ -1,3 +1,4 @@ +using Distributed @testset "native" begin include("definitions/native.jl") @@ -262,6 +263,7 @@ end occursin("[2] func", msg) end end +end @testset "LazyCodegen" begin import .LazyCodegen: call_delayed @@ -311,7 +313,94 @@ end @test call_delayed(complex, 1.0, 2.0) == 1.0+2.0im end +addprocs(1) + +@everywhere workers() begin + using GPUCompiler + using Libdl + include("definitions/native.jl") +end +@everywhere begin +function generate_shlib(f, tt, name=GPUCompiler.safe_name(repr(f))) + mktemp() do path, io + source = FunctionSpec(f, Base.to_tuple_type(tt), false, name) + target = NativeCompilerTarget(;reloc=LLVM.API.LLVMRelocPIC, extern=true) + params = TestCompilerParams() + job = CompilerJob(target, source, params) + obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) + write(io, obj) + flush(io) + # FIXME: Be more portable + run(`ld -shared -o $path.$dlext $path`) + return "$path.$dlext", name + end +end +function load_fptr(path, name) + ptr = dlopen("$path", Libdl.RTLD_LOCAL) + fptr = dlsym(ptr, "julia_$name") + @assert fptr != C_NULL + atexit(()->rm("$path")) + fptr +end +generate_shlib_fptr(f, tt, name=GPUCompiler.safe_name(repr(f))) = + load_fptr(generate_shlib(f, tt, name)...) +end + +@static if VERSION >= v"1.7.0-DEV.600" && Sys.isunix() +@testset "shared library emission" begin + @testset "primitive types" begin + f1(x) = x+1 + @test ccall(generate_shlib_fptr(f1, (Int,)), Int, (Int,), 1) == 2 + f2(x,y) = x+y + path, name = generate_shlib(f2, (Int,Int)) + @test fetch(@spawnat 2 ccall(load_fptr(path, name), Int, (Int,Int), 1, 2)) == 3 + end + @testset "runtime calls" begin + function f3() + # Something reasonably complicated + if isdir(homedir()) + true + else + false + end + end + @test ccall(generate_shlib_fptr(f3, ()), Bool, ()) + end + @testset "String/Symbol" begin + f4(str) = str*"!" + @test ccall(generate_shlib_fptr(f4, (String,)), String, (String,), "Hello") == "Hello!" + + f5() = :asymbol + @test ccall(generate_shlib_fptr(f5, ()), Symbol, ()) == :asymbol + + f6(x) = x == :asymbol ? true : false + @test ccall(generate_shlib_fptr(f6, (Symbol,)), Bool, (Symbol,), :asymbol) + @test !ccall(generate_shlib_fptr(f6, (Symbol,)), Bool, (Symbol,), :bsymbol) + end + @testset "closures" begin + y = [42.0] + function cf1(x) + x + y[1] + end + @test ccall(generate_shlib_fptr(cf1, (Float64,)), Float64, (Any, Float64,), cf1, 1.0) == 43.0 + end + @testset "mutation" begin + function cf2(A, sym) + if sym != :asymbol + A[] = true + else + A[] = false + end + return nothing + end + A = Ref(false) + fptr = generate_shlib_fptr(cf2, (Base.RefValue{Bool}, Symbol)) + ccall(fptr, Nothing, (Any, Symbol), A, :asymbol); @test !A[] + ccall(fptr, Nothing, (Any, Symbol), A, :bsymbol); @test A[] + end +end end +rmprocs(2) ############################################################################################