Conversation
Codecov Report❌ Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## main #2675 +/- ##
==========================================
- Coverage 72.58% 68.05% -4.54%
==========================================
Files 58 58
Lines 18746 18460 -286
==========================================
- Hits 13607 12563 -1044
- Misses 5139 5897 +758 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
Benchmark Results
Benchmark PlotsA plot of the benchmark results has been uploaded as an artifact at https://github.com/EnzymeAD/Enzyme.jl/actions/runs/18876631945/artifacts/4393981985. |
|
Where are we registering the Enzyme passes? |
|
We are not yet, I am not even sure we need to? Right now I want a working legalization and pre-opt step which is just Julia based passes. |
|
Right now |
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/src/compiler.jl b/src/compiler.jl
index 96b1c01c..2ab0e65b 100644
--- a/src/compiler.jl
+++ b/src/compiler.jl
@@ -1263,7 +1263,7 @@ function nested_codegen!(
edges = edges::Vector{Any}
push!(edges, funcspec)
- LLVM.@dispose pb=LLVM.NewPMPassBuilder() begin
+ LLVM.@dispose pb = LLVM.NewPMPassBuilder() begin
registerEnzymeAndPassPipeline!(pb)
LLVM.add!(pb, LLVM.NewPMModulePassManager()) do mpm
LLVM.add!(mpm, PreserveNVVMPass())
@@ -2755,7 +2755,7 @@ function enzyme!(
for f in collect(functions(mod))
API.EnzymeFixupBatchedJuliaCallingConvention(f)
end
- run!(DCEPass(), mod)
+ run!(DCEPass(), mod)
fix_decayaddr!(mod)
adjointf = adjointf == nothing ? nothing : functions(mod)[adjointfname]
augmented_primalf =
@@ -4502,7 +4502,7 @@ function GPUCompiler.compile_unhooked(output::Symbol, job::CompilerJob{<:EnzymeT
permit_inlining!(f)
end
- LLVM.@dispose pb=LLVM.NewPMPassBuilder() begin
+ LLVM.@dispose pb = LLVM.NewPMPassBuilder() begin
registerEnzymeAndPassPipeline!(pb)
LLVM.add!(pb, LLVM.NewPMModulePassManager()) do mpm
LLVM.add!(mpm, PreserveNVVMPass())
@@ -5186,7 +5186,7 @@ end
augmented_primalf = nothing
end
- LLVM.@dispose pb=LLVM.NewPMPassBuilder() begin
+ LLVM.@dispose pb = LLVM.NewPMPassBuilder() begin
registerEnzymeAndPassPipeline!(pb)
LLVM.add!(pb, LLVM.NewPMModulePassManager()) do mpm
LLVM.add!(mpm, PreserveNVVMEndPass())
diff --git a/src/compiler/optimize.jl b/src/compiler/optimize.jl
index a4f4334f..ab3f34d4 100644
--- a/src/compiler/optimize.jl
+++ b/src/compiler/optimize.jl
@@ -1,6 +1,6 @@
function registerEnzymeAndPassPipeline!(pb::NewPMPassBuilder)
enzyme_callback = cglobal((:registerEnzymeAndPassPipeline, API.libEnzyme))
- LLVM.API.LLVMPassBuilderExtensionsPushRegistrationCallbacks(pb.exts, enzyme_callback)
+ return LLVM.API.LLVMPassBuilderExtensionsPushRegistrationCallbacks(pb.exts, enzyme_callback)
end
LLVM.@function_pass "jl-inst-simplify" JLInstSimplifyPass
@@ -26,7 +26,7 @@ Addr13NoAliasPass() = NewPMModulePass("addr13_noalias", addr13NoAlias)
RewriteGenericMemoryPass() = NewPMModulePass("rewrite_generic_memory", rewrite_generic_memory!)
function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
- @dispose pb = NewPMPassBuilder() begin
+ return @dispose pb = NewPMPassBuilder() begin
registerEnzymeAndPassPipeline!(pb)
register!(pb, Addr13NoAliasPass())
register!(pb, RewriteGenericMemoryPass())
@@ -51,7 +51,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
add!(mpm, AlwaysInlinerPass())
add!(mpm, NewPMFunctionPassManager()) do fpm
add!(fpm, AllocOptPass())
- end
+ end
add!(mpm, GlobalOptPass())
add!(mpm, NewPMFunctionPassManager()) do fpm
@@ -74,7 +74,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
add!(fpm, ReassociatePass())
add!(fpm, EarlyCSEPass())
add!(fpm, AllocOptPass())
- add!(fpm, NewPMLoopPassManager(use_memory_ssa=true)) do lpm
+ add!(fpm, NewPMLoopPassManager(use_memory_ssa = true)) do lpm
add!(lpm, LoopIdiomRecognizePass())
add!(lpm, LoopRotatePass())
add!(lpm, LowerSIMDLoopPass())
@@ -89,7 +89,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
add!(lpm, IndVarSimplifyPass())
add!(lpm, LoopDeletionPass())
end
- add!(fpm, LoopUnrollPass(opt_level=2))
+ add!(fpm, LoopUnrollPass(opt_level = 2))
add!(fpm, AllocOptPass())
add!(fpm, SROAPass())
add!(fpm, GVNPass())
@@ -120,7 +120,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
add!(fpm, JLInstSimplifyPass())
# GC passes
- add!(fpm, GCInvariantVerifierPass(strong=false))
+ add!(fpm, GCInvariantVerifierPass(strong = false))
add!(fpm, SimplifyCFGPass())
add!(fpm, InstCombinePass())
add!(fpm, JLInstSimplifyPass())
@@ -158,7 +158,7 @@ function addOptimizationPasses!(mpm::LLVM.NewPMPassManager)
add!(mpm, AlwaysInlinerPass())
- add!(mpm, NewPMFunctionPassManager()) do fpm
+ return add!(mpm, NewPMFunctionPassManager()) do fpm
# Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time
# merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
# pass.
@@ -182,7 +182,7 @@ function addOptimizationPasses!(mpm::LLVM.NewPMPassManager)
# remove those before optimizing loops.
add!(fpm, AllocOptPass())
- add!(fpm, NewPMLoopPassManager(use_memory_ssa=true)) do lpm
+ add!(fpm, NewPMLoopPassManager(use_memory_ssa = true)) do lpm
add!(lpm, LoopRotatePass())
# moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1)
add!(lpm, LoopIdiomRecognizePass())
@@ -198,7 +198,7 @@ function addOptimizationPasses!(mpm::LLVM.NewPMPassManager)
add!(lpm, IndVarSimplifyPass())
add!(lpm, LoopDeletionPass())
end
- add!(fpm, LoopUnrollPass(opt_level=2))
+ add!(fpm, LoopUnrollPass(opt_level = 2))
# Run our own SROA on heap objects before LLVM's
add!(fpm, AllocOptPass())
@@ -242,18 +242,18 @@ function addOptimizationPasses!(mpm::LLVM.NewPMPassManager)
end
function addMachinePasses!(mpm::LLVM.NewPMPassManager)
- add!(mpm, NewPMFunctionPassManager()) do fpm
+ return add!(mpm, NewPMFunctionPassManager()) do fpm
if VERSION < v"1.12.0-DEV.1390"
add!(fpm, CombineMulAddPass())
end
add!(fpm, DivRemPairsPass())
add!(fpm, DemoteFloat16Pass())
- add!(fpm, GVNPass())
+ add!(fpm, GVNPass())
end
end
function addJuliaLegalizationPasses!(mpm::LLVM.NewPMPassManager, lower_intrinsics::Bool = true)
- if lower_intrinsics
+ return if lower_intrinsics
add!(mpm, NewPMFunctionPassManager()) do fpm
add!(fpm, ReinsertGCMarkerPass())
if VERSION < v"1.13.0-DEV.36"
@@ -275,7 +275,7 @@ function addJuliaLegalizationPasses!(mpm::LLVM.NewPMPassManager, lower_intrinsic
end
# We need these two passes and the instcombine below
# after GC lowering to let LLVM do some constant propagation on the tags.
- # and remove some unnecessary write barrier checks.
+ # and remove some unnecessary write barrier checks.
add!(mpm, NewPMFunctionPassManager()) do fpm
add!(fpm, GVNPass())
add!(fpm, SCCPPass())
@@ -288,10 +288,12 @@ function addJuliaLegalizationPasses!(mpm::LLVM.NewPMPassManager, lower_intrinsic
add!(fpm, InstCombinePass())
add!(fpm, JLInstSimplifyPass())
aggressiveSimplifyCFGOptions =
- (forward_switch_cond=true,
- switch_range_to_icmp=true,
- switch_to_lookup=true,
- hoist_common_insts=true)
+ (
+ forward_switch_cond = true,
+ switch_range_to_icmp = true,
+ switch_to_lookup = true,
+ hoist_common_insts = true,
+ )
add!(fpm, SimplifyCFGPass(; aggressiveSimplifyCFGOptions...))
end
else
diff --git a/src/llvm/transforms.jl b/src/llvm/transforms.jl
index 5462fd43..67255c3a 100644
--- a/src/llvm/transforms.jl
+++ b/src/llvm/transforms.jl
@@ -2372,7 +2372,7 @@ end
function rewrite_generic_memory!(mod::LLVM.Module)
@static if VERSION < v"1.11-"
return false
- else
+ else
for f in functions(mod), bb in blocks(f)
iter = LLVM.API.LLVMGetFirstInstruction(bb)
while iter != C_NULL
@@ -2381,7 +2381,7 @@ function rewrite_generic_memory!(mod::LLVM.Module)
if !isa(inst, LLVM.LoadInst)
continue
end
-
+
if isa(operands(inst)[1], LLVM.ConstantExpr)
legal2, obj = absint(inst)
if legal2 && obj isa Memory && obj == typeof(obj).instance |
|
Current status is that minimal functionality works.
also switching the NewPM pieces on for 1.11 |
7dbd669 to
5c774db
Compare
|
Currently hitting JuliaLLVM/LLVM.jl#528 Still needs to handle:
|
|
@wsmoses the 1.11 error is https://gist.github.com/vchuravy/dc0c635a3ebcc6eb85e7990b587f7956 perhaps due to a small change in optimization. Any ideas on how to fix that? |
|
can we perhaps split this into smaller pieces to figure out where the differences in pass setup comes from? |
|
It somewhere in |
|
could we do a PR for everything except optimize, and then a separate one for otpimize. I can review them both separately in depth and try to look at the error |
No description provided.