Rewrite early and late optimize pipeline with NewPM#2711
Conversation
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/src/compiler/optimize.jl b/src/compiler/optimize.jl
index 027f1b22..98afb1bc 100644
--- a/src/compiler/optimize.jl
+++ b/src/compiler/optimize.jl
@@ -73,89 +73,89 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
run!(pb, mod, tm)
end
- function middle_optimize!(second_stage=false)
- @dispose pb = NewPMPassBuilder() begin
- registerEnzymeAndPassPipeline!(pb)
- register!(pb, RewriteGenericMemoryPass())
- add!(pb, NewPMAAManager()) do aam
- add!(aam, ScopedNoAliasAA())
- add!(aam, TypeBasedAA())
- add!(aam, BasicAA())
+ function middle_optimize!(second_stage = false)
+ return @dispose pb = NewPMPassBuilder() begin
+ registerEnzymeAndPassPipeline!(pb)
+ register!(pb, RewriteGenericMemoryPass())
+ add!(pb, NewPMAAManager()) do aam
+ add!(aam, ScopedNoAliasAA())
+ add!(aam, TypeBasedAA())
+ add!(aam, BasicAA())
end
- add!(pb, NewPMModulePassManager()) do mpm
- add!(mpm, RewriteGenericMemoryPass())
- add!(mpm, CPUFeaturesPass()) # why is this duplicated?
-
- add!(mpm, NewPMFunctionPassManager()) do fpm
- add!(fpm, InstCombinePass())
- add!(fpm, JLInstSimplifyPass())
- add!(fpm, SimplifyCFGPass())
- add!(fpm, SROAPass())
- add!(fpm, InstCombinePass())
- add!(fpm, JLInstSimplifyPass())
- add!(fpm, JumpThreadingPass())
- add!(fpm, CorrelatedValuePropagationPass())
- add!(fpm, InstCombinePass())
- add!(fpm, JLInstSimplifyPass())
- add!(fpm, ReassociatePass())
- add!(fpm, EarlyCSEPass())
- add!(fpm, AllocOptPass())
-
- add!(fpm, NewPMLoopPassManager(use_memory_ssa=true)) do lpm
- add!(lpm, LoopIdiomRecognizePass())
- add!(lpm, LoopRotatePass())
- add!(lpm, LowerSIMDLoopPass())
- add!(lpm, LICMPass())
- add!(lpm, JuliaLICMPass())
- add!(lpm, SimpleLoopUnswitchPass())
+ add!(pb, NewPMModulePassManager()) do mpm
+ add!(mpm, RewriteGenericMemoryPass())
+ add!(mpm, CPUFeaturesPass()) # why is this duplicated?
+
+ add!(mpm, NewPMFunctionPassManager()) do fpm
+ add!(fpm, InstCombinePass())
+ add!(fpm, JLInstSimplifyPass())
+ add!(fpm, SimplifyCFGPass())
+ add!(fpm, SROAPass())
+ add!(fpm, InstCombinePass())
+ add!(fpm, JLInstSimplifyPass())
+ add!(fpm, JumpThreadingPass())
+ add!(fpm, CorrelatedValuePropagationPass())
+ add!(fpm, InstCombinePass())
+ add!(fpm, JLInstSimplifyPass())
+ add!(fpm, ReassociatePass())
+ add!(fpm, EarlyCSEPass())
+ add!(fpm, AllocOptPass())
+
+ add!(fpm, NewPMLoopPassManager(use_memory_ssa = true)) do lpm
+ add!(lpm, LoopIdiomRecognizePass())
+ add!(lpm, LoopRotatePass())
+ add!(lpm, LowerSIMDLoopPass())
+ add!(lpm, LICMPass())
+ add!(lpm, JuliaLICMPass())
+ add!(lpm, SimpleLoopUnswitchPass())
+ end
+
+ add!(fpm, InstCombinePass())
+ add!(fpm, JLInstSimplifyPass())
+ add!(fpm, NewPMLoopPassManager()) do lpm
+ add!(lpm, IndVarSimplifyPass())
+ add!(lpm, LoopDeletionPass())
end
-
- add!(fpm, InstCombinePass())
- add!(fpm, JLInstSimplifyPass())
- add!(fpm, NewPMLoopPassManager()) do lpm
- add!(lpm, IndVarSimplifyPass())
- add!(lpm, LoopDeletionPass())
+ add!(fpm, LoopUnrollPass(opt_level = 2)) # what opt level?
+ add!(fpm, AllocOptPass())
+ add!(fpm, SROAPass())
+ add!(fpm, GVNPass())
+
+ # This InstCombine needs to be after GVN
+ # Otherwise it will generate load chains in GPU code...
+ add!(fpm, InstCombinePass())
+ add!(fpm, JLInstSimplifyPass())
+ add!(fpm, MemCpyOptPass())
+ add!(fpm, SCCPPass())
+ add!(fpm, InstCombinePass())
+ add!(fpm, JLInstSimplifyPass())
+ add!(fpm, JumpThreadingPass())
+ add!(fpm, DSEPass())
+ add!(fpm, AllocOptPass())
+ add!(fpm, SimplifyCFGPass())
+
+
+ add!(fpm, NewPMLoopPassManager()) do lpm
+ add!(lpm, LoopIdiomRecognizePass())
+ add!(lpm, LoopDeletionPass())
end
- add!(fpm, LoopUnrollPass(opt_level=2)) # what opt level?
- add!(fpm, AllocOptPass())
- add!(fpm, SROAPass())
- add!(fpm, GVNPass())
-
- # This InstCombine needs to be after GVN
- # Otherwise it will generate load chains in GPU code...
- add!(fpm, InstCombinePass())
- add!(fpm, JLInstSimplifyPass())
- add!(fpm, MemCpyOptPass())
- add!(fpm, SCCPPass())
- add!(fpm, InstCombinePass())
- add!(fpm, JLInstSimplifyPass())
- add!(fpm, JumpThreadingPass())
- add!(fpm, DSEPass())
- add!(fpm, AllocOptPass())
- add!(fpm, SimplifyCFGPass())
-
-
- add!(fpm, NewPMLoopPassManager()) do lpm
- add!(lpm, LoopIdiomRecognizePass())
- add!(lpm, LoopDeletionPass())
+ add!(fpm, JumpThreadingPass())
+ add!(fpm, CorrelatedValuePropagationPass())
+ if second_stage
+
+ add!(fpm, ADCEPass())
+ add!(fpm, InstCombinePass())
+ add!(fpm, JLInstSimplifyPass())
+
+ # GC passes
+ add!(fpm, GCInvariantVerifierPass(strong = false))
+ add!(fpm, SimplifyCFGPass())
+ add!(fpm, InstCombinePass())
+ add!(fpm, JLInstSimplifyPass())
+ end # second_stage
end
- add!(fpm, JumpThreadingPass())
- add!(fpm, CorrelatedValuePropagationPass())
- if second_stage
-
- add!(fpm, ADCEPass())
- add!(fpm, InstCombinePass())
- add!(fpm, JLInstSimplifyPass())
-
- # GC passes
- add!(fpm, GCInvariantVerifierPass(strong=false))
- add!(fpm, SimplifyCFGPass())
- add!(fpm, InstCombinePass())
- add!(fpm, JLInstSimplifyPass())
- end # second_stage
- end
end
- run!(pb, mod, tm)
+ run!(pb, mod, tm)
end
end # middle_optimize!
@@ -180,7 +180,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
end
removeDeadArgs!(mod, tm)
detect_writeonly!(mod)
- nodecayed_phis!(mod)
+ return nodecayed_phis!(mod)
end
function addOptimizationPasses!(mpm::LLVM.NewPMPassManager)
@@ -200,7 +200,7 @@ function addOptimizationPasses!(mpm::LLVM.NewPMPassManager)
add!(mpm, AlwaysInlinerPass())
- add!(mpm, NewPMFunctionPassManager()) do fpm
+ return add!(mpm, NewPMFunctionPassManager()) do fpm
# Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time
# merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
# pass.
@@ -224,7 +224,7 @@ function addOptimizationPasses!(mpm::LLVM.NewPMPassManager)
# remove those before optimizing loops.
add!(fpm, AllocOptPass())
- add!(fpm, NewPMLoopPassManager(use_memory_ssa=true)) do lpm
+ add!(fpm, NewPMLoopPassManager(use_memory_ssa = true)) do lpm
add!(lpm, LoopRotatePass())
# moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1)
add!(lpm, LoopIdiomRecognizePass())
@@ -240,7 +240,7 @@ function addOptimizationPasses!(mpm::LLVM.NewPMPassManager)
add!(lpm, IndVarSimplifyPass())
add!(lpm, LoopDeletionPass())
end
- add!(fpm, LoopUnrollPass(opt_level=2))
+ add!(fpm, LoopUnrollPass(opt_level = 2))
# Run our own SROA on heap objects before LLVM's
add!(fpm, AllocOptPass())
@@ -284,13 +284,13 @@ function addOptimizationPasses!(mpm::LLVM.NewPMPassManager)
end
function addMachinePasses!(mpm::LLVM.NewPMPassManager)
- add!(mpm, NewPMFunctionPassManager()) do fpm
+ return add!(mpm, NewPMFunctionPassManager()) do fpm
if VERSION < v"1.12.0-DEV.1390"
add!(fpm, CombineMulAddPass())
end
add!(fpm, DivRemPairsPass())
add!(fpm, DemoteFloat16Pass())
- add!(fpm, GVNPass())
+ add!(fpm, GVNPass())
end
end
@@ -317,7 +317,7 @@ function addJuliaLegalizationPasses!(mpm::LLVM.NewPMPassManager, lower_intrinsic
end
# We need these two passes and the instcombine below
# after GC lowering to let LLVM do some constant propagation on the tags.
- # and remove some unnecessary write barrier checks.
+ # and remove some unnecessary write barrier checks.
add!(mpm, NewPMFunctionPassManager()) do fpm
add!(fpm, GVNPass())
add!(fpm, SCCPPass())
@@ -330,10 +330,12 @@ function addJuliaLegalizationPasses!(mpm::LLVM.NewPMPassManager, lower_intrinsic
add!(fpm, InstCombinePass())
add!(fpm, JLInstSimplifyPass())
aggressiveSimplifyCFGOptions =
- (forward_switch_cond=true,
- switch_range_to_icmp=true,
- switch_to_lookup=true,
- hoist_common_insts=true)
+ (
+ forward_switch_cond = true,
+ switch_range_to_icmp = true,
+ switch_to_lookup = true,
+ hoist_common_insts = true,
+ )
add!(fpm, SimplifyCFGPass(; aggressiveSimplifyCFGOptions...))
end
else
diff --git a/src/llvm/transforms.jl b/src/llvm/transforms.jl
index 5462fd43..67255c3a 100644
--- a/src/llvm/transforms.jl
+++ b/src/llvm/transforms.jl
@@ -2372,7 +2372,7 @@ end
function rewrite_generic_memory!(mod::LLVM.Module)
@static if VERSION < v"1.11-"
return false
- else
+ else
for f in functions(mod), bb in blocks(f)
iter = LLVM.API.LLVMGetFirstInstruction(bb)
while iter != C_NULL
@@ -2381,7 +2381,7 @@ function rewrite_generic_memory!(mod::LLVM.Module)
if !isa(inst, LLVM.LoadInst)
continue
end
-
+
if isa(operands(inst)[1], LLVM.ConstantExpr)
legal2, obj = absint(inst)
if legal2 && obj isa Memory && obj == typeof(obj).instance |
46d3e06 to
6cd4a76
Compare
6cd4a76 to
e653a48
Compare
26f7d6d to
1a1a74a
Compare
Codecov Report❌ Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## main #2711 +/- ##
==========================================
- Coverage 72.61% 72.48% -0.14%
==========================================
Files 58 58
Lines 18746 18635 -111
==========================================
- Hits 13613 13508 -105
+ Misses 5133 5127 -6 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
Benchmark Results
Benchmark PlotsA plot of the benchmark results has been uploaded as an artifact at https://github.com/EnzymeAD/Enzyme.jl/actions/runs/18924163508/artifacts/4411690206. |
dcbfa45 to
00c7c44
Compare
00c7c44 to
06460fb
Compare
|
Sigh the |
Assuming you're referring to the |
|
that test has been blocked by a julia patch release for a while =/. not in our control -- specifically JuliaLang/julia@8230e8b |
|
Well that's good then xD |
No description provided.