fix: reactant precompilation + throw error (#1631)

avik-pal · web-flow · commit 9bf4ce8f5fd7 · 2026-01-23T12:46:49.000-05:00
* fix: reactant precompilation + throw error

* fix: rmsnorm
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Lux"
 uuid = "b2108857-7c20-44ae-9111-449ecde12c47"
 authors = ["Avik Pal <avikpal@mit.edu> and contributors"]
-version = "1.29.4"
+version = "1.29.5"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
@@ -122,7 +122,7 @@ Optimisers = "0.4.6"
 PrecompileTools = "1.2.1"
 Preferences = "1.4.3"
 Random = "1.10"
-Reactant = "0.2.200"
+Reactant = "0.2.205"
 ReactantCore = "0.1.16"
 Reexport = "1.2.2"
 ReverseDiff = "1.15"
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -68,7 +68,7 @@ NPZ = "0.4.3"
 Optimisers = "0.4.6"
 Printf = "1.10"
 Random = "1.10"
-Reactant = "0.2.200"
+Reactant = "0.2.203"
 StableRNGs = "1"
 StaticArrays = "1"
 WeightInitializers = "1.3"
diff --git a/ext/ReactantExt/ReactantExt.jl b/ext/ReactantExt/ReactantExt.jl
@@ -59,6 +59,6 @@ include("tracing.jl")
 include("saved_model.jl")
 include("batched_jacobian.jl")
 
-# include("precompile_workloads.jl")
+include("precompile_workloads.jl")
 
 end
diff --git a/ext/ReactantExt/precompile_workloads.jl b/ext/ReactantExt/precompile_workloads.jl
@@ -17,53 +17,72 @@ end
 if Reactant.Reactant_jll.is_available()
     @setup_workload begin
         orig_backend = Reactant.XLA.default_backend()
-        Reactant.set_default_backend("cpu") # always precompile on CPU
+        Reactant.set_default_backend("cpu")
 
-        dev = reactant_device(; force=true)
+        @compile_workload begin
+            @static if Reactant.precompilation_supported()
+                dev = reactant_device(; force=true)
 
-        # attention model
-        mha = Lux.MultiHeadAttention(4; nheads=2)
-        ps_mha, st_mha = Lux.setup(Random.default_rng(), mha) |> dev
+                # attention model
+                mha = Lux.MultiHeadAttention(4; nheads=2)
+                ps_mha, st_mha = Lux.setup(Random.default_rng(), mha) |> dev
 
-        q = rand(Float32, (4, 3, 2)) |> dev
-        k = rand(Float32, (4, 3, 2)) |> dev
-        v = rand(Float32, (4, 3, 2)) |> dev
+                q = ones(Float32, (4, 3, 2)) |> dev
+                k = ones(Float32, (4, 3, 2)) |> dev
+                v = ones(Float32, (4, 3, 2)) |> dev
 
-        # convolution + dense model
-        conv_model = Lux.Chain(
-            Lux.Conv((3, 3), 3 => 32),
-            Lux.Conv((3, 3), 32 => 64),
-            Lux.GlobalMaxPool(),
-            Lux.FlattenLayer(),
-            Lux.Dense(64 => 10),
-        )
-        ps_conv_model, st_conv_model = Lux.setup(Random.default_rng(), conv_model) |> dev
+                try
+                    @compile mha((q, k, v), ps_mha, LuxCore.testmode(st_mha))
 
-        x = rand(Float32, (28, 28, 3, 2)) |> dev
+                    Lux.Training.single_train_step(
+                        AutoEnzyme(),
+                        PrecompileWorkloads.sumabs2attnloss,
+                        (q, k, v),
+                        Lux.Training.TrainState(
+                            mha, ps_mha, st_mha, Optimisers.Adam(0.001f0)
+                        ),
+                    )
+                catch err
+                    if !(err isa Reactant.ReactantPrecompilationException)
+                        rethrow(err)
+                    end
+                end
 
-        @compile_workload begin
-            @compile mha((q, k, v), ps_mha, LuxCore.testmode(st_mha))
+                # convolution + dense model
+                conv_model = Lux.Chain(
+                    Lux.Conv((3, 3), 3 => 32),
+                    Lux.Conv((3, 3), 32 => 64),
+                    Lux.GlobalMaxPool(),
+                    Lux.FlattenLayer(),
+                    Lux.Dense(64 => 10),
+                )
+                ps_conv_model, st_conv_model =
+                    Lux.setup(Random.default_rng(), conv_model) |> dev
 
-            Lux.Training.single_train_step(
-                AutoEnzyme(),
-                PrecompileWorkloads.sumabs2attnloss,
-                (q, k, v),
-                Lux.Training.TrainState(mha, ps_mha, st_mha, Optimisers.Adam(0.001f0)),
-            )
+                x = ones(Float32, (28, 28, 3, 2)) |> dev
 
-            @compile conv_model(x, ps_conv_model, LuxCore.testmode(st_conv_model))
+                try
+                    @compile conv_model(x, ps_conv_model, LuxCore.testmode(st_conv_model))
 
-            Lux.Training.single_train_step(
-                AutoEnzyme(),
-                PrecompileWorkloads.sumabs2loss,
-                x,
-                Lux.Training.TrainState(
-                    conv_model, ps_conv_model, st_conv_model, Optimisers.Adam(0.001f0)
-                ),
-            )
+                    Lux.Training.single_train_step(
+                        AutoEnzyme(),
+                        PrecompileWorkloads.sumabs2loss,
+                        x,
+                        Lux.Training.TrainState(
+                            conv_model,
+                            ps_conv_model,
+                            st_conv_model,
+                            Optimisers.Adam(0.001f0),
+                        ),
+                    )
+                catch err
+                    if !(err isa Reactant.ReactantPrecompilationException)
+                        rethrow(err)
+                    end
+                end
+            end
         end
 
-        Reactant.clear_oc_cache()
         Reactant.set_default_backend(orig_backend)
     end
 end
diff --git a/lib/LuxCore/Project.toml b/lib/LuxCore/Project.toml
@@ -42,7 +42,7 @@ Flux = "0.16.3"
 Functors = "0.5"
 MLDataDevices = "1.17"
 Random = "1.10"
-Reactant = "0.2.200"
+Reactant = "0.2.203"
 ReverseDiff = "1.15"
 SciMLPublic = "1.0.0"
 Setfield = "1"
diff --git a/lib/MLDataDevices/Project.toml b/lib/MLDataDevices/Project.toml
@@ -67,7 +67,7 @@ Metal = "1"
 OneHotArrays = "0.2.10"
 Preferences = "1.4.3"
 Random = "1.10"
-Reactant = "0.2.200"
+Reactant = "0.2.203"
 RecursiveArrayTools = "3.8"
 ReverseDiff = "1.15"
 SciMLPublic = "1.0.0"
diff --git a/src/layers/normalize.jl b/src/layers/normalize.jl
@@ -785,7 +785,7 @@ parameterlength(l::RMSNorm) = has_affine(l) ? prod(l.normalized_shape) : 0
     # Don't use `match_eltype` here, since often times the eltypes are intentionally
     # different.
     ϵ = T(rms.epsilon)
-    mean_sq = mean(abs2, x; dims=1:length(rms.normalized_shape))
+    mean_sq = mean(abs2, x; dims=Tuple(1:length(rms.normalized_shape)))
 
     if has_affine(rms)
         norm_x = @. (x * LuxOps.rsqrt(mean_sq + ϵ)) * ps.scale
diff --git a/test/Project.toml b/test/Project.toml
@@ -80,7 +80,7 @@ Preferences = "1.4.3"
 PythonCall = "0.9"
 Random = "1.10"
 ReTestItems = "1.24.0"
-Reactant = "0.2.170"
+Reactant = "0.2.205"
 Reexport = "1.2.2"
 ReverseDiff = "1.15.3"
 Setfield = "1.1.1"