Merge pull request #997 from ChrisRackauckas-Claude/precompile-improvements-20251230-111636

ChrisRackauckas · web-flow · commit 8d39975ed242 · 2025-12-30T18:50:19.000-01:00
Add PrecompileTools workload to improve startup time
diff --git a/Project.toml b/Project.toml
@@ -13,6 +13,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
 LuxLib = "82251201-b29d-42c6-8e01-566dec8acb11"
+PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
@@ -58,6 +59,7 @@ Optimization = "4"
 OptimizationOptimJL = "0.4"
 OptimizationOptimisers = "0.3"
 OrdinaryDiffEq = "6.76.0"
+PrecompileTools = "1.3.3"
 Printf = "1.10"
 Random = "1.10"
 ReTestItems = "1.25.1"
diff --git a/src/DiffEqFlux.jl b/src/DiffEqFlux.jl
@@ -55,4 +55,7 @@ export BacksolveAdjoint, QuadratureAdjoint, GaussAdjoint, InterpolatingAdjoint,
        AdjointLSS, NILSS, NILSAS
 export TrackerVJP, ZygoteVJP, EnzymeVJP, ReverseDiffVJP
 
+# Precompilation workload - must be at the end
+include("precompilation.jl")
+
 end
diff --git a/src/precompilation.jl b/src/precompilation.jl
@@ -0,0 +1,56 @@
+# Precompilation workload for DiffEqFlux
+# This improves time-to-first-X (TTFX) by precompiling common code paths
+
+using PrecompileTools: @compile_workload, @setup_workload
+
+@setup_workload begin
+    # Setup code - imports and minimal test data
+    # This code is run during precompilation but the compilation results are discarded
+    using Random: MersenneTwister
+    using Lux: Chain, Dense
+
+    @compile_workload begin
+        # These operations will be precompiled
+        # Focus on the most common use cases
+
+        # Use a fixed RNG for reproducibility
+        rng = MersenneTwister(0)
+
+        # Create a simple model - this is the most common pattern
+        model = Chain(Dense(2, 4, tanh), Dense(4, 2))
+
+        # Create NeuralODE layer - the main entry point
+        # Note: We don't run the forward pass because it requires an ODE solver
+        # which is not a direct dependency of DiffEqFlux
+        tspan = (0.0f0, 1.0f0)
+        node = NeuralODE(model, tspan)
+
+        # Setup parameters and state - this is called often and benefits from precompilation
+        ps, st = Lux.setup(rng, node)
+
+        # Precompile StatefulLuxLayer creation (used in forward pass)
+        stateful = StatefulLuxLayer{true}(node.model, nothing, st)
+
+        # Precompile the dudt function creation pattern
+        x0 = Float32[1.0, 0.0]
+        dudt_out = stateful(x0, ps)
+
+        # Precompile ODEFunction and ODEProblem creation
+        dudt(u, p, t) = stateful(u, p)
+        ff = ODEFunction{false}(dudt; tgrad = basic_tgrad)
+        prob = ODEProblem{false}(ff, x0, node.tspan, ps)
+
+        # Precompile FFJORD constructor
+        ffjord_model = Chain(Dense(2, 4, tanh), Dense(4, 2))
+        ffjord = FFJORD(ffjord_model, tspan, (2,))
+
+        # Precompile collocation kernel calculations (commonly used)
+        tpoints = Float32[0.0, 0.5, 1.0]
+        data = Float32[1.0 1.1 1.2; 0.0 0.1 0.2]
+        try
+            collocate_data(data, tpoints, TriangularKernel())
+        catch
+            # May fail with small data, but we still get the compilation
+        end
+    end
+end