SciML · DhairyaLGandhi · Apr 24, 2025 · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -24,6 +24,7 @@ jobs:
           - Core5
           - Core6
           - Core7
+          - Core8
           - QA
           - SDE1
           - SDE2

diff --git a/Project.toml b/Project.toml
@@ -77,6 +77,7 @@ LinearSolve = "2, 3"
 Lux = "1"
 Markdown = "1.10"
 ModelingToolkit = "9.42"
+ModelingToolkitStandardLibrary = "2"
 Mooncake = "0.4.52"
 NLsolve = "4.5.1"
 NonlinearSolve = "3.0.1, 4"
@@ -117,6 +118,7 @@ DelayDiffEq = "bcd4f6db-9728-5f36-b5f7-82caef46ccdb"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
+ModelingToolkitStandardLibrary = "16a59e39-deab-5bd0-87e4-056b12336739"
 Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
 NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
@@ -131,4 +133,4 @@ StochasticDiffEq = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["AlgebraicMultigrid", "Aqua", "Calculus", "ComponentArrays", "DelayDiffEq", "Distributed", "Lux", "ModelingToolkit", "Mooncake", "NLsolve", "NonlinearSolve", "Optimization", "OptimizationOptimisers", "OrdinaryDiffEq", "Pkg", "SafeTestsets", "SparseArrays", "SteadyStateDiffEq", "StochasticDiffEq", "Test"]
+test = ["AlgebraicMultigrid", "Aqua", "Calculus", "ComponentArrays", "DelayDiffEq", "Distributed", "Lux", "ModelingToolkit", "ModelingToolkitStandardLibrary", "Mooncake", "NLsolve", "NonlinearSolve", "Optimization", "OptimizationOptimisers", "OrdinaryDiffEq", "Pkg", "SafeTestsets", "SparseArrays", "SteadyStateDiffEq", "StochasticDiffEq", "Test"]
diff --git a/src/SciMLSensitivity.jl b/src/SciMLSensitivity.jl
@@ -40,13 +40,14 @@ using SciMLBase: SciMLBase, AbstractOverloadingSensitivityAlgorithm,
                  solve, u_modified!, LinearAliasSpecifier
 
 # AD Backends
-using ChainRulesCore: unthunk, @thunk, NoTangent, @not_implemented, Tangent, ZeroTangent
+using ChainRulesCore: unthunk, @thunk, NoTangent, @not_implemented, Tangent, ZeroTangent, AbstractThunk
 using Enzyme: Enzyme
 using FiniteDiff: FiniteDiff
 using ForwardDiff: ForwardDiff
 using Tracker: Tracker, TrackedArray
 using ReverseDiff: ReverseDiff
 using Zygote: Zygote
+using SciMLBase.ConstructionBase
 
 # Std Libs
 using LinearAlgebra: LinearAlgebra, Diagonal, I, UniformScaling, adjoint, axpy!,
@@ -56,6 +57,8 @@ using Markdown: Markdown, @doc_str
 using Random: Random, rand!
 using Statistics: Statistics, mean
 
+using LinearAlgebra: diag
+
 abstract type SensitivityFunction end
 abstract type TransformedFunction end
 

diff --git a/src/adjoint_common.jl b/src/adjoint_common.jl
@@ -78,7 +78,7 @@ function adjointdiffcache(g::G, sensealg, discrete, sol, dgdu::DG1, dgdp::DG2, f
     unwrappedf = unwrapped_f(f)
 
     numparams = p === nothing || p === SciMLBase.NullParameters() ? 0 : length(tunables)
-    numindvar = length(u0)
+    numindvar = isnothing(u0) ? nothing : length(u0)
     isautojacvec = get_jacvec(sensealg)
 
     issemiexplicitdae = false
@@ -106,18 +106,22 @@ function adjointdiffcache(g::G, sensealg, discrete, sol, dgdu::DG1, dgdp::DG2, f
         isempty(algevar_idxs) || (issemiexplicitdae = true)
     end
     if !issemiexplicitdae
-        diffvar_idxs = eachindex(u0)
+        diffvar_idxs = isnothing(u0) ? nothing : eachindex(u0)
         algevar_idxs = 1:0
     end
 
     if !needs_jac && !issemiexplicitdae && !(autojacvec isa Bool)
         J = nothing
     else
         if alg === nothing || SciMLBase.forwarddiffs_model_time(alg)
-            # 1 chunk is fine because it's only t
-            _J = similar(u0, numindvar, numindvar)
-            _J .= 0
-            J = dualcache(_J, ForwardDiff.pickchunksize(length(u0)))
+            if !isnothing(u0)
+                # 1 chunk is fine because it's only t
+                _J = similar(u0, numindvar, numindvar)
+                _J .= 0
+                J = dualcache(_J, ForwardDiff.pickchunksize(length(u0)))
+            else
+                J = nothing
+            end
         else
             J = similar(u0, numindvar, numindvar)
             J .= 0
@@ -133,8 +137,12 @@ function adjointdiffcache(g::G, sensealg, discrete, sol, dgdu::DG1, dgdp::DG2, f
                 dg_val[1] .= false
                 dg_val[2] .= false
             else
-                dg_val = similar(u0, numindvar) # number of funcs size
-                dg_val .= false
+                if !isnothing(u0)
+                    dg_val = similar(u0, numindvar) # number of funcs size
+                    dg_val .= false
+                else
+                    dg_val = nothing
+                end
             end
         else
             pgpu = UGradientWrapper(g, _t, p)
@@ -241,8 +249,12 @@ function adjointdiffcache(g::G, sensealg, discrete, sol, dgdu::DG1, dgdp::DG2, f
     pJ = if (quad || !(autojacvec isa Bool))
         nothing
     else
-        _pJ = similar(u0, numindvar, numparams)
-        _pJ .= false
+        if !isnothing(u0)
+            _pJ = similar(u0, numindvar, numparams)
+            _pJ .= false
+        else
+            _pJ = nothing
+        end
     end
 
     f_cache = isinplace ? deepcopy(u0) : nothing
@@ -379,11 +391,11 @@ function get_paramjac_config(autojacvec::ReverseDiffVJP, p, f, y, _p, _t;
         if !isRODE
             __p = p isa SciMLBase.NullParameters ? _p :
                   SciMLStructures.replace(Tunable(), p, _p)
-            tape = ReverseDiff.GradientTape((y, __p, [_t])) do u, p, t
+            tape = ReverseDiff.GradientTape((y, _p, [_t])) do u, p, t
                 du1 = (p !== nothing && p !== SciMLBase.NullParameters()) ?
                       similar(p, size(u)) : similar(u)
                 du1 .= false
-                f(du1, u, p, first(t))
+                f(du1, u, repack(p), first(t))
                 return vec(du1)
             end
         else
@@ -402,8 +414,8 @@ function get_paramjac_config(autojacvec::ReverseDiffVJP, p, f, y, _p, _t;
             # because hasportion(Tunable(), NullParameters) == false
             __p = p isa SciMLBase.NullParameters ? _p :
                   SciMLStructures.replace(Tunable(), p, _p)
-            tape = ReverseDiff.GradientTape((y, __p, [_t])) do u, p, t
-                vec(f(u, p, first(t)))
+            tape = ReverseDiff.GradientTape((y, _p, [_t])) do u, p, t
+                vec(f(u, repack(p), first(t)))
             end
         else
             tape = ReverseDiff.GradientTape((y, _p, [_t], _W)) do u, p, t, W

diff --git a/src/concrete_solve.jl b/src/concrete_solve.jl
@@ -46,15 +46,16 @@ function inplace_vjp(prob, u0, p, verbose, repack)
 
     vjp = try
         f = unwrapped_f(prob.f)
+        tspan_ = prob isa AbstractNonlinearProblem ? nothing : [prob.tspan[1]]
         if p === nothing || p isa SciMLBase.NullParameters
-            ReverseDiff.GradientTape((copy(u0), [prob.tspan[1]])) do u, t
+            ReverseDiff.GradientTape((copy(u0), tspan_)) do u, t
                 du1 = similar(u, size(u))
                 du1 .= 0
                 f(du1, u, p, first(t))
                 return vec(du1)
             end
         else
-            ReverseDiff.GradientTape((copy(u0), p, [prob.tspan[1]])) do u, p, t
+            ReverseDiff.GradientTape((copy(u0), p, tspan_)) do u, p, t
                 du1 = similar(u, size(u))
                 du1 .= 0
                 f(du1, u, repack(p), first(t))
@@ -299,6 +300,7 @@ function DiffEqBase._concrete_solve_adjoint(
         tunables, repack = Functors.functor(p)
     end
 
+    u0 = state_values(prob) === nothing ? Float64[] : u0
     default_sensealg = automatic_sensealg_choice(prob, u0, tunables, verbose, repack)
     DiffEqBase._concrete_solve_adjoint(prob, alg, default_sensealg, u0, p,
         originator::SciMLBase.ADOriginator, args...; verbose,
@@ -412,16 +414,42 @@ function DiffEqBase._concrete_solve_adjoint(
         Base.diff_names(Base._nt_names(values(kwargs)),
         (:callback_adj, :callback))}(values(kwargs))
     isq = sensealg isa QuadratureAdjoint
+
+    igs, new_u0, new_p = if _prob.f.initialization_data !== nothing
+        local new_u0
+        local new_p
+        iy, back = Zygote.pullback(tunables) do tunables
+            new_prob = remake(_prob, p = repack(tunables))
+            new_u0, new_p, _ = SciMLBase.get_initial_values(new_prob, new_prob, new_prob.f, SciMLBase.OverrideInit(), Val(true);
+                                                            abstol = 1e-6,
+                                                            reltol = 1e-6,
+                                                            sensealg = SteadyStateAdjoint(autojacvec = sensealg.autojacvec),
+                                                            kwargs...)
+            new_tunables, _, _ = SciMLStructures.canonicalize(SciMLStructures.Tunable(), new_p)
+            if SciMLBase.initialization_status(_prob) == SciMLBase.OVERDETERMINED
+                sum(new_tunables)
+            else
+                sum(new_u0) + sum(new_tunables)
+            end
+        end
+        igs = back(one(iy))[1] .- one(eltype(tunables))
+
+        igs, new_u0, new_p
+    else
+        nothing, u0, p
+    end
+    _prob = remake(_prob, u0 = new_u0, p = new_p)
+
     if sensealg isa BacksolveAdjoint
-        sol = solve(_prob, alg, args...; save_noise = true,
+        sol = solve(_prob, alg, args...; initializealg = SciMLBase.NoInit(), save_noise = true,
             save_start = save_start, save_end = save_end,
             saveat = saveat, kwargs_fwd...)
     elseif ischeckpointing(sensealg)
-        sol = solve(_prob, alg, args...; save_noise = true,
+        sol = solve(_prob, alg, args...; initializealg = SciMLBase.NoInit(), save_noise = true,
             save_start = true, save_end = true,
             saveat = saveat, kwargs_fwd...)
     else
-        sol = solve(_prob, alg, args...; save_noise = true, save_start = true,
+        sol = solve(_prob, alg, args...; initializealg = SciMLBase.NoInit(), save_noise = true, save_start = true,
             save_end = true, kwargs_fwd...)
     end
 
@@ -491,6 +519,7 @@ function DiffEqBase._concrete_solve_adjoint(
     _save_idxs = save_idxs === nothing ? Colon() : save_idxs
 
     function adjoint_sensitivity_backpass(Δ)
+        Δ = Δ isa AbstractThunk ? unthunk(Δ) : Δ
         function df_iip(_out, u, p, t, i)
             outtype = _out isa SubArray ?
                       ArrayInterface.parameterless_type(_out.parent) :
@@ -642,6 +671,8 @@ function DiffEqBase._concrete_solve_adjoint(
         dp = p === nothing || p === DiffEqBase.NullParameters() ? nothing :
              dp isa AbstractArray ? reshape(dp', size(tunables)) : dp
 
+        dp = Zygote.accum(dp, igs)
+
         _, repack_adjoint = if p === nothing || p === DiffEqBase.NullParameters() ||
                                !isscimlstructure(p)
             nothing, x -> (x,)
@@ -1679,6 +1710,7 @@ function DiffEqBase._concrete_solve_adjoint(
         u0, p, originator::SciMLBase.ADOriginator,
         args...; save_idxs = nothing, kwargs...)
     _prob = remake(prob, u0 = u0, p = p)
+
     sol = solve(_prob, alg, args...; kwargs...)
     _save_idxs = save_idxs === nothing ? Colon() : save_idxs
 
@@ -1688,26 +1720,56 @@ function DiffEqBase._concrete_solve_adjoint(
         out = SciMLBase.sensitivity_solution(sol, sol[_save_idxs])
     end
 
+    _, repack_adjoint = if isscimlstructure(p)
+        Zygote.pullback(p) do p
+            t, _, _ = canonicalize(Tunable(), p)
+            t
+        end
+    else
+        nothing, x -> (x,)
+    end
+
     function steadystatebackpass(Δ)
+        Δ = Δ isa AbstractThunk ? unthunk(Δ) : Δ
         # Δ = dg/dx or diffcache.dg_val
         # del g/del p = 0
         function df(_out, u, p, t, i)
             if _save_idxs isa Number
                 _out[_save_idxs] = Δ[_save_idxs]
             elseif Δ isa Number
                 @. _out[_save_idxs] = Δ
-            else
+            elseif Δ isa AbstractArray{<:AbstractArray} || Δ isa AbstractVectorOfArray || Δ isa AbstractArray
                 @. _out[_save_idxs] = Δ[_save_idxs]
+            elseif isnothing(_out)
+                _out
+            else
+                @. _out[_save_idxs] = Δ.u[_save_idxs]
             end
         end
         dp = adjoint_sensitivities(sol, alg; sensealg = sensealg, dgdu = df)
 
+        dp, Δtunables = if Δ isa AbstractArray || Δ isa Number
+            # if Δ isa AbstractArray, the gradients correspond to `u`
+            # this is something that needs changing in the future, but
+            # this is the applicable till the movement to structuaral
+            # tangents is completed
+            dp, _, _ = canonicalize(Tunable(), dp)
+            dp, nothing
+        else
+            Δp = setproperties(dp, to_nt(Δ.prob.p))
+            Δtunables, _, _ = canonicalize(Tunable(), Δp)
+            dp, _, _ = canonicalize(Tunable(), dp)
+            dp, Δtunables
+        end
+
+        dp = Zygote.accum(dp, Δtunables)
+
         if originator isa SciMLBase.TrackerOriginator ||
            originator isa SciMLBase.ReverseDiffOriginator
-            (NoTangent(), NoTangent(), NoTangent(), dp, NoTangent(),
+            (NoTangent(), NoTangent(), NoTangent(), repack_adjoint(dp)[1], NoTangent(),
                 ntuple(_ -> NoTangent(), length(args))...)
         else
-            (NoTangent(), NoTangent(), NoTangent(), NoTangent(), dp, NoTangent(),
+            (NoTangent(), NoTangent(), NoTangent(), NoTangent(), repack_adjoint(dp)[1], NoTangent(),
                 ntuple(_ -> NoTangent(), length(args))...)
         end
     end

diff --git a/src/derivative_wrappers.jl b/src/derivative_wrappers.jl
@@ -144,6 +144,13 @@ function jacobian(f, x::AbstractArray{<:Number},
     return J
 end
 
+function jacobian!(J::Nothing, f, x::AbstractArray{<:Number},
+    fx::Union{Nothing, AbstractArray{<:Number}},
+    alg::AbstractOverloadingSensitivityAlgorithm, jac_config::Nothing)
+    @assert isempty(x)
+    J
+end
+jacobian!(J::PreallocationTools.DiffCache, x::SciMLBase.UJacobianWrapper, args...) = jacobian!(J.du, x, args...)
 function jacobian!(J::AbstractMatrix{<:Number}, f, x::AbstractArray{<:Number},
         fx::Union{Nothing, AbstractArray{<:Number}},
         alg::AbstractOverloadingSensitivityAlgorithm, jac_config)
@@ -456,9 +463,10 @@ function _vecjacobian!(dλ, y, λ, p, t, S::TS, isautojacvec::ReverseDiffVJP, dg
     elseif inplace_sensitivity(S)
         _y = eltype(y) === eltype(λ) ? y : convert.(promote_type(eltype(y), eltype(λ)), y)
         if W === nothing
-            tape = ReverseDiff.GradientTape((_y, _p, [t])) do u, p, t
+            _tunables, _repack, _ = canonicalize(Tunable(), _p)
+            tape = ReverseDiff.GradientTape((_y, _tunables, [t])) do u, p, t
                 du1 = similar(u, size(u))
-                f(du1, u, p, first(t))
+                f(du1, u, _repack(p), first(t))
                 return vec(du1)
             end
         else
@@ -474,8 +482,9 @@ function _vecjacobian!(dλ, y, λ, p, t, S::TS, isautojacvec::ReverseDiffVJP, dg
     else
         _y = eltype(y) === eltype(λ) ? y : convert.(promote_type(eltype(y), eltype(λ)), y)
         if W === nothing
-            tape = ReverseDiff.GradientTape((_y, _p, [t])) do u, p, t
-                vec(f(u, p, first(t)))
+            _tunables, _repack, _ = canonicalize(Tunable(), _p)
+            tape = ReverseDiff.GradientTape((_y, _tunables, [t])) do u, p, t
+                vec(f(u, _repack(p), first(t)))
             end
         else
             _W = eltype(W) === eltype(λ) ? W :
@@ -1047,6 +1056,7 @@ function accumulate_cost(dλ, y, p, t, S::TS,
     return dλ, dgrad
 end
 
+build_jac_config(alg, uf, u::Nothing) = nothing
 function build_jac_config(alg, uf, u)
     if alg_autodiff(alg)
         jac_config = ForwardDiff.JacobianConfig(uf, u, u,
@@ -1068,9 +1078,10 @@ end
 
 function build_param_jac_config(alg, pf, u, p)
     if alg_autodiff(alg)
-        jac_config = ForwardDiff.JacobianConfig(pf, u, p,
+        tunables, repack, aliases = canonicalize(Tunable(), p)
+        jac_config = ForwardDiff.JacobianConfig(pf, u, tunables,
             ForwardDiff.Chunk{
-                determine_chunksize(p,
+                determine_chunksize(tunables,
                 alg)}())
     else
         if diff_type(alg) != Val{:complex}

diff --git a/src/parameters_handling.jl b/src/parameters_handling.jl
@@ -15,6 +15,9 @@ end
 recursive_copyto!(y::T, x::T) where {T} = fmap(recursive_copyto!, y, x)
 recursive_copyto!(y, ::Nothing) = y
 recursive_copyto!(::Nothing, ::Nothing) = nothing
+function recursive_copyto!(y::T, x::NamedTuple) where T
+    fmap(recursive_copyto!, y, x)
+end
 
 """
     neg!(x)
@@ -61,14 +64,14 @@ recursive_add!(::Nothing, ::Nothing) = nothing
 
 `similar(λ, size(x))` for generic `x`. This is used to handle non-array parameters!
 """
-allocate_vjp(λ::AbstractArray, x::AbstractArray) = similar(λ, size(x))
+allocate_vjp(λ::AbstractArray{T}, x::AbstractArray) where T = fill!(similar(λ, size(x)), zero(T))
 allocate_vjp(λ::AbstractArray, x::Tuple) = allocate_vjp.((λ,), x)
 function allocate_vjp(λ::AbstractArray, x::NamedTuple{F}) where {F}
     NamedTuple{F}(allocate_vjp.((λ,), values(x)))
 end
 allocate_vjp(λ::AbstractArray, x) = fmap(Base.Fix1(allocate_vjp, λ), x)
 
-allocate_vjp(x::AbstractArray) = similar(x)
+allocate_vjp(x::AbstractArray) = zero(x) # similar(x)
 allocate_vjp(x::Tuple) = allocate_vjp.(x)
 allocate_vjp(x::NamedTuple{F}) where {F} = NamedTuple{F}(allocate_vjp.(values(x)))
 allocate_vjp(x) = fmap(allocate_vjp, x)
-Original file line number
+Diff line change
@@ Expand Up / @@ -24,6 +24,7 @@ jobs: @@
               - Core5
               - Core6
               - Core7
+              - Core8
               - QA
               - SDE1
               - SDE2
@@ Expand Down @@