Enable keyword arguments for particle methods (#2660)

penelopeysm · web-flow · commit 751ae1d36427 · 2026-01-25T18:29:44.000Z
> [!NOTE] > ~~This PR requires some changes to AdvancedPS. TuringLang/AdvancedPS.jl#118 This is merged > > ~~It also needs the following Libtask patch: TuringLang/Libtask.jl#198 This is merged > > ~~This PR also lacks tests; some should be added.~~ Tests added. This PR allows models with keyword arguments to be run with SMC / PG. Example: ```julia julia> using Turing julia> @model function m(y; n=0) x ~ Normal(n) y ~ Normal(x) end m (generic function with 2 methods) julia> mean(sample(m(5.0), PG(20), 1000)) [...] ERROR: Models with keyword arguments need special treatment to be used with particle methods. Please run: using Libtask; Libtask.@might_produce(m) before sampling from this model with particle methods. Stacktrace: [...] julia> using Libtask; Libtask.@might_produce(m) julia> mean(sample(m(5.0), PG(20), 1000)) Sampling 100%|███████████████████████████████████████████████████████████████████| Time: 0:00:05 Mean parameters mean Symbol Float64 x 2.7182 julia> mean(sample(m(5.0; n=10.0), PG(20), 1000)) Sampling 100%|███████████████████████████████████████████████████████████████████| Time: 0:00:04 Mean parameters mean Symbol Float64 x 7.4854 ``` Closes #2007.
diff --git a/HISTORY.md b/HISTORY.md
@@ -1,3 +1,20 @@
+# 0.42.5
+
+SMC and PG can now be used for models with keyword arguments, albeit with one requirement: the user must mark the model function as being able to produce.
+For example, if the model is
+
+```julia
+@model foo(x; y) = a ~ Normal(x, y)
+```
+
+then before samping from this with SMC or PG, you will have to run
+
+```julia
+using Turing
+
+@might_produce(foo)
+```
+
 # 0.42.4
 
 Fixes a typo that caused NUTS to perform one less adaptation step than in versions prior to 0.41.
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "Turing"
 uuid = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
-version = "0.42.4"
+version = "0.42.5"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
@@ -52,7 +52,7 @@ AbstractPPL = "0.11, 0.12, 0.13"
 Accessors = "0.1"
 AdvancedHMC = "0.8.3"
 AdvancedMH = "0.8.9"
-AdvancedPS = "0.7"
+AdvancedPS = "0.7.2"
 AdvancedVI = "0.6"
 BangBang = "0.4.2"
 Bijectors = "0.14, 0.15"
@@ -65,7 +65,7 @@ DynamicHMC = "3.4"
 DynamicPPL = "0.39.1"
 EllipticalSliceSampling = "0.5, 1, 2"
 ForwardDiff = "0.10.3, 1"
-Libtask = "0.9.3"
+Libtask = "0.9.5"
 LinearAlgebra = "1"
 LogDensityProblems = "2"
 MCMCChains = "5, 6, 7"
diff --git a/docs/make.jl b/docs/make.jl
@@ -6,6 +6,7 @@ using DocumenterInterLinks
 links = InterLinks(
     "DynamicPPL" => "https://turinglang.org/DynamicPPL.jl/stable/",
     "AbstractPPL" => "https://turinglang.org/AbstractPPL.jl/stable/",
+    "Libtask" => "https://turinglang.org/Libtask.jl/stable/",
     "LinearAlgebra" => "https://docs.julialang.org/en/v1/",
     "AbstractMCMC" => "https://turinglang.org/AbstractMCMC.jl/stable/",
     "ADTypes" => "https://sciml.github.io/ADTypes.jl/stable/",
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -44,6 +44,8 @@ even though [`Prior()`](@ref) is actually defined in the `Turing.Inference` modu
 | `LogDensityFunction` | [`DynamicPPL.LogDensityFunction`](@extref) | A struct containing all information about how to evaluate a model. Mostly for advanced users |
 | `@addlogprob!`       | [`DynamicPPL.@addlogprob!`](@extref)       | Add arbitrary log-probability terms during model evaluation                                  |
 | `setthreadsafe`      | [`DynamicPPL.setthreadsafe`](@extref)      | Mark a model as requiring threadsafe evaluation                                              |
+| `might_produce`      | [`Libtask.might_produce`](@extref)         | Mark a method signature as potentially calling `Libtask.produce`                             |
+| `@might_produce`     | [`Libtask.@might_produce`](@extref)        | Mark a function name as potentially calling `Libtask.produce`                                |
 
 ### Inference
 
@@ -110,19 +112,19 @@ Turing.jl provides several strategies to initialise parameters for models.
 
 See the [docs of AdvancedVI.jl](https://turinglang.org/AdvancedVI.jl/stable/) for detailed usage and the [variational inference tutorial](https://turinglang.org/docs/tutorials/09-variational-inference/) for a basic walkthrough.
 
-| Exported symbol               | Documentation                                            | Description                                                                                                                                       |
-|:----------------------------- |:-------------------------------------------------------- |:------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `vi`                          | [`Turing.vi`](@ref)                                      | Perform variational inference                                                                                                                     |
-| `q_locationscale`             | [`Turing.Variational.q_locationscale`](@ref)             | Find a numerically non-degenerate initialization for a location-scale variational family                                                          |
-| `q_meanfield_gaussian`        | [`Turing.Variational.q_meanfield_gaussian`](@ref)        | Find a numerically non-degenerate initialization for a mean-field Gaussian family                                                                 |
-| `q_fullrank_gaussian`         | [`Turing.Variational.q_fullrank_gaussian`](@ref)         | Find a numerically non-degenerate initialization for a full-rank Gaussian family                                                                  |
-| `KLMinRepGradDescent`         | [`Turing.Variational.KLMinRepGradDescent`](@ref)         | KL divergence minimization via stochastic gradient descent with the reparameterization gradient                                                   |
-| `KLMinRepGradProxDescent`     | [`Turing.Variational.KLMinRepGradProxDescent`](@ref)     | KL divergence minimization via stochastic proximal gradient descent with the reparameterization gradient over location-scale variational families |
-| `KLMinScoreGradDescent`       | [`Turing.Variational.KLMinScoreGradDescent`](@ref)       | KL divergence minimization via stochastic gradient descent with the score gradient                                                                |
-| `KLMinWassFwdBwd`             | [`Turing.Variational.KLMinWassFwdBwd`](@ref)             | KL divergence minimization via Wasserstein proximal gradient descent                                                                              |
-| `KLMinNaturalGradDescent`     | [`Turing.Variational.KLMinNaturalGradDescent`](@ref)     | KL divergence minimization via natural gradient descent                                                                                           |
-| `KLMinSqrtNaturalGradDescent` | [`Turing.Variational.KLMinSqrtNaturalGradDescent`](@ref) | KL divergence minimization via natural gradient descent in the square-root parameterization                                                       |
-| `FisherMinBatchMatch`         | [`Turing.Variational.FisherMinBatchMatch`](@ref)         | Covariance-weighted Fisher divergence minimization via the batch-and-match algorithm                                                              |
+| Exported symbol               | Documentation                                       | Description                                                                                                                                       |
+|:----------------------------- |:--------------------------------------------------- |:------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `vi`                          | [`Turing.vi`](@ref)                                 | Perform variational inference                                                                                                                     |
+| `q_locationscale`             | [`Turing.Variational.q_locationscale`](@ref)        | Find a numerically non-degenerate initialization for a location-scale variational family                                                          |
+| `q_meanfield_gaussian`        | [`Turing.Variational.q_meanfield_gaussian`](@ref)   | Find a numerically non-degenerate initialization for a mean-field Gaussian family                                                                 |
+| `q_fullrank_gaussian`         | [`Turing.Variational.q_fullrank_gaussian`](@ref)    | Find a numerically non-degenerate initialization for a full-rank Gaussian family                                                                  |
+| `KLMinRepGradDescent`         | [`AdvancedVI.KLMinRepGradDescent`](@extref)         | KL divergence minimization via stochastic gradient descent with the reparameterization gradient                                                   |
+| `KLMinRepGradProxDescent`     | [`AdvancedVI.KLMinRepGradProxDescent`](@extref)     | KL divergence minimization via stochastic proximal gradient descent with the reparameterization gradient over location-scale variational families |
+| `KLMinScoreGradDescent`       | [`AdvancedVI.KLMinScoreGradDescent`](@extref)       | KL divergence minimization via stochastic gradient descent with the score gradient                                                                |
+| `KLMinWassFwdBwd`             | [`AdvancedVI.KLMinWassFwdBwd`](@extref)             | KL divergence minimization via Wasserstein proximal gradient descent                                                                              |
+| `KLMinNaturalGradDescent`     | [`AdvancedVI.KLMinNaturalGradDescent`](@extref)     | KL divergence minimization via natural gradient descent                                                                                           |
+| `KLMinSqrtNaturalGradDescent` | [`AdvancedVI.KLMinSqrtNaturalGradDescent`](@extref) | KL divergence minimization via natural gradient descent in the square-root parameterization                                                       |
+| `FisherMinBatchMatch`         | [`AdvancedVI.FisherMinBatchMatch`](@extref)         | Covariance-weighted Fisher divergence minimization via the batch-and-match algorithm                                                              |
 
 ### Automatic differentiation types
 
diff --git a/src/Turing.jl b/src/Turing.jl
@@ -80,6 +80,7 @@ using DynamicPPL:
     setthreadsafe
 using StatsBase: predict
 using OrderedCollections: OrderedDict
+using Libtask: might_produce, @might_produce
 
 # Turing essentials - modelling macros and inference algorithms
 export
@@ -172,6 +173,9 @@ export
     MAP,
     MLE,
     # Chain save/resume
-    loadstate
+    loadstate,
+    # kwargs in SMC
+    might_produce,
+    @might_produce
 
 end
diff --git a/src/mcmc/particle_mcmc.jl b/src/mcmc/particle_mcmc.jl
@@ -19,23 +19,22 @@ struct ParticleMCMCContext{R<:AbstractRNG} <: DynamicPPL.AbstractContext
     rng::R
 end
 
-struct TracedModel{V<:AbstractVarInfo,M<:Model,E<:Tuple} <: AdvancedPS.AbstractGenericModel
+struct TracedModel{V<:AbstractVarInfo,M<:Model,T<:Tuple,NT<:NamedTuple} <:
+       AdvancedPS.AbstractTuringLibtaskModel
     model::M
     varinfo::V
-    evaluator::E
     resample::Bool
+    fargs::T
+    kwargs::NT
 end
 
 function TracedModel(
     model::Model, varinfo::AbstractVarInfo, rng::Random.AbstractRNG, resample::Bool
 )
     model = DynamicPPL.setleafcontext(model, ParticleMCMCContext(rng))
     args, kwargs = DynamicPPL.make_evaluate_args_and_kwargs(model, varinfo)
-    isempty(kwargs) || error(
-        "Particle sampling methods do not currently support models with keyword arguments.",
-    )
-    evaluator = (model.f, args...)
-    return TracedModel(model, varinfo, evaluator, resample)
+    fargs = (model.f, args...)
+    return TracedModel(model, varinfo, resample, fargs, kwargs)
 end
 
 function AdvancedPS.advance!(
@@ -53,16 +52,16 @@ function AdvancedPS.delete_retained!(trace::TracedModel)
     # In such a case, we need to ensure that when we continue sampling (i.e.
     # the next time we hit tilde_assume!!), we don't use the values in the 
     # reference particle but rather sample new values.
-    return TracedModel(trace.model, trace.varinfo, trace.evaluator, true)
+    return TracedModel(trace.model, trace.varinfo, true, trace.fargs, trace.kwargs)
 end
 
 function AdvancedPS.reset_model(trace::TracedModel)
     return trace
 end
 
-function Libtask.TapedTask(taped_globals, model::TracedModel; kwargs...)
+function Libtask.TapedTask(taped_globals, model::TracedModel)
     return Libtask.TapedTask(
-        taped_globals, model.evaluator[1], model.evaluator[2:end]...; kwargs...
+        taped_globals, model.fargs[1], model.fargs[2:end]...; model.kwargs...
     )
 end
 
@@ -124,6 +123,7 @@ function AbstractMCMC.sample(
 )
     check_model && _check_model(model, sampler)
     error_if_threadsafe_eval(model)
+    check_model_kwargs(model)
     # need to add on the `nparticles` keyword argument for `initialstep` to make use of
     return AbstractMCMC.mcmcsample(
         rng,
@@ -138,6 +138,28 @@ function AbstractMCMC.sample(
     )
 end
 
+function check_model_kwargs(model::DynamicPPL.Model)
+    if !isempty(model.defaults)
+        # If there are keyword arguments, we need to check that the user has
+        # accounted for this by overloading `might_produce`.
+        might_produce = Libtask.might_produce(typeof((Core.kwcall, NamedTuple(), model.f)))
+        if !might_produce
+            io = IOBuffer()
+            ctx = IOContext(io, :color => true)
+            print(
+                ctx,
+                "Models with keyword arguments need special treatment to be used" *
+                " with particle methods. Please run:\n\n",
+            )
+            printstyled(
+                ctx, "    Turing.@might_produce($(model.f))"; bold=true, color=:blue
+            )
+            print(ctx, "\n\nbefore sampling from this model with particle methods.\n")
+            error(String(take!(io)))
+        end
+    end
+end
+
 function Turing.Inference.initialstep(
     rng::AbstractRNG,
     model::DynamicPPL.Model,
@@ -146,6 +168,7 @@ function Turing.Inference.initialstep(
     nparticles::Int,
     kwargs...,
 )
+    check_model_kwargs(model)
     # Reset the VarInfo.
     vi = DynamicPPL.setacc!!(vi, ProduceLogLikelihoodAccumulator())
     vi = DynamicPPL.empty!!(vi)
@@ -254,6 +277,7 @@ function Turing.Inference.initialstep(
     rng::AbstractRNG, model::DynamicPPL.Model, spl::PG, vi::AbstractVarInfo; kwargs...
 )
     error_if_threadsafe_eval(model)
+    check_model_kwargs(model)
     vi = DynamicPPL.setacc!!(vi, ProduceLogLikelihoodAccumulator())
 
     # Create a new set of particles
@@ -495,7 +519,7 @@ end
 # details of the compiler, we set a bunch of methods as might_produce = true. We start with
 # adding to ProduceLogLikelihoodAccumulator, which is what calls `produce`, and go up the
 # call stack.
-Libtask.might_produce(::Type{<:Tuple{typeof(DynamicPPL.accloglikelihood!!),Vararg}}) = true
+Libtask.@might_produce(DynamicPPL.accloglikelihood!!)
 function Libtask.might_produce(
     ::Type{
         <:Tuple{
@@ -507,15 +531,11 @@ function Libtask.might_produce(
 )
     return true
 end
-function Libtask.might_produce(
-    ::Type{<:Tuple{typeof(DynamicPPL.accumulate_observe!!),Vararg}}
-)
-    return true
-end
-Libtask.might_produce(::Type{<:Tuple{typeof(DynamicPPL.tilde_observe!!),Vararg}}) = true
-# Could the next two could have tighter type bounds on the arguments, namely a GibbsContext?
+Libtask.@might_produce(DynamicPPL.accumulate_observe!!)
+Libtask.@might_produce(DynamicPPL.tilde_observe!!)
+# Could tilde_assume!! have tighter type bounds on the arguments, namely a GibbsContext?
 # That's the only thing that makes tilde_assume calls result in tilde_observe calls.
-Libtask.might_produce(::Type{<:Tuple{typeof(DynamicPPL.tilde_assume!!),Vararg}}) = true
-Libtask.might_produce(::Type{<:Tuple{typeof(DynamicPPL.evaluate!!),Vararg}}) = true
-Libtask.might_produce(::Type{<:Tuple{typeof(DynamicPPL.init!!),Vararg}}) = true
+Libtask.@might_produce(DynamicPPL.tilde_assume!!)
+Libtask.@might_produce(DynamicPPL.evaluate!!)
+Libtask.@might_produce(DynamicPPL.init!!)
 Libtask.might_produce(::Type{<:Tuple{<:DynamicPPL.Model,Vararg}}) = true
diff --git a/test/Project.toml b/test/Project.toml
@@ -43,7 +43,7 @@ ADTypes = "1"
 AbstractMCMC = "5.9"
 AbstractPPL = "0.11, 0.12, 0.13"
 AdvancedMH = "0.8.9"
-AdvancedPS = "0.7"
+AdvancedPS = "0.7.2"
 AdvancedVI = "0.6"
 Aqua = "0.8"
 BangBang = "0.4"
diff --git a/test/mcmc/particle_mcmc.jl b/test/mcmc/particle_mcmc.jl
@@ -162,9 +162,23 @@ end
     end
 
     # https://github.com/TuringLang/Turing.jl/issues/2007
-    @testset "keyword arguments not supported" begin
-        @model kwarg_demo(; x=2) = return x
-        @test_throws ErrorException sample(kwarg_demo(), PG(1), 10)
+    @testset "keyword argument handling" begin
+        @model function kwarg_demo(y; n=0.0)
+            x ~ Normal(n)
+            return y ~ Normal(x)
+        end
+        @test_throws "Models with keyword arguments" sample(kwarg_demo(5.0), PG(20), 10)
+
+        # Check that enabling `might_produce` does allow sampling
+        @might_produce kwarg_demo
+        chain = sample(StableRNG(468), kwarg_demo(5.0), PG(20), 1000)
+        @test chain isa MCMCChains.Chains
+        @test mean(chain[:x]) ≈ 2.5 atol = 0.2
+
+        # Check that the keyword argument's value is respected
+        chain2 = sample(StableRNG(468), kwarg_demo(5.0; n=10.0), PG(20), 1000)
+        @test chain2 isa MCMCChains.Chains
+        @test mean(chain2[:x]) ≈ 7.5 atol = 0.2
     end
 
     @testset "refuses to run threadsafe eval" begin