Various minor+doc improvements

Closed-Limelike-Curves · Closed-Limelike-Curves · commit bb20f5217f51 · 2021-08-25T12:26:19.000-07:00
diff --git a/src/AbstractCV.jl b/src/AbstractCV.jl
@@ -14,9 +14,7 @@ const CV_DESC = """
         estimated using leave-one-out cross validation.
       - `:naive_est` contains estimates of the in-sample prediction error.
       - `:p_eff` is the effective number of parameters -- a model with a `p_eff` of 2 is 
-        "about as overfit" as a model with 2 parameters and no regularization. It equals the 
-        difference between the previous two estimators, and measures how much your model
-        tends to overfit the data.
+        "about as overfit" as a model with 2 parameters and no regularization.
   - `pointwise::KeyedArray`: A `KeyedArray` of pointwise estimates with 5 columns --
       - `:cv_est` contains the estimated out-of-sample error for this point, as measured
       using leave-one-out cross validation.
diff --git a/src/GPD.jl b/src/GPD.jl
@@ -77,6 +77,7 @@ function gpdfit(
 
 end
 
+
 """
     gpd_quantile(p::T, k::T, sigma::T) where {T<:Real} -> T
 
@@ -95,4 +96,3 @@ A quantile of the Generalized Pareto Distribution.
 function gpd_quantile(p, ξ::T, sigma::T) where {T <: Real}
     return sigma * expm1(-ξ * log1p(-p)) / ξ
 end
-
diff --git a/src/ImportanceSampling.jl b/src/ImportanceSampling.jl
@@ -85,11 +85,9 @@ Implements Pareto-smoothed importance sampling (PSIS).
 
 # Arguments
 ## Positional Arguments
-  - `log_ratios::AbstractArray`: A 2d or 3d array of importance ratios on the log scale (for 
-    PSIS-LOO these are *negative* log-likelihood values). Indices must be ordered as 
-    `[data, step, chain]`: `log_ratios[1, 2, 3]` should be the log-likelihood of the first 
-    data point, evaluated at the second step in the third chain. Chain indices can be
-    left off if there is only one chain, or if keyword argument `chain_index` is provided.
+  - `log_ratios::AbstractArray`: A 2d or 3d array of (unnormalized) importance ratios on the
+    log scale. Indices must be ordered as `[data, step, chain]`. The chain index can be left 
+    off if there is only one chain, or if keyword argument `chain_index` is provided.
   - $R_EFF_DOC
 
 ## Keyword Arguments
diff --git a/src/InternalHelpers.jl b/src/InternalHelpers.jl
@@ -4,6 +4,10 @@ const CHAIN_INDEX_DOC = """
     `log_likelihood[:, step]` belongs to the second chain.
 """
 
+const DATA_ARG = """
+`data`: An array of data points used to estimate the parameters of the model.
+"""
+
 const LIKELIHOOD_FUNCTION_ARG = """
 `ll_fun::Function`: A function taking a single data point and returning the log-likelihood 
 of that point. This function must take the form `f(θ[1], ..., θ[n], data)`, where `θ` is the
diff --git a/src/LeaveOneOut.jl b/src/LeaveOneOut.jl
@@ -109,18 +109,15 @@ score.
   - `log_likelihood::Array`: A matrix or 3d array of log-likelihood values indexed as
     `[data, step, chain]`. The chain argument can be left off if `chain_index` is provided
     or if all posterior samples were drawn from a single chain.
-  - `args...`: Positional arguments to be passed to [`psis`](@ref).
-  - `chain_index::Vector`: An optional vector of integers specifying which chain each
-    step belongs to. For instance, `chain_index[3]` should return `2` if
-    `log_likelihood[:, 3]` belongs to the second chain.
-  - `kwargs...`: Keyword arguments to be passed to [`psis`](@ref).
+  - $ARGS [`psis`](@ref).
+  - $CHAIN_INDEX_DOC
+  - $KWARGS [`psis`](@ref).
 
 See also: [`psis`](@ref), [`loo`](@ref), [`PsisLoo`](@ref).
 """
 function psis_loo(
-    log_likelihood::T, args...; kwargs...
-) where {F <: Real, T <: AbstractArray{F, 3}}
-
+    log_likelihood::AbstractArray{<:Real, 3}, args...; kwargs...
+)
 
     dims = size(log_likelihood)
     data_size = dims[1]
@@ -139,14 +136,14 @@ function psis_loo(
 
     @tullio pointwise_loo[i] := weights[i, j, k] * exp(log_likelihood[i, j, k]) |> log
     @tullio pointwise_naive[i] := exp(log_likelihood[i, j, k] - log_count) |> log
-    pointwise_overfit = pointwise_naive - pointwise_loo
+    pointwise_p_eff = pointwise_naive - pointwise_loo
     pointwise_mcse = _calc_mcse(weights, log_likelihood, pointwise_loo, r_eff)
 
 
     pointwise = KeyedArray(
-        hcat(pointwise_loo, pointwise_naive, pointwise_overfit, pointwise_mcse, ξ);
+        hcat(pointwise_loo, pointwise_naive, pointwise_p_eff, pointwise_mcse, ξ);
         data=1:length(pointwise_loo),
-        statistic=[:cv_est, :naive_est, :overfit, :mcse, :pareto_k],
+        statistic=[:cv_est, :naive_est, :p_eff, :mcse, :pareto_k],
     )
 
     table = _generate_loo_table(pointwise)
@@ -160,28 +157,28 @@ end
 
 
 function psis_loo(
-    log_likelihood::T,
+    log_likelihood::AbstractMatrix{<:Real},
     args...;
     chain_index::AbstractVector=ones(size(log_likelihood, 1)),
     kwargs...,
-) where {F <: Real, T <: AbstractMatrix{F}}
+)
     new_log_ratios = _convert_to_array(log_likelihood, chain_index)
     return psis_loo(new_log_ratios, args...; kwargs...)
 end
 
 
-function _generate_loo_table(pointwise::AbstractArray)
+function _generate_loo_table(pointwise::AbstractArray{<:Real})
 
     data_size = size(pointwise, :data)
     # create table with the right labels
     table = KeyedArray(
         similar(NamedDims.unname(pointwise), 3, 4);
-        criterion=[:cv_est, :naive_est, :overfit],
+        criterion=[:cv_est, :naive_est, :p_eff],
         statistic=[:total, :se_total, :mean, :se_mean],
     )
 
     # calculate the sample expectation for the total score
-    to_sum = pointwise([:cv_est, :naive_est, :overfit])
+    to_sum = pointwise([:cv_est, :naive_est, :p_eff])
     @tullio averages[crit] := to_sum[data, crit] / data_size
     averages = reshape(averages, 3)
     table(:, :mean) .= averages
@@ -197,6 +194,11 @@ function _generate_loo_table(pointwise::AbstractArray)
     # calculate the sample expectation for the standard error in averages
     table(:, :se_total) .= se_mean * data_size
 
+    if table(:p_eff, :total) ≤ 0
+        @warn "The calculated effective number of parameters is negative, which should " *
+        "not be possible. PSIS has failed to approximate the target distribution."
+    end
+
     return table
 end
 
@@ -211,4 +213,4 @@ function _calc_mcse(weights, log_likelihood, pointwise_loo, r_eff)
     # (google "log-normal method of moments" for a proof)
     # apply MCMC correlation correction:
     return @turbo @. sqrt(pointwise_var / r_eff)
-end
+end
diff --git a/src/MCMCChainsHelpers.jl b/src/MCMCChainsHelpers.jl
@@ -1,15 +1,21 @@
 using .MCMCChains
 export pointwise_log_likelihoods
 
+
+const CHAINS_ARG = """
+`chains::Chains`: A chain object from MCMCChains.
+"""
+
+
 """
     pointwise_log_likelihoods(ll_fun::Function, chains::Chains, data)
 
 Compute the pointwise log likelihoods.
 
 # Arguments
   - $LIKELIHOOD_FUNCTION_ARG
-  - `chain::Chains`: A chain object from MCMCChains.
-  - `data`: An array of data points used to estimate the parameters of the model.
+  - $CHAINS_ARG
+  - $DATA_ARG
 
 # Returns
   - `Array`: a three dimensional array of pointwise log-likelihoods. Dimensions are ordered
@@ -22,6 +28,7 @@ function pointwise_log_likelihoods(
     return pointwise_log_likelihoods(ll_fun, samples, data; kwargs...)
 end
 
+
 """
     function psis_loo(
         ll_fun::Function, 
@@ -37,8 +44,8 @@ score from an MCMCChains object.
 # Arguments
 
   - $LIKELIHOOD_FUNCTION_ARG
-  - `chain::Chain`: A chain object from MCMCChains.
-  - `data`: A vector of data points used to estimate the parameters of the model.
+  - $CHAINS_ARG
+  - $DATA_ARG
   - $ARGS [`psis_loo`](@ref).
   - $KWARGS [`psis_loo`](@ref).
 
@@ -57,8 +64,8 @@ Implements Pareto-smoothed importance sampling (PSIS) based on MCMCChain object.
 # Arguments
 
   - $LIKELIHOOD_FUNCTION_ARG
-  - `chain::Chain`: A chain object from MCMCChains.
-  - `data`: A vector of data points used to estimate the parameters of the model.
+  - $CHAINS_ARG
+  - $DATA_ARG
   - $ARGS [`psis`](@ref).
   - $KWARGS [`psis`](@ref).
 
@@ -67,4 +74,4 @@ See also: [`psis`](@ref), [`psis_loo`](@ref), [`PsisLoo`](@ref).
 function psis(ll_fun::Function, chain::Chains, data::AbstractVector, args...; kwargs...)
     pointwise_log_likes = pointwise_log_likelihoods(ll_fun, chain, data)
     return psis(-pointwise_log_likes, args...; kwargs...)
-end
+end
diff --git a/src/ModelComparison.jl b/src/ModelComparison.jl
@@ -110,15 +110,14 @@ function loo_compare(
     pointwise = KeyedArray(
         pointwise;
         data=1:size(pointwise, :data),
-        statistic=[:cv_est, :naive_est, :overfit, :mcse, :pareto_k],
+        statistic=[:cv_est, :naive_est, :p_eff, :mcse, :pareto_k],
         model=model_names,
     )
 
     # Subtract the effective number of params and elpd ests; leave mcse+pareto_k the same
     base_case = pointwise[data=:, statistic=1:3, model=1]
     @inbounds @simd for model_number in axes(pointwise, :model)
-        @. pointwise[:, 1:3, model_number] = 
-            pointwise[:, 1:3, model_number] - base_case
+        @. pointwise[:, 1:3, model_number] = pointwise[:, 1:3, model_number] - base_case
     end
 
     return ModelComparison(pointwise, table)
diff --git a/src/NaiveLPD.jl b/src/NaiveLPD.jl
@@ -0,0 +1,21 @@
+using LoopVectorization
+using Tullio
+
+
+"""
+    naive_lpd()
+
+Calculate the naive (in-sample) estimate of the expected log probability density, otherwise
+known as the in-sample Bayes score. Not recommended for most uses.
+"""
+function naive_lpd(log_likelihood::AbstractArray{<:Real, 3})
+
+    dims = size(log_likelihood)
+    data_size = dims[1]
+    mcmc_count = dims[2] * dims[3]  # total number of samples from posterior
+    log_count = log(mcmc_count)
+
+    @tullio pointwise_naive[i] := exp(log_likelihood[i, j, k] - log_count) |> log
+
+    return sum(pointwise_naive)
+end
diff --git a/src/ParetoSmooth.jl b/src/ParetoSmooth.jl
@@ -4,9 +4,9 @@ using DocStringExtensions
 
 function __init__()
     @require Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" include("TuringHelpers.jl")
-    @require MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" include(
-        "MCMCChainsHelpers.jl"
-    )
+    @require MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" begin 
+        include("MCMCChainsHelpers.jl")
+    end
 end
 
 include("AbstractCV.jl")
@@ -16,6 +16,7 @@ include("InternalHelpers.jl")
 include("ImportanceSampling.jl")
 include("LeaveOneOut.jl")
 include("ModelComparison.jl")
+include("NaiveLPD.jl")
 include("PublicHelpers.jl")
 
 end
diff --git a/src/PublicHelpers.jl b/src/PublicHelpers.jl
@@ -1,27 +1,29 @@
 export pointwise_log_likelihoods
 
-const ARRAY_DIMS_WARNING = "The supplied array of mcmc samples indicates you have more 
-parameters than mcmc samples.This is possible, but highly unusual. Please check that your
-array of mcmc samples has the following dimensions: [n_samples,n_params,n_chains]."
+const ARRAY_DIMS_WARNING = """
+The supplied array of mcmc samples indicates you have more parameters than samples. This is 
+possible, but highly unusual. Please check that your array has the following dimensions, in 
+order: [n_samples,n_params,n_chains].
+"""
+
 
 """
     pointwise_log_likelihoods(
-        ll_fun::Function, 
-        samples::AbstractArray{<:Real,3}, 
-        data;
-        splat::Bool=true
+        ll_fun::Function, samples::AbstractArray{<:Real,3}, data;
+        splat::Bool=true[, chain_index::Vector{<:Integer}]
     ) 
 
-Compute the pointwise log likelihood.
+Compute the pointwise log likelihoods.
 
 # Arguments
   - $LIKELIHOOD_FUNCTION_ARG
   - `samples::AbstractArray`: A three dimensional array of MCMC samples. Here, the first
-    dimension should indicate the iteration of the MCMC ; the second dimension should
-    indicate the parameter ; and the third dimension represents the chains. 
-  - `data`: A vector of data used to estimate the parameters of the model.
+    dimension should indicate the step of the MCMC algorithm; the second dimension should
+    indicate the parameter; and the third should indicate the chain. 
+  - $DATA_ARG
   - `splat`: If `true` (default), `f` must be a function of `n` different parameters. 
-    Otherwise, `f` is assumed to be a function of a single parameter vector.
+  Otherwise, `f` is assumed to be a function of a single parameter vector.
+  - $CHAIN_INDEX_DOC
 
 # Returns
   - `Array`: A three dimensional array of pointwise log-likelihoods.
diff --git a/src/TuringHelpers.jl b/src/TuringHelpers.jl
@@ -1,14 +1,6 @@
 using .Turing
-export pointwise_log_likelihoods
+export pointwise_log_likelihoods, psis_loo, psis
 
-const TURING_LOOP_WARN = """
-**Important Note:** The posterior log-likelihood must be computed with a `for` loop inside a
-Turing model; broadcasting will result in all observations being treated as if they are a
-single point. 
-"""
-const CHAINS_ARG = """
-`chains::Chains`: A chain object from MCMCChains.
-"""
 const TURING_MODEL_ARG = """
 `model`: A Turing model with data in the form of `model(data)`.
 """
@@ -17,9 +9,7 @@ const TURING_MODEL_ARG = """
 """
     pointwise_log_likelihoods(model::DynamicPPL.Model, chain::Chains)
 
-Compute the pointwise log-likelihoods from a Turing model. 
-
-$TURING_LOOP_WARN 
+Compute the pointwise log-likelihoods from a Turing model.  
 
 # Arguments
   - $TURING_MODEL_ARG
@@ -30,9 +20,6 @@ $TURING_LOOP_WARN
     indexed using `array[data, sample, chain]`.
 """
 function pointwise_log_likelihoods(model::DynamicPPL.Model, chain::Chains)
-
-    @info TURING_LOOP_WARN
-
     # subset of chain for mcmc samples
     chain_params = MCMCChains.get_sections(chain, :parameters)
     # compute the pointwise log likelihoods
@@ -57,9 +44,7 @@ end
     ) -> PsisLoo
 
 Use Pareto-Smoothed Importance Sampling to calculate the leave-one-out cross validation
-score from an MCMCChain object and a Turing model. 
-
-$TURING_LOOP_WARN
+score from a `chains` object and a Turing model. 
 
 # Arguments
 
@@ -71,7 +56,6 @@ $TURING_LOOP_WARN
 See also: [`psis`](@ref), [`loo`](@ref), [`PsisLoo`](@ref).
 """
 function psis_loo(model::DynamicPPL.Model, chain::Chains, args...; kwargs...)
-    @info TURING_LOOP_WARN
     pointwise_log_likes = pointwise_log_likelihoods(model, chain)
     return psis_loo(pointwise_log_likes, args...; kwargs...)
 end
@@ -87,8 +71,6 @@ end
 
 Generate samples using Pareto smoothed importance sampling (PSIS).
 
-$TURING_LOOP_WARN
-
 # Arguments
   - $TURING_MODEL_ARG
   - $CHAINS_ARG
@@ -98,7 +80,6 @@ $TURING_LOOP_WARN
 See also: [`psis`](@ref), [`loo`](@ref), [`PsisLoo`](@ref).
 """
 function psis(model::DynamicPPL.Model, chain::Chains, args...; kwargs...)
-    @info TURING_LOOP_WARN
     log_ratios = pointwise_log_likelihoods(model, chain)
     return psis(-log_ratios, args...; kwargs...)
 end
diff --git a/test/runtests.jl b/test/runtests.jl
diff --git a/test/tests/BasicTests.jl b/test/tests/BasicTests.jl
diff --git a/test/tests/ComparisonTests.jl b/test/tests/ComparisonTests.jl
diff --git a/test/tests/TuringTests.jl b/test/tests/TuringTests.jl