From 7bf2ef81038da1f16648c311bc5585bf9091e55a Mon Sep 17 00:00:00 2001 From: Daniel Schwen Date: Mon, 27 Apr 2026 09:51:01 -0600 Subject: [PATCH] Add inequality constraints to Gaussian Process trainer (#32848) Three mechanisms for enforcing inequality constraints on GP predictions: - Link functions (log, logit): exact constraints via output warping; GP trains in unconstrained latent space and predictions are inverted back to physical space via the inverse link. - Derivative observations: monotonicity constraints via virtual observations that augment the covariance matrix with cross-covariance blocks between function values and directional derivatives. - Penalty constraints: soft bounds added to NLML loss/gradient during Adam hyperparameter tuning. New files: GPLinkFunction.h/C, GP_log_link.i, GP_logit_link.i, GP_derivative_obs.i, GP_penalty_constraint.i, GPLinkFunction.md. Updated: GaussianProcess.h/C, GaussianProcessTrainer.h/C, GaussianProcessSurrogate.C, CovarianceFunctionBase.h/C, SquaredExponentialCovariance.h/C, tests spec, and doc pages. Co-Authored-By: Claude Sonnet 4.6 --- .../source/trainers/GaussianProcessTrainer.md | 79 +++++ .../content/source/utils/GPLinkFunction.md | 81 ++++++ .../content/source/utils/GaussianProcess.md | 30 ++ .../covariances/CovarianceFunctionBase.h | 70 +++++ .../SquaredExponentialCovariance.h | 26 ++ .../include/trainers/GaussianProcessTrainer.h | 1 + .../include/utils/GPLinkFunction.h | 140 +++++++++ .../include/utils/GaussianProcess.h | 112 ++++++- .../src/covariances/CovarianceFunctionBase.C | 45 +++ .../SquaredExponentialCovariance.C | 146 ++++++++++ .../src/surrogates/GaussianProcessSurrogate.C | 50 +++- .../src/trainers/GaussianProcessTrainer.C | 175 ++++++++++- .../src/utils/GPLinkFunction.C | 107 +++++++ .../src/utils/GaussianProcess.C | 275 +++++++++++++++++- .../gaussian_process/GP_derivative_obs.i | 122 ++++++++ .../surrogates/gaussian_process/GP_log_link.i | 116 ++++++++ .../gaussian_process/GP_logit_link.i | 115 ++++++++ .../gaussian_process/GP_penalty_constraint.i | 120 ++++++++ .../tests/surrogates/gaussian_process/tests | 36 ++- 19 files changed, 1819 insertions(+), 27 deletions(-) create mode 100644 modules/stochastic_tools/doc/content/source/utils/GPLinkFunction.md create mode 100644 modules/stochastic_tools/include/utils/GPLinkFunction.h create mode 100644 modules/stochastic_tools/src/utils/GPLinkFunction.C create mode 100644 modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_derivative_obs.i create mode 100644 modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_log_link.i create mode 100644 modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_logit_link.i create mode 100644 modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_penalty_constraint.i diff --git a/modules/stochastic_tools/doc/content/source/trainers/GaussianProcessTrainer.md b/modules/stochastic_tools/doc/content/source/trainers/GaussianProcessTrainer.md index ea2ed40baee1..09ae0dab9831 100644 --- a/modules/stochastic_tools/doc/content/source/trainers/GaussianProcessTrainer.md +++ b/modules/stochastic_tools/doc/content/source/trainers/GaussianProcessTrainer.md @@ -238,6 +238,85 @@ with $M$ outputs, each training iteration of Adam has a cost of $\mathcal{O} (M^3N^3)$. Adam permits using $n < N$ random training points during each iteration (mini-batches) which has a cost of $\mathcal{O}(M^3n^3)<<\mathcal{O}(M^3N^3)$. +## Inequality Constraints + +Three complementary mechanisms are available to enforce inequality constraints on +GP predictions. They can be combined freely. + +### Link Functions (Output Warping) + +A link function $g$ maps physical outputs to an unconstrained latent space. +The GP trains in latent space and predictions are inverted back to physical space, +guaranteeing constraint satisfaction by construction. + +| `link_function` | Constraint | Required parameters | +| - | - | - | +| `identity` (default) | none | — | +| `log` | $y > \ell$ | `link_lower_bound` | +| `logit` | $\ell < y < u$ | `link_lower_bound`, `link_upper_bound` | + +See [GPLinkFunction.md] for the mathematical details. + ++Example — positivity constraint:+ + +!listing stochastic_tools/test/tests/surrogates/gaussian_process/GP_log_link.i + block=Trainers + +### Derivative Observations (Monotonicity Constraints) + +Virtual derivative observations augment the covariance matrix with cross-covariance +blocks between function values and directional derivatives at specified points. +This enforces monotonicity (or zero-derivative) constraints structurally via +the GP prior. + +The augmented covariance matrix takes the block form: + +!equation +\mathbf{K}_{\text{aug}} = \begin{bmatrix} +\mathbf{K}_{ff} + \sigma_n^2 \mathbf{I} & \mathbf{K}_{fd} \\ +\mathbf{K}_{df} & \mathbf{K}_{dd} + \sigma_d^2 \mathbf{I} +\end{bmatrix} + +where $\mathbf{K}_{fd}$ and $\mathbf{K}_{dd}$ contain cross-covariances between +function values and partial derivatives. Only the +[SquaredExponentialCovariance.md] kernel supports derivative covariances. Only +single-output GPs are supported. + +The virtual observations are placed at user-specified locations +(`monotone_constraint_points`) and target either zero derivative, positive +derivative (`increasing`), or negative derivative (`decreasing`) at those points. + ++Example — increasing in second parameter:+ + +!listing stochastic_tools/test/tests/surrogates/gaussian_process/GP_derivative_obs.i + block=Trainers + +### Penalty Constraints (Soft Bounds During Tuning) + +Penalty terms are added to the negative log-marginal likelihood (NLML) and its +gradient during Adam hyperparameter optimization. This steers hyperparameter +selection toward configurations that respect lower/upper bounds on predicted means +at specified evaluation points. + +The augmented loss is: + +!equation +\mathcal{L}_{\text{aug}} = \mathcal{L}_{\text{NLML}} + \lambda \sum_c \left[ + \max(0,\, \ell_c - \hat{y}_c)^2 + \max(0,\, \hat{y}_c - u_c)^2 +\right] + +where $\hat{y}_c$ is the predicted mean at constraint point $c$, and $\lambda$ +is `penalty_weight`. + +The bounds are specified in physical space and are internally transformed through +the link function and data standardizer. Penalty constraints require the +`tune_parameters` option to be active. + ++Example — lower-bound penalty at two points:+ + +!listing stochastic_tools/test/tests/surrogates/gaussian_process/GP_penalty_constraint.i + block=Trainers + !syntax parameters /Trainers/GaussianProcessTrainer !syntax inputs /Trainers/GaussianProcessTrainer diff --git a/modules/stochastic_tools/doc/content/source/utils/GPLinkFunction.md b/modules/stochastic_tools/doc/content/source/utils/GPLinkFunction.md new file mode 100644 index 000000000000..2d34dbd48882 --- /dev/null +++ b/modules/stochastic_tools/doc/content/source/utils/GPLinkFunction.md @@ -0,0 +1,81 @@ +# GPLinkFunction + +## Overview + +`GPLinkFunction` provides output warping (link functions) for [GaussianProcessTrainer.md] +that enforce inequality constraints on GP predictions. A link function $g$ maps +the physical output $y$ to a latent (unconstrained) variable $z = g(y)$. The GP +is trained in latent $z$-space, and predictions are mapped back to physical space +via the inverse $y = g^{-1}(z)$. + +This approach is sometimes called *warped Gaussian processes* +[!cite](snelson2004warped) and provides an exact constraint: every prediction +satisfies the bound by construction. + +## Available Link Functions + +### Identity (default) + +No transformation is applied. The GP trains and predicts directly in physical space. + +!equation +g(y) = y, \quad g^{-1}(z) = z + +Use this (or omit `link_function`) for unconstrained problems. + +### Log Link + +Enforces a strict lower bound: $y > \ell$. + +!equation +g(y) = \ln(y - \ell), \quad g^{-1}(z) = e^{z} + \ell + +where $\ell$ is `link_lower_bound` (default 0, giving positivity constraint). + +The Jacobian correction $\ln |g'(y)| = -\ln(y - \ell)$ is included in the +negative log-marginal likelihood when hyperparameter tuning is active. + +### Logit Link + +Enforces both a lower and upper bound: $\ell < y < u$. + +!equation +g(y) = \ln\!\left(\frac{y - \ell}{u - y}\right), \quad +g^{-1}(z) = \ell + \frac{u - \ell}{1 + e^{-z}} + +where $\ell$ is `link_lower_bound` and $u$ is `link_upper_bound`. Requires +$u > \ell$. + +## Uncertainty Propagation + +When a link function is active, GP posterior variance $\sigma_z^2$ in latent space +is mapped to physical space via the delta method: + +!equation +\sigma_y \approx \left| (g^{-1})'(\mu_z) \right| \, \sigma_z + +This is an approximation that is accurate when $\sigma_z$ is small relative to +the curvature of $g^{-1}$. + +## Interaction with Standardization + +The link transform is applied to training data *before* standardization. +The training pipeline in [GaussianProcessTrainer.md] proceeds as: + +1. Collect physical responses $y_i$. +2. Apply link: $z_i = g(y_i)$. +3. Standardize (center/scale) the $z_i$. +4. Train GP in standardized $z$-space. + +At prediction time, the GP mean and standard deviation are de-standardized to +$z$-space and then the inverse link is applied. + +## Usage + +Link functions are configured via parameters on [GaussianProcessTrainer.md]: + +!listing stochastic_tools/test/tests/surrogates/gaussian_process/GP_log_link.i + block=Trainers + +!listing stochastic_tools/test/tests/surrogates/gaussian_process/GP_logit_link.i + block=Trainers diff --git a/modules/stochastic_tools/doc/content/source/utils/GaussianProcess.md b/modules/stochastic_tools/doc/content/source/utils/GaussianProcess.md index 970c40c75137..8b907d6325a1 100644 --- a/modules/stochastic_tools/doc/content/source/utils/GaussianProcess.md +++ b/modules/stochastic_tools/doc/content/source/utils/GaussianProcess.md @@ -52,3 +52,33 @@ Or by simply calling the optimizer with Adam (stochastic algorithm): !listing stochastic_tools/src/utils/GaussianProcess.C start=tuneHyperParamsAdam(training_params end=); +### Applying a link function + +Before setting up the covariance matrix, training data can be transformed through +a [GPLinkFunction.md] to enforce inequality constraints. The link type and bounds +are configured through [GaussianProcessTrainer.md]: + +!listing stochastic_tools/src/trainers/GaussianProcessTrainer.C + start=// ---- Link function ---- + end=// Apply link function to training data + +The transformed values are standardized and the GP trains entirely in latent space. +At prediction time the inverse link is applied to recover physical outputs. + +### Derivative observations + +Monotonicity constraints are encoded via virtual derivative observations appended +to the training set. The `GaussianProcess` object stores the virtual parameter +locations and their associated derivative dimensions, and assembles an augmented +covariance matrix at setup time: + +!listing stochastic_tools/src/trainers/GaussianProcessTrainer.C + start=// ---- Derivative constraint (virtual observations) ---- + end=// ---- Penalty constraints ---- + +### Penalty constraints + +Soft bounds on predicted means at specified evaluation points are enforced by +adding a penalty term to the NLML loss and gradient during Adam optimization. +The penalty is computed inside `getLoss` and `getGradient` in `GaussianProcess.C`. + diff --git a/modules/stochastic_tools/include/covariances/CovarianceFunctionBase.h b/modules/stochastic_tools/include/covariances/CovarianceFunctionBase.h index 585e7624b0d4..404ce9f0120c 100644 --- a/modules/stochastic_tools/include/covariances/CovarianceFunctionBase.h +++ b/modules/stochastic_tools/include/covariances/CovarianceFunctionBase.h @@ -77,6 +77,76 @@ class CovarianceFunctionBase : public MooseObject, public CovarianceInterface const std::string & hyper_param_name, unsigned int ind) const; + /** + * Compute the cross-covariance between function values and derivatives: + * K_fd[i,j] = Cov[f(x_i), df(xd_j)/dx'_{dim}] = dK(x_i, xd_j)/dx'_{dim} + * For SE kernel: K_fd[i,j] = K(x_i,xd_j) * (x_{i,dim} - xd_{j,dim}) / ell_dim^2 + * Default implementation calls mooseError; override in kernels that support + * derivative observations. + * @param K_fd Output matrix of size (x.rows() x xd.rows()) + * @param x Function-value observation locations + * @param xd Derivative observation locations + * @param dim Input dimension for the derivative + */ + virtual void computeCovarianceFD(RealEigenMatrix & K_fd, + const RealEigenMatrix & x, + const RealEigenMatrix & xd, + unsigned int dim) const; + + /** + * Compute the cross-covariance between derivatives and function values: + * K_df[i,j] = Cov[df(xd_i)/dx_{dim}, f(xp_j)] = dK(xd_i, xp_j)/dx_{d,dim} + * For SE kernel: K_df[i,j] = K(xd_i,xp_j) * (xp_{j,dim} - xd_{i,dim}) / ell_dim^2 + * Note: K_df = K_fd^T when xd and xp are swapped. + * Default implementation calls mooseError. + * @param K_df Output matrix of size (xd.rows() x xp.rows()) + * @param xd Derivative observation locations + * @param xp Function-value locations (e.g. test points) + * @param dim Input dimension for the derivative + */ + virtual void computeCovarianceDf(RealEigenMatrix & K_df, + const RealEigenMatrix & xd, + const RealEigenMatrix & xp, + unsigned int dim) const; + + /** + * Compute the covariance between two sets of derivative observations: + * K_dd[i,j] = Cov[df(xd_i)/dx_{dim_i}, df(xdp_j)/dx'_{dim_j}] + * = d^2 K(xd_i, xdp_j) / (dx_{dim_i} dx'_{dim_j}) + * For SE kernel when dim_i == dim_j: + * = K(xd_i,xdp_j) * [1/ell_k^2 - (xd_{i,k}-xdp_{j,k})^2/ell_k^4] + * For SE kernel when dim_i != dim_j: + * = K(xd_i,xdp_j) * [-(xd_{i,ki}-xdp_{j,ki})*(xd_{i,kj}-xdp_{j,kj})/(ell_ki^2*ell_kj^2)] + * Default implementation calls mooseError. + * @param K_dd Output matrix of size (xd.rows() x xdp.rows()) + * @param xd First set of derivative observation locations + * @param xdp Second set of derivative observation locations + * @param dim_i Derivative dimension for the first set + * @param dim_j Derivative dimension for the second set + */ + virtual void computeCovarianceDD(RealEigenMatrix & K_dd, + const RealEigenMatrix & xd, + const RealEigenMatrix & xdp, + unsigned int dim_i, + unsigned int dim_j) const; + + /** + * Compute dK_cross/dhp: the derivative of the cross-covariance K(x, xc) with + * respect to a hyperparameter. Used in the penalty constraint gradient. + * For kernels that do not implement this, the default returns a zero matrix + * (penalty loss is computed but no gradient contribution is added). + * @param dKdhp Output matrix of size (x.rows() x xc.rows()) + * @param x Training point locations (batch) + * @param xc Constraint point location (1 x d) + * @param hp_name Hyperparameter name (prefixed with covariance object name) + * @param ind Index within vector hyperparameter (0 for scalars) + */ + virtual void computedKdhyper_cross(RealEigenMatrix & dKdhp, + const RealEigenMatrix & x, + const RealEigenMatrix & xc, + const std::string & hp_name, + unsigned int ind) const; + /// Check if a given parameter is tunable /// @param The name of the hyperparameter virtual bool isTunable(const std::string & name) const; diff --git a/modules/stochastic_tools/include/covariances/SquaredExponentialCovariance.h b/modules/stochastic_tools/include/covariances/SquaredExponentialCovariance.h index f77d018f33dd..8493d6a8dbf9 100644 --- a/modules/stochastic_tools/include/covariances/SquaredExponentialCovariance.h +++ b/modules/stochastic_tools/include/covariances/SquaredExponentialCovariance.h @@ -44,6 +44,32 @@ class SquaredExponentialCovariance : public CovarianceFunctionBase const Real sigma_f_squared, const int ind); + /// Cross-covariance K_fd[i,j] = Cov[f(x_i), df(xd_j)/dx'_{dim}] = dK(x_i,xd_j)/dx'_{dim} + void computeCovarianceFD(RealEigenMatrix & K_fd, + const RealEigenMatrix & x, + const RealEigenMatrix & xd, + unsigned int dim) const override; + + /// Cross-covariance K_df[i,j] = Cov[df(xd_i)/dx_{dim}, f(xp_j)] = dK(xd_i,xp_j)/dx_{d,dim} + void computeCovarianceDf(RealEigenMatrix & K_df, + const RealEigenMatrix & xd, + const RealEigenMatrix & xp, + unsigned int dim) const override; + + /// Second-derivative covariance K_dd[i,j] = d^2K/(dx_{dim_i} dx'_{dim_j}) + void computeCovarianceDD(RealEigenMatrix & K_dd, + const RealEigenMatrix & xd, + const RealEigenMatrix & xdp, + unsigned int dim_i, + unsigned int dim_j) const override; + + /// dK_cross(x, xc)/dhp: derivative of cross-covariance w.r.t. hyperparameters for penalty gradient + void computedKdhyper_cross(RealEigenMatrix & dKdhp, + const RealEigenMatrix & x, + const RealEigenMatrix & xc, + const std::string & hp_name, + unsigned int ind) const override; + protected: /// lengh factor (\ell) for the kernel, in vector form for multiple parameters const std::vector & _length_factor; diff --git a/modules/stochastic_tools/include/trainers/GaussianProcessTrainer.h b/modules/stochastic_tools/include/trainers/GaussianProcessTrainer.h index 371d578fd455..83068e3a65db 100644 --- a/modules/stochastic_tools/include/trainers/GaussianProcessTrainer.h +++ b/modules/stochastic_tools/include/trainers/GaussianProcessTrainer.h @@ -19,6 +19,7 @@ #include "CovarianceInterface.h" #include "GaussianProcess.h" +#include "GPLinkFunction.h" class GaussianProcessTrainer : public SurrogateTrainer, public CovarianceInterface { diff --git a/modules/stochastic_tools/include/utils/GPLinkFunction.h b/modules/stochastic_tools/include/utils/GPLinkFunction.h new file mode 100644 index 000000000000..acb9053558ca --- /dev/null +++ b/modules/stochastic_tools/include/utils/GPLinkFunction.h @@ -0,0 +1,140 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#pragma once + +#include "MooseTypes.h" +#include +#include +#include + +namespace StochasticTools +{ + +/** + * Enumeration of supported output link function types. + * Stored as an integer in model data for serialization. + */ +enum class GPLinkFunctionType : int +{ + Identity = 0, ///< No transformation (default) + Log = 1, ///< g(y) = log(y - lb), enforces y > lb + Logit = 2 ///< g(y) = log((y-lb)/(ub-y)), enforces lb < y < ub +}; + +/** + * Utility class for GP output link functions. A link function g maps + * physical-space outputs y to the GP training space z = g(y) and back via g^{-1}. + * This allows GP regression to operate in a transformed space that naturally + * satisfies inequality constraints such as positivity or bounded outputs. + * + * The transformation changes the negative log-marginal likelihood (NLML) by the + * Jacobian correction: NLML_total = NLML_z - sum_i log|g'(y_i)|. + * Since this Jacobian does not depend on the kernel hyperparameters, it does not + * alter the hyperparameter gradient computation. + * + * Prediction uncertainty propagation uses the delta method: + * sigma_y ≈ |g^{-1}'(mu_z)| * sigma_z + */ +class GPLinkFunction +{ +public: + virtual ~GPLinkFunction() = default; + + /// Forward: physical space y -> GP training space z = g(y) + virtual Real forward(Real y) const = 0; + + /// Inverse: GP training space z -> physical space y = g^{-1}(z) + virtual Real inverse(Real z) const = 0; + + /// Derivative of the inverse: d/dz [g^{-1}(z)], used in delta-method uncertainty propagation + virtual Real inverseDeriv(Real z) const = 0; + + /// log |g'(y)| = log |d/dy g(y)|, used as the Jacobian correction to NLML reporting + virtual Real logJacobian(Real y) const = 0; + + /// Lowest physical-space value that is valid for this link function + virtual Real lowerBound() const = 0; + + /// Highest physical-space value that is valid for this link function + virtual Real upperBound() const = 0; + + /// Type tag for serialization + virtual GPLinkFunctionType type() const = 0; + + /// Factory: create a link function from type tag and optional bounds + static std::unique_ptr + build(GPLinkFunctionType type, Real lb = 0.0, Real ub = 1.0); + + /// Factory: create a link function from a string name ("identity", "log", "logit") + static std::unique_ptr + buildFromString(const std::string & type_name, Real lb = 0.0, Real ub = 1.0); +}; + +/// Identity link: g(y) = y. No constraint is enforced. +class IdentityLinkFunction : public GPLinkFunction +{ +public: + Real forward(Real y) const override { return y; } + Real inverse(Real z) const override { return z; } + Real inverseDeriv(Real /*z*/) const override { return 1.0; } + Real logJacobian(Real /*y*/) const override { return 0.0; } + Real lowerBound() const override { return -std::numeric_limits::infinity(); } + Real upperBound() const override { return std::numeric_limits::infinity(); } + GPLinkFunctionType type() const override { return GPLinkFunctionType::Identity; } +}; + +/** + * Log link: g(y) = log(y - lb), g^{-1}(z) = exp(z) + lb. + * Enforces y > lb (typically lb = 0 for positivity). + * Uncertainty propagation: sigma_y ≈ exp(mu_z) * sigma_z. + */ +class LogLinkFunction : public GPLinkFunction +{ +public: + LogLinkFunction(Real lb = 0.0) : _lb(lb) {} + + Real forward(Real y) const override; + Real inverse(Real z) const override; + Real inverseDeriv(Real z) const override; + Real logJacobian(Real y) const override; + Real lowerBound() const override { return _lb; } + Real upperBound() const override { return std::numeric_limits::infinity(); } + GPLinkFunctionType type() const override { return GPLinkFunctionType::Log; } + Real lb() const { return _lb; } + +private: + Real _lb; +}; + +/** + * Logit link: g(y) = log((y-lb)/(ub-y)), g^{-1}(z) = lb + (ub-lb)/(1+exp(-z)). + * Enforces lb < y < ub. + * Uncertainty propagation: sigma_y ≈ (ub-lb)*sigmoid(mu_z)*(1-sigmoid(mu_z)) * sigma_z. + */ +class LogitLinkFunction : public GPLinkFunction +{ +public: + LogitLinkFunction(Real lb = 0.0, Real ub = 1.0) : _lb(lb), _ub(ub) {} + + Real forward(Real y) const override; + Real inverse(Real z) const override; + Real inverseDeriv(Real z) const override; + Real logJacobian(Real y) const override; + Real lowerBound() const override { return _lb; } + Real upperBound() const override { return _ub; } + GPLinkFunctionType type() const override { return GPLinkFunctionType::Logit; } + Real lb() const { return _lb; } + Real ub() const { return _ub; } + +private: + Real _lb, _ub; +}; + +} // namespace StochasticTools diff --git a/modules/stochastic_tools/include/utils/GaussianProcess.h b/modules/stochastic_tools/include/utils/GaussianProcess.h index b59951e4e4d2..2db6f9adcb18 100644 --- a/modules/stochastic_tools/include/utils/GaussianProcess.h +++ b/modules/stochastic_tools/include/utils/GaussianProcess.h @@ -13,6 +13,7 @@ #include #include "CovarianceFunctionBase.h" +#include "GPLinkFunction.h" namespace StochasticTools { @@ -89,16 +90,91 @@ class GaussianProcess }; /** * Sets up the covariance matrix given data and optimization options. - * @param training_params The training parameter values (x values) for the - * covariance matrix. - * @param training_data The training data (y values) for the inversion of the - * covariance matrix. + * If virtual derivative observations have been configured via setDerivativeConstraints(), + * builds the full augmented covariance matrix incorporating those observations. + * @param training_params The training parameter values (x values, already standardized). + * @param training_data The training data (y values, link-transformed and standardized). * @param opts The optimizer options. */ void setupCovarianceMatrix(const RealEigenMatrix & training_params, const RealEigenMatrix & training_data, const GPOptimizerOptions & opts); + /** + * Configure the output link function. Call before setupCovarianceMatrix. + * @param type Link function type (Identity, Log, or Logit) + * @param lb Lower bound (required for Log and Logit) + * @param ub Upper bound (required for Logit) + */ + void setLinkFunction(GPLinkFunctionType type, Real lb = 0.0, Real ub = 1.0); + + /** + * Apply the forward link function to all entries of a data matrix (in-place). + * Validates that all values respect the link function's domain. + */ + void applyLinkTransform(RealEigenMatrix & data) const; + + /** + * Configure virtual derivative observations for monotonicity/derivative constraints. + * Call before setupCovarianceMatrix. The virtual_params must be in the same + * standardized space as the training parameters. + * @param virtual_params Locations of virtual observations (n_virt x n_dims), standardized + * @param deriv_dims Derivative dimension for each virtual observation + * @param target_value Target derivative value (0 = zero-derivative constraint, + * positive = increasing, negative = decreasing) + * @param noise_variance Noise variance added to the derivative covariance diagonal + */ + void setDerivativeConstraints(const RealEigenMatrix & virtual_params, + const std::vector & deriv_dims, + Real target_value, + Real noise_variance); + + /** + * Configure penalty constraints on the predicted mean at specified points. + * These are evaluated and added to the NLML loss and its gradient during Adam. + * The points and bounds must be in the standardized GP output space (post-link, + * post-standardize). Call after standardization is set up. + * @param penalty_points_std Constraint point locations in standardized param space (n_c x d) + * @param lower_bounds_std Lower bounds in standardized output space (use -inf to disable) + * @param upper_bounds_std Upper bounds in standardized output space (use +inf to disable) + * @param weight Penalty weight lambda + */ + void setPenaltyConstraints(const RealEigenMatrix & penalty_points_std, + const std::vector & lower_bounds_std, + const std::vector & upper_bounds_std, + Real weight); + + /// Apply forward link function to a scalar + Real applyLink(Real y) const; + /// Apply inverse link function to a scalar + Real applyInvLink(Real z) const; + /// Derivative of the inverse link at z, for delta-method uncertainty propagation + Real invLinkDeriv(Real z) const; + /// log|g'(y)| for the Jacobian correction to NLML reporting + Real logLinkJacobian(Real y) const; + /// True if a non-identity link function is set + bool hasLinkFunction() const { return _link_type != GPLinkFunctionType::Identity; } + + /// @{ Accessors for link function parameters (for serialization) + GPLinkFunctionType & linkType() { return _link_type; } + Real & linkLb() { return _link_lb; } + Real & linkUb() { return _link_ub; } + const GPLinkFunctionType & linkType() const { return _link_type; } + const Real & linkLb() const { return _link_lb; } + const Real & linkUb() const { return _link_ub; } + /// @} + + /// @{ Accessors for virtual derivative observations (for serialization and prediction) + RealEigenMatrix & virtualParams() { return _virtual_params; } + std::vector & virtualDerivDims() { return _virtual_deriv_dims; } + Real & virtualNoise() { return _virtual_noise; } + Real & virtualTarget() { return _virtual_target; } + const RealEigenMatrix & virtualParams() const { return _virtual_params; } + const std::vector & virtualDerivDims() const { return _virtual_deriv_dims; } + const Real & virtualNoise() const { return _virtual_noise; } + bool hasDerivativeConstraints() const { return _virtual_params.rows() > 0; } + /// @} + /** * Sets up the Cholesky decomposition and inverse action of the covariance matrix. * @param input The vector/matrix which right multiples the inverse of the covariance matrix. @@ -282,6 +358,34 @@ class GaussianProcess /// To return the GP length scales for active learning std::vector _length_scales; + + // ---- Link function (serialized) ---- + /// Link function type tag (stored as enum, serialized as int) + GPLinkFunctionType _link_type = GPLinkFunctionType::Identity; + /// Lower bound for Log/Logit link + Real _link_lb = 0.0; + /// Upper bound for Logit link + Real _link_ub = 1.0; + + // ---- Virtual derivative observations (serialized; needed for prediction) ---- + /// Locations of virtual observations in standardized parameter space (n_virt x n_dims) + RealEigenMatrix _virtual_params; + /// Derivative dimension for each virtual observation + std::vector _virtual_deriv_dims; + /// Noise variance added to the derivative-derivative diagonal blocks of K_aug + Real _virtual_noise = 1e-6; + /// Target derivative value for virtual observations + Real _virtual_target = 0.0; + + // ---- Penalty constraints (not serialized; only used during Adam tuning) ---- + /// Constraint point locations in standardized parameter space (n_c x n_dims) + RealEigenMatrix _penalty_points_std; + /// Lower bounds in standardized output space (-inf disables lower constraint) + std::vector _penalty_lower_std; + /// Upper bounds in standardized output space (+inf disables upper constraint) + std::vector _penalty_upper_std; + /// Penalty weight lambda + Real _penalty_weight = 0.0; }; } // StochasticTools namespac diff --git a/modules/stochastic_tools/src/covariances/CovarianceFunctionBase.C b/modules/stochastic_tools/src/covariances/CovarianceFunctionBase.C index c7582889ba0f..f690c8e0b764 100644 --- a/modules/stochastic_tools/src/covariances/CovarianceFunctionBase.C +++ b/modules/stochastic_tools/src/covariances/CovarianceFunctionBase.C @@ -48,6 +48,51 @@ CovarianceFunctionBase::computedKdhyper(RealEigenMatrix & /*dKdhp*/, "computedKdhyper() to compute gradient."); } +void +CovarianceFunctionBase::computeCovarianceFD(RealEigenMatrix & /*K_fd*/, + const RealEigenMatrix & /*x*/, + const RealEigenMatrix & /*xd*/, + unsigned int /*dim*/) const +{ + mooseError("Derivative covariance (function vs. derivative) not implemented for '", + type(), + "'. Override computeCovarianceFD() to use derivative observations."); +} + +void +CovarianceFunctionBase::computeCovarianceDf(RealEigenMatrix & /*K_df*/, + const RealEigenMatrix & /*xd*/, + const RealEigenMatrix & /*xp*/, + unsigned int /*dim*/) const +{ + mooseError("Derivative covariance (derivative vs. function) not implemented for '", + type(), + "'. Override computeCovarianceDf() to use derivative observations."); +} + +void +CovarianceFunctionBase::computeCovarianceDD(RealEigenMatrix & /*K_dd*/, + const RealEigenMatrix & /*xd*/, + const RealEigenMatrix & /*xdp*/, + unsigned int /*dim_i*/, + unsigned int /*dim_j*/) const +{ + mooseError("Second-derivative covariance not implemented for '", + type(), + "'. Override computeCovarianceDD() to use derivative observations with this kernel."); +} + +void +CovarianceFunctionBase::computedKdhyper_cross(RealEigenMatrix & dKdhp, + const RealEigenMatrix & x, + const RealEigenMatrix & xc, + const std::string & /*hp_name*/, + unsigned int /*ind*/) const +{ + // Default: zero matrix — penalty gradient is not added for this kernel type + dKdhp.setZero(x.rows(), xc.rows()); +} + const Real & CovarianceFunctionBase::addRealHyperParameter(const std::string & name, const Real value, diff --git a/modules/stochastic_tools/src/covariances/SquaredExponentialCovariance.C b/modules/stochastic_tools/src/covariances/SquaredExponentialCovariance.C index de043f58e788..bee8df18d02d 100644 --- a/modules/stochastic_tools/src/covariances/SquaredExponentialCovariance.C +++ b/modules/stochastic_tools/src/covariances/SquaredExponentialCovariance.C @@ -139,3 +139,149 @@ SquaredExponentialCovariance::computedKdlf(RealEigenMatrix & K, } } } + +void +SquaredExponentialCovariance::computeCovarianceFD(RealEigenMatrix & K_fd, + const RealEigenMatrix & x, + const RealEigenMatrix & xd, + unsigned int dim) const +{ + // K_fd[i,j] = Cov[f(x_i), df(xd_j)/dx'_{dim}] + // = dK(x_i, xd_j)/dx'_{dim} + // = K(x_i,xd_j) * (x_{i,dim} - xd_{j,dim}) / ell_dim^2 + const unsigned int n_x = x.rows(); + const unsigned int n_xd = xd.rows(); + const unsigned int n_params = x.cols(); + mooseAssert(dim < n_params, "dim out of range for computeCovarianceFD"); + mooseAssert((unsigned)xd.cols() == n_params, "x and xd must have same number of columns"); + + K_fd.resize(n_x, n_xd); + for (unsigned int ii = 0; ii < n_x; ++ii) + { + for (unsigned int jj = 0; jj < n_xd; ++jj) + { + Real r_sq = 0.0; + for (unsigned int kk = 0; kk < n_params; ++kk) + r_sq += std::pow((x(ii, kk) - xd(jj, kk)) / _length_factor[kk], 2); + const Real k_val = _sigma_f_squared * std::exp(-r_sq / 2.0); + K_fd(ii, jj) = + k_val * (x(ii, dim) - xd(jj, dim)) / (_length_factor[dim] * _length_factor[dim]); + } + } +} + +void +SquaredExponentialCovariance::computeCovarianceDf(RealEigenMatrix & K_df, + const RealEigenMatrix & xd, + const RealEigenMatrix & xp, + unsigned int dim) const +{ + // K_df[i,j] = Cov[df(xd_i)/dx_{dim}, f(xp_j)] + // = dK(xd_i, xp_j)/dx_{d,dim} + // = K(xd_i,xp_j) * (xp_{j,dim} - xd_{i,dim}) / ell_dim^2 + // Note: K_df = K_fd^T (transpose of computeCovarianceFD with swapped args) + RealEigenMatrix K_fd; + computeCovarianceFD(K_fd, xp, xd, dim); + K_df = K_fd.transpose(); +} + +void +SquaredExponentialCovariance::computeCovarianceDD(RealEigenMatrix & K_dd, + const RealEigenMatrix & xd, + const RealEigenMatrix & xdp, + unsigned int dim_i, + unsigned int dim_j) const +{ + // K_dd[i,j] = d^2 K(xd_i, xdp_j) / (dx_{dim_i} dx'_{dim_j}) + // + // For SE kernel: + // if dim_i == dim_j (call the dimension k): + // = K(xd_i,xdp_j) * [1/ell_k^2 - (xd_{i,k}-xdp_{j,k})^2/ell_k^4] + // if dim_i != dim_j: + // = K(xd_i,xdp_j) * [-(xd_{i,ki}-xdp_{j,ki})*(xd_{i,kj}-xdp_{j,kj})/(ell_ki^2*ell_kj^2)] + const unsigned int n_i = xd.rows(); + const unsigned int n_j = xdp.rows(); + const unsigned int n_params = xd.cols(); + mooseAssert(dim_i < n_params, "dim_i out of range for computeCovarianceDD"); + mooseAssert(dim_j < n_params, "dim_j out of range for computeCovarianceDD"); + mooseAssert((unsigned)xdp.cols() == n_params, "xd and xdp must have same number of columns"); + + K_dd.resize(n_i, n_j); + const Real ell_i_sq = _length_factor[dim_i] * _length_factor[dim_i]; + const Real ell_j_sq = _length_factor[dim_j] * _length_factor[dim_j]; + + for (unsigned int ii = 0; ii < n_i; ++ii) + { + for (unsigned int jj = 0; jj < n_j; ++jj) + { + Real r_sq = 0.0; + for (unsigned int kk = 0; kk < n_params; ++kk) + r_sq += std::pow((xd(ii, kk) - xdp(jj, kk)) / _length_factor[kk], 2); + const Real k_val = _sigma_f_squared * std::exp(-r_sq / 2.0); + + const Real d_i = xd(ii, dim_i) - xdp(jj, dim_i); + const Real d_j = xd(ii, dim_j) - xdp(jj, dim_j); + + if (dim_i == dim_j) + K_dd(ii, jj) = k_val * (1.0 / ell_i_sq - d_i * d_j / (ell_i_sq * ell_j_sq)); + else + K_dd(ii, jj) = k_val * (-d_i * d_j / (ell_i_sq * ell_j_sq)); + } + } +} + +void +SquaredExponentialCovariance::computedKdhyper_cross(RealEigenMatrix & dKdhp, + const RealEigenMatrix & x, + const RealEigenMatrix & xc, + const std::string & hyper_param_name, + unsigned int ind) const +{ + // Derivative of K(x_i, xc) w.r.t. each hyperparameter. + // Used for the approximate penalty constraint gradient. + if (name().length() + 1 > hyper_param_name.length()) + { + dKdhp.setZero(x.rows(), xc.rows()); + return; + } + + const std::string name_without_prefix = hyper_param_name.substr(name().length() + 1); + + if (name_without_prefix == "noise_variance") + { + // Cross-covariance does not depend on noise variance + dKdhp.setZero(x.rows(), xc.rows()); + return; + } + + if (name_without_prefix == "signal_variance") + { + // d(K(x_i,xc))/d(sigma_f^2) = exp(-r^2/2) = K(x_i,xc)/sigma_f^2 + SquaredExponentialFunction(dKdhp, x, xc, _length_factor, 1.0, 0.0, false); + return; + } + + if (name_without_prefix == "length_factor") + { + // d(K(x_i, xc)) / d(ell_ind) = K(x_i, xc) * (x_{i,ind} - xc_{ind})^2 / ell_ind^3 + const unsigned int n_x = x.rows(); + const unsigned int n_xc = xc.rows(); + const unsigned int n_params = x.cols(); + dKdhp.resize(n_x, n_xc); + for (unsigned int ii = 0; ii < n_x; ++ii) + { + for (unsigned int jj = 0; jj < n_xc; ++jj) + { + Real r_sq = 0.0; + for (unsigned int kk = 0; kk < n_params; ++kk) + r_sq += std::pow((x(ii, kk) - xc(jj, kk)) / _length_factor[kk], 2); + const Real k_val = _sigma_f_squared * std::exp(-r_sq / 2.0); + dKdhp(ii, jj) = + k_val * std::pow(x(ii, ind) - xc(jj, ind), 2) / std::pow(_length_factor[ind], 3); + } + } + return; + } + + dKdhp.setZero(x.rows(), xc.rows()); +} diff --git a/modules/stochastic_tools/src/surrogates/GaussianProcessSurrogate.C b/modules/stochastic_tools/src/surrogates/GaussianProcessSurrogate.C index d9343d6f4ea0..1cda9cc0b2e5 100644 --- a/modules/stochastic_tools/src/surrogates/GaussianProcessSurrogate.C +++ b/modules/stochastic_tools/src/surrogates/GaussianProcessSurrogate.C @@ -75,6 +75,9 @@ GaussianProcessSurrogate::evaluate(const std::vector & x, "Number of parameters provided for evaluation does not match number of parameters " "used for training."); const unsigned int n_outputs = _gp.getCovarFunction().numOutputs(); + const unsigned int n_train = _training_params.rows(); + const unsigned int n_virt = _gp.virtualParams().rows(); + const unsigned int n_total = n_train + n_virt; y = std::vector(n_outputs, 0.0); std = std::vector(n_outputs, 0.0); @@ -85,28 +88,59 @@ GaussianProcessSurrogate::evaluate(const std::vector & x, _gp.getParamStandardizer().getStandardized(test_points); - RealEigenMatrix K_train_test(_training_params.rows() * n_outputs, n_outputs); + // Build K_train_test: (n_total x n_outputs) — extended with derivative rows if needed + RealEigenMatrix K_train_test(n_total * n_outputs, n_outputs); - _gp.getCovarFunction().computeCovarianceMatrix( - K_train_test, _training_params, test_points, false); + // Standard rows: Cov[f(X_train), f(x*)] + RealEigenMatrix K_ff_test(n_train * n_outputs, n_outputs); + _gp.getCovarFunction().computeCovarianceMatrix(K_ff_test, _training_params, test_points, false); + K_train_test.topRows(n_train * n_outputs) = K_ff_test; + + // Derivative rows: Cov[df(x_d^j)/dx_{k_j}, f(x*)] — only for single-output GP + if (n_virt > 0) + { + for (unsigned int j = 0; j < n_virt; ++j) + { + RealEigenMatrix xd_j = _gp.virtualParams().row(j); + RealEigenMatrix K_df_j(1, 1); + _gp.getCovarFunction().computeCovarianceDf( + K_df_j, xd_j, test_points, _gp.virtualDerivDims()[j]); + K_train_test(n_train + j, 0) = K_df_j(0, 0); + } + } + + // Self-covariance at test point RealEigenMatrix K_test(n_outputs, n_outputs); _gp.getCovarFunction().computeCovarianceMatrix(K_test, test_points, test_points, true); - // Compute the predicted mean value (centered) + // Predicted mean in standardized z-space RealEigenMatrix pred_value = (K_train_test.transpose() * _gp.getKResultsSolve()).transpose(); - // De-center/scale the value and store for return + + // De-standardize to z-space (post-link, pre-inverse-link) _gp.getDataStandardizer().getDestandardized(pred_value); + // Posterior variance: K_** - k_*^T K_aug^{-1} k_* RealEigenMatrix pred_var = K_test - (K_train_test.transpose() * _gp.getKCholeskyDecomp().solve(K_train_test)); - // Vairance computed, take sqrt for standard deviation, scale up by training data std and store RealEigenMatrix std_dev_mat = pred_var.array().sqrt(); _gp.getDataStandardizer().getDescaled(std_dev_mat); for (const auto output_i : make_range(n_outputs)) { - y[output_i] = pred_value(0, output_i); - std[output_i] = std_dev_mat(output_i, output_i); + const Real mu_z = pred_value(0, output_i); + const Real sigma_z = std_dev_mat(output_i, output_i); + + if (_gp.hasLinkFunction()) + { + // Apply inverse link to mean; propagate uncertainty via delta method + y[output_i] = _gp.applyInvLink(mu_z); + std[output_i] = std::abs(_gp.invLinkDeriv(mu_z)) * sigma_z; + } + else + { + y[output_i] = mu_z; + std[output_i] = sigma_z; + } } } diff --git a/modules/stochastic_tools/src/trainers/GaussianProcessTrainer.C b/modules/stochastic_tools/src/trainers/GaussianProcessTrainer.C index 7835e7f3d8c8..c20682ac27cd 100644 --- a/modules/stochastic_tools/src/trainers/GaussianProcessTrainer.C +++ b/modules/stochastic_tools/src/trainers/GaussianProcessTrainer.C @@ -18,6 +18,7 @@ #include "libmesh/petsc_matrix.h" #include +#include registerMooseObject("StochasticToolsApp", GaussianProcessTrainer); @@ -33,7 +34,6 @@ GaussianProcessTrainer::validParams() "standardize_params", true, "Standardize (center and scale) training parameters (x values)"); params.addParam( "standardize_data", true, "Standardize (center and scale) training data (y values)"); - // Already preparing to use Adam here params.addParam("num_iters", 1000, "Tolerance value for Adam optimization"); params.addParam("batch_size", 0, "The batch size for Adam optimization"); params.addParam("learning_rate", 0.001, "The learning rate for Adam optimization"); @@ -45,6 +45,67 @@ GaussianProcessTrainer::validParams() "Select hyperparameters to be tuned"); params.addParam>("tuning_min", "Minimum allowable tuning value"); params.addParam>("tuning_max", "Maximum allowable tuning value"); + + // ---- Link function parameters ---- + MooseEnum link_function_options("identity log logit", "identity"); + params.addParam( + "link_function", + link_function_options, + "Output link function enforcing inequality constraints. 'identity' (default): no constraint; " + "'log': enforces y > link_lower_bound (e.g. positivity); " + "'logit': enforces link_lower_bound < y < link_upper_bound."); + params.addParam( + "link_lower_bound", 0.0, "Lower bound for the 'log' or 'logit' link function."); + params.addParam("link_upper_bound", 1.0, "Upper bound for the 'logit' link function."); + + // ---- Derivative observation (monotonicity constraint) parameters ---- + params.addParam>( + "monotone_constraint_points", + "Locations of virtual derivative observations as a flat row-major vector " + "(n_points x n_dims). Used to enforce monotonicity or zero-derivative constraints. " + "Requires a covariance kernel that supports derivative covariances (e.g. " + "SquaredExponentialCovariance)."); + params.addParam>( + "monotone_constraint_dims", + "Input dimension index for each virtual derivative observation. Must have the same " + "number of entries as the number of points in monotone_constraint_points."); + MooseEnum constraint_type_options("zero increasing decreasing", "zero"); + params.addParam( + "monotone_constraint_type", + constraint_type_options, + "Target derivative type: 'zero' (df/dx_k ≈ 0), 'increasing' (df/dx_k > 0), " + "or 'decreasing' (df/dx_k < 0)."); + params.addParam("derivative_target_value", + 0.0, + "Magnitude of the target derivative for 'increasing' or 'decreasing' " + "constraints, in the standardized output space. Default 0 means the " + "constraint softly pins the derivative to zero."); + params.addParam( + "derivative_noise_variance", + 1e-4, + "Noise variance (sigma_d^2) added to the derivative-derivative diagonal blocks of the " + "augmented covariance matrix. Must be > 0. Smaller values enforce constraints more strictly " + "but may cause numerical issues."); + + // ---- Penalty constraint parameters ---- + params.addParam>( + "penalty_constraint_points", + "Locations of penalty constraint evaluation points as a flat row-major vector " + "(n_points x n_dims). The penalty is added to the NLML loss and gradient during " + "hyperparameter tuning when predicted means violate the specified bounds."); + params.addParam>( + "penalty_constraint_lower_bounds", + "Lower bounds on the GP output (in physical space, before link function) at each " + "penalty constraint point. Use -1e30 or similar to disable a lower bound."); + params.addParam>( + "penalty_constraint_upper_bounds", + "Upper bounds on the GP output (in physical space, before link function) at each " + "penalty constraint point. Use 1e30 or similar to disable an upper bound."); + params.addParam( + "penalty_weight", + 1.0, + "Weight lambda for the penalty constraint term: lambda * sum_c max(0, violation_c)^2."); + return params; } @@ -136,20 +197,126 @@ GaussianProcessTrainer::postTrain() _training_data(ii, jj) = _data_buffer[ii][jj]; } + // ---- Link function ---- + const std::string link_str = getParam("link_function"); + StochasticTools::GPLinkFunctionType link_type = StochasticTools::GPLinkFunctionType::Identity; + if (link_str == "log") + link_type = StochasticTools::GPLinkFunctionType::Log; + else if (link_str == "logit") + link_type = StochasticTools::GPLinkFunctionType::Logit; + const Real link_lb = getParam("link_lower_bound"); + const Real link_ub = getParam("link_upper_bound"); + if (link_str == "logit" && link_ub <= link_lb) + paramError("link_upper_bound", + "link_upper_bound must be strictly greater than link_lower_bound for logit link."); + _gp.setLinkFunction(link_type, link_lb, link_ub); + + // Apply link function to training data BEFORE standardization + _gp.applyLinkTransform(_training_data); + // Standardize (center and scale) training params if (_standardize_params) _gp.standardizeParameters(_training_params); - // if not standardizing data set mean=0, std=1 for use in surrogate else _gp.paramStandardizer().set(0, 1, _n_dims); - // Standardize (center and scale) training data + // Standardize (center and scale) training data (in link-transformed z-space) if (_standardize_data) _gp.standardizeData(_training_data); - // if not standardizing data set mean=0, std=1 for use in surrogate else _gp.dataStandardizer().set(0, 1, _n_outputs); + // ---- Derivative constraint (virtual observations) ---- + if (isParamValid("monotone_constraint_points")) + { + const auto & raw_pts = getParam>("monotone_constraint_points"); + if (!isParamValid("monotone_constraint_dims")) + paramError("monotone_constraint_dims", + "monotone_constraint_dims is required when monotone_constraint_points is set."); + const auto & dims = getParam>("monotone_constraint_dims"); + const unsigned int n_virt = dims.size(); + if (raw_pts.size() != n_virt * (unsigned)_n_dims) + paramError("monotone_constraint_points", + "monotone_constraint_points must contain n_points * n_dims = ", + n_virt, + " * ", + _n_dims, + " = ", + n_virt * _n_dims, + " values. Got ", + raw_pts.size(), + "."); + + // Fill virtual_params in physical space, then standardize + RealEigenMatrix virtual_params(n_virt, _n_dims); + for (unsigned int i = 0; i < n_virt; ++i) + for (unsigned int j = 0; j < (unsigned)_n_dims; ++j) + virtual_params(i, j) = raw_pts[i * _n_dims + j]; + + _gp.getParamStandardizer().getStandardized(virtual_params); + + const std::string ctype = getParam("monotone_constraint_type"); + const Real magnitude = getParam("derivative_target_value"); + Real target_value = 0.0; + if (ctype == "increasing") + target_value = magnitude; + else if (ctype == "decreasing") + target_value = -magnitude; + + _gp.setDerivativeConstraints( + virtual_params, dims, target_value, getParam("derivative_noise_variance")); + } + + // ---- Penalty constraints ---- + if (isParamValid("penalty_constraint_points")) + { + const auto & raw_pts = getParam>("penalty_constraint_points"); + const auto & lower_raw = isParamValid("penalty_constraint_lower_bounds") + ? getParam>("penalty_constraint_lower_bounds") + : std::vector{}; + const auto & upper_raw = isParamValid("penalty_constraint_upper_bounds") + ? getParam>("penalty_constraint_upper_bounds") + : std::vector{}; + + const unsigned int n_penalty = lower_raw.empty() ? upper_raw.size() : lower_raw.size(); + if (!lower_raw.empty() && !upper_raw.empty() && lower_raw.size() != upper_raw.size()) + paramError("penalty_constraint_lower_bounds", + "penalty_constraint_lower_bounds and penalty_constraint_upper_bounds must have " + "the same number of entries."); + if (raw_pts.size() != n_penalty * (unsigned)_n_dims) + paramError("penalty_constraint_points", + "penalty_constraint_points must have n_penalty * n_dims entries."); + + RealEigenMatrix penalty_pts(n_penalty, _n_dims); + for (unsigned int i = 0; i < n_penalty; ++i) + for (unsigned int j = 0; j < (unsigned)_n_dims; ++j) + penalty_pts(i, j) = raw_pts[i * _n_dims + j]; + + _gp.getParamStandardizer().getStandardized(penalty_pts); + + // Transform bounds through link function then standardize into z-space + const Real neg_inf = -std::numeric_limits::max(); + const Real pos_inf = std::numeric_limits::max(); + std::vector lower_std(n_penalty, neg_inf), upper_std(n_penalty, pos_inf); + const Real z_mean = _gp.getDataStandardizer().getMean()[0]; + const Real z_std_val = _gp.getDataStandardizer().getStdDev()[0]; + for (unsigned int c = 0; c < n_penalty; ++c) + { + if (!lower_raw.empty() && lower_raw[c] > neg_inf) + { + const Real z_lb = _gp.applyLink(lower_raw[c]); + lower_std[c] = (z_lb - z_mean) / z_std_val; + } + if (!upper_raw.empty() && upper_raw[c] < pos_inf) + { + const Real z_ub = _gp.applyLink(upper_raw[c]); + upper_std[c] = (z_ub - z_mean) / z_std_val; + } + } + + _gp.setPenaltyConstraints(penalty_pts, lower_std, upper_std, getParam("penalty_weight")); + } + // Setup the covariance _gp.setupCovarianceMatrix(_training_params, _training_data, _optimization_opts); } diff --git a/modules/stochastic_tools/src/utils/GPLinkFunction.C b/modules/stochastic_tools/src/utils/GPLinkFunction.C new file mode 100644 index 000000000000..e322df2a23ba --- /dev/null +++ b/modules/stochastic_tools/src/utils/GPLinkFunction.C @@ -0,0 +1,107 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#include "GPLinkFunction.h" +#include "MooseError.h" + +#include + +namespace StochasticTools +{ + +// LogLinkFunction ----------------------------------------------------------------- + +Real +LogLinkFunction::forward(Real y) const +{ + return std::log(y - _lb); +} + +Real +LogLinkFunction::inverse(Real z) const +{ + return std::exp(z) + _lb; +} + +Real +LogLinkFunction::inverseDeriv(Real z) const +{ + return std::exp(z); +} + +Real +LogLinkFunction::logJacobian(Real y) const +{ + // log|g'(y)| = log|1/(y-lb)| = -log(y-lb) + return -std::log(y - _lb); +} + +// LogitLinkFunction --------------------------------------------------------------- + +Real +LogitLinkFunction::forward(Real y) const +{ + const Real p = (y - _lb) / (_ub - _lb); + return std::log(p / (1.0 - p)); +} + +Real +LogitLinkFunction::inverse(Real z) const +{ + return _lb + (_ub - _lb) / (1.0 + std::exp(-z)); +} + +Real +LogitLinkFunction::inverseDeriv(Real z) const +{ + const Real sig = 1.0 / (1.0 + std::exp(-z)); + return (_ub - _lb) * sig * (1.0 - sig); +} + +Real +LogitLinkFunction::logJacobian(Real y) const +{ + // g'(y) = (ub-lb) / ((y-lb)*(ub-y)) + // log|g'(y)| = log(ub-lb) - log(y-lb) - log(ub-y) + return std::log(_ub - _lb) - std::log(y - _lb) - std::log(_ub - y); +} + +// Factory ------------------------------------------------------------------------- + +std::unique_ptr +GPLinkFunction::build(GPLinkFunctionType type, Real lb, Real ub) +{ + switch (type) + { + case GPLinkFunctionType::Identity: + return std::make_unique(); + case GPLinkFunctionType::Log: + return std::make_unique(lb); + case GPLinkFunctionType::Logit: + return std::make_unique(lb, ub); + default: + mooseError("Unknown GPLinkFunctionType"); + } +} + +std::unique_ptr +GPLinkFunction::buildFromString(const std::string & type_name, Real lb, Real ub) +{ + if (type_name == "identity") + return build(GPLinkFunctionType::Identity, lb, ub); + if (type_name == "log") + return build(GPLinkFunctionType::Log, lb, ub); + if (type_name == "logit") + return build(GPLinkFunctionType::Logit, lb, ub); + mooseError("Unknown link function type '", + type_name, + "'. Supported options: 'identity', 'log', 'logit'."); +} + +} // namespace StochasticTools diff --git a/modules/stochastic_tools/src/utils/GaussianProcess.C b/modules/stochastic_tools/src/utils/GaussianProcess.C index 53eb4c1354e1..c940023161c9 100644 --- a/modules/stochastic_tools/src/utils/GaussianProcess.C +++ b/modules/stochastic_tools/src/utils/GaussianProcess.C @@ -17,6 +17,7 @@ #include "libmesh/petsc_matrix.h" #include +#include #include "MooseRandom.h" #include "Shuffle.h" @@ -43,7 +44,114 @@ GaussianProcess::GPOptimizerOptions::GPOptimizerOptions(const bool show_every_nt { } -GaussianProcess::GaussianProcess() {} +GaussianProcess::GaussianProcess() : _virtual_params(0, 0), _penalty_points_std(0, 0) {} + +// ---- Link function ------------------------------------------------------- + +void +GaussianProcess::setLinkFunction(GPLinkFunctionType type, Real lb, Real ub) +{ + _link_type = type; + _link_lb = lb; + _link_ub = ub; +} + +void +GaussianProcess::applyLinkTransform(RealEigenMatrix & data) const +{ + if (_link_type == GPLinkFunctionType::Identity) + return; + + auto link = GPLinkFunction::build(_link_type, _link_lb, _link_ub); + for (int ii = 0; ii < data.rows(); ++ii) + for (int jj = 0; jj < data.cols(); ++jj) + { + const Real y = data(ii, jj); + if (y <= link->lowerBound()) + mooseError("Training data value ", + y, + " is at or below the link function lower bound ", + link->lowerBound(), + ". Adjust the link lower bound or check training data."); + if (y >= link->upperBound()) + mooseError("Training data value ", + y, + " is at or above the link function upper bound ", + link->upperBound(), + ". Adjust the link upper bound or check training data."); + data(ii, jj) = link->forward(y); + } +} + +Real +GaussianProcess::applyLink(Real y) const +{ + if (_link_type == GPLinkFunctionType::Identity) + return y; + return GPLinkFunction::build(_link_type, _link_lb, _link_ub)->forward(y); +} + +Real +GaussianProcess::applyInvLink(Real z) const +{ + if (_link_type == GPLinkFunctionType::Identity) + return z; + return GPLinkFunction::build(_link_type, _link_lb, _link_ub)->inverse(z); +} + +Real +GaussianProcess::invLinkDeriv(Real z) const +{ + if (_link_type == GPLinkFunctionType::Identity) + return 1.0; + return GPLinkFunction::build(_link_type, _link_lb, _link_ub)->inverseDeriv(z); +} + +Real +GaussianProcess::logLinkJacobian(Real y) const +{ + if (_link_type == GPLinkFunctionType::Identity) + return 0.0; + return GPLinkFunction::build(_link_type, _link_lb, _link_ub)->logJacobian(y); +} + +// ---- Constraint setters -------------------------------------------------- + +void +GaussianProcess::setDerivativeConstraints(const RealEigenMatrix & virtual_params, + const std::vector & deriv_dims, + Real target_value, + Real noise_variance) +{ + if ((unsigned)virtual_params.rows() != deriv_dims.size()) + mooseError("virtual_params rows (", + virtual_params.rows(), + ") must match deriv_dims size (", + deriv_dims.size(), + ")."); + if (noise_variance <= 0.0) + mooseError("derivative_noise_variance must be > 0 to keep the augmented covariance matrix " + "positive-definite."); + _virtual_params = virtual_params; + _virtual_deriv_dims = deriv_dims; + _virtual_target = target_value; + _virtual_noise = noise_variance; +} + +void +GaussianProcess::setPenaltyConstraints(const RealEigenMatrix & penalty_points_std, + const std::vector & lower_bounds_std, + const std::vector & upper_bounds_std, + Real weight) +{ + if ((unsigned)penalty_points_std.rows() != lower_bounds_std.size() || + (unsigned)penalty_points_std.rows() != upper_bounds_std.size()) + mooseError("penalty_constraint_points rows must match the number of lower/upper bounds."); + _penalty_points_std = penalty_points_std; + _penalty_lower_std = lower_bounds_std; + _penalty_upper_std = upper_bounds_std; + _penalty_weight = weight; +} void GaussianProcess::initialize(CovarianceFunctionBase * covariance_function, @@ -71,6 +179,10 @@ GaussianProcess::setupCovarianceMatrix(const RealEigenMatrix & training_params, const RealEigenMatrix & training_data, const GPOptimizerOptions & opts) { + if (_virtual_params.rows() > 0 && _num_outputs > 1) + mooseError("Derivative observations (monotonicity constraints) are not supported for " + "multi-output GPs. Use a single-output covariance function."); + const bool batch_decision = opts.batch_size > 0 && (opts.batch_size <= training_params.rows()); _batch_size = batch_decision ? opts.batch_size : training_params.rows(); _K.resize(_num_outputs * _batch_size, _num_outputs * _batch_size); @@ -78,15 +190,64 @@ GaussianProcess::setupCovarianceMatrix(const RealEigenMatrix & training_params, if (_tuning_data.size()) tuneHyperParamsAdam(training_params, training_data, opts); - _K.resize(training_params.rows() * training_data.cols(), - training_params.rows() * training_data.cols()); - _covariance_function->computeCovarianceMatrix(_K, training_params, training_params, true); + const unsigned int n_train = training_params.rows(); + const unsigned int n_outputs = training_data.cols(); + const unsigned int n_virt = _virtual_params.rows(); - RealEigenMatrix flattened_data = - training_data.reshaped(training_params.rows() * training_data.cols(), 1); + if (n_virt == 0) + { + _K.resize(n_train * n_outputs, n_train * n_outputs); + _covariance_function->computeCovarianceMatrix(_K, training_params, training_params, true); + RealEigenMatrix flattened_data = training_data.reshaped(n_train * n_outputs, 1); + setupStoredMatrices(flattened_data); + } + else + { + // Augmented covariance matrix incorporating virtual derivative observations. + // K_aug = [ K_ff + sigma_n^2*I K_fd ] + // [ K_df K_dd + sigma_d^2*I ] + const unsigned int n_total = n_train + n_virt; + _K.resize(n_total, n_total); + _K.setZero(); + + // Top-left: K(X_train, X_train) + sigma_n^2*I (noise added by is_self_covariance=true) + RealEigenMatrix K_ff(n_train, n_train); + _covariance_function->computeCovarianceMatrix(K_ff, training_params, training_params, true); + _K.topLeftCorner(n_train, n_train) = K_ff; + + // Off-diagonal: K_fd and its transpose K_df + for (unsigned int j = 0; j < n_virt; ++j) + { + RealEigenMatrix xd_j = _virtual_params.row(j); + RealEigenMatrix K_fd_j(n_train, 1); + _covariance_function->computeCovarianceFD( + K_fd_j, training_params, xd_j, _virtual_deriv_dims[j]); + _K.block(0, n_train + j, n_train, 1) = K_fd_j; + _K.block(n_train + j, 0, 1, n_train) = K_fd_j.transpose(); + } - // Compute the Cholesky decomposition and inverse action of the covariance matrix - setupStoredMatrices(flattened_data); + // Bottom-right: K_dd + sigma_d^2*I + for (unsigned int i = 0; i < n_virt; ++i) + { + RealEigenMatrix xd_i = _virtual_params.row(i); + for (unsigned int j = 0; j < n_virt; ++j) + { + RealEigenMatrix xd_j = _virtual_params.row(j); + RealEigenMatrix K_dd_ij(1, 1); + _covariance_function->computeCovarianceDD( + K_dd_ij, xd_i, xd_j, _virtual_deriv_dims[i], _virtual_deriv_dims[j]); + _K(n_train + i, n_train + j) = K_dd_ij(0, 0); + } + _K(n_train + i, n_train + i) += _virtual_noise; + } + + // Augmented RHS: [y_train; v_virtual_target * ones] + RealEigenMatrix augmented_data(n_total, 1); + augmented_data.topRows(n_train) = training_data.reshaped(n_train, 1); + augmented_data.bottomRows(n_virt).setConstant(_virtual_target); + + setupStoredMatrices(augmented_data); + } _covariance_function->buildHyperParamMap(_hyperparam_map, _hyperparam_vec_map); } @@ -255,6 +416,27 @@ GaussianProcess::getLoss(RealEigenMatrix & inputs, RealEigenMatrix & outputs) log_likelihood += -std::log(_K.determinant()); log_likelihood -= _batch_size * std::log(2 * M_PI); log_likelihood = -log_likelihood / 2; + + // Add penalty constraint terms (evaluated at constraint points using current batch model) + const unsigned int n_penalty = _penalty_points_std.rows(); + if (n_penalty > 0 && _penalty_weight > 0.0) + { + for (unsigned int c = 0; c < n_penalty; ++c) + { + RealEigenMatrix xc = _penalty_points_std.row(c); + RealEigenMatrix K_star(_batch_size, 1); + _covariance_function->computeCovarianceMatrix(K_star, inputs, xc, false); + const Real mu_c = (K_star.transpose() * _K_results_solve)(0, 0); + + const Real lb = _penalty_lower_std[c]; + const Real ub = _penalty_upper_std[c]; + if (lb > -std::numeric_limits::max() && mu_c < lb) + log_likelihood += _penalty_weight * std::pow(lb - mu_c, 2); + if (ub < std::numeric_limits::max() && mu_c > ub) + log_likelihood += _penalty_weight * std::pow(mu_c - ub, 2); + } + } + return log_likelihood; } @@ -263,8 +445,9 @@ GaussianProcess::getGradient(RealEigenMatrix & inputs) const { RealEigenMatrix dKdhp(_batch_size, _batch_size); RealEigenMatrix alpha = _K_results_solve * _K_results_solve.transpose(); - std::vector grad_vec; - grad_vec.resize(_num_tunable); + std::vector grad_vec(_num_tunable, 0.0); + + // NLML gradient (existing computation) for (auto iter = _tuning_data.begin(); iter != _tuning_data.end(); ++iter) { std::string hyper_param_name = iter->first; @@ -278,6 +461,57 @@ GaussianProcess::getGradient(RealEigenMatrix & inputs) const grad_vec[global_index] = -tmp.trace() / 2.0; } } + + // Penalty gradient contribution (approximate: only the dK_star/dtheta term) + // d(penalty)/d(theta_i) ≈ sum_c [violation_c * (-d(mu_c)/d(theta_i))] + // where d(mu_c)/d(theta_i) ≈ (dK_star/d(theta_i))^T * alpha_solve + const unsigned int n_penalty = _penalty_points_std.rows(); + if (n_penalty > 0 && _penalty_weight > 0.0) + { + RealEigenMatrix dK_star_dhp(_batch_size, 1); + for (unsigned int c = 0; c < n_penalty; ++c) + { + RealEigenMatrix xc = _penalty_points_std.row(c); + RealEigenMatrix K_star(_batch_size, 1); + _covariance_function->computeCovarianceMatrix(K_star, inputs, xc, false); + const Real mu_c = (K_star.transpose() * _K_results_solve)(0, 0); + + const Real lb = _penalty_lower_std[c]; + const Real ub = _penalty_upper_std[c]; + + Real slack = 0.0; + int sign = 0; + if (lb > -std::numeric_limits::max() && mu_c < lb) + { + slack = lb - mu_c; + sign = -1; // penalty increases when mu_c decreases → gradient pushes mu_c up + } + else if (ub < std::numeric_limits::max() && mu_c > ub) + { + slack = mu_c - ub; + sign = 1; // penalty increases when mu_c increases → gradient pushes mu_c down + } + + if (sign == 0) + continue; + + for (auto iter = _tuning_data.begin(); iter != _tuning_data.end(); ++iter) + { + const auto & hp_name = iter->first; + const auto first_index = std::get<0>(iter->second); + const auto num_entries = std::get<1>(iter->second); + for (unsigned int ii = 0; ii < num_entries; ++ii) + { + const auto global_index = first_index + ii; + _covariance_function->computedKdhyper_cross(dK_star_dhp, inputs, xc, hp_name, ii); + const Real d_mu_c = (dK_star_dhp.transpose() * _K_results_solve)(0, 0); + // d(penalty)/d(theta_i) = 2*lambda*slack * sign * d(mu_c)/d(theta_i) + grad_vec[global_index] += 2.0 * _penalty_weight * slack * sign * d_mu_c; + } + } + } + } + return grad_vec; } @@ -361,6 +595,16 @@ dataStore(std::ostream & stream, StochasticTools::GaussianProcess & gp_utils, vo dataStore(stream, gp_utils.KCholeskyDecomp(), context); dataStore(stream, gp_utils.paramStandardizer(), context); dataStore(stream, gp_utils.dataStandardizer(), context); + // Link function + int link_type_int = static_cast(gp_utils.linkType()); + dataStore(stream, link_type_int, context); + dataStore(stream, gp_utils.linkLb(), context); + dataStore(stream, gp_utils.linkUb(), context); + // Virtual derivative observations + dataStore(stream, gp_utils.virtualParams(), context); + dataStore(stream, gp_utils.virtualDerivDims(), context); + dataStore(stream, gp_utils.virtualNoise(), context); + dataStore(stream, gp_utils.virtualTarget(), context); } template <> @@ -379,4 +623,15 @@ dataLoad(std::istream & stream, StochasticTools::GaussianProcess & gp_utils, voi dataLoad(stream, gp_utils.KCholeskyDecomp(), context); dataLoad(stream, gp_utils.paramStandardizer(), context); dataLoad(stream, gp_utils.dataStandardizer(), context); + // Link function + int link_type_int; + dataLoad(stream, link_type_int, context); + gp_utils.linkType() = static_cast(link_type_int); + dataLoad(stream, gp_utils.linkLb(), context); + dataLoad(stream, gp_utils.linkUb(), context); + // Virtual derivative observations + dataLoad(stream, gp_utils.virtualParams(), context); + dataLoad(stream, gp_utils.virtualDerivDims(), context); + dataLoad(stream, gp_utils.virtualNoise(), context); + dataLoad(stream, gp_utils.virtualTarget(), context); } diff --git a/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_derivative_obs.i b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_derivative_obs.i new file mode 100644 index 000000000000..2f3cf807ab08 --- /dev/null +++ b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_derivative_obs.i @@ -0,0 +1,122 @@ +# Gaussian Process with derivative (virtual) observations for monotonicity constraint. +# The heat diffusion response is monotonically increasing in q (source term). +# Virtual derivative observations at selected points enforce this monotonicity +# by augmenting the covariance matrix with cross-covariance blocks between +# function values and derivative observations. + +[StochasticTools] +[] + +[Distributions] + [k_dist] + type = Uniform + lower_bound = 1 + upper_bound = 10 + [] + [q_dist] + type = Uniform + lower_bound = 9000 + upper_bound = 11000 + [] +[] + +[Samplers] + [train_sample] + type = MonteCarlo + num_rows = 10 + distributions = 'k_dist q_dist' + execute_on = PRE_MULTIAPP_SETUP + [] + [test_sample] + type = MonteCarlo + num_rows = 20 + distributions = 'k_dist q_dist' + execute_on = PRE_MULTIAPP_SETUP + [] +[] + +[MultiApps] + [sub] + type = SamplerFullSolveMultiApp + input_files = sub.i + sampler = train_sample + [] +[] + +[Controls] + [cmdline] + type = MultiAppSamplerControl + multi_app = sub + sampler = train_sample + param_names = 'Materials/conductivity/prop_values Kernels/source/value' + [] +[] + +[Transfers] + [data] + type = SamplerReporterTransfer + from_multi_app = sub + sampler = train_sample + stochastic_reporter = results + from_reporter = 'avg/value' + [] +[] + +[Reporters] + [results] + type = StochasticReporter + parallel_type = ROOT + [] + [samp_avg] + type = EvaluateSurrogate + model = GP_avg + sampler = test_sample + evaluate_std = 'true' + parallel_type = ROOT + execute_on = final + [] +[] + +[Trainers] + [GP_avg_trainer] + type = GaussianProcessTrainer + execute_on = timestep_end + covariance_function = 'covar' + standardize_params = 'true' + standardize_data = 'true' + sampler = train_sample + response = results/data:avg:value + # Enforce df/dq > 0 (increasing in q, dimension index 1) + # Virtual obs placed at three locations spanning the q range + monotone_constraint_points = '5.5 9000 + 5.5 10000 + 5.5 11000' + monotone_constraint_dims = '1 1 1' + monotone_constraint_type = increasing + derivative_target_value = 0.01 + derivative_noise_variance = 1e-4 + [] +[] + +[Surrogates] + [GP_avg] + type = GaussianProcessSurrogate + trainer = GP_avg_trainer + [] +[] + +[Covariance] + [covar] + type = SquaredExponentialCovariance + signal_variance = 1 + noise_variance = 1e-3 + length_factor = '0.4 0.4' + [] +[] + +[Outputs] + [out] + type = CSV + execute_on = FINAL + [] +[] diff --git a/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_log_link.i b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_log_link.i new file mode 100644 index 000000000000..4bced44391d5 --- /dev/null +++ b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_log_link.i @@ -0,0 +1,116 @@ +# Gaussian Process with log link function for positivity constraint. +# The heat diffusion sub-problem produces strictly positive temperatures. +# The log link g(y) = log(y - 0) ensures all GP predictions satisfy y > 0. +# This test verifies that the log link can be used without errors and that +# the basic prediction structure is correct. + +[StochasticTools] +[] + +[Distributions] + [k_dist] + type = Uniform + lower_bound = 1 + upper_bound = 10 + [] + [q_dist] + type = Uniform + lower_bound = 9000 + upper_bound = 11000 + [] +[] + +[Samplers] + [train_sample] + type = MonteCarlo + num_rows = 10 + distributions = 'k_dist q_dist' + execute_on = PRE_MULTIAPP_SETUP + [] + [test_sample] + type = MonteCarlo + num_rows = 20 + distributions = 'k_dist q_dist' + execute_on = PRE_MULTIAPP_SETUP + [] +[] + +[MultiApps] + [sub] + type = SamplerFullSolveMultiApp + input_files = sub.i + sampler = train_sample + [] +[] + +[Controls] + [cmdline] + type = MultiAppSamplerControl + multi_app = sub + sampler = train_sample + param_names = 'Materials/conductivity/prop_values Kernels/source/value' + [] +[] + +[Transfers] + [data] + type = SamplerReporterTransfer + from_multi_app = sub + sampler = train_sample + stochastic_reporter = results + from_reporter = 'avg/value' + [] +[] + +[Reporters] + [results] + type = StochasticReporter + parallel_type = ROOT + [] + [samp_avg] + type = EvaluateSurrogate + model = GP_avg + sampler = test_sample + evaluate_std = 'true' + parallel_type = ROOT + execute_on = final + [] +[] + +[Trainers] + [GP_avg_trainer] + type = GaussianProcessTrainer + execute_on = timestep_end + covariance_function = 'covar' + standardize_params = 'true' + standardize_data = 'true' + sampler = train_sample + response = results/data:avg:value + # Enforce positivity: log link with lower bound 0 + link_function = log + link_lower_bound = 0.0 + [] +[] + +[Surrogates] + [GP_avg] + type = GaussianProcessSurrogate + trainer = GP_avg_trainer + [] +[] + +[Covariance] + [covar] + type = SquaredExponentialCovariance + signal_variance = 1 + noise_variance = 1e-3 + length_factor = '0.4 0.4' + [] +[] + +[Outputs] + [out] + type = CSV + execute_on = FINAL + [] +[] diff --git a/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_logit_link.i b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_logit_link.i new file mode 100644 index 000000000000..d15e1344595d --- /dev/null +++ b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_logit_link.i @@ -0,0 +1,115 @@ +# Gaussian Process with logit link function for bounded output constraint. +# The logit link g(y) = log((y - lb)/(ub - y)) maps a bounded response +# into an unbounded GP latent space, ensuring predictions satisfy lb < y < ub. + +[StochasticTools] +[] + +[Distributions] + [k_dist] + type = Uniform + lower_bound = 1 + upper_bound = 10 + [] + [q_dist] + type = Uniform + lower_bound = 9000 + upper_bound = 11000 + [] +[] + +[Samplers] + [train_sample] + type = MonteCarlo + num_rows = 10 + distributions = 'k_dist q_dist' + execute_on = PRE_MULTIAPP_SETUP + [] + [test_sample] + type = MonteCarlo + num_rows = 20 + distributions = 'k_dist q_dist' + execute_on = PRE_MULTIAPP_SETUP + [] +[] + +[MultiApps] + [sub] + type = SamplerFullSolveMultiApp + input_files = sub.i + sampler = train_sample + [] +[] + +[Controls] + [cmdline] + type = MultiAppSamplerControl + multi_app = sub + sampler = train_sample + param_names = 'Materials/conductivity/prop_values Kernels/source/value' + [] +[] + +[Transfers] + [data] + type = SamplerReporterTransfer + from_multi_app = sub + sampler = train_sample + stochastic_reporter = results + from_reporter = 'avg/value' + [] +[] + +[Reporters] + [results] + type = StochasticReporter + parallel_type = ROOT + [] + [samp_avg] + type = EvaluateSurrogate + model = GP_avg + sampler = test_sample + evaluate_std = 'true' + parallel_type = ROOT + execute_on = final + [] +[] + +[Trainers] + [GP_avg_trainer] + type = GaussianProcessTrainer + execute_on = timestep_end + covariance_function = 'covar' + standardize_params = 'true' + standardize_data = 'true' + sampler = train_sample + response = results/data:avg:value + # Bound the output: logit link with physical bounds [250, 2000] + link_function = logit + link_lower_bound = 250.0 + link_upper_bound = 2000.0 + [] +[] + +[Surrogates] + [GP_avg] + type = GaussianProcessSurrogate + trainer = GP_avg_trainer + [] +[] + +[Covariance] + [covar] + type = SquaredExponentialCovariance + signal_variance = 1 + noise_variance = 1e-3 + length_factor = '0.4 0.4' + [] +[] + +[Outputs] + [out] + type = CSV + execute_on = FINAL + [] +[] diff --git a/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_penalty_constraint.i b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_penalty_constraint.i new file mode 100644 index 000000000000..3192d92d33ca --- /dev/null +++ b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_penalty_constraint.i @@ -0,0 +1,120 @@ +# Gaussian Process with penalty constraints during hyperparameter tuning. +# Penalty terms are added to the NLML loss to steer the Adam optimizer toward +# hyperparameter configurations that respect lower/upper bound constraints at +# specified evaluation points. + +[StochasticTools] +[] + +[Distributions] + [k_dist] + type = Uniform + lower_bound = 1 + upper_bound = 10 + [] + [q_dist] + type = Uniform + lower_bound = 9000 + upper_bound = 11000 + [] +[] + +[Samplers] + [train_sample] + type = MonteCarlo + num_rows = 10 + distributions = 'k_dist q_dist' + execute_on = PRE_MULTIAPP_SETUP + [] + [test_sample] + type = MonteCarlo + num_rows = 20 + distributions = 'k_dist q_dist' + execute_on = PRE_MULTIAPP_SETUP + [] +[] + +[MultiApps] + [sub] + type = SamplerFullSolveMultiApp + input_files = sub.i + sampler = train_sample + [] +[] + +[Controls] + [cmdline] + type = MultiAppSamplerControl + multi_app = sub + sampler = train_sample + param_names = 'Materials/conductivity/prop_values Kernels/source/value' + [] +[] + +[Transfers] + [data] + type = SamplerReporterTransfer + from_multi_app = sub + sampler = train_sample + stochastic_reporter = results + from_reporter = 'avg/value' + [] +[] + +[Reporters] + [results] + type = StochasticReporter + parallel_type = ROOT + [] + [samp_avg] + type = EvaluateSurrogate + model = GP_avg + sampler = test_sample + evaluate_std = 'true' + parallel_type = ROOT + execute_on = final + [] +[] + +[Trainers] + [GP_avg_trainer] + type = GaussianProcessTrainer + execute_on = timestep_end + covariance_function = 'covar' + standardize_params = 'true' + standardize_data = 'true' + sampler = train_sample + response = results/data:avg:value + tune_parameters = 'signal_variance noise_variance length_factor' + num_iters = 50 + learning_rate = 0.001 + # Penalize predictions below 300 (floor near BC value) at two constraint points + penalty_constraint_points = '5.5 9000 + 5.5 11000' + penalty_constraint_lower_bounds = '300.0 300.0' + penalty_weight = 10.0 + [] +[] + +[Surrogates] + [GP_avg] + type = GaussianProcessSurrogate + trainer = GP_avg_trainer + [] +[] + +[Covariance] + [covar] + type = SquaredExponentialCovariance + signal_variance = 1 + noise_variance = 1e-3 + length_factor = '0.4 0.4' + [] +[] + +[Outputs] + [out] + type = CSV + execute_on = FINAL + [] +[] diff --git a/modules/stochastic_tools/test/tests/surrogates/gaussian_process/tests b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/tests index 23603d80028e..760e3d9bb27f 100644 --- a/modules/stochastic_tools/test/tests/surrogates/gaussian_process/tests +++ b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/tests @@ -1,6 +1,6 @@ [Tests] issues = "#15482" - design = "GaussianProcessTrainer.md GaussianProcess.md" + design = "GaussianProcessTrainer.md GaussianProcess.md GPLinkFunction.md" [store_load] requirement = 'The system shall demonstrate a gaussian process surrogate by ' @@ -79,6 +79,33 @@ detail = 'a Matern half integer kernel using Adam with mini-batch sampling;' [] [] + [constraints] + requirement = 'The system shall support inequality constraints on Gaussian process predictions via' + [GP_log_link] + type = RunApp + input = GP_log_link.i + allow_test_objects = true + detail = 'a log link function enforcing strict positivity (y > lower_bound);' + [] + [GP_logit_link] + type = RunApp + input = GP_logit_link.i + allow_test_objects = true + detail = 'a logit link function enforcing bounded outputs (lower_bound < y < upper_bound);' + [] + [GP_derivative_obs] + type = RunApp + input = GP_derivative_obs.i + allow_test_objects = true + detail = 'virtual derivative observations that enforce monotonicity constraints;' + [] + [GP_penalty_constraint] + type = RunApp + input = GP_penalty_constraint.i + allow_test_objects = true + detail = 'penalty augmented NLML that steers hyperparameter tuning toward feasible predictions.' + [] + [] [errors] requirement = 'The system shall throw an error when ' [optimization_adam_batch_size] @@ -88,5 +115,12 @@ detail = 'the batch size is greater than the training data set size for Adam optimization. ' expect_err = "Batch size cannot be greater than the training data set size." [] + [logit_invalid_bounds] + type = RunException + input = GP_logit_link.i + cli_args = "Trainers/GP_avg_trainer/link_upper_bound=250.0" + detail = 'the logit link upper bound is not greater than the lower bound. ' + expect_err = "link_upper_bound must be strictly greater than link_lower_bound" + [] [] []