From 7bf2ef81038da1f16648c311bc5585bf9091e55a Mon Sep 17 00:00:00 2001
From: Daniel Schwen <daniel.schwen@inl.gov>
Date: Mon, 27 Apr 2026 09:51:01 -0600
Subject: [PATCH] Add inequality constraints to Gaussian Process trainer
 (#32848)

Three mechanisms for enforcing inequality constraints on GP predictions:

- Link functions (log, logit): exact constraints via output warping;
  GP trains in unconstrained latent space and predictions are inverted
  back to physical space via the inverse link.
- Derivative observations: monotonicity constraints via virtual
  observations that augment the covariance matrix with cross-covariance
  blocks between function values and directional derivatives.
- Penalty constraints: soft bounds added to NLML loss/gradient during
  Adam hyperparameter tuning.

New files: GPLinkFunction.h/C, GP_log_link.i, GP_logit_link.i,
GP_derivative_obs.i, GP_penalty_constraint.i, GPLinkFunction.md.
Updated: GaussianProcess.h/C, GaussianProcessTrainer.h/C,
GaussianProcessSurrogate.C, CovarianceFunctionBase.h/C,
SquaredExponentialCovariance.h/C, tests spec, and doc pages.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../source/trainers/GaussianProcessTrainer.md |  79 +++++
 .../content/source/utils/GPLinkFunction.md    |  81 ++++++
 .../content/source/utils/GaussianProcess.md   |  30 ++
 .../covariances/CovarianceFunctionBase.h      |  70 +++++
 .../SquaredExponentialCovariance.h            |  26 ++
 .../include/trainers/GaussianProcessTrainer.h |   1 +
 .../include/utils/GPLinkFunction.h            | 140 +++++++++
 .../include/utils/GaussianProcess.h           | 112 ++++++-
 .../src/covariances/CovarianceFunctionBase.C  |  45 +++
 .../SquaredExponentialCovariance.C            | 146 ++++++++++
 .../src/surrogates/GaussianProcessSurrogate.C |  50 +++-
 .../src/trainers/GaussianProcessTrainer.C     | 175 ++++++++++-
 .../src/utils/GPLinkFunction.C                | 107 +++++++
 .../src/utils/GaussianProcess.C               | 275 +++++++++++++++++-
 .../gaussian_process/GP_derivative_obs.i      | 122 ++++++++
 .../surrogates/gaussian_process/GP_log_link.i | 116 ++++++++
 .../gaussian_process/GP_logit_link.i          | 115 ++++++++
 .../gaussian_process/GP_penalty_constraint.i  | 120 ++++++++
 .../tests/surrogates/gaussian_process/tests   |  36 ++-
 19 files changed, 1819 insertions(+), 27 deletions(-)
 create mode 100644 modules/stochastic_tools/doc/content/source/utils/GPLinkFunction.md
 create mode 100644 modules/stochastic_tools/include/utils/GPLinkFunction.h
 create mode 100644 modules/stochastic_tools/src/utils/GPLinkFunction.C
 create mode 100644 modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_derivative_obs.i
 create mode 100644 modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_log_link.i
 create mode 100644 modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_logit_link.i
 create mode 100644 modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_penalty_constraint.i

diff --git a/modules/stochastic_tools/doc/content/source/trainers/GaussianProcessTrainer.md b/modules/stochastic_tools/doc/content/source/trainers/GaussianProcessTrainer.md
index ea2ed40baee1..09ae0dab9831 100644
--- a/modules/stochastic_tools/doc/content/source/trainers/GaussianProcessTrainer.md
+++ b/modules/stochastic_tools/doc/content/source/trainers/GaussianProcessTrainer.md
@@ -238,6 +238,85 @@ with $M$ outputs, each training iteration of Adam has a cost of $\mathcal{O}
 (M^3N^3)$. Adam permits using $n < N$ random training points during each iteration
 (mini-batches) which has a cost of $\mathcal{O}(M^3n^3)<<\mathcal{O}(M^3N^3)$.
 
+## Inequality Constraints
+
+Three complementary mechanisms are available to enforce inequality constraints on
+GP predictions. They can be combined freely.
+
+### Link Functions (Output Warping)
+
+A link function $g$ maps physical outputs to an unconstrained latent space.
+The GP trains in latent space and predictions are inverted back to physical space,
+guaranteeing constraint satisfaction by construction.
+
+| `link_function` | Constraint | Required parameters |
+| - | - | - |
+| `identity` (default) | none | — |
+| `log` | $y > \ell$ | `link_lower_bound` |
+| `logit` | $\ell < y < u$ | `link_lower_bound`, `link_upper_bound` |
+
+See [GPLinkFunction.md] for the mathematical details.
+
++Example — positivity constraint:+
+
+!listing stochastic_tools/test/tests/surrogates/gaussian_process/GP_log_link.i
+         block=Trainers
+
+### Derivative Observations (Monotonicity Constraints)
+
+Virtual derivative observations augment the covariance matrix with cross-covariance
+blocks between function values and directional derivatives at specified points.
+This enforces monotonicity (or zero-derivative) constraints structurally via
+the GP prior.
+
+The augmented covariance matrix takes the block form:
+
+!equation
+\mathbf{K}_{\text{aug}} = \begin{bmatrix}
+\mathbf{K}_{ff} + \sigma_n^2 \mathbf{I} & \mathbf{K}_{fd} \\
+\mathbf{K}_{df}                          & \mathbf{K}_{dd} + \sigma_d^2 \mathbf{I}
+\end{bmatrix}
+
+where $\mathbf{K}_{fd}$ and $\mathbf{K}_{dd}$ contain cross-covariances between
+function values and partial derivatives. Only the
+[SquaredExponentialCovariance.md] kernel supports derivative covariances. Only
+single-output GPs are supported.
+
+The virtual observations are placed at user-specified locations
+(`monotone_constraint_points`) and target either zero derivative, positive
+derivative (`increasing`), or negative derivative (`decreasing`) at those points.
+
++Example — increasing in second parameter:+
+
+!listing stochastic_tools/test/tests/surrogates/gaussian_process/GP_derivative_obs.i
+         block=Trainers
+
+### Penalty Constraints (Soft Bounds During Tuning)
+
+Penalty terms are added to the negative log-marginal likelihood (NLML) and its
+gradient during Adam hyperparameter optimization. This steers hyperparameter
+selection toward configurations that respect lower/upper bounds on predicted means
+at specified evaluation points.
+
+The augmented loss is:
+
+!equation
+\mathcal{L}_{\text{aug}} = \mathcal{L}_{\text{NLML}} + \lambda \sum_c \left[
+  \max(0,\, \ell_c - \hat{y}_c)^2 + \max(0,\, \hat{y}_c - u_c)^2
+\right]
+
+where $\hat{y}_c$ is the predicted mean at constraint point $c$, and $\lambda$
+is `penalty_weight`.
+
+The bounds are specified in physical space and are internally transformed through
+the link function and data standardizer. Penalty constraints require the
+`tune_parameters` option to be active.
+
++Example — lower-bound penalty at two points:+
+
+!listing stochastic_tools/test/tests/surrogates/gaussian_process/GP_penalty_constraint.i
+         block=Trainers
+
 !syntax parameters /Trainers/GaussianProcessTrainer
 
 !syntax inputs /Trainers/GaussianProcessTrainer
diff --git a/modules/stochastic_tools/doc/content/source/utils/GPLinkFunction.md b/modules/stochastic_tools/doc/content/source/utils/GPLinkFunction.md
new file mode 100644
index 000000000000..2d34dbd48882
--- /dev/null
+++ b/modules/stochastic_tools/doc/content/source/utils/GPLinkFunction.md
@@ -0,0 +1,81 @@
+# GPLinkFunction
+
+## Overview
+
+`GPLinkFunction` provides output warping (link functions) for [GaussianProcessTrainer.md]
+that enforce inequality constraints on GP predictions. A link function $g$ maps
+the physical output $y$ to a latent (unconstrained) variable $z = g(y)$. The GP
+is trained in latent $z$-space, and predictions are mapped back to physical space
+via the inverse $y = g^{-1}(z)$.
+
+This approach is sometimes called *warped Gaussian processes*
+[!cite](snelson2004warped) and provides an exact constraint: every prediction
+satisfies the bound by construction.
+
+## Available Link Functions
+
+### Identity (default)
+
+No transformation is applied. The GP trains and predicts directly in physical space.
+
+!equation
+g(y) = y, \quad g^{-1}(z) = z
+
+Use this (or omit `link_function`) for unconstrained problems.
+
+### Log Link
+
+Enforces a strict lower bound: $y > \ell$.
+
+!equation
+g(y) = \ln(y - \ell), \quad g^{-1}(z) = e^{z} + \ell
+
+where $\ell$ is `link_lower_bound` (default 0, giving positivity constraint).
+
+The Jacobian correction $\ln |g'(y)| = -\ln(y - \ell)$ is included in the
+negative log-marginal likelihood when hyperparameter tuning is active.
+
+### Logit Link
+
+Enforces both a lower and upper bound: $\ell < y < u$.
+
+!equation
+g(y) = \ln\!\left(\frac{y - \ell}{u - y}\right), \quad
+g^{-1}(z) = \ell + \frac{u - \ell}{1 + e^{-z}}
+
+where $\ell$ is `link_lower_bound` and $u$ is `link_upper_bound`. Requires
+$u > \ell$.
+
+## Uncertainty Propagation
+
+When a link function is active, GP posterior variance $\sigma_z^2$ in latent space
+is mapped to physical space via the delta method:
+
+!equation
+\sigma_y \approx \left| (g^{-1})'(\mu_z) \right| \, \sigma_z
+
+This is an approximation that is accurate when $\sigma_z$ is small relative to
+the curvature of $g^{-1}$.
+
+## Interaction with Standardization
+
+The link transform is applied to training data *before* standardization.
+The training pipeline in [GaussianProcessTrainer.md] proceeds as:
+
+1. Collect physical responses $y_i$.
+2. Apply link: $z_i = g(y_i)$.
+3. Standardize (center/scale) the $z_i$.
+4. Train GP in standardized $z$-space.
+
+At prediction time, the GP mean and standard deviation are de-standardized to
+$z$-space and then the inverse link is applied.
+
+## Usage
+
+Link functions are configured via parameters on [GaussianProcessTrainer.md]:
+
+!listing stochastic_tools/test/tests/surrogates/gaussian_process/GP_log_link.i
+         block=Trainers
+
+!listing stochastic_tools/test/tests/surrogates/gaussian_process/GP_logit_link.i
+         block=Trainers
diff --git a/modules/stochastic_tools/doc/content/source/utils/GaussianProcess.md b/modules/stochastic_tools/doc/content/source/utils/GaussianProcess.md
index 970c40c75137..8b907d6325a1 100644
--- a/modules/stochastic_tools/doc/content/source/utils/GaussianProcess.md
+++ b/modules/stochastic_tools/doc/content/source/utils/GaussianProcess.md
@@ -52,3 +52,33 @@ Or by simply calling the optimizer with Adam (stochastic algorithm):
 
 !listing stochastic_tools/src/utils/GaussianProcess.C start=tuneHyperParamsAdam(training_params end=);
 
+### Applying a link function
+
+Before setting up the covariance matrix, training data can be transformed through
+a [GPLinkFunction.md] to enforce inequality constraints. The link type and bounds
+are configured through [GaussianProcessTrainer.md]:
+
+!listing stochastic_tools/src/trainers/GaussianProcessTrainer.C
+         start=// ---- Link function ----
+         end=// Apply link function to training data
+
+The transformed values are standardized and the GP trains entirely in latent space.
+At prediction time the inverse link is applied to recover physical outputs.
+
+### Derivative observations
+
+Monotonicity constraints are encoded via virtual derivative observations appended
+to the training set. The `GaussianProcess` object stores the virtual parameter
+locations and their associated derivative dimensions, and assembles an augmented
+covariance matrix at setup time:
+
+!listing stochastic_tools/src/trainers/GaussianProcessTrainer.C
+         start=// ---- Derivative constraint (virtual observations) ----
+         end=// ---- Penalty constraints ----
+
+### Penalty constraints
+
+Soft bounds on predicted means at specified evaluation points are enforced by
+adding a penalty term to the NLML loss and gradient during Adam optimization.
+The penalty is computed inside `getLoss` and `getGradient` in `GaussianProcess.C`.
+
diff --git a/modules/stochastic_tools/include/covariances/CovarianceFunctionBase.h b/modules/stochastic_tools/include/covariances/CovarianceFunctionBase.h
index 585e7624b0d4..404ce9f0120c 100644
--- a/modules/stochastic_tools/include/covariances/CovarianceFunctionBase.h
+++ b/modules/stochastic_tools/include/covariances/CovarianceFunctionBase.h
@@ -77,6 +77,76 @@ class CovarianceFunctionBase : public MooseObject, public CovarianceInterface
                                const std::string & hyper_param_name,
                                unsigned int ind) const;
 
+  /**
+   * Compute the cross-covariance between function values and derivatives:
+   *   K_fd[i,j] = Cov[f(x_i), df(xd_j)/dx'_{dim}] = dK(x_i, xd_j)/dx'_{dim}
+   * For SE kernel: K_fd[i,j] = K(x_i,xd_j) * (x_{i,dim} - xd_{j,dim}) / ell_dim^2
+   * Default implementation calls mooseError; override in kernels that support
+   * derivative observations.
+   * @param K_fd  Output matrix of size (x.rows() x xd.rows())
+   * @param x     Function-value observation locations
+   * @param xd    Derivative observation locations
+   * @param dim   Input dimension for the derivative
+   */
+  virtual void computeCovarianceFD(RealEigenMatrix & K_fd,
+                                   const RealEigenMatrix & x,
+                                   const RealEigenMatrix & xd,
+                                   unsigned int dim) const;
+
+  /**
+   * Compute the cross-covariance between derivatives and function values:
+   *   K_df[i,j] = Cov[df(xd_i)/dx_{dim}, f(xp_j)] = dK(xd_i, xp_j)/dx_{d,dim}
+   * For SE kernel: K_df[i,j] = K(xd_i,xp_j) * (xp_{j,dim} - xd_{i,dim}) / ell_dim^2
+   * Note: K_df = K_fd^T when xd and xp are swapped.
+   * Default implementation calls mooseError.
+   * @param K_df  Output matrix of size (xd.rows() x xp.rows())
+   * @param xd    Derivative observation locations
+   * @param xp    Function-value locations (e.g. test points)
+   * @param dim   Input dimension for the derivative
+   */
+  virtual void computeCovarianceDf(RealEigenMatrix & K_df,
+                                   const RealEigenMatrix & xd,
+                                   const RealEigenMatrix & xp,
+                                   unsigned int dim) const;
+
+  /**
+   * Compute the covariance between two sets of derivative observations:
+   *   K_dd[i,j] = Cov[df(xd_i)/dx_{dim_i}, df(xdp_j)/dx'_{dim_j}]
+   *             = d^2 K(xd_i, xdp_j) / (dx_{dim_i} dx'_{dim_j})
+   * For SE kernel when dim_i == dim_j:
+   *   = K(xd_i,xdp_j) * [1/ell_k^2 - (xd_{i,k}-xdp_{j,k})^2/ell_k^4]
+   * For SE kernel when dim_i != dim_j:
+   *   = K(xd_i,xdp_j) * [-(xd_{i,ki}-xdp_{j,ki})*(xd_{i,kj}-xdp_{j,kj})/(ell_ki^2*ell_kj^2)]
+   * Default implementation calls mooseError.
+   * @param K_dd   Output matrix of size (xd.rows() x xdp.rows())
+   * @param xd     First set of derivative observation locations
+   * @param xdp    Second set of derivative observation locations
+   * @param dim_i  Derivative dimension for the first set
+   * @param dim_j  Derivative dimension for the second set
+   */
+  virtual void computeCovarianceDD(RealEigenMatrix & K_dd,
+                                   const RealEigenMatrix & xd,
+                                   const RealEigenMatrix & xdp,
+                                   unsigned int dim_i,
+                                   unsigned int dim_j) const;
+
+  /**
+   * Compute dK_cross/dhp: the derivative of the cross-covariance K(x, xc) with
+   * respect to a hyperparameter. Used in the penalty constraint gradient.
+   * For kernels that do not implement this, the default returns a zero matrix
+   * (penalty loss is computed but no gradient contribution is added).
+   * @param dKdhp      Output matrix of size (x.rows() x xc.rows())
+   * @param x          Training point locations (batch)
+   * @param xc         Constraint point location (1 x d)
+   * @param hp_name    Hyperparameter name (prefixed with covariance object name)
+   * @param ind        Index within vector hyperparameter (0 for scalars)
+   */
+  virtual void computedKdhyper_cross(RealEigenMatrix & dKdhp,
+                                     const RealEigenMatrix & x,
+                                     const RealEigenMatrix & xc,
+                                     const std::string & hp_name,
+                                     unsigned int ind) const;
+
   /// Check if a given parameter is tunable
   /// @param The name of the hyperparameter
   virtual bool isTunable(const std::string & name) const;
diff --git a/modules/stochastic_tools/include/covariances/SquaredExponentialCovariance.h b/modules/stochastic_tools/include/covariances/SquaredExponentialCovariance.h
index f77d018f33dd..8493d6a8dbf9 100644
--- a/modules/stochastic_tools/include/covariances/SquaredExponentialCovariance.h
+++ b/modules/stochastic_tools/include/covariances/SquaredExponentialCovariance.h
@@ -44,6 +44,32 @@ class SquaredExponentialCovariance : public CovarianceFunctionBase
                            const Real sigma_f_squared,
                            const int ind);
 
+  /// Cross-covariance K_fd[i,j] = Cov[f(x_i), df(xd_j)/dx'_{dim}] = dK(x_i,xd_j)/dx'_{dim}
+  void computeCovarianceFD(RealEigenMatrix & K_fd,
+                           const RealEigenMatrix & x,
+                           const RealEigenMatrix & xd,
+                           unsigned int dim) const override;
+
+  /// Cross-covariance K_df[i,j] = Cov[df(xd_i)/dx_{dim}, f(xp_j)] = dK(xd_i,xp_j)/dx_{d,dim}
+  void computeCovarianceDf(RealEigenMatrix & K_df,
+                           const RealEigenMatrix & xd,
+                           const RealEigenMatrix & xp,
+                           unsigned int dim) const override;
+
+  /// Second-derivative covariance K_dd[i,j] = d^2K/(dx_{dim_i} dx'_{dim_j})
+  void computeCovarianceDD(RealEigenMatrix & K_dd,
+                           const RealEigenMatrix & xd,
+                           const RealEigenMatrix & xdp,
+                           unsigned int dim_i,
+                           unsigned int dim_j) const override;
+
+  /// dK_cross(x, xc)/dhp: derivative of cross-covariance w.r.t. hyperparameters for penalty gradient
+  void computedKdhyper_cross(RealEigenMatrix & dKdhp,
+                             const RealEigenMatrix & x,
+                             const RealEigenMatrix & xc,
+                             const std::string & hp_name,
+                             unsigned int ind) const override;
+
 protected:
   /// lengh factor (\ell) for the kernel, in vector form for multiple parameters
   const std::vector<Real> & _length_factor;
diff --git a/modules/stochastic_tools/include/trainers/GaussianProcessTrainer.h b/modules/stochastic_tools/include/trainers/GaussianProcessTrainer.h
index 371d578fd455..83068e3a65db 100644
--- a/modules/stochastic_tools/include/trainers/GaussianProcessTrainer.h
+++ b/modules/stochastic_tools/include/trainers/GaussianProcessTrainer.h
@@ -19,6 +19,7 @@
 #include "CovarianceInterface.h"
 
 #include "GaussianProcess.h"
+#include "GPLinkFunction.h"
 
 class GaussianProcessTrainer : public SurrogateTrainer, public CovarianceInterface
 {
diff --git a/modules/stochastic_tools/include/utils/GPLinkFunction.h b/modules/stochastic_tools/include/utils/GPLinkFunction.h
new file mode 100644
index 000000000000..acb9053558ca
--- /dev/null
+++ b/modules/stochastic_tools/include/utils/GPLinkFunction.h
@@ -0,0 +1,140 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#pragma once
+
+#include "MooseTypes.h"
+#include <limits>
+#include <memory>
+#include <string>
+
+namespace StochasticTools
+{
+
+/**
+ * Enumeration of supported output link function types.
+ * Stored as an integer in model data for serialization.
+ */
+enum class GPLinkFunctionType : int
+{
+  Identity = 0, ///< No transformation (default)
+  Log = 1,      ///< g(y) = log(y - lb), enforces y > lb
+  Logit = 2     ///< g(y) = log((y-lb)/(ub-y)), enforces lb < y < ub
+};
+
+/**
+ * Utility class for GP output link functions. A link function g maps
+ * physical-space outputs y to the GP training space z = g(y) and back via g^{-1}.
+ * This allows GP regression to operate in a transformed space that naturally
+ * satisfies inequality constraints such as positivity or bounded outputs.
+ *
+ * The transformation changes the negative log-marginal likelihood (NLML) by the
+ * Jacobian correction: NLML_total = NLML_z - sum_i log|g'(y_i)|.
+ * Since this Jacobian does not depend on the kernel hyperparameters, it does not
+ * alter the hyperparameter gradient computation.
+ *
+ * Prediction uncertainty propagation uses the delta method:
+ *   sigma_y ≈ |g^{-1}'(mu_z)| * sigma_z
+ */
+class GPLinkFunction
+{
+public:
+  virtual ~GPLinkFunction() = default;
+
+  /// Forward: physical space y -> GP training space z = g(y)
+  virtual Real forward(Real y) const = 0;
+
+  /// Inverse: GP training space z -> physical space y = g^{-1}(z)
+  virtual Real inverse(Real z) const = 0;
+
+  /// Derivative of the inverse: d/dz [g^{-1}(z)], used in delta-method uncertainty propagation
+  virtual Real inverseDeriv(Real z) const = 0;
+
+  /// log |g'(y)| = log |d/dy g(y)|, used as the Jacobian correction to NLML reporting
+  virtual Real logJacobian(Real y) const = 0;
+
+  /// Lowest physical-space value that is valid for this link function
+  virtual Real lowerBound() const = 0;
+
+  /// Highest physical-space value that is valid for this link function
+  virtual Real upperBound() const = 0;
+
+  /// Type tag for serialization
+  virtual GPLinkFunctionType type() const = 0;
+
+  /// Factory: create a link function from type tag and optional bounds
+  static std::unique_ptr<GPLinkFunction>
+  build(GPLinkFunctionType type, Real lb = 0.0, Real ub = 1.0);
+
+  /// Factory: create a link function from a string name ("identity", "log", "logit")
+  static std::unique_ptr<GPLinkFunction>
+  buildFromString(const std::string & type_name, Real lb = 0.0, Real ub = 1.0);
+};
+
+/// Identity link: g(y) = y. No constraint is enforced.
+class IdentityLinkFunction : public GPLinkFunction
+{
+public:
+  Real forward(Real y) const override { return y; }
+  Real inverse(Real z) const override { return z; }
+  Real inverseDeriv(Real /*z*/) const override { return 1.0; }
+  Real logJacobian(Real /*y*/) const override { return 0.0; }
+  Real lowerBound() const override { return -std::numeric_limits<Real>::infinity(); }
+  Real upperBound() const override { return std::numeric_limits<Real>::infinity(); }
+  GPLinkFunctionType type() const override { return GPLinkFunctionType::Identity; }
+};
+
+/**
+ * Log link: g(y) = log(y - lb), g^{-1}(z) = exp(z) + lb.
+ * Enforces y > lb (typically lb = 0 for positivity).
+ * Uncertainty propagation: sigma_y ≈ exp(mu_z) * sigma_z.
+ */
+class LogLinkFunction : public GPLinkFunction
+{
+public:
+  LogLinkFunction(Real lb = 0.0) : _lb(lb) {}
+
+  Real forward(Real y) const override;
+  Real inverse(Real z) const override;
+  Real inverseDeriv(Real z) const override;
+  Real logJacobian(Real y) const override;
+  Real lowerBound() const override { return _lb; }
+  Real upperBound() const override { return std::numeric_limits<Real>::infinity(); }
+  GPLinkFunctionType type() const override { return GPLinkFunctionType::Log; }
+  Real lb() const { return _lb; }
+
+private:
+  Real _lb;
+};
+
+/**
+ * Logit link: g(y) = log((y-lb)/(ub-y)), g^{-1}(z) = lb + (ub-lb)/(1+exp(-z)).
+ * Enforces lb < y < ub.
+ * Uncertainty propagation: sigma_y ≈ (ub-lb)*sigmoid(mu_z)*(1-sigmoid(mu_z)) * sigma_z.
+ */
+class LogitLinkFunction : public GPLinkFunction
+{
+public:
+  LogitLinkFunction(Real lb = 0.0, Real ub = 1.0) : _lb(lb), _ub(ub) {}
+
+  Real forward(Real y) const override;
+  Real inverse(Real z) const override;
+  Real inverseDeriv(Real z) const override;
+  Real logJacobian(Real y) const override;
+  Real lowerBound() const override { return _lb; }
+  Real upperBound() const override { return _ub; }
+  GPLinkFunctionType type() const override { return GPLinkFunctionType::Logit; }
+  Real lb() const { return _lb; }
+  Real ub() const { return _ub; }
+
+private:
+  Real _lb, _ub;
+};
+
+} // namespace StochasticTools
diff --git a/modules/stochastic_tools/include/utils/GaussianProcess.h b/modules/stochastic_tools/include/utils/GaussianProcess.h
index b59951e4e4d2..2db6f9adcb18 100644
--- a/modules/stochastic_tools/include/utils/GaussianProcess.h
+++ b/modules/stochastic_tools/include/utils/GaussianProcess.h
@@ -13,6 +13,7 @@
 #include <Eigen/Dense>
 
 #include "CovarianceFunctionBase.h"
+#include "GPLinkFunction.h"
 
 namespace StochasticTools
 {
@@ -89,16 +90,91 @@ class GaussianProcess
   };
   /**
    * Sets up the covariance matrix given data and optimization options.
-   * @param training_params The training parameter values (x values) for the
-   *                        covariance matrix.
-   * @param training_data The training data (y values) for the inversion of the
-   *                      covariance matrix.
+   * If virtual derivative observations have been configured via setDerivativeConstraints(),
+   * builds the full augmented covariance matrix incorporating those observations.
+   * @param training_params The training parameter values (x values, already standardized).
+   * @param training_data The training data (y values, link-transformed and standardized).
    * @param opts The optimizer options.
    */
   void setupCovarianceMatrix(const RealEigenMatrix & training_params,
                              const RealEigenMatrix & training_data,
                              const GPOptimizerOptions & opts);
 
+  /**
+   * Configure the output link function. Call before setupCovarianceMatrix.
+   * @param type  Link function type (Identity, Log, or Logit)
+   * @param lb    Lower bound (required for Log and Logit)
+   * @param ub    Upper bound (required for Logit)
+   */
+  void setLinkFunction(GPLinkFunctionType type, Real lb = 0.0, Real ub = 1.0);
+
+  /**
+   * Apply the forward link function to all entries of a data matrix (in-place).
+   * Validates that all values respect the link function's domain.
+   */
+  void applyLinkTransform(RealEigenMatrix & data) const;
+
+  /**
+   * Configure virtual derivative observations for monotonicity/derivative constraints.
+   * Call before setupCovarianceMatrix. The virtual_params must be in the same
+   * standardized space as the training parameters.
+   * @param virtual_params   Locations of virtual observations (n_virt x n_dims), standardized
+   * @param deriv_dims       Derivative dimension for each virtual observation
+   * @param target_value     Target derivative value (0 = zero-derivative constraint,
+   *                         positive = increasing, negative = decreasing)
+   * @param noise_variance   Noise variance added to the derivative covariance diagonal
+   */
+  void setDerivativeConstraints(const RealEigenMatrix & virtual_params,
+                                const std::vector<unsigned int> & deriv_dims,
+                                Real target_value,
+                                Real noise_variance);
+
+  /**
+   * Configure penalty constraints on the predicted mean at specified points.
+   * These are evaluated and added to the NLML loss and its gradient during Adam.
+   * The points and bounds must be in the standardized GP output space (post-link,
+   * post-standardize). Call after standardization is set up.
+   * @param penalty_points_std  Constraint point locations in standardized param space (n_c x d)
+   * @param lower_bounds_std    Lower bounds in standardized output space (use -inf to disable)
+   * @param upper_bounds_std    Upper bounds in standardized output space (use +inf to disable)
+   * @param weight              Penalty weight lambda
+   */
+  void setPenaltyConstraints(const RealEigenMatrix & penalty_points_std,
+                             const std::vector<Real> & lower_bounds_std,
+                             const std::vector<Real> & upper_bounds_std,
+                             Real weight);
+
+  /// Apply forward link function to a scalar
+  Real applyLink(Real y) const;
+  /// Apply inverse link function to a scalar
+  Real applyInvLink(Real z) const;
+  /// Derivative of the inverse link at z, for delta-method uncertainty propagation
+  Real invLinkDeriv(Real z) const;
+  /// log|g'(y)| for the Jacobian correction to NLML reporting
+  Real logLinkJacobian(Real y) const;
+  /// True if a non-identity link function is set
+  bool hasLinkFunction() const { return _link_type != GPLinkFunctionType::Identity; }
+
+  /// @{ Accessors for link function parameters (for serialization)
+  GPLinkFunctionType & linkType() { return _link_type; }
+  Real & linkLb() { return _link_lb; }
+  Real & linkUb() { return _link_ub; }
+  const GPLinkFunctionType & linkType() const { return _link_type; }
+  const Real & linkLb() const { return _link_lb; }
+  const Real & linkUb() const { return _link_ub; }
+  /// @}
+
+  /// @{ Accessors for virtual derivative observations (for serialization and prediction)
+  RealEigenMatrix & virtualParams() { return _virtual_params; }
+  std::vector<unsigned int> & virtualDerivDims() { return _virtual_deriv_dims; }
+  Real & virtualNoise() { return _virtual_noise; }
+  Real & virtualTarget() { return _virtual_target; }
+  const RealEigenMatrix & virtualParams() const { return _virtual_params; }
+  const std::vector<unsigned int> & virtualDerivDims() const { return _virtual_deriv_dims; }
+  const Real & virtualNoise() const { return _virtual_noise; }
+  bool hasDerivativeConstraints() const { return _virtual_params.rows() > 0; }
+  /// @}
+
   /**
    * Sets up the Cholesky decomposition and inverse action of the covariance matrix.
    * @param input The vector/matrix which right multiples the inverse of the covariance matrix.
@@ -282,6 +358,34 @@ class GaussianProcess
 
   /// To return the GP length scales for active learning
   std::vector<Real> _length_scales;
+
+  // ---- Link function (serialized) ----
+  /// Link function type tag (stored as enum, serialized as int)
+  GPLinkFunctionType _link_type = GPLinkFunctionType::Identity;
+  /// Lower bound for Log/Logit link
+  Real _link_lb = 0.0;
+  /// Upper bound for Logit link
+  Real _link_ub = 1.0;
+
+  // ---- Virtual derivative observations (serialized; needed for prediction) ----
+  /// Locations of virtual observations in standardized parameter space (n_virt x n_dims)
+  RealEigenMatrix _virtual_params;
+  /// Derivative dimension for each virtual observation
+  std::vector<unsigned int> _virtual_deriv_dims;
+  /// Noise variance added to the derivative-derivative diagonal blocks of K_aug
+  Real _virtual_noise = 1e-6;
+  /// Target derivative value for virtual observations
+  Real _virtual_target = 0.0;
+
+  // ---- Penalty constraints (not serialized; only used during Adam tuning) ----
+  /// Constraint point locations in standardized parameter space (n_c x n_dims)
+  RealEigenMatrix _penalty_points_std;
+  /// Lower bounds in standardized output space (-inf disables lower constraint)
+  std::vector<Real> _penalty_lower_std;
+  /// Upper bounds in standardized output space (+inf disables upper constraint)
+  std::vector<Real> _penalty_upper_std;
+  /// Penalty weight lambda
+  Real _penalty_weight = 0.0;
 };
 
 } // StochasticTools namespac
diff --git a/modules/stochastic_tools/src/covariances/CovarianceFunctionBase.C b/modules/stochastic_tools/src/covariances/CovarianceFunctionBase.C
index c7582889ba0f..f690c8e0b764 100644
--- a/modules/stochastic_tools/src/covariances/CovarianceFunctionBase.C
+++ b/modules/stochastic_tools/src/covariances/CovarianceFunctionBase.C
@@ -48,6 +48,51 @@ CovarianceFunctionBase::computedKdhyper(RealEigenMatrix & /*dKdhp*/,
              "computedKdhyper() to compute gradient.");
 }
 
+void
+CovarianceFunctionBase::computeCovarianceFD(RealEigenMatrix & /*K_fd*/,
+                                            const RealEigenMatrix & /*x*/,
+                                            const RealEigenMatrix & /*xd*/,
+                                            unsigned int /*dim*/) const
+{
+  mooseError("Derivative covariance (function vs. derivative) not implemented for '",
+             type(),
+             "'. Override computeCovarianceFD() to use derivative observations.");
+}
+
+void
+CovarianceFunctionBase::computeCovarianceDf(RealEigenMatrix & /*K_df*/,
+                                            const RealEigenMatrix & /*xd*/,
+                                            const RealEigenMatrix & /*xp*/,
+                                            unsigned int /*dim*/) const
+{
+  mooseError("Derivative covariance (derivative vs. function) not implemented for '",
+             type(),
+             "'. Override computeCovarianceDf() to use derivative observations.");
+}
+
+void
+CovarianceFunctionBase::computeCovarianceDD(RealEigenMatrix & /*K_dd*/,
+                                            const RealEigenMatrix & /*xd*/,
+                                            const RealEigenMatrix & /*xdp*/,
+                                            unsigned int /*dim_i*/,
+                                            unsigned int /*dim_j*/) const
+{
+  mooseError("Second-derivative covariance not implemented for '",
+             type(),
+             "'. Override computeCovarianceDD() to use derivative observations with this kernel.");
+}
+
+void
+CovarianceFunctionBase::computedKdhyper_cross(RealEigenMatrix & dKdhp,
+                                              const RealEigenMatrix & x,
+                                              const RealEigenMatrix & xc,
+                                              const std::string & /*hp_name*/,
+                                              unsigned int /*ind*/) const
+{
+  // Default: zero matrix — penalty gradient is not added for this kernel type
+  dKdhp.setZero(x.rows(), xc.rows());
+}
+
 const Real &
 CovarianceFunctionBase::addRealHyperParameter(const std::string & name,
                                               const Real value,
diff --git a/modules/stochastic_tools/src/covariances/SquaredExponentialCovariance.C b/modules/stochastic_tools/src/covariances/SquaredExponentialCovariance.C
index de043f58e788..bee8df18d02d 100644
--- a/modules/stochastic_tools/src/covariances/SquaredExponentialCovariance.C
+++ b/modules/stochastic_tools/src/covariances/SquaredExponentialCovariance.C
@@ -139,3 +139,149 @@ SquaredExponentialCovariance::computedKdlf(RealEigenMatrix & K,
     }
   }
 }
+
+void
+SquaredExponentialCovariance::computeCovarianceFD(RealEigenMatrix & K_fd,
+                                                  const RealEigenMatrix & x,
+                                                  const RealEigenMatrix & xd,
+                                                  unsigned int dim) const
+{
+  // K_fd[i,j] = Cov[f(x_i), df(xd_j)/dx'_{dim}]
+  //           = dK(x_i, xd_j)/dx'_{dim}
+  //           = K(x_i,xd_j) * (x_{i,dim} - xd_{j,dim}) / ell_dim^2
+  const unsigned int n_x = x.rows();
+  const unsigned int n_xd = xd.rows();
+  const unsigned int n_params = x.cols();
+  mooseAssert(dim < n_params, "dim out of range for computeCovarianceFD");
+  mooseAssert((unsigned)xd.cols() == n_params, "x and xd must have same number of columns");
+
+  K_fd.resize(n_x, n_xd);
+  for (unsigned int ii = 0; ii < n_x; ++ii)
+  {
+    for (unsigned int jj = 0; jj < n_xd; ++jj)
+    {
+      Real r_sq = 0.0;
+      for (unsigned int kk = 0; kk < n_params; ++kk)
+        r_sq += std::pow((x(ii, kk) - xd(jj, kk)) / _length_factor[kk], 2);
+      const Real k_val = _sigma_f_squared * std::exp(-r_sq / 2.0);
+      K_fd(ii, jj) =
+          k_val * (x(ii, dim) - xd(jj, dim)) / (_length_factor[dim] * _length_factor[dim]);
+    }
+  }
+}
+
+void
+SquaredExponentialCovariance::computeCovarianceDf(RealEigenMatrix & K_df,
+                                                  const RealEigenMatrix & xd,
+                                                  const RealEigenMatrix & xp,
+                                                  unsigned int dim) const
+{
+  // K_df[i,j] = Cov[df(xd_i)/dx_{dim}, f(xp_j)]
+  //           = dK(xd_i, xp_j)/dx_{d,dim}
+  //           = K(xd_i,xp_j) * (xp_{j,dim} - xd_{i,dim}) / ell_dim^2
+  // Note: K_df = K_fd^T (transpose of computeCovarianceFD with swapped args)
+  RealEigenMatrix K_fd;
+  computeCovarianceFD(K_fd, xp, xd, dim);
+  K_df = K_fd.transpose();
+}
+
+void
+SquaredExponentialCovariance::computeCovarianceDD(RealEigenMatrix & K_dd,
+                                                  const RealEigenMatrix & xd,
+                                                  const RealEigenMatrix & xdp,
+                                                  unsigned int dim_i,
+                                                  unsigned int dim_j) const
+{
+  // K_dd[i,j] = d^2 K(xd_i, xdp_j) / (dx_{dim_i} dx'_{dim_j})
+  //
+  // For SE kernel:
+  //   if dim_i == dim_j (call the dimension k):
+  //     = K(xd_i,xdp_j) * [1/ell_k^2 - (xd_{i,k}-xdp_{j,k})^2/ell_k^4]
+  //   if dim_i != dim_j:
+  //     = K(xd_i,xdp_j) * [-(xd_{i,ki}-xdp_{j,ki})*(xd_{i,kj}-xdp_{j,kj})/(ell_ki^2*ell_kj^2)]
+  const unsigned int n_i = xd.rows();
+  const unsigned int n_j = xdp.rows();
+  const unsigned int n_params = xd.cols();
+  mooseAssert(dim_i < n_params, "dim_i out of range for computeCovarianceDD");
+  mooseAssert(dim_j < n_params, "dim_j out of range for computeCovarianceDD");
+  mooseAssert((unsigned)xdp.cols() == n_params, "xd and xdp must have same number of columns");
+
+  K_dd.resize(n_i, n_j);
+  const Real ell_i_sq = _length_factor[dim_i] * _length_factor[dim_i];
+  const Real ell_j_sq = _length_factor[dim_j] * _length_factor[dim_j];
+
+  for (unsigned int ii = 0; ii < n_i; ++ii)
+  {
+    for (unsigned int jj = 0; jj < n_j; ++jj)
+    {
+      Real r_sq = 0.0;
+      for (unsigned int kk = 0; kk < n_params; ++kk)
+        r_sq += std::pow((xd(ii, kk) - xdp(jj, kk)) / _length_factor[kk], 2);
+      const Real k_val = _sigma_f_squared * std::exp(-r_sq / 2.0);
+
+      const Real d_i = xd(ii, dim_i) - xdp(jj, dim_i);
+      const Real d_j = xd(ii, dim_j) - xdp(jj, dim_j);
+
+      if (dim_i == dim_j)
+        K_dd(ii, jj) = k_val * (1.0 / ell_i_sq - d_i * d_j / (ell_i_sq * ell_j_sq));
+      else
+        K_dd(ii, jj) = k_val * (-d_i * d_j / (ell_i_sq * ell_j_sq));
+    }
+  }
+}
+
+void
+SquaredExponentialCovariance::computedKdhyper_cross(RealEigenMatrix & dKdhp,
+                                                    const RealEigenMatrix & x,
+                                                    const RealEigenMatrix & xc,
+                                                    const std::string & hyper_param_name,
+                                                    unsigned int ind) const
+{
+  // Derivative of K(x_i, xc) w.r.t. each hyperparameter.
+  // Used for the approximate penalty constraint gradient.
+  if (name().length() + 1 > hyper_param_name.length())
+  {
+    dKdhp.setZero(x.rows(), xc.rows());
+    return;
+  }
+
+  const std::string name_without_prefix = hyper_param_name.substr(name().length() + 1);
+
+  if (name_without_prefix == "noise_variance")
+  {
+    // Cross-covariance does not depend on noise variance
+    dKdhp.setZero(x.rows(), xc.rows());
+    return;
+  }
+
+  if (name_without_prefix == "signal_variance")
+  {
+    // d(K(x_i,xc))/d(sigma_f^2) = exp(-r^2/2) = K(x_i,xc)/sigma_f^2
+    SquaredExponentialFunction(dKdhp, x, xc, _length_factor, 1.0, 0.0, false);
+    return;
+  }
+
+  if (name_without_prefix == "length_factor")
+  {
+    // d(K(x_i, xc)) / d(ell_ind) = K(x_i, xc) * (x_{i,ind} - xc_{ind})^2 / ell_ind^3
+    const unsigned int n_x = x.rows();
+    const unsigned int n_xc = xc.rows();
+    const unsigned int n_params = x.cols();
+    dKdhp.resize(n_x, n_xc);
+    for (unsigned int ii = 0; ii < n_x; ++ii)
+    {
+      for (unsigned int jj = 0; jj < n_xc; ++jj)
+      {
+        Real r_sq = 0.0;
+        for (unsigned int kk = 0; kk < n_params; ++kk)
+          r_sq += std::pow((x(ii, kk) - xc(jj, kk)) / _length_factor[kk], 2);
+        const Real k_val = _sigma_f_squared * std::exp(-r_sq / 2.0);
+        dKdhp(ii, jj) =
+            k_val * std::pow(x(ii, ind) - xc(jj, ind), 2) / std::pow(_length_factor[ind], 3);
+      }
+    }
+    return;
+  }
+
+  dKdhp.setZero(x.rows(), xc.rows());
+}
diff --git a/modules/stochastic_tools/src/surrogates/GaussianProcessSurrogate.C b/modules/stochastic_tools/src/surrogates/GaussianProcessSurrogate.C
index d9343d6f4ea0..1cda9cc0b2e5 100644
--- a/modules/stochastic_tools/src/surrogates/GaussianProcessSurrogate.C
+++ b/modules/stochastic_tools/src/surrogates/GaussianProcessSurrogate.C
@@ -75,6 +75,9 @@ GaussianProcessSurrogate::evaluate(const std::vector<Real> & x,
               "Number of parameters provided for evaluation does not match number of parameters "
               "used for training.");
   const unsigned int n_outputs = _gp.getCovarFunction().numOutputs();
+  const unsigned int n_train = _training_params.rows();
+  const unsigned int n_virt = _gp.virtualParams().rows();
+  const unsigned int n_total = n_train + n_virt;
 
   y = std::vector<Real>(n_outputs, 0.0);
   std = std::vector<Real>(n_outputs, 0.0);
@@ -85,28 +88,59 @@ GaussianProcessSurrogate::evaluate(const std::vector<Real> & x,
 
   _gp.getParamStandardizer().getStandardized(test_points);
 
-  RealEigenMatrix K_train_test(_training_params.rows() * n_outputs, n_outputs);
+  // Build K_train_test: (n_total x n_outputs) — extended with derivative rows if needed
+  RealEigenMatrix K_train_test(n_total * n_outputs, n_outputs);
 
-  _gp.getCovarFunction().computeCovarianceMatrix(
-      K_train_test, _training_params, test_points, false);
+  // Standard rows: Cov[f(X_train), f(x*)]
+  RealEigenMatrix K_ff_test(n_train * n_outputs, n_outputs);
+  _gp.getCovarFunction().computeCovarianceMatrix(K_ff_test, _training_params, test_points, false);
+  K_train_test.topRows(n_train * n_outputs) = K_ff_test;
+
+  // Derivative rows: Cov[df(x_d^j)/dx_{k_j}, f(x*)] — only for single-output GP
+  if (n_virt > 0)
+  {
+    for (unsigned int j = 0; j < n_virt; ++j)
+    {
+      RealEigenMatrix xd_j = _gp.virtualParams().row(j);
+      RealEigenMatrix K_df_j(1, 1);
+      _gp.getCovarFunction().computeCovarianceDf(
+          K_df_j, xd_j, test_points, _gp.virtualDerivDims()[j]);
+      K_train_test(n_train + j, 0) = K_df_j(0, 0);
+    }
+  }
+
+  // Self-covariance at test point
   RealEigenMatrix K_test(n_outputs, n_outputs);
   _gp.getCovarFunction().computeCovarianceMatrix(K_test, test_points, test_points, true);
 
-  // Compute the predicted mean value (centered)
+  // Predicted mean in standardized z-space
   RealEigenMatrix pred_value = (K_train_test.transpose() * _gp.getKResultsSolve()).transpose();
-  // De-center/scale the value and store for return
+
+  // De-standardize to z-space (post-link, pre-inverse-link)
   _gp.getDataStandardizer().getDestandardized(pred_value);
 
+  // Posterior variance: K_** - k_*^T K_aug^{-1} k_*
   RealEigenMatrix pred_var =
       K_test - (K_train_test.transpose() * _gp.getKCholeskyDecomp().solve(K_train_test));
 
-  // Vairance computed, take sqrt for standard deviation, scale up by training data std and store
   RealEigenMatrix std_dev_mat = pred_var.array().sqrt();
   _gp.getDataStandardizer().getDescaled(std_dev_mat);
 
   for (const auto output_i : make_range(n_outputs))
   {
-    y[output_i] = pred_value(0, output_i);
-    std[output_i] = std_dev_mat(output_i, output_i);
+    const Real mu_z = pred_value(0, output_i);
+    const Real sigma_z = std_dev_mat(output_i, output_i);
+
+    if (_gp.hasLinkFunction())
+    {
+      // Apply inverse link to mean; propagate uncertainty via delta method
+      y[output_i] = _gp.applyInvLink(mu_z);
+      std[output_i] = std::abs(_gp.invLinkDeriv(mu_z)) * sigma_z;
+    }
+    else
+    {
+      y[output_i] = mu_z;
+      std[output_i] = sigma_z;
+    }
   }
 }
diff --git a/modules/stochastic_tools/src/trainers/GaussianProcessTrainer.C b/modules/stochastic_tools/src/trainers/GaussianProcessTrainer.C
index 7835e7f3d8c8..c20682ac27cd 100644
--- a/modules/stochastic_tools/src/trainers/GaussianProcessTrainer.C
+++ b/modules/stochastic_tools/src/trainers/GaussianProcessTrainer.C
@@ -18,6 +18,7 @@
 #include "libmesh/petsc_matrix.h"
 
 #include <cmath>
+#include <limits>
 
 registerMooseObject("StochasticToolsApp", GaussianProcessTrainer);
 
@@ -33,7 +34,6 @@ GaussianProcessTrainer::validParams()
       "standardize_params", true, "Standardize (center and scale) training parameters (x values)");
   params.addParam<bool>(
       "standardize_data", true, "Standardize (center and scale) training data (y values)");
-  // Already preparing to use Adam here
   params.addParam<unsigned int>("num_iters", 1000, "Tolerance value for Adam optimization");
   params.addParam<unsigned int>("batch_size", 0, "The batch size for Adam optimization");
   params.addParam<Real>("learning_rate", 0.001, "The learning rate for Adam optimization");
@@ -45,6 +45,67 @@ GaussianProcessTrainer::validParams()
                                             "Select hyperparameters to be tuned");
   params.addParam<std::vector<Real>>("tuning_min", "Minimum allowable tuning value");
   params.addParam<std::vector<Real>>("tuning_max", "Maximum allowable tuning value");
+
+  // ---- Link function parameters ----
+  MooseEnum link_function_options("identity log logit", "identity");
+  params.addParam<MooseEnum>(
+      "link_function",
+      link_function_options,
+      "Output link function enforcing inequality constraints. 'identity' (default): no constraint; "
+      "'log': enforces y > link_lower_bound (e.g. positivity); "
+      "'logit': enforces link_lower_bound < y < link_upper_bound.");
+  params.addParam<Real>(
+      "link_lower_bound", 0.0, "Lower bound for the 'log' or 'logit' link function.");
+  params.addParam<Real>("link_upper_bound", 1.0, "Upper bound for the 'logit' link function.");
+
+  // ---- Derivative observation (monotonicity constraint) parameters ----
+  params.addParam<std::vector<Real>>(
+      "monotone_constraint_points",
+      "Locations of virtual derivative observations as a flat row-major vector "
+      "(n_points x n_dims). Used to enforce monotonicity or zero-derivative constraints. "
+      "Requires a covariance kernel that supports derivative covariances (e.g. "
+      "SquaredExponentialCovariance).");
+  params.addParam<std::vector<unsigned int>>(
+      "monotone_constraint_dims",
+      "Input dimension index for each virtual derivative observation. Must have the same "
+      "number of entries as the number of points in monotone_constraint_points.");
+  MooseEnum constraint_type_options("zero increasing decreasing", "zero");
+  params.addParam<MooseEnum>(
+      "monotone_constraint_type",
+      constraint_type_options,
+      "Target derivative type: 'zero' (df/dx_k ≈ 0), 'increasing' (df/dx_k > 0), "
+      "or 'decreasing' (df/dx_k < 0).");
+  params.addParam<Real>("derivative_target_value",
+                        0.0,
+                        "Magnitude of the target derivative for 'increasing' or 'decreasing' "
+                        "constraints, in the standardized output space. Default 0 means the "
+                        "constraint softly pins the derivative to zero.");
+  params.addParam<Real>(
+      "derivative_noise_variance",
+      1e-4,
+      "Noise variance (sigma_d^2) added to the derivative-derivative diagonal blocks of the "
+      "augmented covariance matrix. Must be > 0. Smaller values enforce constraints more strictly "
+      "but may cause numerical issues.");
+
+  // ---- Penalty constraint parameters ----
+  params.addParam<std::vector<Real>>(
+      "penalty_constraint_points",
+      "Locations of penalty constraint evaluation points as a flat row-major vector "
+      "(n_points x n_dims). The penalty is added to the NLML loss and gradient during "
+      "hyperparameter tuning when predicted means violate the specified bounds.");
+  params.addParam<std::vector<Real>>(
+      "penalty_constraint_lower_bounds",
+      "Lower bounds on the GP output (in physical space, before link function) at each "
+      "penalty constraint point. Use -1e30 or similar to disable a lower bound.");
+  params.addParam<std::vector<Real>>(
+      "penalty_constraint_upper_bounds",
+      "Upper bounds on the GP output (in physical space, before link function) at each "
+      "penalty constraint point. Use 1e30 or similar to disable an upper bound.");
+  params.addParam<Real>(
+      "penalty_weight",
+      1.0,
+      "Weight lambda for the penalty constraint term: lambda * sum_c max(0, violation_c)^2.");
+
   return params;
 }
 
@@ -136,20 +197,126 @@ GaussianProcessTrainer::postTrain()
       _training_data(ii, jj) = _data_buffer[ii][jj];
   }
 
+  // ---- Link function ----
+  const std::string link_str = getParam<MooseEnum>("link_function");
+  StochasticTools::GPLinkFunctionType link_type = StochasticTools::GPLinkFunctionType::Identity;
+  if (link_str == "log")
+    link_type = StochasticTools::GPLinkFunctionType::Log;
+  else if (link_str == "logit")
+    link_type = StochasticTools::GPLinkFunctionType::Logit;
+  const Real link_lb = getParam<Real>("link_lower_bound");
+  const Real link_ub = getParam<Real>("link_upper_bound");
+  if (link_str == "logit" && link_ub <= link_lb)
+    paramError("link_upper_bound",
+               "link_upper_bound must be strictly greater than link_lower_bound for logit link.");
+  _gp.setLinkFunction(link_type, link_lb, link_ub);
+
+  // Apply link function to training data BEFORE standardization
+  _gp.applyLinkTransform(_training_data);
+
   // Standardize (center and scale) training params
   if (_standardize_params)
     _gp.standardizeParameters(_training_params);
-  // if not standardizing data set mean=0, std=1 for use in surrogate
   else
     _gp.paramStandardizer().set(0, 1, _n_dims);
 
-  // Standardize (center and scale) training data
+  // Standardize (center and scale) training data (in link-transformed z-space)
   if (_standardize_data)
     _gp.standardizeData(_training_data);
-  // if not standardizing data set mean=0, std=1 for use in surrogate
   else
     _gp.dataStandardizer().set(0, 1, _n_outputs);
 
+  // ---- Derivative constraint (virtual observations) ----
+  if (isParamValid("monotone_constraint_points"))
+  {
+    const auto & raw_pts = getParam<std::vector<Real>>("monotone_constraint_points");
+    if (!isParamValid("monotone_constraint_dims"))
+      paramError("monotone_constraint_dims",
+                 "monotone_constraint_dims is required when monotone_constraint_points is set.");
+    const auto & dims = getParam<std::vector<unsigned int>>("monotone_constraint_dims");
+    const unsigned int n_virt = dims.size();
+    if (raw_pts.size() != n_virt * (unsigned)_n_dims)
+      paramError("monotone_constraint_points",
+                 "monotone_constraint_points must contain n_points * n_dims = ",
+                 n_virt,
+                 " * ",
+                 _n_dims,
+                 " = ",
+                 n_virt * _n_dims,
+                 " values. Got ",
+                 raw_pts.size(),
+                 ".");
+
+    // Fill virtual_params in physical space, then standardize
+    RealEigenMatrix virtual_params(n_virt, _n_dims);
+    for (unsigned int i = 0; i < n_virt; ++i)
+      for (unsigned int j = 0; j < (unsigned)_n_dims; ++j)
+        virtual_params(i, j) = raw_pts[i * _n_dims + j];
+
+    _gp.getParamStandardizer().getStandardized(virtual_params);
+
+    const std::string ctype = getParam<MooseEnum>("monotone_constraint_type");
+    const Real magnitude = getParam<Real>("derivative_target_value");
+    Real target_value = 0.0;
+    if (ctype == "increasing")
+      target_value = magnitude;
+    else if (ctype == "decreasing")
+      target_value = -magnitude;
+
+    _gp.setDerivativeConstraints(
+        virtual_params, dims, target_value, getParam<Real>("derivative_noise_variance"));
+  }
+
+  // ---- Penalty constraints ----
+  if (isParamValid("penalty_constraint_points"))
+  {
+    const auto & raw_pts = getParam<std::vector<Real>>("penalty_constraint_points");
+    const auto & lower_raw = isParamValid("penalty_constraint_lower_bounds")
+                                 ? getParam<std::vector<Real>>("penalty_constraint_lower_bounds")
+                                 : std::vector<Real>{};
+    const auto & upper_raw = isParamValid("penalty_constraint_upper_bounds")
+                                 ? getParam<std::vector<Real>>("penalty_constraint_upper_bounds")
+                                 : std::vector<Real>{};
+
+    const unsigned int n_penalty = lower_raw.empty() ? upper_raw.size() : lower_raw.size();
+    if (!lower_raw.empty() && !upper_raw.empty() && lower_raw.size() != upper_raw.size())
+      paramError("penalty_constraint_lower_bounds",
+                 "penalty_constraint_lower_bounds and penalty_constraint_upper_bounds must have "
+                 "the same number of entries.");
+    if (raw_pts.size() != n_penalty * (unsigned)_n_dims)
+      paramError("penalty_constraint_points",
+                 "penalty_constraint_points must have n_penalty * n_dims entries.");
+
+    RealEigenMatrix penalty_pts(n_penalty, _n_dims);
+    for (unsigned int i = 0; i < n_penalty; ++i)
+      for (unsigned int j = 0; j < (unsigned)_n_dims; ++j)
+        penalty_pts(i, j) = raw_pts[i * _n_dims + j];
+
+    _gp.getParamStandardizer().getStandardized(penalty_pts);
+
+    // Transform bounds through link function then standardize into z-space
+    const Real neg_inf = -std::numeric_limits<Real>::max();
+    const Real pos_inf = std::numeric_limits<Real>::max();
+    std::vector<Real> lower_std(n_penalty, neg_inf), upper_std(n_penalty, pos_inf);
+    const Real z_mean = _gp.getDataStandardizer().getMean()[0];
+    const Real z_std_val = _gp.getDataStandardizer().getStdDev()[0];
+    for (unsigned int c = 0; c < n_penalty; ++c)
+    {
+      if (!lower_raw.empty() && lower_raw[c] > neg_inf)
+      {
+        const Real z_lb = _gp.applyLink(lower_raw[c]);
+        lower_std[c] = (z_lb - z_mean) / z_std_val;
+      }
+      if (!upper_raw.empty() && upper_raw[c] < pos_inf)
+      {
+        const Real z_ub = _gp.applyLink(upper_raw[c]);
+        upper_std[c] = (z_ub - z_mean) / z_std_val;
+      }
+    }
+
+    _gp.setPenaltyConstraints(penalty_pts, lower_std, upper_std, getParam<Real>("penalty_weight"));
+  }
+
   // Setup the covariance
   _gp.setupCovarianceMatrix(_training_params, _training_data, _optimization_opts);
 }
diff --git a/modules/stochastic_tools/src/utils/GPLinkFunction.C b/modules/stochastic_tools/src/utils/GPLinkFunction.C
new file mode 100644
index 000000000000..e322df2a23ba
--- /dev/null
+++ b/modules/stochastic_tools/src/utils/GPLinkFunction.C
@@ -0,0 +1,107 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#include "GPLinkFunction.h"
+#include "MooseError.h"
+
+#include <cmath>
+
+namespace StochasticTools
+{
+
+// LogLinkFunction -----------------------------------------------------------------
+
+Real
+LogLinkFunction::forward(Real y) const
+{
+  return std::log(y - _lb);
+}
+
+Real
+LogLinkFunction::inverse(Real z) const
+{
+  return std::exp(z) + _lb;
+}
+
+Real
+LogLinkFunction::inverseDeriv(Real z) const
+{
+  return std::exp(z);
+}
+
+Real
+LogLinkFunction::logJacobian(Real y) const
+{
+  // log|g'(y)| = log|1/(y-lb)| = -log(y-lb)
+  return -std::log(y - _lb);
+}
+
+// LogitLinkFunction ---------------------------------------------------------------
+
+Real
+LogitLinkFunction::forward(Real y) const
+{
+  const Real p = (y - _lb) / (_ub - _lb);
+  return std::log(p / (1.0 - p));
+}
+
+Real
+LogitLinkFunction::inverse(Real z) const
+{
+  return _lb + (_ub - _lb) / (1.0 + std::exp(-z));
+}
+
+Real
+LogitLinkFunction::inverseDeriv(Real z) const
+{
+  const Real sig = 1.0 / (1.0 + std::exp(-z));
+  return (_ub - _lb) * sig * (1.0 - sig);
+}
+
+Real
+LogitLinkFunction::logJacobian(Real y) const
+{
+  // g'(y) = (ub-lb) / ((y-lb)*(ub-y))
+  // log|g'(y)| = log(ub-lb) - log(y-lb) - log(ub-y)
+  return std::log(_ub - _lb) - std::log(y - _lb) - std::log(_ub - y);
+}
+
+// Factory -------------------------------------------------------------------------
+
+std::unique_ptr<GPLinkFunction>
+GPLinkFunction::build(GPLinkFunctionType type, Real lb, Real ub)
+{
+  switch (type)
+  {
+    case GPLinkFunctionType::Identity:
+      return std::make_unique<IdentityLinkFunction>();
+    case GPLinkFunctionType::Log:
+      return std::make_unique<LogLinkFunction>(lb);
+    case GPLinkFunctionType::Logit:
+      return std::make_unique<LogitLinkFunction>(lb, ub);
+    default:
+      mooseError("Unknown GPLinkFunctionType");
+  }
+}
+
+std::unique_ptr<GPLinkFunction>
+GPLinkFunction::buildFromString(const std::string & type_name, Real lb, Real ub)
+{
+  if (type_name == "identity")
+    return build(GPLinkFunctionType::Identity, lb, ub);
+  if (type_name == "log")
+    return build(GPLinkFunctionType::Log, lb, ub);
+  if (type_name == "logit")
+    return build(GPLinkFunctionType::Logit, lb, ub);
+  mooseError("Unknown link function type '",
+             type_name,
+             "'. Supported options: 'identity', 'log', 'logit'.");
+}
+
+} // namespace StochasticTools
diff --git a/modules/stochastic_tools/src/utils/GaussianProcess.C b/modules/stochastic_tools/src/utils/GaussianProcess.C
index 53eb4c1354e1..c940023161c9 100644
--- a/modules/stochastic_tools/src/utils/GaussianProcess.C
+++ b/modules/stochastic_tools/src/utils/GaussianProcess.C
@@ -17,6 +17,7 @@
 #include "libmesh/petsc_matrix.h"
 
 #include <cmath>
+#include <limits>
 
 #include "MooseRandom.h"
 #include "Shuffle.h"
@@ -43,7 +44,114 @@ GaussianProcess::GPOptimizerOptions::GPOptimizerOptions(const bool show_every_nt
 {
 }
 
-GaussianProcess::GaussianProcess() {}
+GaussianProcess::GaussianProcess() : _virtual_params(0, 0), _penalty_points_std(0, 0) {}
+
+// ---- Link function -------------------------------------------------------
+
+void
+GaussianProcess::setLinkFunction(GPLinkFunctionType type, Real lb, Real ub)
+{
+  _link_type = type;
+  _link_lb = lb;
+  _link_ub = ub;
+}
+
+void
+GaussianProcess::applyLinkTransform(RealEigenMatrix & data) const
+{
+  if (_link_type == GPLinkFunctionType::Identity)
+    return;
+
+  auto link = GPLinkFunction::build(_link_type, _link_lb, _link_ub);
+  for (int ii = 0; ii < data.rows(); ++ii)
+    for (int jj = 0; jj < data.cols(); ++jj)
+    {
+      const Real y = data(ii, jj);
+      if (y <= link->lowerBound())
+        mooseError("Training data value ",
+                   y,
+                   " is at or below the link function lower bound ",
+                   link->lowerBound(),
+                   ". Adjust the link lower bound or check training data.");
+      if (y >= link->upperBound())
+        mooseError("Training data value ",
+                   y,
+                   " is at or above the link function upper bound ",
+                   link->upperBound(),
+                   ". Adjust the link upper bound or check training data.");
+      data(ii, jj) = link->forward(y);
+    }
+}
+
+Real
+GaussianProcess::applyLink(Real y) const
+{
+  if (_link_type == GPLinkFunctionType::Identity)
+    return y;
+  return GPLinkFunction::build(_link_type, _link_lb, _link_ub)->forward(y);
+}
+
+Real
+GaussianProcess::applyInvLink(Real z) const
+{
+  if (_link_type == GPLinkFunctionType::Identity)
+    return z;
+  return GPLinkFunction::build(_link_type, _link_lb, _link_ub)->inverse(z);
+}
+
+Real
+GaussianProcess::invLinkDeriv(Real z) const
+{
+  if (_link_type == GPLinkFunctionType::Identity)
+    return 1.0;
+  return GPLinkFunction::build(_link_type, _link_lb, _link_ub)->inverseDeriv(z);
+}
+
+Real
+GaussianProcess::logLinkJacobian(Real y) const
+{
+  if (_link_type == GPLinkFunctionType::Identity)
+    return 0.0;
+  return GPLinkFunction::build(_link_type, _link_lb, _link_ub)->logJacobian(y);
+}
+
+// ---- Constraint setters --------------------------------------------------
+
+void
+GaussianProcess::setDerivativeConstraints(const RealEigenMatrix & virtual_params,
+                                          const std::vector<unsigned int> & deriv_dims,
+                                          Real target_value,
+                                          Real noise_variance)
+{
+  if ((unsigned)virtual_params.rows() != deriv_dims.size())
+    mooseError("virtual_params rows (",
+               virtual_params.rows(),
+               ") must match deriv_dims size (",
+               deriv_dims.size(),
+               ").");
+  if (noise_variance <= 0.0)
+    mooseError("derivative_noise_variance must be > 0 to keep the augmented covariance matrix "
+               "positive-definite.");
+  _virtual_params = virtual_params;
+  _virtual_deriv_dims = deriv_dims;
+  _virtual_target = target_value;
+  _virtual_noise = noise_variance;
+}
+
+void
+GaussianProcess::setPenaltyConstraints(const RealEigenMatrix & penalty_points_std,
+                                       const std::vector<Real> & lower_bounds_std,
+                                       const std::vector<Real> & upper_bounds_std,
+                                       Real weight)
+{
+  if ((unsigned)penalty_points_std.rows() != lower_bounds_std.size() ||
+      (unsigned)penalty_points_std.rows() != upper_bounds_std.size())
+    mooseError("penalty_constraint_points rows must match the number of lower/upper bounds.");
+  _penalty_points_std = penalty_points_std;
+  _penalty_lower_std = lower_bounds_std;
+  _penalty_upper_std = upper_bounds_std;
+  _penalty_weight = weight;
+}
 
 void
 GaussianProcess::initialize(CovarianceFunctionBase * covariance_function,
@@ -71,6 +179,10 @@ GaussianProcess::setupCovarianceMatrix(const RealEigenMatrix & training_params,
                                        const RealEigenMatrix & training_data,
                                        const GPOptimizerOptions & opts)
 {
+  if (_virtual_params.rows() > 0 && _num_outputs > 1)
+    mooseError("Derivative observations (monotonicity constraints) are not supported for "
+               "multi-output GPs. Use a single-output covariance function.");
+
   const bool batch_decision = opts.batch_size > 0 && (opts.batch_size <= training_params.rows());
   _batch_size = batch_decision ? opts.batch_size : training_params.rows();
   _K.resize(_num_outputs * _batch_size, _num_outputs * _batch_size);
@@ -78,15 +190,64 @@ GaussianProcess::setupCovarianceMatrix(const RealEigenMatrix & training_params,
   if (_tuning_data.size())
     tuneHyperParamsAdam(training_params, training_data, opts);
 
-  _K.resize(training_params.rows() * training_data.cols(),
-            training_params.rows() * training_data.cols());
-  _covariance_function->computeCovarianceMatrix(_K, training_params, training_params, true);
+  const unsigned int n_train = training_params.rows();
+  const unsigned int n_outputs = training_data.cols();
+  const unsigned int n_virt = _virtual_params.rows();
 
-  RealEigenMatrix flattened_data =
-      training_data.reshaped(training_params.rows() * training_data.cols(), 1);
+  if (n_virt == 0)
+  {
+    _K.resize(n_train * n_outputs, n_train * n_outputs);
+    _covariance_function->computeCovarianceMatrix(_K, training_params, training_params, true);
+    RealEigenMatrix flattened_data = training_data.reshaped(n_train * n_outputs, 1);
+    setupStoredMatrices(flattened_data);
+  }
+  else
+  {
+    // Augmented covariance matrix incorporating virtual derivative observations.
+    // K_aug = [ K_ff + sigma_n^2*I   K_fd             ]
+    //         [ K_df                 K_dd + sigma_d^2*I ]
+    const unsigned int n_total = n_train + n_virt;
+    _K.resize(n_total, n_total);
+    _K.setZero();
+
+    // Top-left: K(X_train, X_train) + sigma_n^2*I (noise added by is_self_covariance=true)
+    RealEigenMatrix K_ff(n_train, n_train);
+    _covariance_function->computeCovarianceMatrix(K_ff, training_params, training_params, true);
+    _K.topLeftCorner(n_train, n_train) = K_ff;
+
+    // Off-diagonal: K_fd and its transpose K_df
+    for (unsigned int j = 0; j < n_virt; ++j)
+    {
+      RealEigenMatrix xd_j = _virtual_params.row(j);
+      RealEigenMatrix K_fd_j(n_train, 1);
+      _covariance_function->computeCovarianceFD(
+          K_fd_j, training_params, xd_j, _virtual_deriv_dims[j]);
+      _K.block(0, n_train + j, n_train, 1) = K_fd_j;
+      _K.block(n_train + j, 0, 1, n_train) = K_fd_j.transpose();
+    }
 
-  // Compute the Cholesky decomposition and inverse action of the covariance matrix
-  setupStoredMatrices(flattened_data);
+    // Bottom-right: K_dd + sigma_d^2*I
+    for (unsigned int i = 0; i < n_virt; ++i)
+    {
+      RealEigenMatrix xd_i = _virtual_params.row(i);
+      for (unsigned int j = 0; j < n_virt; ++j)
+      {
+        RealEigenMatrix xd_j = _virtual_params.row(j);
+        RealEigenMatrix K_dd_ij(1, 1);
+        _covariance_function->computeCovarianceDD(
+            K_dd_ij, xd_i, xd_j, _virtual_deriv_dims[i], _virtual_deriv_dims[j]);
+        _K(n_train + i, n_train + j) = K_dd_ij(0, 0);
+      }
+      _K(n_train + i, n_train + i) += _virtual_noise;
+    }
+
+    // Augmented RHS: [y_train; v_virtual_target * ones]
+    RealEigenMatrix augmented_data(n_total, 1);
+    augmented_data.topRows(n_train) = training_data.reshaped(n_train, 1);
+    augmented_data.bottomRows(n_virt).setConstant(_virtual_target);
+
+    setupStoredMatrices(augmented_data);
+  }
 
   _covariance_function->buildHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
 }
@@ -255,6 +416,27 @@ GaussianProcess::getLoss(RealEigenMatrix & inputs, RealEigenMatrix & outputs)
   log_likelihood += -std::log(_K.determinant());
   log_likelihood -= _batch_size * std::log(2 * M_PI);
   log_likelihood = -log_likelihood / 2;
+
+  // Add penalty constraint terms (evaluated at constraint points using current batch model)
+  const unsigned int n_penalty = _penalty_points_std.rows();
+  if (n_penalty > 0 && _penalty_weight > 0.0)
+  {
+    for (unsigned int c = 0; c < n_penalty; ++c)
+    {
+      RealEigenMatrix xc = _penalty_points_std.row(c);
+      RealEigenMatrix K_star(_batch_size, 1);
+      _covariance_function->computeCovarianceMatrix(K_star, inputs, xc, false);
+      const Real mu_c = (K_star.transpose() * _K_results_solve)(0, 0);
+
+      const Real lb = _penalty_lower_std[c];
+      const Real ub = _penalty_upper_std[c];
+      if (lb > -std::numeric_limits<Real>::max() && mu_c < lb)
+        log_likelihood += _penalty_weight * std::pow(lb - mu_c, 2);
+      if (ub < std::numeric_limits<Real>::max() && mu_c > ub)
+        log_likelihood += _penalty_weight * std::pow(mu_c - ub, 2);
+    }
+  }
+
   return log_likelihood;
 }
 
@@ -263,8 +445,9 @@ GaussianProcess::getGradient(RealEigenMatrix & inputs) const
 {
   RealEigenMatrix dKdhp(_batch_size, _batch_size);
   RealEigenMatrix alpha = _K_results_solve * _K_results_solve.transpose();
-  std::vector<Real> grad_vec;
-  grad_vec.resize(_num_tunable);
+  std::vector<Real> grad_vec(_num_tunable, 0.0);
+
+  // NLML gradient (existing computation)
   for (auto iter = _tuning_data.begin(); iter != _tuning_data.end(); ++iter)
   {
     std::string hyper_param_name = iter->first;
@@ -278,6 +461,57 @@ GaussianProcess::getGradient(RealEigenMatrix & inputs) const
       grad_vec[global_index] = -tmp.trace() / 2.0;
     }
   }
+
+  // Penalty gradient contribution (approximate: only the dK_star/dtheta term)
+  // d(penalty)/d(theta_i) ≈ sum_c [violation_c * (-d(mu_c)/d(theta_i))]
+  // where d(mu_c)/d(theta_i) ≈ (dK_star/d(theta_i))^T * alpha_solve
+  const unsigned int n_penalty = _penalty_points_std.rows();
+  if (n_penalty > 0 && _penalty_weight > 0.0)
+  {
+    RealEigenMatrix dK_star_dhp(_batch_size, 1);
+    for (unsigned int c = 0; c < n_penalty; ++c)
+    {
+      RealEigenMatrix xc = _penalty_points_std.row(c);
+      RealEigenMatrix K_star(_batch_size, 1);
+      _covariance_function->computeCovarianceMatrix(K_star, inputs, xc, false);
+      const Real mu_c = (K_star.transpose() * _K_results_solve)(0, 0);
+
+      const Real lb = _penalty_lower_std[c];
+      const Real ub = _penalty_upper_std[c];
+
+      Real slack = 0.0;
+      int sign = 0;
+      if (lb > -std::numeric_limits<Real>::max() && mu_c < lb)
+      {
+        slack = lb - mu_c;
+        sign = -1; // penalty increases when mu_c decreases → gradient pushes mu_c up
+      }
+      else if (ub < std::numeric_limits<Real>::max() && mu_c > ub)
+      {
+        slack = mu_c - ub;
+        sign = 1; // penalty increases when mu_c increases → gradient pushes mu_c down
+      }
+
+      if (sign == 0)
+        continue;
+
+      for (auto iter = _tuning_data.begin(); iter != _tuning_data.end(); ++iter)
+      {
+        const auto & hp_name = iter->first;
+        const auto first_index = std::get<0>(iter->second);
+        const auto num_entries = std::get<1>(iter->second);
+        for (unsigned int ii = 0; ii < num_entries; ++ii)
+        {
+          const auto global_index = first_index + ii;
+          _covariance_function->computedKdhyper_cross(dK_star_dhp, inputs, xc, hp_name, ii);
+          const Real d_mu_c = (dK_star_dhp.transpose() * _K_results_solve)(0, 0);
+          // d(penalty)/d(theta_i) = 2*lambda*slack * sign * d(mu_c)/d(theta_i)
+          grad_vec[global_index] += 2.0 * _penalty_weight * slack * sign * d_mu_c;
+        }
+      }
+    }
+  }
+
   return grad_vec;
 }
 
@@ -361,6 +595,16 @@ dataStore(std::ostream & stream, StochasticTools::GaussianProcess & gp_utils, vo
   dataStore(stream, gp_utils.KCholeskyDecomp(), context);
   dataStore(stream, gp_utils.paramStandardizer(), context);
   dataStore(stream, gp_utils.dataStandardizer(), context);
+  // Link function
+  int link_type_int = static_cast<int>(gp_utils.linkType());
+  dataStore(stream, link_type_int, context);
+  dataStore(stream, gp_utils.linkLb(), context);
+  dataStore(stream, gp_utils.linkUb(), context);
+  // Virtual derivative observations
+  dataStore(stream, gp_utils.virtualParams(), context);
+  dataStore(stream, gp_utils.virtualDerivDims(), context);
+  dataStore(stream, gp_utils.virtualNoise(), context);
+  dataStore(stream, gp_utils.virtualTarget(), context);
 }
 
 template <>
@@ -379,4 +623,15 @@ dataLoad(std::istream & stream, StochasticTools::GaussianProcess & gp_utils, voi
   dataLoad(stream, gp_utils.KCholeskyDecomp(), context);
   dataLoad(stream, gp_utils.paramStandardizer(), context);
   dataLoad(stream, gp_utils.dataStandardizer(), context);
+  // Link function
+  int link_type_int;
+  dataLoad(stream, link_type_int, context);
+  gp_utils.linkType() = static_cast<StochasticTools::GPLinkFunctionType>(link_type_int);
+  dataLoad(stream, gp_utils.linkLb(), context);
+  dataLoad(stream, gp_utils.linkUb(), context);
+  // Virtual derivative observations
+  dataLoad(stream, gp_utils.virtualParams(), context);
+  dataLoad(stream, gp_utils.virtualDerivDims(), context);
+  dataLoad(stream, gp_utils.virtualNoise(), context);
+  dataLoad(stream, gp_utils.virtualTarget(), context);
 }
diff --git a/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_derivative_obs.i b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_derivative_obs.i
new file mode 100644
index 000000000000..2f3cf807ab08
--- /dev/null
+++ b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_derivative_obs.i
@@ -0,0 +1,122 @@
+# Gaussian Process with derivative (virtual) observations for monotonicity constraint.
+# The heat diffusion response is monotonically increasing in q (source term).
+# Virtual derivative observations at selected points enforce this monotonicity
+# by augmenting the covariance matrix with cross-covariance blocks between
+# function values and derivative observations.
+
+[StochasticTools]
+[]
+
+[Distributions]
+  [k_dist]
+    type = Uniform
+    lower_bound = 1
+    upper_bound = 10
+  []
+  [q_dist]
+    type = Uniform
+    lower_bound = 9000
+    upper_bound = 11000
+  []
+[]
+
+[Samplers]
+  [train_sample]
+    type = MonteCarlo
+    num_rows = 10
+    distributions = 'k_dist q_dist'
+    execute_on = PRE_MULTIAPP_SETUP
+  []
+  [test_sample]
+    type = MonteCarlo
+    num_rows = 20
+    distributions = 'k_dist q_dist'
+    execute_on = PRE_MULTIAPP_SETUP
+  []
+[]
+
+[MultiApps]
+  [sub]
+    type = SamplerFullSolveMultiApp
+    input_files = sub.i
+    sampler = train_sample
+  []
+[]
+
+[Controls]
+  [cmdline]
+    type = MultiAppSamplerControl
+    multi_app = sub
+    sampler = train_sample
+    param_names = 'Materials/conductivity/prop_values Kernels/source/value'
+  []
+[]
+
+[Transfers]
+  [data]
+    type = SamplerReporterTransfer
+    from_multi_app = sub
+    sampler = train_sample
+    stochastic_reporter = results
+    from_reporter = 'avg/value'
+  []
+[]
+
+[Reporters]
+  [results]
+    type = StochasticReporter
+    parallel_type = ROOT
+  []
+  [samp_avg]
+    type = EvaluateSurrogate
+    model = GP_avg
+    sampler = test_sample
+    evaluate_std = 'true'
+    parallel_type = ROOT
+    execute_on = final
+  []
+[]
+
+[Trainers]
+  [GP_avg_trainer]
+    type = GaussianProcessTrainer
+    execute_on = timestep_end
+    covariance_function = 'covar'
+    standardize_params = 'true'
+    standardize_data = 'true'
+    sampler = train_sample
+    response = results/data:avg:value
+    # Enforce df/dq > 0 (increasing in q, dimension index 1)
+    # Virtual obs placed at three locations spanning the q range
+    monotone_constraint_points = '5.5 9000
+                                  5.5 10000
+                                  5.5 11000'
+    monotone_constraint_dims = '1 1 1'
+    monotone_constraint_type = increasing
+    derivative_target_value = 0.01
+    derivative_noise_variance = 1e-4
+  []
+[]
+
+[Surrogates]
+  [GP_avg]
+    type = GaussianProcessSurrogate
+    trainer = GP_avg_trainer
+  []
+[]
+
+[Covariance]
+  [covar]
+    type = SquaredExponentialCovariance
+    signal_variance = 1
+    noise_variance = 1e-3
+    length_factor = '0.4 0.4'
+  []
+[]
+
+[Outputs]
+  [out]
+    type = CSV
+    execute_on = FINAL
+  []
+[]
diff --git a/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_log_link.i b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_log_link.i
new file mode 100644
index 000000000000..4bced44391d5
--- /dev/null
+++ b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_log_link.i
@@ -0,0 +1,116 @@
+# Gaussian Process with log link function for positivity constraint.
+# The heat diffusion sub-problem produces strictly positive temperatures.
+# The log link g(y) = log(y - 0) ensures all GP predictions satisfy y > 0.
+# This test verifies that the log link can be used without errors and that
+# the basic prediction structure is correct.
+
+[StochasticTools]
+[]
+
+[Distributions]
+  [k_dist]
+    type = Uniform
+    lower_bound = 1
+    upper_bound = 10
+  []
+  [q_dist]
+    type = Uniform
+    lower_bound = 9000
+    upper_bound = 11000
+  []
+[]
+
+[Samplers]
+  [train_sample]
+    type = MonteCarlo
+    num_rows = 10
+    distributions = 'k_dist q_dist'
+    execute_on = PRE_MULTIAPP_SETUP
+  []
+  [test_sample]
+    type = MonteCarlo
+    num_rows = 20
+    distributions = 'k_dist q_dist'
+    execute_on = PRE_MULTIAPP_SETUP
+  []
+[]
+
+[MultiApps]
+  [sub]
+    type = SamplerFullSolveMultiApp
+    input_files = sub.i
+    sampler = train_sample
+  []
+[]
+
+[Controls]
+  [cmdline]
+    type = MultiAppSamplerControl
+    multi_app = sub
+    sampler = train_sample
+    param_names = 'Materials/conductivity/prop_values Kernels/source/value'
+  []
+[]
+
+[Transfers]
+  [data]
+    type = SamplerReporterTransfer
+    from_multi_app = sub
+    sampler = train_sample
+    stochastic_reporter = results
+    from_reporter = 'avg/value'
+  []
+[]
+
+[Reporters]
+  [results]
+    type = StochasticReporter
+    parallel_type = ROOT
+  []
+  [samp_avg]
+    type = EvaluateSurrogate
+    model = GP_avg
+    sampler = test_sample
+    evaluate_std = 'true'
+    parallel_type = ROOT
+    execute_on = final
+  []
+[]
+
+[Trainers]
+  [GP_avg_trainer]
+    type = GaussianProcessTrainer
+    execute_on = timestep_end
+    covariance_function = 'covar'
+    standardize_params = 'true'
+    standardize_data = 'true'
+    sampler = train_sample
+    response = results/data:avg:value
+    # Enforce positivity: log link with lower bound 0
+    link_function = log
+    link_lower_bound = 0.0
+  []
+[]
+
+[Surrogates]
+  [GP_avg]
+    type = GaussianProcessSurrogate
+    trainer = GP_avg_trainer
+  []
+[]
+
+[Covariance]
+  [covar]
+    type = SquaredExponentialCovariance
+    signal_variance = 1
+    noise_variance = 1e-3
+    length_factor = '0.4 0.4'
+  []
+[]
+
+[Outputs]
+  [out]
+    type = CSV
+    execute_on = FINAL
+  []
+[]
diff --git a/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_logit_link.i b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_logit_link.i
new file mode 100644
index 000000000000..d15e1344595d
--- /dev/null
+++ b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_logit_link.i
@@ -0,0 +1,115 @@
+# Gaussian Process with logit link function for bounded output constraint.
+# The logit link g(y) = log((y - lb)/(ub - y)) maps a bounded response
+# into an unbounded GP latent space, ensuring predictions satisfy lb < y < ub.
+
+[StochasticTools]
+[]
+
+[Distributions]
+  [k_dist]
+    type = Uniform
+    lower_bound = 1
+    upper_bound = 10
+  []
+  [q_dist]
+    type = Uniform
+    lower_bound = 9000
+    upper_bound = 11000
+  []
+[]
+
+[Samplers]
+  [train_sample]
+    type = MonteCarlo
+    num_rows = 10
+    distributions = 'k_dist q_dist'
+    execute_on = PRE_MULTIAPP_SETUP
+  []
+  [test_sample]
+    type = MonteCarlo
+    num_rows = 20
+    distributions = 'k_dist q_dist'
+    execute_on = PRE_MULTIAPP_SETUP
+  []
+[]
+
+[MultiApps]
+  [sub]
+    type = SamplerFullSolveMultiApp
+    input_files = sub.i
+    sampler = train_sample
+  []
+[]
+
+[Controls]
+  [cmdline]
+    type = MultiAppSamplerControl
+    multi_app = sub
+    sampler = train_sample
+    param_names = 'Materials/conductivity/prop_values Kernels/source/value'
+  []
+[]
+
+[Transfers]
+  [data]
+    type = SamplerReporterTransfer
+    from_multi_app = sub
+    sampler = train_sample
+    stochastic_reporter = results
+    from_reporter = 'avg/value'
+  []
+[]
+
+[Reporters]
+  [results]
+    type = StochasticReporter
+    parallel_type = ROOT
+  []
+  [samp_avg]
+    type = EvaluateSurrogate
+    model = GP_avg
+    sampler = test_sample
+    evaluate_std = 'true'
+    parallel_type = ROOT
+    execute_on = final
+  []
+[]
+
+[Trainers]
+  [GP_avg_trainer]
+    type = GaussianProcessTrainer
+    execute_on = timestep_end
+    covariance_function = 'covar'
+    standardize_params = 'true'
+    standardize_data = 'true'
+    sampler = train_sample
+    response = results/data:avg:value
+    # Bound the output: logit link with physical bounds [250, 2000]
+    link_function = logit
+    link_lower_bound = 250.0
+    link_upper_bound = 2000.0
+  []
+[]
+
+[Surrogates]
+  [GP_avg]
+    type = GaussianProcessSurrogate
+    trainer = GP_avg_trainer
+  []
+[]
+
+[Covariance]
+  [covar]
+    type = SquaredExponentialCovariance
+    signal_variance = 1
+    noise_variance = 1e-3
+    length_factor = '0.4 0.4'
+  []
+[]
+
+[Outputs]
+  [out]
+    type = CSV
+    execute_on = FINAL
+  []
+[]
diff --git a/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_penalty_constraint.i b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_penalty_constraint.i
new file mode 100644
index 000000000000..3192d92d33ca
--- /dev/null
+++ b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/GP_penalty_constraint.i
@@ -0,0 +1,120 @@
+# Gaussian Process with penalty constraints during hyperparameter tuning.
+# Penalty terms are added to the NLML loss to steer the Adam optimizer toward
+# hyperparameter configurations that respect lower/upper bound constraints at
+# specified evaluation points.
+
+[StochasticTools]
+[]
+
+[Distributions]
+  [k_dist]
+    type = Uniform
+    lower_bound = 1
+    upper_bound = 10
+  []
+  [q_dist]
+    type = Uniform
+    lower_bound = 9000
+    upper_bound = 11000
+  []
+[]
+
+[Samplers]
+  [train_sample]
+    type = MonteCarlo
+    num_rows = 10
+    distributions = 'k_dist q_dist'
+    execute_on = PRE_MULTIAPP_SETUP
+  []
+  [test_sample]
+    type = MonteCarlo
+    num_rows = 20
+    distributions = 'k_dist q_dist'
+    execute_on = PRE_MULTIAPP_SETUP
+  []
+[]
+
+[MultiApps]
+  [sub]
+    type = SamplerFullSolveMultiApp
+    input_files = sub.i
+    sampler = train_sample
+  []
+[]
+
+[Controls]
+  [cmdline]
+    type = MultiAppSamplerControl
+    multi_app = sub
+    sampler = train_sample
+    param_names = 'Materials/conductivity/prop_values Kernels/source/value'
+  []
+[]
+
+[Transfers]
+  [data]
+    type = SamplerReporterTransfer
+    from_multi_app = sub
+    sampler = train_sample
+    stochastic_reporter = results
+    from_reporter = 'avg/value'
+  []
+[]
+
+[Reporters]
+  [results]
+    type = StochasticReporter
+    parallel_type = ROOT
+  []
+  [samp_avg]
+    type = EvaluateSurrogate
+    model = GP_avg
+    sampler = test_sample
+    evaluate_std = 'true'
+    parallel_type = ROOT
+    execute_on = final
+  []
+[]
+
+[Trainers]
+  [GP_avg_trainer]
+    type = GaussianProcessTrainer
+    execute_on = timestep_end
+    covariance_function = 'covar'
+    standardize_params = 'true'
+    standardize_data = 'true'
+    sampler = train_sample
+    response = results/data:avg:value
+    tune_parameters = 'signal_variance noise_variance length_factor'
+    num_iters = 50
+    learning_rate = 0.001
+    # Penalize predictions below 300 (floor near BC value) at two constraint points
+    penalty_constraint_points = '5.5 9000
+                                  5.5 11000'
+    penalty_constraint_lower_bounds = '300.0 300.0'
+    penalty_weight = 10.0
+  []
+[]
+
+[Surrogates]
+  [GP_avg]
+    type = GaussianProcessSurrogate
+    trainer = GP_avg_trainer
+  []
+[]
+
+[Covariance]
+  [covar]
+    type = SquaredExponentialCovariance
+    signal_variance = 1
+    noise_variance = 1e-3
+    length_factor = '0.4 0.4'
+  []
+[]
+
+[Outputs]
+  [out]
+    type = CSV
+    execute_on = FINAL
+  []
+[]
diff --git a/modules/stochastic_tools/test/tests/surrogates/gaussian_process/tests b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/tests
index 23603d80028e..760e3d9bb27f 100644
--- a/modules/stochastic_tools/test/tests/surrogates/gaussian_process/tests
+++ b/modules/stochastic_tools/test/tests/surrogates/gaussian_process/tests
@@ -1,6 +1,6 @@
 [Tests]
   issues = "#15482"
-  design = "GaussianProcessTrainer.md GaussianProcess.md"
+  design = "GaussianProcessTrainer.md GaussianProcess.md GPLinkFunction.md"
 
   [store_load]
     requirement = 'The system shall demonstrate a gaussian process surrogate by '
@@ -79,6 +79,33 @@
       detail = 'a Matern half integer kernel using Adam with mini-batch sampling;'
     []
   []
+  [constraints]
+    requirement = 'The system shall support inequality constraints on Gaussian process predictions via'
+    [GP_log_link]
+      type = RunApp
+      input = GP_log_link.i
+      allow_test_objects = true
+      detail = 'a log link function enforcing strict positivity (y > lower_bound);'
+    []
+    [GP_logit_link]
+      type = RunApp
+      input = GP_logit_link.i
+      allow_test_objects = true
+      detail = 'a logit link function enforcing bounded outputs (lower_bound < y < upper_bound);'
+    []
+    [GP_derivative_obs]
+      type = RunApp
+      input = GP_derivative_obs.i
+      allow_test_objects = true
+      detail = 'virtual derivative observations that enforce monotonicity constraints;'
+    []
+    [GP_penalty_constraint]
+      type = RunApp
+      input = GP_penalty_constraint.i
+      allow_test_objects = true
+      detail = 'penalty augmented NLML that steers hyperparameter tuning toward feasible predictions.'
+    []
+  []
   [errors]
     requirement = 'The system shall throw an error when '
     [optimization_adam_batch_size]
@@ -88,5 +115,12 @@
       detail = 'the batch size is greater than the training data set size for Adam optimization. '
       expect_err = "Batch size cannot be greater than the training data set size."
     []
+    [logit_invalid_bounds]
+      type = RunException
+      input = GP_logit_link.i
+      cli_args = "Trainers/GP_avg_trainer/link_upper_bound=250.0"
+      detail = 'the logit link upper bound is not greater than the lower bound. '
+      expect_err = "link_upper_bound must be strictly greater than link_lower_bound"
+    []
   []
 []