Replace jaxopt with optax L-BFGS and remove scipy.stats from tests

katosh · katosh · commit 5a10effdc53e · 2026-03-05T01:50:29.000-08:00
- Replace deprecated jaxopt.ScipyMinimize with optax.lbfgs (JAX-native,
  JIT-compatible L-BFGS optimizer)
- Replace scipy.stats.spearmanr in tests with a simple rank-correlation
  helper using jax.numpy
- Update dependency: jaxopt -&gt; optax
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@
  - new convenience methods on `FunctionEstimator`: `leverage(X)`, `empirical_variance(X, y)`, `get_obs_variance(X)`
  - `obs_variance` weights are included in predictor serialization (`to_json`/`from_json`)
  - `sigma` now accepts per-feature vectors of shape `(p,)` or `(1, p)` for multi-output GPs, giving each output column its own noise level
+ - replace deprecated `jaxopt` dependency with `optax` for L-BFGS optimization
  - fix `requires-python` from `>=3.6` to `>=3.10`
 
 # v1.6.1
diff --git a/mellon/compute_ls_time.py b/mellon/compute_ls_time.py
@@ -2,7 +2,6 @@
 from jax.numpy import exp, unique, corrcoef, zeros, abs, stack
 from jax.numpy import sum as arraysum
 from jax.numpy.linalg import norm
-from jaxopt import ScipyMinimize
 from .density_estimator import DensityEstimator
 from .validation import validate_time_x
 
@@ -95,8 +94,10 @@ def ls_loss(log_ls):
         covs = cov_func_curry(ls)(delta_t, zeros((1, 1))).reshape((n_times, n_times))
         return norm(covs - corrs)
 
-    opt = ScipyMinimize(fun=ls_loss, method="L-BFGS-B", jit=False).run(0.0)
-    ls = exp(opt.params).item()
+    from .inference import minimize_lbfgsb
+
+    result = minimize_lbfgsb(ls_loss, 0.0, jit=False)
+    ls = exp(result.pre_transformation).item()
 
     if return_data:
         return ls, densities, predictors, unique_times
diff --git a/mellon/inference.py b/mellon/inference.py
@@ -7,8 +7,8 @@
 from jax.scipy.special import gammaln
 import jax.scipy.stats.norm as norm
 import jax
+import optax
 from jax.example_libraries.optimizers import adam
-from jaxopt import ScipyMinimize
 from .conditional import (
     FullConditional,
     ExpFullConditional,
@@ -269,22 +269,50 @@ def step(step, opt_state):
     return results
 
 
-def minimize_lbfgsb(loss_func, initial_value, jit=DEFAULT_JIT):
+def minimize_lbfgsb(loss_func, initial_value, jit=DEFAULT_JIT, maxiter=500, tol=1e-8):
     R"""
-    Minimizes function with a starting guess of initial_value.
+    Minimizes function using L-BFGS via optax.
 
     :param loss_func: Loss function to minimize.
     :type loss_func: function
     :param initial_value: Initial guess.
     :type initial_value: array-like
+    :param jit: Whether to JIT-compile the optimization step.
+    :type jit: bool
+    :param maxiter: Maximum number of iterations.
+    :type maxiter: int
+    :param tol: Gradient norm tolerance for convergence.
+    :type tol: float
     :return: Results - A named tuple containing pre_transformation, opt_state,
         loss: The optimized parameters, final state of the optimizer, and the
         final loss value,
     :rtype: array-like, array-like, Object
     """
-    opt = ScipyMinimize(fun=loss_func, method="L-BFGS-B", jit=jit).run(initial_value)
+    solver = optax.lbfgs()
+
+    def step(x, opt_state):
+        value, grad = jax.value_and_grad(loss_func)(x)
+        updates, new_state = solver.update(
+            grad, opt_state, x,
+            value=value, grad=grad, value_fn=loss_func,
+        )
+        new_x = optax.apply_updates(x, updates)
+        return new_x, new_state, value, grad
+
+    if jit:
+        step = jax.jit(step)
+
+    x = jax.numpy.asarray(initial_value)
+    opt_state = solver.init(x)
+    loss_val = loss_func(x)
+
+    for _ in range(maxiter):
+        x, opt_state, loss_val, grad = step(x, opt_state)
+        if jax.numpy.linalg.norm(grad) < tol:
+            break
+
     Results = namedtuple("Results", "pre_transformation opt_state loss")
-    results = Results(opt.params, opt.state, opt.state.fun_val.item())
+    results = Results(x, opt_state, float(loss_val))
     return results
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,7 +22,7 @@ dependencies = [
     # flexible foundations to avoid broken resolutions.
     # See: https://github.com/astral-sh/uv/issues/5161
     "jax",
-    "jaxopt",
+    "optax",
     "scikit-learn",
     "pynndescent",
 ]
@@ -80,4 +80,4 @@ python_version = "3.9"
 warn_return_any = true
 warn_unused_configs = true
 disallow_untyped_defs = false
-disallow_incomplete_defs = false
+disallow_incomplete_defs = false
diff --git a/tests/test_leverage.py b/tests/test_leverage.py
@@ -4,6 +4,14 @@
 import mellon
 
 
+def _spearman_correlation(a, b):
+    """Simple Spearman rank correlation without scipy."""
+    a, b = jnp.asarray(a).ravel(), jnp.asarray(b).ravel()
+    rank_a = jnp.argsort(jnp.argsort(a)).astype(float)
+    rank_b = jnp.argsort(jnp.argsort(b)).astype(float)
+    return jnp.corrcoef(rank_a, rank_b)[0, 1]
+
+
 @pytest.fixture
 def setup_data():
     n = 50
@@ -49,10 +57,7 @@ def test_sparse_gp_leverage_correlates_with_full(setup_data):
     est_sparse.fit(X, y)
     h_sparse = est_sparse.predict.leverage(X, sigma=sigma)
 
-    # Spearman correlation via ranks
-    from scipy.stats import spearmanr
-
-    corr, _ = spearmanr(h_full, h_sparse)
+    corr = _spearman_correlation(h_full, h_sparse)
     assert corr > 0.8, f"Spearman correlation {corr} too low between full and sparse leverage."
 
 
@@ -190,9 +195,7 @@ def test_obs_variance_correlates_with_true_noise():
 
     var = est.predict.obs_variance(X)
 
-    from scipy.stats import spearmanr
-
-    corr, _ = spearmanr(true_noise_std**2, var)
+    corr = _spearman_correlation(true_noise_std**2, var)
     assert corr > 0.3, (
         f"obs_variance should correlate with true noise variance, got Spearman={corr}"
     )