[MRG] Warmstart for exact sparse and lazy solvers (#795)

nathanneike · web-flow · commit e164e78e8d7f · 2026-02-17T15:20:52.000+01:00
* Implementation of warmstart for network simplex can make use off precomputed potentials from sinkhorn or even related simplex

* optimise initial setup for watmstart using heap

* Update Releases and test file

* changed some interfaces and function names

* small doc fix

* Add warmstart potentials to sparse and lazy solver also

* Adding warmstart to missing functions

* small wrapper issue in solve_sample
diff --git a/RELEASES.md b/RELEASES.md
@@ -7,6 +7,7 @@ This new release adds support for sparse cost matrices and a new lazy EMD solver
 #### New features 
 - Add lazy EMD solver with on-the-fly distance computation from coordinates (PR #788)
 - Add Warmstart feature to the EMD solver for existing potentials (PR #793)
+- Add Warmstart potentials feature to the EMD solver for lazy and sparse solver
 - Migrate backend from deprecated `scipy.sparse.coo_matrix` to modern `scipy.sparse.coo_array` (PR #782)
 - Geomloss function now handles both scalar and slice indices for i and j (PR #785)
 - Add support for sparse cost matrices in EMD solver (PR #778, Issue #397)
diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h
@@ -48,7 +48,9 @@ int EMD_wrap_sparse(
     double *alpha,               // Output: dual variables for sources (n1)
     double *beta,                // Output: dual variables for targets (n2)
     double *cost,                // Output: total transportation cost
-    uint64_t maxIter             // Maximum iterations for solver
+    uint64_t maxIter,            // Maximum iterations for solver
+    double *alpha_init,          // Initial dual variables for sources (warmstart)
+    double *beta_init            // Initial dual variables for targets (warmstart)
 );
 
 int EMD_wrap_lazy(
@@ -64,7 +66,9 @@ int EMD_wrap_lazy(
     double *alpha,               // Output: dual variables for sources (n1)
     double *beta,                // Output: dual variables for targets (n2)
     double *cost,                // Output: total transportation cost
-    uint64_t maxIter             // Maximum iterations for solver
+    uint64_t maxIter,            // Maximum iterations for solver
+    double *alpha_init,          // Initial dual variables for sources (warmstart)
+    double *beta_init            // Initial dual variables for targets (warmstart)
 );
 
 
diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp
@@ -253,7 +253,9 @@ int EMD_wrap_sparse(
     double *alpha,
     double *beta,
     double *cost,
-    uint64_t maxIter
+    uint64_t maxIter,
+    double *alpha_init,
+    double *beta_init
 ) {
     using namespace lemon;
     
@@ -351,6 +353,22 @@ int EMD_wrap_sparse(
         }
     }
     
+    // Initialize warmstart if provided
+    if (alpha_init != nullptr && beta_init != nullptr) {
+        // Map original indices to graph indices for warmstart
+        std::vector<double> alpha_filtered(n);
+        std::vector<double> beta_filtered(m);
+        for (uint64_t i = 0; i < n; i++) {
+            uint64_t orig_i = indI[i];
+            alpha_filtered[i] = alpha_init[orig_i];
+        }
+        for (uint64_t j = 0; j < m; j++) {
+            uint64_t orig_j = indJ[j];
+            beta_filtered[j] = beta_init[orig_j];
+        }
+        net.setWarmstartPotentials(&alpha_filtered[0], &beta_filtered[0], n, m);
+    }
+    
     int ret = net.run();
 
     if (ret == (int)net.OPTIMAL || ret == (int)net.MAX_ITER_REACHED) {
@@ -389,7 +407,7 @@ int EMD_wrap_sparse(
 
 int EMD_wrap_lazy(int n1, int n2, double *X, double *Y, double *coords_a, double *coords_b, 
                   int dim, int metric, double *G, double *alpha, double *beta, 
-                  double *cost, uint64_t maxIter) {
+                  double *cost, uint64_t maxIter, double *alpha_init, double *beta_init) {
     using namespace lemon;
     typedef FullBipartiteDigraph Digraph;
     DIGRAPH_TYPEDEFS(Digraph);
@@ -454,6 +472,22 @@ int EMD_wrap_lazy(int n1, int n2, double *X, double *Y, double *coords_a, double
     // Enable lazy cost computation - costs will be computed on-the-fly
     net.setLazyCost(&coords_a_filtered[0], &coords_b_filtered[0], dim, metric, n, m);
     
+    // Initialize warmstart if provided
+    if (alpha_init != nullptr && beta_init != nullptr) {
+        // Map original indices to graph indices for warmstart
+        std::vector<double> alpha_filtered(n);
+        std::vector<double> beta_filtered(m);
+        for (int i = 0; i < n; i++) {
+            int orig_i = idx_a[i];
+            alpha_filtered[i] = alpha_init[orig_i];
+        }
+        for (int j = 0; j < m; j++) {
+            int orig_j = idx_b[j];
+            beta_filtered[j] = beta_init[orig_j];
+        }
+        net.setWarmstartPotentials(&alpha_filtered[0], &beta_filtered[0], n, m);
+    }
+    
     // Run solver
     int ret = net.run();
     
diff --git a/ot/lp/_network_simplex.py b/ot/lp/_network_simplex.py
@@ -820,6 +820,7 @@ def emd2_lazy(
     return_matrix=True,
     center_dual=True,
     check_marginals=True,
+    potentials_init=None,
 ):
     r"""Solves the Earth Movers distance problem with lazy cost computation and returns the loss
 
@@ -873,6 +874,9 @@ def emd2_lazy(
         If True, centers the dual potential using :py:func:`ot.lp.center_ot_dual`
     check_marginals: bool, optional (default=True)
         If True, checks that the marginals mass are equal
+    potentials_init : tuple of (ns,) and (nt,) arrays, optional
+        Initial dual potentials (u, v) to warmstart the solver. If provided,
+        the solver starts from these potentials instead of a cold start.
 
     Returns
     -------
@@ -942,8 +946,18 @@ def emd2_lazy(
         )
     b_np = b_np * a_np.sum() / b_np.sum()
 
+    # Handle warmstart potentials
+    alpha_init_np = None
+    beta_init_np = None
+    if potentials_init is not None:
+        alpha_init, beta_init = potentials_init
+        alpha_init_np = nx.to_numpy(alpha_init)
+        beta_init_np = nx.to_numpy(beta_init)
+        alpha_init_np = np.asarray(alpha_init_np, dtype=np.float64, order="C")
+        beta_init_np = np.asarray(beta_init_np, dtype=np.float64, order="C")
+
     G, cost, u, v, result_code = emd_c_lazy(
-        a_np, b_np, X_a_np, X_b_np, metric, numItermax
+        a_np, b_np, X_a_np, X_b_np, metric, numItermax, alpha_init_np, beta_init_np
     )
 
     if center_dual:
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
@@ -22,8 +22,8 @@ import warnings
 cdef extern from "EMD.h":
     int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, uint64_t maxIter, double* alpha_init, double* beta_init) nogil
     int EMD_wrap_omp(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, uint64_t maxIter, int numThreads) nogil
-    int EMD_wrap_sparse(int n1, int n2, double *X, double *Y, uint64_t n_edges, uint64_t *edge_sources, uint64_t *edge_targets, double *edge_costs, uint64_t *flow_sources_out, uint64_t *flow_targets_out, double *flow_values_out, uint64_t *n_flows_out, double *alpha, double *beta, double *cost, uint64_t maxIter) nogil
-    int EMD_wrap_lazy(int n1, int n2, double *X, double *Y, double *coords_a, double *coords_b, int dim, int metric, double *G, double* alpha, double* beta, double *cost, uint64_t maxIter) nogil
+    int EMD_wrap_sparse(int n1, int n2, double *X, double *Y, uint64_t n_edges, uint64_t *edge_sources, uint64_t *edge_targets, double *edge_costs, uint64_t *flow_sources_out, uint64_t *flow_targets_out, double *flow_values_out, uint64_t *n_flows_out, double *alpha, double *beta, double *cost, uint64_t maxIter, double* alpha_init, double* beta_init) nogil
+    int EMD_wrap_lazy(int n1, int n2, double *X, double *Y, double *coords_a, double *coords_b, int dim, int metric, double *G, double* alpha, double* beta, double *cost, uint64_t maxIter, double* alpha_init, double* beta_init) nogil
     cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED
 
 
@@ -233,7 +233,9 @@ def emd_c_sparse(np.ndarray[double, ndim=1, mode="c"] a,
                 np.ndarray[uint64_t, ndim=1, mode="c"] edge_sources,
                 np.ndarray[uint64_t, ndim=1, mode="c"] edge_targets,
                 np.ndarray[double, ndim=1, mode="c"] edge_costs,
-                uint64_t max_iter):
+                uint64_t max_iter,
+                np.ndarray[double, ndim=1, mode="c"] alpha_init=None,
+                np.ndarray[double, ndim=1, mode="c"] beta_init=None):
     """
     Sparse EMD solver using cost matrix in COO (Coordinate) sparse format.
     
@@ -255,6 +257,10 @@ def emd_c_sparse(np.ndarray[double, ndim=1, mode="c"] a,
         Cost for each edge (non-zero values in COO format)
     max_iter : uint64_t
         Maximum number of iterations
+    alpha_init : (n1,) array, float64, optional
+        Initial dual variables for sources (warmstart)
+    beta_init : (n2,) array, float64, optional
+        Initial dual variables for targets (warmstart)
 
     Returns
     -------
@@ -287,6 +293,12 @@ def emd_c_sparse(np.ndarray[double, ndim=1, mode="c"] a,
     cdef np.ndarray[double, ndim=1, mode="c"] alpha = np.zeros(n1)
     cdef np.ndarray[double, ndim=1, mode="c"] beta = np.zeros(n2)
 
+    cdef double* alpha_init_ptr = NULL
+    cdef double* beta_init_ptr = NULL
+    if alpha_init is not None and beta_init is not None:
+        alpha_init_ptr = <double*> alpha_init.data
+        beta_init_ptr = <double*> beta_init.data
+    
     with nogil:
         result_code = EMD_wrap_sparse(
             n1, n2,
@@ -295,7 +307,8 @@ def emd_c_sparse(np.ndarray[double, ndim=1, mode="c"] a,
             <uint64_t*> edge_sources.data, <uint64_t*> edge_targets.data, <double*> edge_costs.data,
             <uint64_t*> flow_sources.data, <uint64_t*> flow_targets.data, <double*> flow_values.data,
             &n_flows_out,
-            <double*> alpha.data, <double*> beta.data, &cost, max_iter
+            <double*> alpha.data, <double*> beta.data, &cost, max_iter,
+            alpha_init_ptr, beta_init_ptr
         )
 
     # Trim to actual number of flows
@@ -308,7 +321,7 @@ def emd_c_sparse(np.ndarray[double, ndim=1, mode="c"] a,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def emd_c_lazy(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] coords_a, np.ndarray[double, ndim=2, mode="c"] coords_b, str metric='sqeuclidean', uint64_t max_iter=100000):
+def emd_c_lazy(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] coords_a, np.ndarray[double, ndim=2, mode="c"] coords_b, str metric='sqeuclidean', uint64_t max_iter=100000, np.ndarray[double, ndim=1, mode="c"] alpha_init=None, np.ndarray[double, ndim=1, mode="c"] beta_init=None):
     """Solves the Earth Movers distance problem with lazy cost computation from coordinates."""
     cdef int n1 = coords_a.shape[0]
     cdef int n2 = coords_b.shape[0]
@@ -339,6 +352,13 @@ def emd_c_lazy(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1
         a = np.ones((n1,)) / n1
     if not len(b):
         b = np.ones((n2,)) / n2
+    
+    cdef double* alpha_init_ptr = NULL
+    cdef double* beta_init_ptr = NULL
+    if alpha_init is not None and beta_init is not None:
+        alpha_init_ptr = <double*> alpha_init.data
+        beta_init_ptr = <double*> beta_init.data
+    
     with nogil:
-        result_code = EMD_wrap_lazy(n1, n2, <double*> a.data, <double*> b.data, <double*> coords_a.data, <double*> coords_b.data, dim, metric_code, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
+        result_code = EMD_wrap_lazy(n1, n2, <double*> a.data, <double*> b.data, <double*> coords_a.data, <double*> coords_b.data, dim, metric_code, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter, alpha_init_ptr, beta_init_ptr)
     return G, cost, alpha, beta, result_code
diff --git a/ot/lp/network_simplex_simple.h b/ot/lp/network_simplex_simple.h
@@ -679,10 +679,10 @@ namespace lemon {
             if (!_lazy_cost) {
                 return _cost[arc_id];
             } else {
-                // For artificial arcs (>= _arc_num), return 0
-                // These are not real transport arcs
+                // For artificial arcs (>= _arc_num), return stored cost
+                // (0 for positive supply, ART_COST for negative supply)
                 if (arc_id >= _arc_num) {
-                    return 0;
+                    return _cost[arc_id];
                 }
                 // Compute lazily from coordinates
                 // _source and _target use reversed node numbering: _node_id(n) = _node_num - n - 1
@@ -1138,7 +1138,13 @@ namespace lemon {
 
                 for (ArcsType e = 0; e < _arc_num; ++e) {
                     _state[e] = STATE_LOWER;
-                    Cost c = _cost[e];
+                    Cost c;
+                    if (_lazy_cost) {
+                        // Compute cost on-the-fly for lazy mode
+                        c = getCostForArc(e);
+                    } else {
+                        c = _cost[e];
+                    }
                     if (c > ART_COST) ART_COST = c;
                     Cost rc = fabs(c + _pi[_source[e]] - _pi[_target[e]]);
                     if ((ArcsType)maxheap.size() < K) {
@@ -1436,10 +1442,11 @@ namespace lemon {
                 while (u != _root) {
                     ArcsType e = _pred[u];
                     int v = _parent[u];
+                    Cost c = getCostForArc(e);
                     if (_forward[u]) {
-                        _pi[u] = _pi[v] - _cost[e];
+                        _pi[u] = _pi[v] - c;
                     } else {
-                        _pi[u] = _pi[v] + _cost[e];
+                        _pi[u] = _pi[v] + c;
                     }
                     u = _thread[u];
                 }
diff --git a/ot/solvers.py b/ot/solvers.py
@@ -1772,6 +1772,7 @@ def solve_sample(
             numItermax=max_iter if max_iter is not None else 100000,
             log=True,
             return_matrix=True,
+            potentials_init=potentials_init,
         )
 
         res = OTResult(
diff --git a/test/test_ot.py b/test/test_ot.py

Original file line number	Diff line number	Diff line change
`@@ -1772,6 +1772,7 @@ def solve_sample(`
`1772`	`1772`	`numItermax=max_iter if max_iter is not None else 100000,`
`1773`	`1773`	`log=True,`
`1774`	`1774`	`return_matrix=True,`
	`1775`	`+ potentials_init=potentials_init,`
`1775`	`1776`	`)`
`1776`	`1777`
`1777`	`1778`	`res = OTResult(`