TomographicImaging · epapoutsellis · Jan 29, 2026 · Feb 2, 2026
diff --git a/Wrappers/Python/cil/optimisation/algorithms/SARAH.py b/Wrappers/Python/cil/optimisation/algorithms/SARAH.py
@@ -0,0 +1,178 @@
+from cil.optimisation.algorithms import Algorithm
+from cil.optimisation.functions import ApproximateGradientSumFunction
+from cil.optimisation.utilities import ConstantStepSize
+from numbers import Number
+import logging
+
+class SARAH(Algorithm):
+
+    r"""SARAH algorithm.
+
+    StochAstic Recursive grAdient algoritHm (SARAH)
+    Lam M. Nguyen, Jie Liu, Katya Scheinberg, Martin Takáč 
+    Proceedings of the 34th International Conference on Machine Learning, PMLR 70:2613-2621, 2017. 
+    https://proceedings.mlr.press/v70/nguyen17b/nguyen17b.pdf
+
+    ##TODO update the math
+    .. math::
+
+        \begin{align*}
+            g_k &= \nabla f_{i_k}(x_k) - \nabla f_{i_k} (x_{k-1}) + g_{k-1} \\
+            x_{k+1} &= x_k - \eta g_k
+        \end{align*}
+
+    It is used to solve
+
+    .. math:: \min_{x} f(x) + g(x)
+
+    where :math:`f` is differentiable, :math:`g` has a *simple* proximal operator and :math:`\alpha^{k}`
+    is the :code:`step_size` per iteration.
+
+
+    Parameters
+    ----------
+
+    initial : DataContainer
+            Starting point of the algorithm
+    f : Function
+        Differentiable function
+    g : Function
+        Convex function with *simple* proximal operator
+    step_size : positive :obj:`float`, default = None
+                Step size for the gradient step of SARAH
+                The default :code:`step_size` is :math:`\frac{1}{L}`.
+    kwargs: Keyword arguments
+        Arguments from the base class :class:`.Algorithm`.
+
+    See also
+    --------
+    :class:`.ISTA`
+    :class:`.GD`
+
+    """
+
+    @property
+    def step_size(self):        
+       return self._step_size    
+
+    # Set default step size
+    def set_step_size(self, step_size):
+
+        """ Set default step size.        
+        """        
+        if step_size is None:
+            if isinstance(self.f.L, Number):
+                self.initial_step_size = 1.99/self.f.L
+                self._step_size = ConstantStepSize(self.initial_step_size)
+            else:
+                raise ValueError("Function f is not differentiable")                        
+        else:
+            if isinstance(step_size, Number):
+                self.initial_step_size = step_size
+                self._step_size = ConstantStepSize(self.initial_step_size)
+            else:
+                self._step_size = step_size  
+
+    def __init__(self, initial, f, g, step_size = None, update_frequency = None, **kwargs):
+
+        if not isinstance(f, ApproximateGradientSumFunction):
+            raise ValueError("An ApproximateGradientSumFunction is required for f, {} is passed".format(f.__class__.__name__))             
+        super(SARAH, self).__init__(**kwargs)
+
+        # step size
+        self._step_size = None
+
+       # initial step size for adaptive step size
+        self.initial_step_size = None
+
+        self.set_up(initial=initial, f=f, g=g, step_size=step_size, update_frequency=update_frequency,**kwargs)
+
+    def set_up(self, initial, f, g, step_size, update_frequency, **kwargs): # update frequency
+        """ Set up the algorithm
+        """
+
+        logging.info("{} setting up".format(self.__class__.__name__, ))
+
+        self.initial = initial
+        self.f = f # at the moment this is required to be of SubsetSumFunctionClass (so that data_passes member exists)
+        self.g = g
+
+        # set problem parameters    
+        self.update_frequency = update_frequency
+        if self.update_frequency is None:
+            self.update_frequency = self.f.num_functions
+
+        self.set_step_size(step_size=step_size)
+
+        # Initialise iterates, the gradient estimator, and the temporary variables
+        self.x_old = initial.copy()
+        self.x = initial.copy()
+
+        self.gradient_estimator = self.x * 0.0
+        self.stoch_grad_at_iterate = self.x * 0.0
+        self.stochastic_grad_difference = self.x * 0.0
+
+        self.configured = True
+        logging.info("{} configured".format(self.__class__.__name__, ))
+
+    def update(self):
+
+        r"""Performs a single iteration of SARAH
+
+        .. math::
+            # TODO: change maths
+            \begin{cases}
+
+            \end{cases}
+
+        """
+
+        self.approximate_gradient(self.x, out=self.gradient_estimator) 
+        self.x_old = self.x.copy()
+        step_size =  self.step_size(self)
+        self.x.sapyb(1., self.gradient_estimator, -step_size, out = self.x)
+        self.x = self.g.proximal(self.x, step_size)       
+
+    def approximate_gradient(self, x, out = None):            
+
+        update_flag = (self.iteration % (self.update_frequency) == 0)
+
+        if update_flag is True:
+
+            # update the full gradient estimator
+            self.f.full_gradient(x, out=self.gradient_estimator)
+
+            if self.iteration == 0:
+                if len(self.f.data_passes) == 0:
+                    self.f.data_passes.append(1)
+                else:
+                    self.f.data_passes[0] = 1.
+            else:
+                self.f.data_passes.append(self.f.data_passes[-1]+1.)
+
+            if out is None:
+                return self.gradient_estimator
+            else:
+                out = self.gradient_estimator
+        else:
+
+            self.f.next_function()
+            self.f.functions[self.f.function_num].gradient(x, out=self.stoch_grad_at_iterate)
+            self.stoch_grad_at_iterate.sapyb(1., self.f.functions[self.f.function_num].gradient(self.x_old), -1., out=self.stochastic_grad_difference)
+
+            # update the data passes
+            self.f.data_passes.append(round(self.f.data_passes[-1] + 1./self.f.num_functions,2))
+
+            # Compute the output: gradient difference +  v_t
+            if out is None:
+                return self.stochastic_grad_difference.sapyb(self.f.num_functions, self.gradient_estimator, 1.)
+            else:
+                return self.stochastic_grad_difference.sapyb(self.f.num_functions, self.gradient_estimator, 1., out=out)
+
+
+    def update_objective(self):
+        """ Updates the objective
+        .. math:: f(x) + g(x)
+        """
+        self.loss.append( self.f(self.get_output()) + self.g(self.get_output()) )            
+
diff --git a/Wrappers/Python/test/test_SARAH.py b/Wrappers/Python/test/test_SARAH.py
@@ -0,0 +1,138 @@
+import unittest
+from utils import initialise_tests
+from cil.optimisation.operators import MatrixOperator
+from cil.optimisation.algorithms import SARAH
+from cil.optimisation.functions import LeastSquares, L2NormSquared, ZeroFunction, ApproximateGradientSumFunction
+from cil.framework import VectorData
+import numpy as np            
+from cil.optimisation.utilities import RandomSampling     
+
+initialise_tests()
+
+
+from utils import has_cvxpy
+
+if has_cvxpy:
+    import cvxpy
+
+
+class TestSARAH(unittest.TestCase):
+
+    def setUp(self):
+
+        np.random.seed(10)
+        n = 10
+        m = 200
+        A = np.random.uniform(0,1, (m, n)).astype('float32')
+        b = (A.dot(np.random.randn(n)) + 0.1*np.random.randn(m)).astype('float32')
+
+        self.Aop = MatrixOperator(A)
+        self.bop = VectorData(b) 
+
+        self.n_subsets = 5
+
+        Ai = np.vsplit(A, self.n_subsets) 
+        bi = [b[i:i+int(m/self.n_subsets)] for i in range(0, m, int(m/self.n_subsets))]     
+
+        self.fi_cil = []
+        for i in range(self.n_subsets):   
+            self.Ai_cil = MatrixOperator(Ai[i])
+            self.bi_cil = VectorData(bi[i])
+            self.fi_cil.append(LeastSquares(self.Ai_cil, self.bi_cil, c = 0.5))
+
+        self.F = LeastSquares(self.Aop, b=self.bop, c = 0.5) 
+        self.G = ZeroFunction()
+
+        self.ig = self.Aop.domain
+
+        self.sampling = RandomSampling.uniform(self.n_subsets)
+        self.fi = ApproximateGradientSumFunction(functions=self.fi_cil, selection=self.sampling, data_passes=[0.])           
+
+        self.initial = self.ig.allocate()   
+
+
+    def test_signature(self):
+
+        # required args
+        with np.testing.assert_raises(TypeError):
+            sarah = SARAH(initial = self.initial, f = self.fi)            
+
+        with np.testing.assert_raises(TypeError):
+            sarah = SARAH(initial = self.initial, f = self.fi)            
+
+        with np.testing.assert_raises(TypeError):
+            sarah = SARAH(initial = self.initial, g = self.G) 
+
+        with np.testing.assert_raises(ValueError):
+            sarah = SARAH(initial = self.initial, f = L2NormSquared(), g = self.G)             
+
+        tmp_step_size = 10
+        tmp_update_frequency = 3
+        sarah = SARAH(initial = self.initial, g = self.G, f = self.fi, step_size=tmp_step_size, update_frequency=tmp_update_frequency) 
+        np.testing.assert_equal(sarah.step_size.initial, tmp_step_size)
+        np.testing.assert_equal(sarah.update_frequency, tmp_update_frequency)
+
+        self.assertTrue( id(sarah.x)!=id(sarah.initial))   
+        self.assertTrue( id(sarah.x_old)!=id(sarah.initial))
+
+    def test_data_passes(self):
+
+        sampling = RandomSampling.uniform(self.n_subsets)
+        fi = ApproximateGradientSumFunction(functions=self.fi_cil, 
+                                            selection=sampling, 
+                                            data_passes=[0.])           
+
+        sarah = SARAH(f=fi, g=self.G, update_objective_interval=1, 
+                      initial=self.initial, max_iteration=6)
+        sarah.run(verbose=0)
+
+        correct_passes = [1., 1+1/self.n_subsets, 
+                             1.+2./self.n_subsets, 1+3./self.n_subsets, 1+4/self.n_subsets, 2+4/self.n_subsets]
+        np.testing.assert_equal(correct_passes, sarah.f.data_passes)        
+
+
+    @unittest.skipUnless(has_cvxpy, "CVXpy not installed") 
+    def test_with_cvxpy(self):
+        epochs = 300        
+        initial = self.ig.allocate()
+        sarah = SARAH(f=self.fi, g=self.G, update_objective_interval=200, initial=initial, max_iteration=epochs*self.n_subsets)
+        sarah.run(verbose=0)
+
+        u_cvxpy = cvxpy.Variable(self.ig.shape[0])
+        objective = cvxpy.Minimize(0.5 * cvxpy.sum_squares(self.Aop.A @ u_cvxpy - self.bop.array))
+        p = cvxpy.Problem(objective)
+        p.solve(verbose=False, solver=cvxpy.SCS, eps=1e-4)
+        np.testing.assert_allclose(p.value, sarah.objective[-1], rtol=5e-3)
+        np.testing.assert_allclose(u_cvxpy.value, sarah.solution.array, rtol=5e-3)    
+
+    def test_update(self):
+
+        initial = self.ig.allocate()
+        sarah = SARAH(f=self.fi, g=self.G, update_objective_interval=1, 
+                      initial=initial, max_iteration=2)
+        # this should use indices 0 and 1
+        sarah.run(verbose=0)
+
+        x = initial.copy()
+        x_old = initial.copy()
+
+        step_size = sarah.step_size.initial
+        F_new = ApproximateGradientSumFunction(functions=self.fi_cil, selection=self.sampling, data_passes=[0.])
+
+        gradient_estimator = F_new.full_gradient(x)        
+        x.sapyb(1., gradient_estimator, -step_size, out = x)
+        x = self.G.proximal(x, step_size) # not sure if this makes sense
+
+        function_num = sarah.f.function_num
+        stoch_grad_at_iterate = F_new.functions[function_num].gradient(x)
+        stochastic_grad_difference = stoch_grad_at_iterate.sapyb(1., F_new.functions[function_num].gradient(x_old), -1.)
+        gradient_estimator =  stochastic_grad_difference.sapyb(self.n_subsets, gradient_estimator, 1.)
+        x_old = x.copy()
+        x.sapyb(1., gradient_estimator, -step_size, out = x)
+        x = self.G.proximal(x, step_size) # not sure if this makes sense
+
+        np.testing.assert_allclose(sarah.solution.array, x.array, atol=1e-2)      
+
+        res1 = sarah.objective[-1]
+        res2 = F_new(x) + self.G(x)
+        np.testing.assert_allclose(res1, res2, rtol=1e-5)