Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 178 additions & 0 deletions Wrappers/Python/cil/optimisation/algorithms/SARAH.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
from cil.optimisation.algorithms import Algorithm
from cil.optimisation.functions import ApproximateGradientSumFunction
from cil.optimisation.utilities import ConstantStepSize
from numbers import Number
import logging

class SARAH(Algorithm):

r"""SARAH algorithm.

StochAstic Recursive grAdient algoritHm (SARAH)
Lam M. Nguyen, Jie Liu, Katya Scheinberg, Martin Takáč
Proceedings of the 34th International Conference on Machine Learning, PMLR 70:2613-2621, 2017.
https://proceedings.mlr.press/v70/nguyen17b/nguyen17b.pdf

##TODO update the math
.. math::

\begin{align*}
g_k &= \nabla f_{i_k}(x_k) - \nabla f_{i_k} (x_{k-1}) + g_{k-1} \\
x_{k+1} &= x_k - \eta g_k
\end{align*}

It is used to solve

.. math:: \min_{x} f(x) + g(x)

where :math:`f` is differentiable, :math:`g` has a *simple* proximal operator and :math:`\alpha^{k}`
is the :code:`step_size` per iteration.


Parameters
----------

initial : DataContainer
Starting point of the algorithm
f : Function
Differentiable function
g : Function
Convex function with *simple* proximal operator
step_size : positive :obj:`float`, default = None
Step size for the gradient step of SARAH
The default :code:`step_size` is :math:`\frac{1}{L}`.
kwargs: Keyword arguments
Arguments from the base class :class:`.Algorithm`.

See also
--------
:class:`.ISTA`
:class:`.GD`

"""

@property
def step_size(self):
return self._step_size

# Set default step size
def set_step_size(self, step_size):

""" Set default step size.
"""
if step_size is None:
if isinstance(self.f.L, Number):
self.initial_step_size = 1.99/self.f.L
self._step_size = ConstantStepSize(self.initial_step_size)
else:
raise ValueError("Function f is not differentiable")
else:
if isinstance(step_size, Number):
self.initial_step_size = step_size
self._step_size = ConstantStepSize(self.initial_step_size)
else:
self._step_size = step_size

def __init__(self, initial, f, g, step_size = None, update_frequency = None, **kwargs):

if not isinstance(f, ApproximateGradientSumFunction):
raise ValueError("An ApproximateGradientSumFunction is required for f, {} is passed".format(f.__class__.__name__))
super(SARAH, self).__init__(**kwargs)

# step size
self._step_size = None

# initial step size for adaptive step size
self.initial_step_size = None

self.set_up(initial=initial, f=f, g=g, step_size=step_size, update_frequency=update_frequency,**kwargs)

def set_up(self, initial, f, g, step_size, update_frequency, **kwargs): # update frequency
""" Set up the algorithm
"""

logging.info("{} setting up".format(self.__class__.__name__, ))

self.initial = initial
self.f = f # at the moment this is required to be of SubsetSumFunctionClass (so that data_passes member exists)
self.g = g

# set problem parameters
self.update_frequency = update_frequency
if self.update_frequency is None:
self.update_frequency = self.f.num_functions

self.set_step_size(step_size=step_size)

# Initialise iterates, the gradient estimator, and the temporary variables
self.x_old = initial.copy()
self.x = initial.copy()

self.gradient_estimator = self.x * 0.0
self.stoch_grad_at_iterate = self.x * 0.0
self.stochastic_grad_difference = self.x * 0.0

self.configured = True
logging.info("{} configured".format(self.__class__.__name__, ))

def update(self):

r"""Performs a single iteration of SARAH

.. math::
# TODO: change maths
\begin{cases}

\end{cases}

"""

self.approximate_gradient(self.x, out=self.gradient_estimator)
self.x_old = self.x.copy()
step_size = self.step_size(self)
self.x.sapyb(1., self.gradient_estimator, -step_size, out = self.x)
self.x = self.g.proximal(self.x, step_size)

def approximate_gradient(self, x, out = None):

update_flag = (self.iteration % (self.update_frequency) == 0)

if update_flag is True:

# update the full gradient estimator
self.f.full_gradient(x, out=self.gradient_estimator)

if self.iteration == 0:
if len(self.f.data_passes) == 0:
self.f.data_passes.append(1)
else:
self.f.data_passes[0] = 1.
else:
self.f.data_passes.append(self.f.data_passes[-1]+1.)

if out is None:
return self.gradient_estimator
else:
out = self.gradient_estimator
else:

self.f.next_function()
self.f.functions[self.f.function_num].gradient(x, out=self.stoch_grad_at_iterate)
self.stoch_grad_at_iterate.sapyb(1., self.f.functions[self.f.function_num].gradient(self.x_old), -1., out=self.stochastic_grad_difference)

# update the data passes
self.f.data_passes.append(round(self.f.data_passes[-1] + 1./self.f.num_functions,2))

# Compute the output: gradient difference + v_t
if out is None:
return self.stochastic_grad_difference.sapyb(self.f.num_functions, self.gradient_estimator, 1.)
else:
return self.stochastic_grad_difference.sapyb(self.f.num_functions, self.gradient_estimator, 1., out=out)


def update_objective(self):
""" Updates the objective
.. math:: f(x) + g(x)
"""
self.loss.append( self.f(self.get_output()) + self.g(self.get_output()) )

138 changes: 138 additions & 0 deletions Wrappers/Python/test/test_SARAH.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import unittest
from utils import initialise_tests
from cil.optimisation.operators import MatrixOperator
from cil.optimisation.algorithms import SARAH
from cil.optimisation.functions import LeastSquares, L2NormSquared, ZeroFunction, ApproximateGradientSumFunction
from cil.framework import VectorData
import numpy as np
from cil.optimisation.utilities import RandomSampling

initialise_tests()


from utils import has_cvxpy

if has_cvxpy:
import cvxpy


class TestSARAH(unittest.TestCase):

def setUp(self):

np.random.seed(10)
n = 10
m = 200
A = np.random.uniform(0,1, (m, n)).astype('float32')
b = (A.dot(np.random.randn(n)) + 0.1*np.random.randn(m)).astype('float32')

self.Aop = MatrixOperator(A)
self.bop = VectorData(b)

self.n_subsets = 5

Ai = np.vsplit(A, self.n_subsets)
bi = [b[i:i+int(m/self.n_subsets)] for i in range(0, m, int(m/self.n_subsets))]

self.fi_cil = []
for i in range(self.n_subsets):
self.Ai_cil = MatrixOperator(Ai[i])
self.bi_cil = VectorData(bi[i])
self.fi_cil.append(LeastSquares(self.Ai_cil, self.bi_cil, c = 0.5))

self.F = LeastSquares(self.Aop, b=self.bop, c = 0.5)
self.G = ZeroFunction()

self.ig = self.Aop.domain

self.sampling = RandomSampling.uniform(self.n_subsets)
self.fi = ApproximateGradientSumFunction(functions=self.fi_cil, selection=self.sampling, data_passes=[0.])

self.initial = self.ig.allocate()


def test_signature(self):

# required args
with np.testing.assert_raises(TypeError):
sarah = SARAH(initial = self.initial, f = self.fi)

with np.testing.assert_raises(TypeError):
sarah = SARAH(initial = self.initial, f = self.fi)

with np.testing.assert_raises(TypeError):
sarah = SARAH(initial = self.initial, g = self.G)

with np.testing.assert_raises(ValueError):
sarah = SARAH(initial = self.initial, f = L2NormSquared(), g = self.G)

tmp_step_size = 10
tmp_update_frequency = 3
sarah = SARAH(initial = self.initial, g = self.G, f = self.fi, step_size=tmp_step_size, update_frequency=tmp_update_frequency)
np.testing.assert_equal(sarah.step_size.initial, tmp_step_size)
np.testing.assert_equal(sarah.update_frequency, tmp_update_frequency)

self.assertTrue( id(sarah.x)!=id(sarah.initial))
self.assertTrue( id(sarah.x_old)!=id(sarah.initial))

def test_data_passes(self):

sampling = RandomSampling.uniform(self.n_subsets)
fi = ApproximateGradientSumFunction(functions=self.fi_cil,
selection=sampling,
data_passes=[0.])

sarah = SARAH(f=fi, g=self.G, update_objective_interval=1,
initial=self.initial, max_iteration=6)
sarah.run(verbose=0)

correct_passes = [1., 1+1/self.n_subsets,
1.+2./self.n_subsets, 1+3./self.n_subsets, 1+4/self.n_subsets, 2+4/self.n_subsets]
np.testing.assert_equal(correct_passes, sarah.f.data_passes)


@unittest.skipUnless(has_cvxpy, "CVXpy not installed")
def test_with_cvxpy(self):
epochs = 300
initial = self.ig.allocate()
sarah = SARAH(f=self.fi, g=self.G, update_objective_interval=200, initial=initial, max_iteration=epochs*self.n_subsets)
sarah.run(verbose=0)

u_cvxpy = cvxpy.Variable(self.ig.shape[0])
objective = cvxpy.Minimize(0.5 * cvxpy.sum_squares(self.Aop.A @ u_cvxpy - self.bop.array))
p = cvxpy.Problem(objective)
p.solve(verbose=False, solver=cvxpy.SCS, eps=1e-4)
np.testing.assert_allclose(p.value, sarah.objective[-1], rtol=5e-3)
np.testing.assert_allclose(u_cvxpy.value, sarah.solution.array, rtol=5e-3)

def test_update(self):

initial = self.ig.allocate()
sarah = SARAH(f=self.fi, g=self.G, update_objective_interval=1,
initial=initial, max_iteration=2)
# this should use indices 0 and 1
sarah.run(verbose=0)

x = initial.copy()
x_old = initial.copy()

step_size = sarah.step_size.initial
F_new = ApproximateGradientSumFunction(functions=self.fi_cil, selection=self.sampling, data_passes=[0.])

gradient_estimator = F_new.full_gradient(x)
x.sapyb(1., gradient_estimator, -step_size, out = x)
x = self.G.proximal(x, step_size) # not sure if this makes sense

function_num = sarah.f.function_num
stoch_grad_at_iterate = F_new.functions[function_num].gradient(x)
stochastic_grad_difference = stoch_grad_at_iterate.sapyb(1., F_new.functions[function_num].gradient(x_old), -1.)
gradient_estimator = stochastic_grad_difference.sapyb(self.n_subsets, gradient_estimator, 1.)
x_old = x.copy()
x.sapyb(1., gradient_estimator, -step_size, out = x)
x = self.G.proximal(x, step_size) # not sure if this makes sense

np.testing.assert_allclose(sarah.solution.array, x.array, atol=1e-2)

res1 = sarah.objective[-1]
res2 = F_new(x) + self.G(x)
np.testing.assert_allclose(res1, res2, rtol=1e-5)
Loading