Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removed rtol and atol from GD so that it doesn't do early stopping automatically #1944

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions Wrappers/Python/cil/optimisation/algorithms/GD.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,27 +40,28 @@ class GD(Algorithm):
preconditioner: class with a `apply` method or a function that takes an initialised CIL function as an argument and modifies a provided `gradient`.
This could be a custom `preconditioner` or one provided in :meth:`~cil.optimisation.utilities.preconditioner`. If None is passed then `self.gradient_update` will remain unmodified.

rtol: positive float, default 1e-5
optional parameter defining the relative tolerance comparing the current objective function to 0, default 1e-5, see numpy.isclose
atol: positive float, default 1e-8
optional parameter defining the absolute tolerance comparing the current objective function to 0, default 1e-8, see numpy.isclose

"""

def __init__(self, initial=None, objective_function=None, step_size=None, rtol=1e-5, atol=1e-8, preconditioner=None, **kwargs):
def __init__(self, initial=None, objective_function=None, step_size=None, preconditioner=None, **kwargs):
'''GD algorithm creator
'''

self.alpha = kwargs.pop('alpha', None)
self.beta = kwargs.pop('beta', None)

self.rtol = kwargs.pop('rtol', 0) # to be deprecated
self.atol = kwargs.pop('atol', 0) # to be deprecated

super().__init__(**kwargs)

if self.alpha is not None or self.beta is not None:
warn('To modify the parameters for the Armijo rule please use `step_size_rule=ArmijoStepSizeRule(alpha, beta, kmax)`. The arguments `alpha` and `beta` will be deprecated. ', DeprecationWarning, stacklevel=2)

self.rtol = rtol
self.atol = atol

if self.rtol!=0 or self.atol!=0: # to be deprecated
warn('`rtol` and `atol` are deprecated. For early stopping, please use a callback (cil.optimisation.utilities.callbacks) instead for example `EarlyStoppingObjectiveValue`.', DeprecationWarning, stacklevel=2)
else:
logging.info('In a break with backwards compatibility, GD no longer automatically stops if the objective function is close to zero. For this functionality, please use a callback (cil.optimisation.utilities.callbacks).' )

if initial is not None and objective_function is not None:
self.set_up(initial=initial, objective_function=objective_function,
step_size=step_size, preconditioner=preconditioner)
Expand Down Expand Up @@ -119,7 +120,7 @@ def update(self):
def update_objective(self):
self.loss.append(self.objective_function(self.solution))

def should_stop(self):
def should_stop(self): # to be deprecated
'''Stopping criterion for the gradient descent algorithm '''
return super().should_stop() or \
numpy.isclose(self.get_last_objective(), 0., rtol=self.rtol,
Expand Down
20 changes: 9 additions & 11 deletions Wrappers/Python/test/test_stepsizes.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,39 +207,37 @@ def test_bb(self):
def test_bb_converge(self):
n = 10
m = 5
np.random.seed(4)
np.random.seed(2)
A = np.random.uniform(0, 1, (m, n)).astype('float32')
b = (A.dot(np.random.randn(n)) + 0.1 *
np.random.randn(m)).astype('float32')

Aop = MatrixOperator(A)
bop = VectorData(b)
ig=Aop.domain
initial = ig.allocate()
initial = ig.allocate(0)
f = LeastSquares(Aop, b=bop, c=2)

ss_rule=ArmijoStepSizeRule(max_iterations=40)
alg_true = GD(initial=initial, objective_function=f, step_size=ss_rule)
alg_true .run(300, verbose=0)

alg_true = GD(initial=initial, objective_function=f, step_size=1/f.L)
alg_true.run(220, verbose=0)


ss_rule=BarzilaiBorweinStepSizeRule(1/f.L, 'short')
alg = GD(initial=initial, objective_function=f, step_size=ss_rule)
alg.run(80, verbose=0)
self.assertNumpyArrayAlmostEqual(alg.x.as_array(), alg_true.x.as_array(), decimal=3)

self.assertNumpyArrayAlmostEqual(alg.x.as_array(), alg_true.x.as_array(), decimal=2)


ss_rule=BarzilaiBorweinStepSizeRule(1/f.L, 'long')
alg = GD(initial=initial, objective_function=f, step_size=ss_rule)
alg.run(80, verbose=0)
self.assertNumpyArrayAlmostEqual(alg.x.as_array(), alg_true.x.as_array(), decimal=3)
self.assertNumpyArrayAlmostEqual(alg.x.as_array(), alg_true.x.as_array(), decimal=2)


ss_rule=BarzilaiBorweinStepSizeRule(1/f.L, 'alternate')
alg = GD(initial=initial, objective_function=f, step_size=ss_rule)

alg.run(80, verbose=0)
self.assertNumpyArrayAlmostEqual(alg.x.as_array(), alg_true.x.as_array(), decimal=3)
alg.run(100, verbose=0)
self.assertNumpyArrayAlmostEqual(alg.x.as_array(), alg_true.x.as_array(), decimal=2)