Armijo: Work with ISTA and FISTA and new default (#1934)

*Armijo now works with ISTA and FISTA and defaults to using the previous step size to initiate each new search --------- Signed-off-by: Margaret Duff <[email protected]>
TomographicImaging · Oct 8, 2024 · 0181b3f · 0181b3f
1 parent b91c0eb
commit 0181b3f
Show file tree

Hide file tree

Showing 6 changed files with 190 additions and 105 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -28,8 +28,10 @@
     - Make Binner accept accelerated=False (#1887)
     - Added checks on memory allocations within `FiniteDifferenceLibrary.cpp` and verified the status of the return in `GradientOperator` (#1929)
     - Build release version of `cilacc.dll` for Windows. Previously was defaulting to the debug build (#1928)
+    - Armijo step size rule now by default initialises the search for a step size from the previously calculated step size (#1934)
   - Changes that break backwards compatibility:
     - CGLS will no longer automatically stop iterations once a default tolerance is reached. The option to pass `tolerance` will be deprecated to be replaced by `optimisation.utilities.callbacks` (#1892)
+
 
 * 24.1.0
   - New Features:

diff --git a/Wrappers/Python/cil/optimisation/algorithms/FISTA.py b/Wrappers/Python/cil/optimisation/algorithms/FISTA.py
@@ -213,8 +213,19 @@ def update_objective(self):
         .. math:: f(x) + g(x)
 
         """
-        self.loss.append(self.f(self.x_old) + self.g(self.x_old))
+        self.loss.append(self.calculate_objective_function_at_point(self.x_old))
 
+    def calculate_objective_function_at_point(self, x):
+        """ Calculates the objective at a given point x
+
+        .. math:: f(x) + g(x)
+        
+        Parameters
+        ----------
+        x : DataContainer
+        
+        """
+        return self.f(x) + self.g(x)
 
 class FISTA(ISTA):
 

diff --git a/Wrappers/Python/cil/optimisation/algorithms/GD.py b/Wrappers/Python/cil/optimisation/algorithms/GD.py
@@ -84,7 +84,7 @@ def set_up(self, initial, objective_function, step_size, preconditioner):
         log.info("%s setting up", self.__class__.__name__)
 
         self.x = initial.copy()
-        self.objective_function = objective_function
+        self._objective_function = objective_function
 
         if step_size is None:
             self.step_size_rule = ArmijoStepSizeRule(
@@ -106,7 +106,7 @@ def set_up(self, initial, objective_function, step_size, preconditioner):
 
     def update(self):
         '''Performs a single iteration of the gradient descent algorithm'''
-        self.objective_function.gradient(self.x, out=self.gradient_update)
+        self._objective_function.gradient(self.x, out=self.gradient_update)
 
         if self.preconditioner is not None:
             self.preconditioner.apply(
@@ -117,7 +117,7 @@ def update(self):
         self.x.sapyb(1.0, self.gradient_update, -step_size, out=self.x)
 
     def update_objective(self):
-        self.loss.append(self.objective_function(self.solution))
+        self.loss.append(self._objective_function(self.solution))
 
     def should_stop(self):
         '''Stopping criterion for the gradient descent algorithm '''
@@ -132,3 +132,20 @@ def step_size(self):
         else:
             raise TypeError(
                 "There is not a constant step size, it is set by a step-size rule")
+
+    def calculate_objective_function_at_point(self, x):
+        """ Calculates the objective at a given point x
+
+        .. math:: f(x) + g(x)
+        
+        Parameters
+        ----------
+        x : DataContainer
+        
+        """
+        return self._objective_function(x)
+
+    @property
+    def objective_function(self):
+        warn('The attribute `objective_function` will be deprecated in the future. Please use `calculate_objective_function_at_point` instead.', DeprecationWarning, stacklevel=2)  
+        return self._objective_function
diff --git a/Wrappers/Python/cil/optimisation/utilities/StepSizeMethods.py b/Wrappers/Python/cil/optimisation/utilities/StepSizeMethods.py
@@ -19,6 +19,9 @@
 from abc import ABC, abstractmethod
 import numpy
 from numbers import Number
+import logging
+
+log = logging.getLogger(__name__)
 
 class StepSizeRule(ABC):
     """
@@ -82,6 +85,9 @@ class ArmijoStepSizeRule(StepSizeRule):
         The amount the step_size is reduced if the criterion is not met
     max_iterations: integer, optional, default is numpy.ceil (2 * numpy.log10(alpha) / numpy.log10(2))
         The maximum number of iterations to find a suitable step size 
+    warmstart: Boolean, default is True
+        If `warmstart = True` the initial step size at each Armijo iteration is the calculated step size from the last iteration. If `warmstart = False` at each  Armijo iteration, the initial step size is reset to the original, large `alpha`. 
+        In the case of *well-behaved* convex functions, `warmstart = True` is likely to be computationally less expensive. In the case of non-convex functions, or particularly tricky functions, setting `warmstart = False` may be beneficial. 
 
     Reference
     ------------
@@ -91,21 +97,23 @@ class ArmijoStepSizeRule(StepSizeRule):
 
     """
 
-    def __init__(self, alpha=1e6, beta=0.5, max_iterations=None):
+    def __init__(self, alpha=1e6, beta=0.5, max_iterations=None, warmstart=True):
         '''Initialises the step size rule 
         '''
 
         self.alpha_orig = alpha
         if self.alpha_orig is None: # Can be removed when alpha and beta are deprecated in GD
             self.alpha_orig = 1e6 
-
+        self.alpha = self.alpha_orig
         self.beta = beta 
         if self.beta is None:  # Can be removed when alpha and beta are deprecated in GD
             self.beta = 0.5
 
         self.max_iterations = max_iterations
         if self.max_iterations is None:
             self.max_iterations = numpy.ceil(2 * numpy.log10(self.alpha_orig) / numpy.log10(2))
+
+        self.warmstart=warmstart
 
     def get_step_size(self, algorithm):
         """
@@ -117,26 +125,33 @@ def get_step_size(self, algorithm):
 
         """
         k = 0
-        self.alpha = self.alpha_orig
-        f_x = algorithm.objective_function(algorithm.solution)
+        if not self.warmstart:  
+            self.alpha = self.alpha_orig
+
+        f_x = algorithm.calculate_objective_function_at_point(algorithm.solution)
 
         self.x_armijo = algorithm.solution.copy()
-
+
+        log.debug("Starting Armijo backtracking with initial step size: %f", self.alpha)
+
         while k < self.max_iterations:
 
             algorithm.gradient_update.multiply(self.alpha, out=self.x_armijo)
             algorithm.solution.subtract(self.x_armijo, out=self.x_armijo)
 
-            f_x_a = algorithm.objective_function(self.x_armijo)
+            f_x_a = algorithm.calculate_objective_function_at_point(self.x_armijo)
             sqnorm = algorithm.gradient_update.squared_norm()
             if f_x_a - f_x <= - (self.alpha/2.) * sqnorm:
                 break
             k += 1.
             self.alpha *= self.beta
+
+        log.info("Armijo rule took %d iterations to find step size", k)
 
         if k == self.max_iterations:
             raise ValueError(
                 'Could not find a proper step_size in {} loops. Consider increasing alpha or max_iterations.'.format(self.max_iterations))
+
         return self.alpha