Documentation and some fixes for Manopt quasi-Newton call

SciML · Dec 7, 2022 · 1717303 · 1717303
1 parent a3715d8
commit 1717303
Show file tree

Hide file tree

Showing 7 changed files with 228 additions and 12 deletions.
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -8,6 +8,8 @@ Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
 Ipopt_jll = "9cc047cb-c261-5740-88fc-0cf96f7bdcc7"
 IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
 Juniper = "2ddba703-00a4-53a7-87a5-e8b9971dde84"
+Manifolds = "1cead3c2-87b3-11e9-0ccd-23c62b72b94e"
+Manopt = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5"
 ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
 NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
@@ -16,6 +18,7 @@ OptimizationCMAEvolutionStrategy = "bd407f91-200f-4536-9381-e4ba712f53f8"
 OptimizationEvolutionary = "cb963754-43f6-435e-8d4b-99009ff27753"
 OptimizationGCMAES = "6f0a0517-dbc2-4a7a-8a20-99ae7f27e911"
 OptimizationMOI = "fd9f6733-72f4-499f-8506-86b2bdd0dea1"
+OptimizationManopt = "e57b7fff-7ee7-4550-b4f0-90e9476e9fb6"
 OptimizationMetaheuristics = "3aafef2f-86ae-4776-b337-85a36adf0b55"
 OptimizationMultistartOptimization = "e4316d97-8bbb-4fd3-a7d8-3851d2a72823"
 OptimizationNLopt = "4e6fcdb7-1186-4e1f-a706-475e75c168bb"

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -45,6 +45,7 @@ packages.
 | Evolutionary             |                        ❌ |                        ❌ |                        ❌ |                        ❌ | ✅                        | 🟡                         |
 | Flux                     | ✅                        |                        ❌ |                        ❌ |                        ❌ |                        ❌ |                        ❌ |
 | GCMAES                   |                        ❌ |                        ❌ |                        ❌ |                        ❌ | ✅                        |                        ❌ |
+| Manopt                   | ✅                        |                        🟡 | ✅                        | ✅                        | ✅                        | ✅                        |
 | MathOptInterface         | ✅                        | ✅                        | ✅                        | ✅                        | ✅                        |                        🟡 |
 | MultistartOptimization   |                        ❌ |                        ❌ |                        ❌ |                        ❌ | ✅                        |                        ❌ |
 | Metaheuristics           |                        ❌ |                        ❌ |                        ❌ |                        ❌ | ✅                        | 🟡                         |

diff --git a/docs/src/optimization_packages/manopt.md b/docs/src/optimization_packages/manopt.md
@@ -18,17 +18,64 @@ import Pkg; Pkg.add("OptimizationManopt")
 - `ConjugateGradientDescentOptimizer`
 - `GradientDescentOptimizer`
 - `NelderMeadOptimizer`
+- `ParticleSwarmOptimizer`
+- `QuasiNewtonOptimizer`
 
 For a more extensive documentation of all the algorithms and options please consult the [`Documentation`](https://manoptjl.org/stable/).
 
 ## Local Optimizer
 
-### Local Constraint
-
 ### Derivative-Free
 
+The Nelder-Mead optimizer can be used for local derivative-free optimization on manifolds.
+
+```@example Manopt1
+using Optimization, OptimizationManopt, Manifolds
+rosenbrock(x, p) =  (1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2
+x0 = [0.0, 1.0]
+p  = [1.0,100.0]
+f = OptimizationFunction(rosenbrock)
+opt = OptimizationManopt.NelderMeadOptimizer(Sphere(1))
+prob = Optimization.OptimizationProblem(f, x0, p)
+sol = solve(prob, opt)
+```
+
 ### Gradient-Based
 
+Manopt offers gradient descent, conjugate gradient descent and quasi-Newton solvers for local gradient-based optimization.
+
+Note that the returned gradient needs to be Riemannian, see for example [https://manoptjl.org/stable/functions/gradients/](https://manoptjl.org/stable/functions/gradients/).
+Note that one way to obtain a Riemannian gradient is by [projection and (optional) Riesz representer change](https://juliamanifolds.github.io/Manifolds.jl/latest/features/differentiation.html#Manifolds.RiemannianProjectionBackend).
+
+```@example Manopt2
+using Optimization, OptimizationManopt, Manifolds
+rosenbrock(x, p) =  (1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2
+function rosenbrock_grad!(storage, x, p)
+    storage[1] = -2.0 * (p[1] - x[1]) - 4.0 * p[2] * (x[2] - x[1]^2) * x[1]
+    storage[2] = 2.0 * p[2] * (x[2] - x[1]^2)
+    project!(Sphere(1), storage, x, storage)
+end
+x0 = [0.0, 1.0]
+p  = [1.0,100.0]
+f = OptimizationFunction(rosenbrock; grad = rosenbrock_grad!)
+opt = OptimizationManopt.GradientDescentOptimizer(Sphere(1))
+prob = Optimization.OptimizationProblem(f, x0, p)
+sol = solve(prob, opt)
+```
+
 ## Global Optimizer
 
 ### Without Constraint Equations
+
+The particle swarm optimizer can be used for global optimization on a manifold without constraint equations. It can be especially useful on compact manifolds such as spheres or orthogonal matrices.
+
+```@example Manopt3
+using Optimization, OptimizationManopt, Manifolds
+rosenbrock(x, p) =  (1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2
+x0 = [0.0, 1.0]
+p  = [1.0,100.0]
+f = OptimizationFunction(rosenbrock)
+opt = OptimizationManopt.ParticleSwarmOptimizer(Sphere(1))
+prob = Optimization.OptimizationProblem(f, x0, p)
+sol = solve(prob, opt)
+```
diff --git a/docs/src/tutorials/constraints.md b/docs/src/tutorials/constraints.md
@@ -1,10 +1,10 @@
 # [Using Equality and Inequality Constraints](@id constraints)
 
-Multiple optmization packages available with the MathOptInterface and Optim's `IPNewton` solver can handle non-linear constraints.
+Multiple optimization packages available with the MathOptInterface and Optim's `IPNewton` solver can handle non-linear constraints.
 Optimization.jl provides a simple interface to define the constraint as a julia function and then specify the bounds for the output
 in `OptimizationFunction` to indicate if it's an equality or inequality constraint.
 
-Let's define the rosenbrock function as our objective function and consider the below inequalities as our constraints.
+Let's define the Rosenbrock function as our objective function and consider the below inequalities as our constraints.
 
 ```math
 \begin{aligned}
@@ -98,3 +98,42 @@ res = zeros(2)
 cons(res, sol.u, _p)
 println(res)
 ```
+
+## Constraints as Riemannian manifolds
+
+Certain constraints can be efficiently handled using Riemannian optimization methods. This is the case when moving the solution on a manifold can be done quickly. See [here](https://juliamanifolds.github.io/Manifolds.jl/latest/index.html) for a (non-exhaustive) list of such manifolds, most prominent of them being spheres, hyperbolic spaces, Stiefel and Grassmann manifolds, symmetric positive definite matrices and various Lie groups.
+
+Let's for example solve the Rosenbrock function optimization problem with just the spherical constraint. Note that the constraint isn't passed to `OptimizationFunction` but to the optimization method instead. Here we will use a [quasi-Newton optimizer based on the BFGS algorithm](https://manoptjl.org/stable/solvers/quasi_Newton/).
+
+```@example manopt
+using Optimization, Manopt, OptimizationManopt, Manifolds
+
+rosenbrock(x, p) = (p[1] - x[1])^2 + p[2] * (x[2] - x[1]^2)^2
+x0 = [0.0, 1.0]
+_p = [1.0, 100.0]
+
+function rosenbrock_grad!(storage, x, p)
+    # the first part can be computed using AD tools
+    storage[1] = -2.0 * (p[1] - x[1]) - 4.0 * p[2] * (x[2] - x[1]^2) * x[1]
+    storage[2] = 2.0 * p[2] * (x[2] - x[1]^2)
+    # projection is needed because Riemannian optimizers expect
+    # Riemannian gradients instead of Euclidean ones.
+    project!(Sphere(1), storage, x, storage)
+end
+
+optprob = OptimizationFunction(rosenbrock; grad=rosenbrock_grad!)
+opt = OptimizationManopt.QuasiNewtonOptimizer(Sphere(1))
+prob = OptimizationProblem(optprob, x0, _p)
+sol = Optimization.solve(prob, opt)
+```
+
+Note that currently `AutoForwardDiff` can't correctly compute the required Riemannian gradient for optimization. Riemannian optimizers require Riemannian gradients while `ForwardDiff.jl` returns normal Euclidean ones. Conversion from Euclidean to Riemannian gradients can be performed using the [`project`](https://juliamanifolds.github.io/ManifoldsBase.jl/stable/projections.html#Projections) function and (for certain manifolds) [`change_representer`](https://juliamanifolds.github.io/Manifolds.jl/stable/manifolds/metric.html#Manifolds.change_representer-Tuple{AbstractManifold,%20AbstractMetric,%20Any,%20Any}).
+
+Note that the constraint is correctly preserved and the convergence is quite fast.
+
+```@example manopt
+println(norm(sol.u))
+println(sol.original.stop.reason)
+```
+
+It is possible to mix Riemannian and equation-based constraints but it is currently a topic of active research. Manopt.jl offers solvers for such problems but they are not accessible via the Optimization.jl interface yet.
diff --git a/docs/src/tutorials/rosenbrock.md b/docs/src/tutorials/rosenbrock.md
@@ -126,6 +126,21 @@ sol = solve(prob, CMAES(μ =40 , λ = 100),abstol=1e-15) # -1.0 ≤ x[1], x[2]
 using OptimizationBBO
 prob = Optimization.OptimizationProblem(rosenbrock, x0, _p, lb=[-1.0, 0.2], ub=[0.8, 0.43])
 sol = solve(prob, BBO_adaptive_de_rand_1_bin()) # -1.0 ≤ x[1] ≤ 0.8, 0.2 ≤ x[2] ≤ 0.43
+
+## Riemannian optimization
+
+using Manopt, Manifolds, OptimizationManopt
+
+function rosenbrock_grad!(storage, x, p)
+    ForwardDiff.gradient!(storage, y -> rosenbrock(y, p), x)
+    project!(Manifolds.Sphere(1), storage, x, storage)
+end
+
+optprob = OptimizationFunction(rosenbrock; grad=rosenbrock_grad!)
+opt = OptimizationManopt.QuasiNewtonOptimizer(Manifolds.Sphere(1))
+x0 = [1.0, 0.0]
+prob = OptimizationProblem(optprob, x0, _p)
+sol = Optimization.solve(prob, opt)
 ```
 
 And this is only a small subset of what Optimization.jl has to offer!
diff --git a/lib/OptimizationManopt/src/OptimizationManopt.jl b/lib/OptimizationManopt/src/OptimizationManopt.jl
@@ -89,8 +89,6 @@ function call_manopt_optimizer(opt::NelderMeadOptimizer,
            :who_knows
 end
 
-## augmented Lagrangian method
-
 ## conjugate gradient descent
 
 struct ConjugateGradientDescentOptimizer{Teval <: AbstractEvaluationType,
@@ -130,10 +128,60 @@ function call_manopt_optimizer(opt::ConjugateGradientDescentOptimizer{Teval},
            :who_knows
 end
 
-## exact penalty method
-
 ## particle swarm
 
+struct ParticleSwarmOptimizer{Teval <: AbstractEvaluationType,
+                              TM <: AbstractManifold, Tretr <: AbstractRetractionMethod,
+                              Tinvretr <: AbstractInverseRetractionMethod,
+                              Tvt <: AbstractVectorTransportMethod} <:
+       AbstractManoptOptimizer
+    M::TM
+    retraction_method::Tretr
+    inverse_retraction_method::Tinvretr
+    vector_transport_method::Tvt
+    population_size::Int
+end
+
+function ParticleSwarmOptimizer(M::AbstractManifold;
+                                eval::AbstractEvaluationType = MutatingEvaluation(),
+                                population_size::Int = 100,
+                                retraction_method::AbstractRetractionMethod = default_retraction_method(M),
+                                inverse_retraction_method::AbstractInverseRetractionMethod = default_inverse_retraction_method(M),
+                                vector_transport_method::AbstractVectorTransportMethod = default_vector_transport_method(M))
+    ParticleSwarmOptimizer{typeof(eval), typeof(M), typeof(retraction_method),
+                           typeof(inverse_retraction_method),
+                           typeof(vector_transport_method)}(M,
+                                                            retraction_method,
+                                                            inverse_retraction_method,
+                                                            vector_transport_method,
+                                                            population_size)
+end
+
+function call_manopt_optimizer(opt::ParticleSwarmOptimizer{Teval},
+                               loss,
+                               gradF,
+                               x0,
+                               stopping_criterion::Union{Nothing, Manopt.StoppingCriterion}) where {
+                                                                                                    Teval <:
+                                                                                                    AbstractEvaluationType
+                                                                                                    }
+    sckwarg = stopping_criterion_to_kwarg(stopping_criterion)
+    initial_population = vcat([x0], [rand(opt.M) for _ in 1:(opt.population_size - 1)])
+    opts = particle_swarm(opt.M,
+                          loss;
+                          x0 = initial_population,
+                          n = opt.population_size,
+                          return_options = true,
+                          retraction_method = opt.retraction_method,
+                          inverse_retraction_method = opt.inverse_retraction_method,
+                          vector_transport_method = opt.vector_transport_method,
+                          sckwarg...)
+    # we unwrap DebugOptions here
+    minimizer = Manopt.get_solver_result(opts)
+    return (; minimizer = minimizer, minimum = loss(opt.M, minimizer), options = opts),
+           :who_knows
+end
+
 ## quasi Newton
 
 struct QuasiNewtonOptimizer{Teval <: AbstractEvaluationType,
@@ -170,7 +218,7 @@ function call_manopt_optimizer(opt::QuasiNewtonOptimizer{Teval},
                                                                                                     AbstractEvaluationType
                                                                                                     }
     sckwarg = stopping_criterion_to_kwarg(stopping_criterion)
-    opts = conjugate_gradient_descent(opt.M,
+    opts = quasi_Newton(opt.M,
                                       loss,
                                       gradF,
                                       x0;

diff --git a/lib/OptimizationManopt/test/runtests.jl b/lib/OptimizationManopt/test/runtests.jl
@@ -4,9 +4,15 @@ using Manifolds
 using ForwardDiff
 using Manopt
 using Test
+using SciMLBase
 
 rosenbrock(x, p) = (p[1] - x[1])^2 + p[2] * (x[2] - x[1]^2)^2
 
+function rosenbrock_grad!(storage, x, p)
+    storage[1] = -2.0 * (p[1] - x[1]) - 4.0 * p[2] * (x[2] - x[1]^2) * x[1]
+    storage[2] = 2.0 * p[2] * (x[2] - x[1]^2)
+end
+
 R2 = Euclidean(2)
 
 @testset "Gradient descent" begin
@@ -17,10 +23,14 @@ R2 = Euclidean(2)
     opt = OptimizationManopt.GradientDescentOptimizer(R2,
                                                       stepsize = stepsize)
 
-    optprob = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff())
-    prob = OptimizationProblem(optprob, x0, p)
+    optprob_forwarddiff = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff())
+    prob_forwarddiff = OptimizationProblem(optprob_forwarddiff, x0, p)
+    sol = Optimization.solve(prob_forwarddiff, opt)
+    @test sol.minimum < 0.2
 
-    sol = Optimization.solve(prob, opt)
+    optprob_grad = OptimizationFunction(rosenbrock; grad = rosenbrock_grad!)
+    prob_grad = OptimizationProblem(optprob_grad, x0, p)
+    sol = Optimization.solve(prob_grad, opt)
     @test sol.minimum < 0.2
 end
 
@@ -36,3 +46,56 @@ end
     sol = Optimization.solve(prob, opt)
     @test sol.minimum < 0.7
 end
+
+@testset "Conjugate gradient descent" begin
+    x0 = zeros(2)
+    p = [1.0, 100.0]
+
+    stepsize = Manopt.ArmijoLinesearch(R2)
+    opt = OptimizationManopt.ConjugateGradientDescentOptimizer(R2,
+                                                               stepsize = stepsize)
+
+    optprob = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff())
+    prob = OptimizationProblem(optprob, x0, p)
+
+    sol = Optimization.solve(prob, opt)
+    @test sol.minimum < 0.2
+end
+
+@testset "Quasi Newton" begin
+    x0 = zeros(2)
+    p = [1.0, 100.0]
+
+    opt = OptimizationManopt.QuasiNewtonOptimizer(R2)
+
+    optprob = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff())
+    prob = OptimizationProblem(optprob, x0, p)
+
+    sol = Optimization.solve(prob, opt)
+    @test sol.minimum < 1e-16
+end
+
+@testset "Particle swarm" begin
+    x0 = zeros(2)
+    p = [1.0, 100.0]
+
+    opt = OptimizationManopt.ParticleSwarmOptimizer(R2)
+
+    optprob = OptimizationFunction(rosenbrock)
+    prob = OptimizationProblem(optprob, x0, p)
+
+    sol = Optimization.solve(prob, opt)
+    @test sol.minimum < 0.1
+end
+
+@testset "Custom constraints" begin
+    cons(res, x, p) = (res .= [x[1]^2 + x[2]^2, x[1] * x[2]])
+
+    x0 = zeros(2)
+    p = [1.0, 100.0]
+    opt = OptimizationManopt.GradientDescentOptimizer(R2)
+
+    optprob_cons = OptimizationFunction(rosenbrock; grad = rosenbrock_grad!, cons = cons)
+    prob_cons = OptimizationProblem(optprob_cons, x0, p)
+    @test_throws SciMLBase.IncompatibleOptimizerError Optimization.solve(prob_cons, opt)
+end