diff --git a/docs/pages.jl b/docs/pages.jl index 0e88b8a5a..cc69fa3cb 100644 --- a/docs/pages.jl +++ b/docs/pages.jl @@ -24,7 +24,6 @@ pages = ["index.md", "BlackBoxOptim.jl" => "optimization_packages/blackboxoptim.md", "CMAEvolutionStrategy.jl" => "optimization_packages/cmaevolutionstrategy.md", "Evolutionary.jl" => "optimization_packages/evolutionary.md", - "Flux.jl" => "optimization_packages/flux.md", "GCMAES.jl" => "optimization_packages/gcmaes.md", "Manopt.jl" => "optimization_packages/manopt.md", "MathOptInterface.jl" => "optimization_packages/mathoptinterface.md", diff --git a/docs/src/optimization_packages/flux.md b/docs/src/optimization_packages/flux.md deleted file mode 100644 index dc3a1260a..000000000 --- a/docs/src/optimization_packages/flux.md +++ /dev/null @@ -1,135 +0,0 @@ -# Flux.jl - -## Installation: OptimizationFlux.jl - -To use this package, install the OptimizationFlux package: - -```julia -import Pkg; -Pkg.add("OptimizationFlux"); -``` - -!!! warn - - Flux's optimizers are soon to be deprecated by [Optimisers.jl](https://github.com/FluxML/Optimisers.jl) - Because of this, we recommend using the OptimizationOptimisers.jl setup instead of OptimizationFlux.jl - -## Local Unconstrained Optimizers - - - [`Flux.Optimise.Descent`](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.Descent): **Classic gradient descent optimizer with learning rate** - - + `solve(problem, Descent(η))` - - + `η` is the learning rate - + Defaults: - - * `η = 0.1` - - - [`Flux.Optimise.Momentum`](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.Momentum): **Classic gradient descent optimizer with learning rate and momentum** - - + `solve(problem, Momentum(η, ρ))` - - + `η` is the learning rate - + `ρ` is the momentum - + Defaults: - - * `η = 0.01` - * `ρ = 0.9` - - [`Flux.Optimise.Nesterov`](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.Nesterov): **Gradient descent optimizer with learning rate and Nesterov momentum** - - + `solve(problem, Nesterov(η, ρ))` - - + `η` is the learning rate - + `ρ` is the Nesterov momentum - + Defaults: - - * `η = 0.01` - * `ρ = 0.9` - - [`Flux.Optimise.RMSProp`](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.RMSProp): **RMSProp optimizer** - - + `solve(problem, RMSProp(η, ρ))` - - + `η` is the learning rate - + `ρ` is the momentum - + Defaults: - - * `η = 0.001` - * `ρ = 0.9` - - [`Flux.Optimise.ADAM`](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.ADAM): **ADAM optimizer** - - + `solve(problem, ADAM(η, β::Tuple))` - - + `η` is the learning rate - + `β::Tuple` is the decay of momentums - + Defaults: - - * `η = 0.001` - * `β::Tuple = (0.9, 0.999)` - - [`Flux.Optimise.RADAM`](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.RADAM): **Rectified ADAM optimizer** - - + `solve(problem, RADAM(η, β::Tuple))` - - + `η` is the learning rate - + `β::Tuple` is the decay of momentums - + Defaults: - - * `η = 0.001` - * `β::Tuple = (0.9, 0.999)` - - [`Flux.Optimise.AdaMax`](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.AdaMax): **AdaMax optimizer** - - + `solve(problem, AdaMax(η, β::Tuple))` - - + `η` is the learning rate - + `β::Tuple` is the decay of momentums - + Defaults: - - * `η = 0.001` - * `β::Tuple = (0.9, 0.999)` - - [`Flux.Optimise.ADAGRad`](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.ADAGrad): **ADAGrad optimizer** - - + `solve(problem, ADAGrad(η))` - - + `η` is the learning rate - + Defaults: - - * `η = 0.1` - - [`Flux.Optimise.ADADelta`](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.ADADelta): **ADADelta optimizer** - - + `solve(problem, ADADelta(ρ))` - - + `ρ` is the gradient decay factor - + Defaults: - - * `ρ = 0.9` - - [`Flux.Optimise.AMSGrad`](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.ADAGrad): **AMSGrad optimizer** - - + `solve(problem, AMSGrad(η, β::Tuple))` - - + `η` is the learning rate - + `β::Tuple` is the decay of momentums - + Defaults: - - * `η = 0.001` - * `β::Tuple = (0.9, 0.999)` - - [`Flux.Optimise.NADAM`](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.NADAM): **Nesterov variant of the ADAM optimizer** - - + `solve(problem, NADAM(η, β::Tuple))` - - + `η` is the learning rate - + `β::Tuple` is the decay of momentums - + Defaults: - - * `η = 0.001` - * `β::Tuple = (0.9, 0.999)` - - [`Flux.Optimise.ADAMW`](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.ADAMW): **ADAMW optimizer** - - + `solve(problem, ADAMW(η, β::Tuple))` - - + `η` is the learning rate - + `β::Tuple` is the decay of momentums - + `decay` is the decay to weights - + Defaults: - - * `η = 0.001` - * `β::Tuple = (0.9, 0.999)` - * `decay = 0` diff --git a/lib/OptimizationManopt/src/OptimizationManopt.jl b/lib/OptimizationManopt/src/OptimizationManopt.jl index 3d97cc9b9..d68792640 100644 --- a/lib/OptimizationManopt/src/OptimizationManopt.jl +++ b/lib/OptimizationManopt/src/OptimizationManopt.jl @@ -394,8 +394,10 @@ function SciMLBase.__solve(cache::OptimizationCache{ local x, cur, state manifold = haskey(cache.solver_args, :manifold) ? cache.solver_args[:manifold] : nothing - gradF = haskey(cache.solver_args, :riemannian_grad) ? cache.solver_args[:riemannian_grad] : nothing - hessF = haskey(cache.solver_args, :riemannian_hess) ? cache.solver_args[:riemannian_hess] : nothing + gradF = haskey(cache.solver_args, :riemannian_grad) ? + cache.solver_args[:riemannian_grad] : nothing + hessF = haskey(cache.solver_args, :riemannian_hess) ? + cache.solver_args[:riemannian_hess] : nothing if manifold === nothing throw(ArgumentError("Manifold not specified in the problem for e.g. `OptimizationProblem(f, x, p; manifold = SymmetricPositiveDefinite(5))`.")) diff --git a/src/lbfgsb.jl b/src/lbfgsb.jl index edf7364a6..2c20e25fd 100644 --- a/src/lbfgsb.jl +++ b/src/lbfgsb.jl @@ -171,9 +171,11 @@ function SciMLBase.__solve(cache::OptimizationCache{ n = length(cache.u0) if cache.lb === nothing - optimizer, bounds = LBFGSB._opt_bounds(n, cache.opt.m, [-Inf for i in 1:n], [Inf for i in 1:n]) + optimizer, bounds = LBFGSB._opt_bounds( + n, cache.opt.m, [-Inf for i in 1:n], [Inf for i in 1:n]) else - optimizer, bounds = LBFGSB._opt_bounds(n, cache.opt.m, solver_kwargs.lb, solver_kwargs.ub) + optimizer, bounds = LBFGSB._opt_bounds( + n, cache.opt.m, solver_kwargs.lb, solver_kwargs.ub) end solver_kwargs = Base.structdiff(solver_kwargs, (; lb = nothing, ub = nothing)) @@ -182,7 +184,8 @@ function SciMLBase.__solve(cache::OptimizationCache{ prev_eqcons .= cons_tmp[eq_inds] prevβ .= copy(β) - res = optimizer(_loss, aug_grad, θ, bounds; solver_kwargs..., m = cache.opt.m, pgtol = sqrt(ϵ), maxiter = maxiters / 100) + res = optimizer(_loss, aug_grad, θ, bounds; solver_kwargs..., + m = cache.opt.m, pgtol = sqrt(ϵ), maxiter = maxiters / 100) # @show res[2] # @show res[1] # @show cons_tmp @@ -211,7 +214,8 @@ function SciMLBase.__solve(cache::OptimizationCache{ stats = Optimization.OptimizationStats(; iterations = maxiters, time = 0.0, fevals = maxiters, gevals = maxiters) return SciMLBase.build_solution( - cache, cache.opt, res[2], cache.f(res[2], cache.p)[1], stats = stats, retcode = opt_ret) + cache, cache.opt, res[2], cache.f(res[2], cache.p)[1], + stats = stats, retcode = opt_ret) else _loss = function (θ) x = cache.f(θ, cache.p) @@ -226,16 +230,19 @@ function SciMLBase.__solve(cache::OptimizationCache{ n = length(cache.u0) if cache.lb === nothing - optimizer, bounds= LBFGSB._opt_bounds(n, cache.opt.m, [-Inf for i in 1:n], [Inf for i in 1:n]) + optimizer, bounds = LBFGSB._opt_bounds( + n, cache.opt.m, [-Inf for i in 1:n], [Inf for i in 1:n]) else - optimizer, bounds= LBFGSB._opt_bounds(n, cache.opt.m, solver_kwargs.lb, solver_kwargs.ub) + optimizer, bounds = LBFGSB._opt_bounds( + n, cache.opt.m, solver_kwargs.lb, solver_kwargs.ub) end solver_kwargs = Base.structdiff(solver_kwargs, (; lb = nothing, ub = nothing)) t0 = time() - res = optimizer(_loss, cache.f.grad, cache.u0, bounds; m = cache.opt.m, solver_kwargs...) + res = optimizer( + _loss, cache.f.grad, cache.u0, bounds; m = cache.opt.m, solver_kwargs...) # Extract the task message from the result stop_reason = task_message_to_string(optimizer.task) @@ -247,6 +254,7 @@ function SciMLBase.__solve(cache::OptimizationCache{ stats = Optimization.OptimizationStats(; iterations = maxiters, time = t1 - t0, fevals = maxiters, gevals = maxiters) - return SciMLBase.build_solution(cache, cache.opt, res[2], res[1], stats = stats, retcode = opt_ret) + return SciMLBase.build_solution(cache, cache.opt, res[2], res[1], stats = stats, + retcode = opt_ret, original = optimizer) end end diff --git a/src/utils.jl b/src/utils.jl index ccb1699ee..cf219df5d 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -67,7 +67,6 @@ function check_pkg_version(pkg::String, ver::String; pkg_info[pkg].version > VersionNumber(ver) end - # RetCode handling for BBO and others. using SciMLBase: ReturnCode @@ -76,7 +75,7 @@ const STOP_REASON_MAP = Dict( r"Delta fitness .* below tolerance .*" => ReturnCode.Success, r"Fitness .* within tolerance .* of optimum" => ReturnCode.Success, r"CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL" => ReturnCode.Success, - r"Unrecognized stop reason: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH" => ReturnCode.Success, + r"^CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR\*EPSMCH\s*$" => ReturnCode.Success, r"Terminated" => ReturnCode.Terminated, r"MaxIters|MAXITERS_EXCEED|Max number of steps .* reached" => ReturnCode.MaxIters, r"MaxTime|TIME_LIMIT" => ReturnCode.MaxTime, @@ -102,11 +101,11 @@ const STOP_REASON_MAP = Dict( function deduce_retcode(stop_reason::String) for (pattern, retcode) in STOP_REASON_MAP if occursin(pattern, stop_reason) - return retcode + return retcode end end - @warn "Unrecognized stop reason: $stop_reason. Defaulting to ReturnCode.Failure." - return ReturnCode.Failure + @warn "Unrecognized stop reason: $stop_reason. Defaulting to ReturnCode.Default." + return ReturnCode.Default end # Function to deduce ReturnCode from a Symbol @@ -141,4 +140,3 @@ function deduce_retcode(retcode::Symbol) return ReturnCode.Failure end end - diff --git a/test/ADtests.jl b/test/ADtests.jl index f0c67aec9..e7157b174 100644 --- a/test/ADtests.jl +++ b/test/ADtests.jl @@ -202,12 +202,15 @@ prob = OptimizationProblem(optf, x0) sol = solve(prob, Optim.BFGS()) @test 10 * sol.objective < l1 +@test sol.retcode == ReturnCode.Success sol = solve(prob, Optim.Newton()) @test 10 * sol.objective < l1 +@test sol.retcode == ReturnCode.Success sol = solve(prob, Optim.KrylovTrustRegion()) @test 10 * sol.objective < l1 +@test sol.retcode == ReturnCode.Success optf = OptimizationFunction(rosenbrock, Optimization.AutoZygote()) optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoZygote(), nothing) @@ -403,10 +406,12 @@ for consf in [cons, con2_c] prob1 = OptimizationProblem(optf1, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8], lcons = lcons, ucons = ucons) sol1 = solve(prob1, Optim.IPNewton()) + @test sol1.retcode == ReturnCode.Success optf2 = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff(); cons = consf) prob2 = OptimizationProblem(optf2, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8], lcons = lcons, ucons = ucons) sol2 = solve(prob2, Optim.IPNewton()) + @test sol2.retcode == ReturnCode.Success @test sol1.objective≈sol2.objective rtol=1e-4 @test sol1.u ≈ sol2.u res = Array{Float64}(undef, length(lcons)) @@ -421,9 +426,11 @@ for consf in [cons, con2_c] optf1 = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(); cons = consf) prob1 = OptimizationProblem(optf1, [0.5, 0.5], lcons = lcons, ucons = ucons) sol1 = solve(prob1, Optim.IPNewton()) + @test sol1.retcode == ReturnCode.Success optf2 = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff(); cons = consf) prob2 = OptimizationProblem(optf2, [0.5, 0.5], lcons = lcons, ucons = ucons) sol2 = solve(prob2, Optim.IPNewton()) + @test sol2.retcode == ReturnCode.Success @test sol1.objective≈sol2.objective rtol=1e-4 @test sol1.u≈sol2.u rtol=1e-4 res = Array{Float64}(undef, length(lcons)) diff --git a/test/lbfgsb.jl b/test/lbfgsb.jl index bb7afa8b4..0c2f0c20b 100644 --- a/test/lbfgsb.jl +++ b/test/lbfgsb.jl @@ -6,12 +6,14 @@ x0 = zeros(2) rosenbrock(x, p = nothing) = (1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2 l1 = rosenbrock(x0) -optf = OptimizationFunction(rosenbrock, AutoEnzyme()) +optf = OptimizationFunction(rosenbrock, AutoForwardDiff()) prob = OptimizationProblem(optf, x0) @time res = solve(prob, Optimization.LBFGS(), maxiters = 100) +@test res.retcode == Optimization.SciMLBase.ReturnCode.Success -prob = OptimizationProblem(optf, x0, lb = [-1.0, -1.0], ub = [1.0, 1.0]) +prob = OptimizationProblem(optf, x0, lb = [-1.0, -1.0], ub = [1.0, 1.0]) @time res = solve(prob, Optimization.LBFGS(), maxiters = 100) +@test res.retcode == Optimization.SciMLBase.ReturnCode.Success function con2_c(res, x, p) res .= [x[1]^2 + x[2]^2, (x[2] * sin(x[1]) + x[1]) - 5] @@ -22,3 +24,4 @@ prob = OptimizationProblem(optf, x0, lcons = [1.0, -Inf], ucons = [1.0, 0.0], lb = [-1.0, -1.0], ub = [1.0, 1.0]) @time res = solve(prob, Optimization.LBFGS(), maxiters = 100) +@test res.retcode == Optimization.SciMLBase.ReturnCode.MaxIters