From 56b9f2dfb53afaee8963247135155645fc56d601 Mon Sep 17 00:00:00 2001 From: Chris Rackauckas Date: Tue, 21 Jun 2022 08:37:34 -0400 Subject: [PATCH] Doctest the minibatch tutorial and add Optimisers.jl docs --- docs/Project.toml | 1 + docs/pages.jl | 1 + docs/src/optimization_packages/flux.md | 5 + docs/src/optimization_packages/optimisers.md | 132 +++++++++++++++++++ docs/src/tutorials/minibatch.md | 2 +- 5 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 docs/src/optimization_packages/optimisers.md diff --git a/docs/Project.toml b/docs/Project.toml index b07eac66c..98ec981cc 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -2,6 +2,7 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78" OptimizationBBO = "3e6eede4-6085-4f62-9a71-46d9bc1eb92b" OptimizationCMAEvolutionStrategy = "bd407f91-200f-4536-9381-e4ba712f53f8" diff --git a/docs/pages.jl b/docs/pages.jl index f429c427f..1dd6e3ed9 100644 --- a/docs/pages.jl +++ b/docs/pages.jl @@ -27,6 +27,7 @@ pages = [ "NLopt.jl" => "optimization_packages/nlopt.md", "Nonconvex.jl" => "optimization_packages/nonconvex.md", "Optim.jl" => "optimization_packages/optim.md", + "Optimisers.jl" => "optimization_packages/optimisers.md", "QuadDIRECT.jl" => "optimization_packages/quaddirect.md" ], ] \ No newline at end of file diff --git a/docs/src/optimization_packages/flux.md b/docs/src/optimization_packages/flux.md index d2919f150..825623f85 100644 --- a/docs/src/optimization_packages/flux.md +++ b/docs/src/optimization_packages/flux.md @@ -8,6 +8,11 @@ To use this package, install the OptimizationFlux package: import Pkg; Pkg.add("OptimizationFlux") ``` +!!! warn + + Flux's optimizers are soon to be deprecated by [Optimisers.jl](https://github.com/FluxML/Optimisers.jl) + Because of this, we recommend using the OptimizationOptimisers.jl setup instead of OptimizationFlux.jl + ## Local Unconstrained Optimizers - [`Flux.Optimise.Descent`](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.Descent): **Classic gradient descent optimizer with learning rate** diff --git a/docs/src/optimization_packages/optimisers.md b/docs/src/optimization_packages/optimisers.md new file mode 100644 index 000000000..14f471740 --- /dev/null +++ b/docs/src/optimization_packages/optimisers.md @@ -0,0 +1,132 @@ +# Optimisers.jl + +## Installation: OptimizationFlux.jl + +To use this package, install the OptimizationOptimisers package: + +```julia +import Pkg; Pkg.add("OptimizationOptimisers") +``` + +## Local Unconstrained Optimizers + +- [`Optimisers.Descent`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.Descent): **Classic gradient descent optimizer with learning rate** + + * `solve(problem, Descent(η))` + * `η` is the learning rate + * Defaults: + * `η = 0.1` + +- [`Optimisers.Momentum`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.Momentum): **Classic gradient descent optimizer with learning rate and momentum** + + * `solve(problem, Momentum(η, ρ))` + * `η` is the learning rate + * `ρ` is the momentum + * Defaults: + * `η = 0.01` + * `ρ = 0.9` + +- [`Optimisers.Nesterov`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.Nesterov): **Gradient descent optimizer with learning rate and Nesterov momentum** + + * `solve(problem, Nesterov(η, ρ))` + * `η` is the learning rate + * `ρ` is the Nesterov momentum + * Defaults: + * `η = 0.01` + * `ρ = 0.9` + +- [`Optimisers.RMSProp`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.RMSProp): **RMSProp optimizer** + + * `solve(problem, RMSProp(η, ρ))` + * `η` is the learning rate + * `ρ` is the momentum + * Defaults: + * `η = 0.001` + * `ρ = 0.9` + +- [`Optimisers.Adam`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.Adam): **Adam optimizer** + + * `solve(problem, Adam(η, β::Tuple))` + * `η` is the learning rate + * `β::Tuple` is the decay of momentums + * Defaults: + * `η = 0.001` + * `β::Tuple = (0.9, 0.999)` + +- [`Optimisers.RAdam`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.RAdam): **Rectified Adam optimizer** + + * `solve(problem, RAdam(η, β::Tuple))` + * `η` is the learning rate + * `β::Tuple` is the decay of momentums + * Defaults: + * `η = 0.001` + * `β::Tuple = (0.9, 0.999)` +- [`Optimisers.RAdam`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.OAdam): **Optimistic Adam optimizer** + + * `solve(problem, OAdam(η, β::Tuple))` + * `η` is the learning rate + * `β::Tuple` is the decay of momentums + * Defaults: + * `η = 0.001` + * `β::Tuple = (0.5, 0.999)` + +- [`Optimisers.AdaMax`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.AdaMax): **AdaMax optimizer** + + * `solve(problem, AdaMax(η, β::Tuple))` + * `η` is the learning rate + * `β::Tuple` is the decay of momentums + * Defaults: + * `η = 0.001` + * `β::Tuple = (0.9, 0.999)` + +- [`Optimisers.ADAGrad`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.ADAGrad): **ADAGrad optimizer** + + * `solve(problem, ADAGrad(η))` + * `η` is the learning rate + * Defaults: + * `η = 0.1` + +- [`Optimisers.ADADelta`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.ADADelta): **ADADelta optimizer** + + * `solve(problem, ADADelta(ρ))` + * `ρ` is the gradient decay factor + * Defaults: + * `ρ = 0.9` + +- [`Optimisers.AMSGrad`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.ADAGrad): **AMSGrad optimizer** + + * `solve(problem, AMSGrad(η, β::Tuple))` + * `η` is the learning rate + * `β::Tuple` is the decay of momentums + * Defaults: + * `η = 0.001` + * `β::Tuple = (0.9, 0.999)` + +- [`Optimisers.NAdam`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.NAdam): **Nesterov variant of the Adam optimizer** + + * `solve(problem, NAdam(η, β::Tuple))` + * `η` is the learning rate + * `β::Tuple` is the decay of momentums + * Defaults: + * `η = 0.001` + * `β::Tuple = (0.9, 0.999)` + +- [`Optimisers.AdamW`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.AdamW): **AdamW optimizer** + + * `solve(problem, AdamW(η, β::Tuple))` + * `η` is the learning rate + * `β::Tuple` is the decay of momentums + * `decay` is the decay to weights + * Defaults: + * `η = 0.001` + * `β::Tuple = (0.9, 0.999)` + * `decay = 0` + +- [`Optimisers.ADABelief`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.ADABelief): **ADABelief variant of Adam** + + * `solve(problem, ADABelief(η, β::Tuple))` + * `η` is the learning rate + * `β::Tuple` is the decay of momentums + * Defaults: + * `η = 0.001` + * `β::Tuple = (0.9, 0.999)` \ No newline at end of file diff --git a/docs/src/tutorials/minibatch.md b/docs/src/tutorials/minibatch.md index a1086f422..3a44df792 100644 --- a/docs/src/tutorials/minibatch.md +++ b/docs/src/tutorials/minibatch.md @@ -5,7 +5,7 @@ This example uses the OptimizationOptimisers.jl package. See the [Optimisers.jl page](@ref optimisers) for details on the installation and usage. -```julia +```@example using Flux, Optimization, OptimizationOptimisers, OrdinaryDiffEq, DiffEqSensitivity function newtons_cooling(du, u, p, t)