Change default linesearch to Hager-Zhang (#172)

* Switch to Hager-Zhang linesearch * Update docstring about linesearch * Increase tolerance of checks * Select different number of default iterations * Deepcopy optimizer if using multi-threading * Increment minor version number * Bump Pathfinder compat entries in integration tests * Bump Pathfinder version for docs * Ensure size of iterator is known without evaluating
mlcolab · Dec 5, 2023 · 2f8e8df · 2f8e8df · sethaxen · Dec 5, 2023
1 parent 67b90cf
commit 2f8e8df
Show file tree

Hide file tree

Showing 9 changed files with 35 additions and 26 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Pathfinder"
 uuid = "b1d3bc72-d0e7-4279-b92f-7fa5d6d2d454"
 authors = ["Seth Axen <[email protected]> and contributors"]
-version = "0.7.11"
+version = "0.8.0"
 
 [deps]
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -21,7 +21,7 @@ DynamicHMC = "3"
 ForwardDiff = "0.10"
 LogDensityProblems = "1, 2"
 LogDensityProblemsAD = "1, 2"
-Pathfinder = "0.7"
+Pathfinder = "0.8"
 StatsFuns = "0.9, 1"
 StatsPlots = "0.14, 0.15"
 TransformVariables = "0.6, 0.7, 0.8"

diff --git a/src/Pathfinder.jl b/src/Pathfinder.jl
@@ -24,14 +24,17 @@ using UnPack: @unpack
 export PathfinderResult, MultiPathfinderResult
 export pathfinder, multipathfinder
 
-# Note: we override the default history length to be shorter and the default line search
-# to be More-Thuente, which keeps the approximate inverse Hessian positive-definite
 const DEFAULT_HISTORY_LENGTH = 6
-const DEFAULT_LINE_SEARCH = LineSearches.MoreThuente()
+const DEFAULT_LINE_SEARCH = LineSearches.HagerZhang()
+const DEFAULT_LINE_SEARCH_INIT = LineSearches.InitialHagerZhang()
 const DEFAULT_NDRAWS_ELBO = 5
 
 function default_optimizer(history_length)
-    return Optim.LBFGS(; m=history_length, linesearch=DEFAULT_LINE_SEARCH)
+    return Optim.LBFGS(;
+        m=history_length,
+        linesearch=DEFAULT_LINE_SEARCH,
+        alphaguess=DEFAULT_LINE_SEARCH_INIT,
+    )
 end
 
 include("transducers.jl")

diff --git a/src/multipath.jl b/src/multipath.jl
@@ -162,26 +162,29 @@ function multipathfinder(
     logp(x) = -optim_fun.f(x, nothing)
 
     # run pathfinder independently from each starting point
-    trans = Transducers.Map() do (init_i)
+    trans = Transducers.Map() do (init_i, optimizer_i)
         return pathfinder(
             optim_fun;
             rng,
             history_length,
-            optimizer,
+            optimizer=optimizer_i,
             ndraws=ndraws_per_run,
             init=init_i,
             executor=executor_per_run,
             ndraws_elbo,
             kwargs...,
         )
     end
-    iter_sp = if executor isa Folds.ThreadedEx
-        # temporary workaround due to
-        # https://github.com/JuliaFolds2/Transducers.jl/issues/10
-        _init
-    else
-        Transducers.withprogress(_init; interval=1e-3)
-    end |> trans
+    iter_optimizers = fill(optimizer, nruns)
+    iter_sp =
+        if executor isa Folds.ThreadedEx
+            # temporary workaround due to
+            # https://github.com/JuliaFolds2/Transducers.jl/issues/10
+            # also support optimizers that store state
+            zip(_init, Iterators.map(deepcopy, iter_optimizers))
+        else
+            Transducers.withprogress(zip(_init, iter_optimizers); interval=1e-3)
+        end |> trans
     pathfinder_results = Folds.collect(iter_sp, executor)
     fit_distributions =
         pathfinder_results |> Transducers.Map(x -> x.fit_distribution) |> collect

diff --git a/src/singlepath.jl b/src/singlepath.jl
@@ -117,8 +117,9 @@ constructed using at most the previous `history_length` steps.
     inverse Hessian.
 - `optimizer`: Optimizer to be used for constructing trajectory. Can be any optimizer
     compatible with Optimization.jl, so long as it supports callbacks. Defaults to
-    `Optim.LBFGS(; m=history_length, linesearch=LineSearches.MoreThuente())`. See
-    the [Optimization.jl documentation](https://optimization.sciml.ai/stable) for details.
+    `Optim.LBFGS(; m=history_length, linesearch=LineSearches.HagerZhang(), alphaguess=LineSearches.InitialHagerZhang())`.
+    See the [Optimization.jl documentation](https://optimization.sciml.ai/stable) for
+    details.
 - `ntries::Int=1_000`: Number of times to try the optimization, restarting if it fails. Before
     every restart, a new initial point is drawn using `init_sampler`.
 - `fail_on_nonfinite::Bool=true`: If `true`, optimization fails if the log-density is a

diff --git a/test/integration/AdvancedHMC/Project.toml b/test/integration/AdvancedHMC/Project.toml
@@ -23,7 +23,7 @@ LogDensityProblems = "1, 2"
 LogDensityProblemsAD = "1"
 MCMCDiagnosticTools = "0.3"
 Optim = "1.4"
-Pathfinder = "0.7"
+Pathfinder = "0.7, 0.8"
 Statistics = "1"
 StatsFuns = "0.9, 1"
 TransformVariables = "0.6, 0.7, 0.8"

diff --git a/test/integration/DynamicHMC/Project.toml b/test/integration/DynamicHMC/Project.toml
@@ -19,7 +19,7 @@ LogDensityProblems = "1, 2"
 LogDensityProblemsAD = "1"
 MCMCDiagnosticTools = "0.3"
 Optim = "1.4"
-Pathfinder = "0.7"
+Pathfinder = "0.7, 0.8"
 Statistics = "1"
 StatsFuns = "0.9, 1"
 TransformVariables = "0.6, 0.7, 0.8"

diff --git a/test/integration/Turing/Project.toml b/test/integration/Turing/Project.toml
@@ -5,6 +5,6 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
 
 [compat]
-Pathfinder = "0.5, 0.6, 0.7"
+Pathfinder = "0.5, 0.6, 0.7, 0.8"
 Turing = "0.24, 0.25, 0.26, 0.27, 0.28, 0.29"
 julia = "1.6"
diff --git a/test/singlepath.jl b/test/singlepath.jl
@@ -39,9 +39,9 @@ include("test_utils.jl")
                 Pathfinder.default_optimizer(Pathfinder.DEFAULT_HISTORY_LENGTH)
             fit_distribution = result.fit_distribution
             @test fit_distribution isa MvNormal
-            @test fit_distribution.μ ≈ zeros(dim)
+            @test fit_distribution.μ ≈ zeros(dim) atol = 1e-6
             @test fit_distribution.Σ isa Pathfinder.WoodburyPDMat
-            @test fit_distribution.Σ ≈ I
+            @test fit_distribution.Σ ≈ I atol = 1e-6
             @test size(fit_distribution.Σ.B) == (dim, 2) # history contains only 1 iteration
             @test result.draws isa AbstractMatrix
             @test size(result.draws) == (dim, ndraws)
@@ -97,7 +97,9 @@ include("test_utils.jl")
             Random.seed!(rng, seed)
             # less restrictive type check to work around https://github.com/mlcolab/Pathfinder.jl/issues/142
             # TODO: remove this workaround once the issue is fixed
-            result = @inferred PathfinderResult pathfinder(ℓ; rng, optimizer, ndraws_elbo, executor)
+            result = @inferred PathfinderResult pathfinder(
+                ℓ; rng, optimizer, ndraws_elbo, executor
+            )
             @test result.input === ℓ
             @test result.fit_distribution.Σ ≈ Σ rtol = 1e-1
             @test result.optimizer == optimizer
@@ -113,12 +115,12 @@ include("test_utils.jl")
             i = 0
             callback = (args...,) -> (i += 1; false)
             pathfinder(ℓ; callback)
-            @test i ≠ 6
+            @test i ≠ 4
 
             Random.seed!(42)
             i = 0
-            pathfinder(ℓ; callback, maxiters=5)
-            @test i == 6
+            pathfinder(ℓ; callback, maxiters=3)
+            @test i == 4
         end
     end
     @testset "retries" begin