diff --git a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp index 0ab56f6d13..8b8542779a 100644 --- a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp +++ b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp @@ -43,7 +43,7 @@ static VectorValues gradientInPlace(const NonlinearFactorGraph& nfg, NonlinearConjugateGradientOptimizer::NonlinearConjugateGradientOptimizer( const NonlinearFactorGraph& graph, const Values& initialValues, - const Parameters& params) + const Parameters& params, const DirectionMethod& directionMethod) : Base(graph, std::unique_ptr( new State(initialValues, graph.error(initialValues)))), params_(params) {} @@ -70,7 +70,8 @@ NonlinearConjugateGradientOptimizer::System::advance(const State& current, GaussianFactorGraph::shared_ptr NonlinearConjugateGradientOptimizer::iterate() { const auto [newValues, dummy] = nonlinearConjugateGradient( - System(graph_), state_->values, params_, true /* single iteration */); + System(graph_), state_->values, params_, true /* single iteration */, + directionMethod_); state_.reset( new State(newValues, graph_.error(newValues), state_->iterations + 1)); @@ -81,8 +82,8 @@ GaussianFactorGraph::shared_ptr NonlinearConjugateGradientOptimizer::iterate() { const Values& NonlinearConjugateGradientOptimizer::optimize() { // Optimize until convergence System system(graph_); - const auto [newValues, iterations] = - nonlinearConjugateGradient(system, state_->values, params_, false); + const auto [newValues, iterations] = nonlinearConjugateGradient( + system, state_->values, params_, false, directionMethod_); state_.reset( new State(std::move(newValues), graph_.error(newValues), iterations)); return state_->values; diff --git a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h index 369291faea..bd106afbed 100644 --- a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h +++ b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h @@ -23,6 +23,57 @@ namespace gtsam { +/// Fletcher-Reeves formula for computing β, the direction of steepest descent. +template +double FletcherReeves(const Gradient ¤tGradient, + const Gradient &prevGradient) { + // Fletcher-Reeves: beta = g_n'*g_n/g_n-1'*g_n-1 + const double beta = + currentGradient.dot(currentGradient) / prevGradient.dot(prevGradient); + return beta; +} + +/// Polak-Ribiere formula for computing β, the direction of steepest descent. +template +double PolakRibiere(const Gradient ¤tGradient, + const Gradient &prevGradient) { + // Polak-Ribiere: beta = g_n'*(g_n-g_n-1)/g_n-1'*g_n-1 + const double beta = + std::max(0.0, currentGradient.dot(currentGradient - prevGradient) / + prevGradient.dot(prevGradient)); + return beta; +} + +/// The Hestenes-Stiefel formula for computing β, +/// the direction of steepest descent. +template +double HestenesStiefel(const Gradient ¤tGradient, + const Gradient &prevGradient, + const Gradient &direction) { + // Hestenes-Stiefel: beta = g_n'*(g_n-g_n-1)/(-s_n-1')*(g_n-g_n-1) + Gradient d = currentGradient - prevGradient; + const double beta = std::max(0.0, currentGradient.dot(d) / -direction.dot(d)); + return beta; +} + +/// The Dai-Yuan formula for computing β, the direction of steepest descent. +template +double DaiYuan(const Gradient ¤tGradient, const Gradient &prevGradient, + const Gradient &direction) { + // Dai-Yuan: beta = g_n'*g_n/(-s_n-1')*(g_n-g_n-1) + const double beta = + std::max(0.0, currentGradient.dot(currentGradient) / + -direction.dot(currentGradient - prevGradient)); + return beta; +} + +enum class DirectionMethod { + FletcherReeves, + PolakRibiere, + HestenesStiefel, + DaiYuan +}; + /** An implementation of the nonlinear CG method using the template below */ class GTSAM_EXPORT NonlinearConjugateGradientOptimizer : public NonlinearOptimizer { @@ -51,14 +102,16 @@ class GTSAM_EXPORT NonlinearConjugateGradientOptimizer protected: Parameters params_; + DirectionMethod directionMethod_ = DirectionMethod::PolakRibiere; const NonlinearOptimizerParams &_params() const override { return params_; } public: /// Constructor - NonlinearConjugateGradientOptimizer(const NonlinearFactorGraph &graph, - const Values &initialValues, - const Parameters ¶ms = Parameters()); + NonlinearConjugateGradientOptimizer( + const NonlinearFactorGraph &graph, const Values &initialValues, + const Parameters ¶ms = Parameters(), + const DirectionMethod &directionMethod = DirectionMethod::PolakRibiere); /// Destructor ~NonlinearConjugateGradientOptimizer() override {} @@ -140,7 +193,9 @@ double lineSearch(const S &system, const V currentValues, const W &gradient) { template std::tuple nonlinearConjugateGradient( const S &system, const V &initial, const NonlinearOptimizerParams ¶ms, - const bool singleIteration, const bool gradientDescent = false) { + const bool singleIteration, + const DirectionMethod &directionMethod = DirectionMethod::PolakRibiere, + const bool gradientDescent = false) { // GTSAM_CONCEPT_MANIFOLD_TYPE(V) size_t iteration = 0; @@ -177,10 +232,23 @@ std::tuple nonlinearConjugateGradient( } else { prevGradient = currentGradient; currentGradient = system.gradient(currentValues); - // Polak-Ribiere: beta = g'*(g_n-g_n-1)/g_n-1'*g_n-1 - const double beta = - std::max(0.0, currentGradient.dot(currentGradient - prevGradient) / - prevGradient.dot(prevGradient)); + + double beta; + switch (directionMethod) { + case DirectionMethod::FletcherReeves: + beta = FletcherReeves(currentGradient, prevGradient); + break; + case DirectionMethod::PolakRibiere: + beta = PolakRibiere(currentGradient, prevGradient); + break; + case DirectionMethod::HestenesStiefel: + beta = HestenesStiefel(currentGradient, prevGradient, direction); + break; + case DirectionMethod::DaiYuan: + beta = DaiYuan(currentGradient, prevGradient, direction); + break; + } + direction = currentGradient + (beta * direction); } diff --git a/gtsam/nonlinear/tests/testNonlinearConjugateGradientOptimizer.cpp b/gtsam/nonlinear/tests/testNonlinearConjugateGradientOptimizer.cpp index 36673c7a08..f216e4a8c4 100644 --- a/gtsam/nonlinear/tests/testNonlinearConjugateGradientOptimizer.cpp +++ b/gtsam/nonlinear/tests/testNonlinearConjugateGradientOptimizer.cpp @@ -79,6 +79,49 @@ TEST(NonlinearConjugateGradientOptimizer, Optimize) { EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4); } +/* ************************************************************************* */ +/// Test different direction methods +TEST(NonlinearConjugateGradientOptimizer, DirectionMethods) { + const auto [graph, initialEstimate] = generateProblem(); + + NonlinearOptimizerParams param; + param.maxIterations = + 500; /* requires a larger number of iterations to converge */ + param.verbosity = NonlinearOptimizerParams::SILENT; + + // Fletcher-Reeves + { + NonlinearConjugateGradientOptimizer optimizer( + graph, initialEstimate, param, DirectionMethod::FletcherReeves); + Values result = optimizer.optimize(); + + EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4); + } + // Polak-Ribiere + { + NonlinearConjugateGradientOptimizer optimizer( + graph, initialEstimate, param, DirectionMethod::PolakRibiere); + Values result = optimizer.optimize(); + + EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4); + } + // Hestenes-Stiefel + { + NonlinearConjugateGradientOptimizer optimizer( + graph, initialEstimate, param, DirectionMethod::HestenesStiefel); + Values result = optimizer.optimize(); + + EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4); + } + // Dai-Yuan + { + NonlinearConjugateGradientOptimizer optimizer(graph, initialEstimate, param, + DirectionMethod::DaiYuan); + Values result = optimizer.optimize(); + + EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4); + } +} /* ************************************************************************* */ int main() { TestResult tr;