Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wip - strongwolfe #9

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 31 additions & 7 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,49 @@ Or a yali_. Whatever sinks your boat.
Docs
====

.. doxygenfunction:: numopt::functors::paraboloid
.. doxygenfunction:: numopt::functors::rosenbrock
Problems
--------
A problem defines the objective funtion that is to be minimized (unconstrained as of now). It must atleast define the evaluation function and a gradient function.

.. doxygenclass:: numopt::ProblemBase
:members:
.. doxygenclass:: numopt::ProblemAD
:members:

.. doxygenclass:: numopt::problems::Rosenbrock2dProblem
:members:
.. doxygenclass:: numopt::problems::ParaboloidProblem
:members:

.. doxygenclass:: numopt::Solver


.. doxygenclass:: numopt::ProblemAD
:members:

Functors
--------
It might not always be easy to derive your gradient vector, and even more hard to get your hessian matrix. In such situations, we can use autodiff to obtain the gradient and Hessian. The class :class:`numopt::ProblemAD` implements this functionality, as long as the desired function is defined as a template function.
The following functors serve as examples.

.. doxygenfunction:: numopt::functors::paraboloid
.. doxygenfunction:: numopt::functors::rosenbrock

Solvers
-------
As of now this is an unconstrained optimization solver, that takes a problem, and finds its minima.

.. doxygenstruct:: numopt::solver::SolverData
:members:
.. doxygenstruct:: numopt::solver::SolverSettings
:members:
.. doxygenfunction:: numopt::solver::BackTrackingLineSearch
.. doxygenclass:: numopt::Solver
:members:
.. doxygenclass:: numopt::GradientDescentSolver
:members:

Linesearch
----------

.. doxygenstruct:: numopt::solver::linesearch::LinesearchSettings
:members:
.. doxygenclass:: numopt::solver::linesearch::BackTrackingLinesearch
:members:
.. doxygenclass:: numopt::solver::linesearch::StrongWolfeLinesearch
:members:
23 changes: 12 additions & 11 deletions include/numopt/problems/paraboloidProblem.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,37 @@

#include "problem.h"


namespace numopt {
namespace problems {
/**
* Describes a simple paraboloid problem with analytic gradients and hessian.
* The minima is at T(5)
*/
*/
class ParaboloidProblem : public ProblemBase {

public:
ParaboloidProblem() {}
/** \fn operator()(const VectorX &in) const
* \brief returns function value at \a in
* \param in The vector to evaluate
* \return function value at \a in
* \param in vector to evaluate the function at
*/
double operator()(const VectorX &in) const override {
return (in.squaredNorm() + (5));
}
/**
* \brief returns the gradient vector at \a in
* \param in The vector at which gradient is computed
*/
/**
* \return a pair where the first element is the gradient vector at \a in and
* the second element is the function value at \a in
* \param in vector at which gradient is computed
*/
std::pair<VectorX, double> gradient(const VectorX &in) const override {
const VectorX grad = 2 * in.transpose();
return std::make_pair(grad, this->operator()(in));
}
/**
* \brief returns hessian matrix at \a in
* \param in The vector at which the hessian is computed
*/
* \return returns a pair, where the first element is the Hessian matrix at \a
* in and the second element is the gradient value at \a in
* \param in vector at which the hessian is computed
*/
std::pair<SparseMatrixX, VectorX> hessian(const VectorX &in) const {
SparseMatrixX H(in.rows(), in.rows());
H.setIdentity();
Expand Down
26 changes: 19 additions & 7 deletions include/numopt/solvers/gradientDescent.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once
#include <solvers/linesearch/backtrackingLineSearch.h>
#include <solvers/linesearch/strongWolfeLineSearch.h>
#include <solvers/solver.h>
#include <solvers/solverSettings.h>

Expand All @@ -20,17 +21,28 @@ class GradientDescentSolver : public Solver<_Problem> {
solverData.argmin = initValue;
double prevNorm;
solverData.min = problem()(initValue);
double alpha(1);
double alpha(1), dphi(0), dphi_im1(0);
for (unsigned iter = 0; iter < settings().maxSolverIterations; iter++) {
prevNorm = solverData.min;
const VectorX dir = direction(solverData.argmin);
const VectorX xCur = solverData.argmin;
solverData.min = //(settings().linesearchtype == solver::SolverSettings::LineSearchType::BackTracking) ?
solver::BackTrackingLineSearch(xCur, dir, problem(), solverData.argmin,
&alpha, settings().verbosity,
settings().functionTolerance,
settings().parameterTolerance,
settings().maxLineSearchIterations);
dphi = dir.dot(dir);
const double alphaCur = iter ? alpha * dphi_im1 / dphi : 1e-5;
dphi_im1 = dphi;
solverData
.min = (settings().linesearchtype ==
solver::SolverSettings::LineSearchType::BackTracking) ?
solver::linesearch::BackTrackingLinesearch<_Problem>().run(
xCur, dir, problem(), solverData.argmin, &alpha, settings(),
settings().verbosity):
solver::linesearch::StrongWolfeLinesearch<_Problem>(problem()).run(
xCur, dir, solverData.argmin, alphaCur, &alpha, settings(),
settings().verbosity);
if (settings().verbosity) {
std::cout << "Gradient Descent iteration : " << iter
<< " f(x):" << solverData.min
<< " at x_o:" << solverData.argmin << std::endl;
}
solverData.nIter = iter + 1;
solverData.paramNorm = alpha * dir.norm();
if (hasConverged(std::abs(prevNorm - solverData.min), alpha * dir.norm(),
Expand Down
86 changes: 41 additions & 45 deletions include/numopt/solvers/linesearch/backtrackingLineSearch.h
Original file line number Diff line number Diff line change
@@ -1,62 +1,58 @@
#pragma once

#include "linesearchSettings.h"
#include "solvers/solverSettings.h"
#include <base/types.h>

namespace numopt {
namespace solver {
constexpr static double LS_InitalAlpha = 0.8;
constexpr static double LS_Rho = 0.5;
constexpr static double LS_C = 0.0001;
namespace numopt::solver::linesearch {
/**
* Implements the Backtracking algorithm. (3.1)
* Implements the Backtracking algorithm. (Algorithm 3.1)
* Only ensures sufficient decrease (weak Wolfe Conditions)
*/

template <typename DerivedX, typename Evaluator>
double BackTrackingLineSearch(const Eigen::MatrixBase<DerivedX> &xcur,
const Eigen::MatrixBase<DerivedX> &dir,
const Evaluator func,
Eigen::MatrixBase<DerivedX> &xnext,
double *palpha, const unsigned verbose,
const double funcTol, const double paramTol,
const unsigned maxIterations) {
double alphak = LS_InitalAlpha;
const double initNorm = func(xcur);
double prevNorm = initNorm;

if (verbose > 1)
std::cout << "BackTracking - Init Norm : " << initNorm << std::endl;

for (unsigned iter = 0; iter < maxIterations; iter++) {
xnext = (xcur + alphak * dir).eval();
const double curNorm = func(xnext);
template <typename Evaluator> class BackTrackingLinesearch {
public:
double run(const VectorX &xcur, const VectorX &dir, const Evaluator func,
VectorX &xnext, double *palpha, const SolverSettings &settings,
const unsigned verbose) {
double alphak = LS_InitAlpha;
const double initNorm = func(xcur);
double prevNorm = initNorm;

if (verbose > 1)
std::cout << "BackTracking - Cur Norm : " << curNorm
<< " Step Size : " << alphak << std::endl;
std::cout << "BackTracking - Init Norm : " << initNorm << std::endl;

if (curNorm <= initNorm - LS_C * alphak * dir.norm()) {
if (palpha)
*palpha = alphak;
for (unsigned iter = 0; iter < settings.maxLineSearchIterations; iter++) {
xnext = (xcur + alphak * dir).eval();
const double curNorm = func(xnext);

return curNorm;
}
if (verbose > 1)
std::cout << "BackTracking - Cur Norm : " << curNorm
<< " Step Size : " << alphak << std::endl;

if ((iter == (maxIterations - 1)) ||
(std::fabs(curNorm - prevNorm) < funcTol) ||
(alphak * dir.norm() < paramTol))
break;
if (curNorm <= initNorm - LS_C1 * alphak * dir.norm()) {
if (palpha)
*palpha = alphak;

alphak = alphak * LS_Rho;
prevNorm = curNorm;
}
return curNorm;
}

if (palpha)
*palpha = 0.0;
if ((iter == (settings.maxLineSearchIterations - 1)) ||
(std::fabs(curNorm - prevNorm) < settings.functionTolerance) ||
(alphak * dir.norm() < settings.parameterTolerance))
break;

alphak = alphak * LS_Rho;
prevNorm = curNorm;
}

xnext = xcur;
if (palpha)
*palpha = 0.0;

xnext = xcur;

return initNorm;
}
};

return initNorm;
}
} // namespace solver
} // namespace numopt
} // namespace numopt::solver::linesearch
23 changes: 23 additions & 0 deletions include/numopt/solvers/linesearch/linesearchSettings.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#pragma once

namespace numopt::solver::linesearch {
constexpr static double LS_MaxAlpha = 1e5;
constexpr static double LS_InitAlpha = 0.8;
constexpr static double LS_Rho = 0.5;
constexpr static double LS_C1 = 1e-4;
constexpr static double LS_C2 = 0.9;
/**
* @brief Structure that holds tolerances & max # iterations.
*/
struct LinesearchSettings {
/** Tolerance for change in f(x) */
double functionTolerance;
/** Tolerance for change in x*/
double parameterTolerance;
/** max no. of linesearch iterations. */
unsigned maxIterations;
LinesearchSettings(double funcTol, double paramTol, unsigned maxIter)
: functionTolerance(funcTol), parameterTolerance(paramTol),
maxIterations(maxIter) {}
};
} // namespace numopt::solver::linesearch
106 changes: 106 additions & 0 deletions include/numopt/solvers/linesearch/strongWolfeLineSearch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#pragma once

#include "linesearchSettings.h"
#include "solvers/solverSettings.h"
#include <base/types.h>

namespace numopt::solver::linesearch {
/**
* Implements interpolation based linesearch algorithm (Algorithm 3.5 & 3.6)
* Ensures Strong Wolfe conditions are satisfied
*/
template <typename Evaluator> class StrongWolfeLinesearch {
public:
StrongWolfeLinesearch(const Evaluator &functor) : functor_(functor) {}
double run(const VectorX &xcur, const VectorX &dir, VectorX &xnext,
const double alpha_0, double *palpha,
const SolverSettings &settings, const unsigned verbose) {
xcur_ = xcur;
dir_ = dir;
auto phiInfo_i =getPhiInfo(std::min(1.0, alpha_0 * 1.01));
phiInfo_0_ = getPhiInfo(0);
PhiInfo phiInfo_im1 = phiInfo_0_;
PhiInfo phiInfo_amax = getPhiInfo(LS_MaxAlpha);
for (unsigned iter = 0; iter < settings.maxLineSearchIterations; iter++) {
if (check_armijo(phiInfo_i) || (iter && (phiInfo_i.phi >= phiInfo_im1.phi))) {
auto phiOptimal = zoom(phiInfo_im1, phiInfo_i);
*palpha = phiOptimal.alpha;
xnext = xcur_ + *palpha * dir_;
return functor_(xnext);
}
if (abs(phiInfo_i.dphi) <= -LS_C2 * phiInfo_0_.dphi) {
*palpha = phiInfo_i.alpha;
xnext = xcur_ + *palpha * dir_;
return phiInfo_i.phi;
}
if (phiInfo_i.dphi >= 0) {
auto phiOptimal = zoom(phiInfo_i, phiInfo_im1);
*palpha = phiOptimal.alpha;
xnext = xcur_ + *palpha * dir_;
return functor_(xnext);
}
phiInfo_im1 = phiInfo_i;
phiInfo_i = //getphiinfo(std::min(phiinfo_im1.alpha / ls_rho, ls_maxalpha));
getPhiInfo(interpolate_cubic(phiInfo_i,phiInfo_amax ));
if (verbose > 1) {
std::cout << "StrongWolfe iteration " << iter
<< " f(x) : " << phiInfo_i.phi << " alpha " << phiInfo_i.alpha
<< std::endl;
}
}
return phiInfo_0_.phi;
}

private:
struct PhiInfo {
double alpha;
double phi;
double dphi;
};

PhiInfo getPhiInfo(const double alpha) {
const auto [grad, phi] = functor_.gradient(xcur_ + alpha * dir_);
const auto dphi = grad.dot(dir_);
return {alpha, phi, dphi};
}

double interpolate_cubic(const PhiInfo low, const PhiInfo high) {
const auto d1 = low.dphi + high.dphi -
3 * (low.phi - high.phi) / (low.alpha - high.alpha);
const auto d2 = std::copysign(1.0, high.alpha - low.alpha) *
std::sqrt(d1 * d1 - high.dphi * low.dphi);
return high.alpha - (high.alpha - low.alpha) * (high.dphi + d2 - d1) /
(high.dphi - low.dphi + 2 * d2);
}

PhiInfo zoom(PhiInfo phiInfo_l, PhiInfo phiInfo_h) {
for (int j = 0; j < 10; j++) {
double alpha_j = interpolate_cubic(phiInfo_l, phiInfo_h);
PhiInfo phiInfo_j = getPhiInfo(alpha_j);
// std::cout << "Z " << phiInfo_j.phi << " " << phiInfo_j.alpha << std::endl;
if (check_armijo(phiInfo_j) || (phiInfo_j.phi >= phiInfo_l.phi)) {
phiInfo_h = phiInfo_j;
} else {
if (std::abs(phiInfo_j.dphi) <= -LS_C2 * phiInfo_0_.dphi) {
return phiInfo_j;
}
if (phiInfo_j.dphi * (phiInfo_h.alpha - phiInfo_l.alpha) >= 0) {
phiInfo_h = phiInfo_l;
}
phiInfo_l = phiInfo_j;
}
}
return phiInfo_l;
}

bool check_armijo(const PhiInfo phiInfo) const {
return (phiInfo.phi >
(phiInfo_0_.phi + LS_C1 * phiInfo.alpha * phiInfo_0_.dphi));
}

PhiInfo phiInfo_0_;
const Evaluator &functor_;
VectorX xcur_;
VectorX dir_;
};
} // namespace numopt::solver::linesearch
Loading