tniranjan · tniranjan · Dec 20, 2021 · Dec 23, 2021 · Dec 23, 2021 · Dec 23, 2021
diff --git a/docs/index.rst b/docs/index.rst
@@ -15,25 +15,49 @@ Or a yali_. Whatever sinks your boat.
 Docs
 ====
 
-.. doxygenfunction:: numopt::functors::paraboloid
-.. doxygenfunction:: numopt::functors::rosenbrock
+Problems
+--------
+A problem defines the objective funtion that is to be minimized (unconstrained as of now). It must atleast define the evaluation function and a gradient function. 
+
 .. doxygenclass:: numopt::ProblemBase
    :members:
-.. doxygenclass:: numopt::ProblemAD
-   :members:
-
 .. doxygenclass:: numopt::problems::Rosenbrock2dProblem
    :members:
 .. doxygenclass:: numopt::problems::ParaboloidProblem
    :members:
 
-.. doxygenclass:: numopt::Solver
+
+
+.. doxygenclass:: numopt::ProblemAD
    :members:
+
+Functors
+--------
+It might not always be easy to derive your gradient vector, and even more hard to get your hessian matrix. In such situations, we can use autodiff to obtain the gradient and Hessian. The class :class:`numopt::ProblemAD` implements this functionality, as long as the desired function is defined as a template function.
+The following functors serve as examples.
+
+.. doxygenfunction:: numopt::functors::paraboloid
+.. doxygenfunction:: numopt::functors::rosenbrock
+
+Solvers
+-------
+As of now this is an unconstrained optimization solver, that takes a problem, and finds its minima.
+
 .. doxygenstruct:: numopt::solver::SolverData
    :members:
 .. doxygenstruct:: numopt::solver::SolverSettings
    :members:
-.. doxygenfunction:: numopt::solver::BackTrackingLineSearch
+.. doxygenclass:: numopt::Solver
+   :members:
 .. doxygenclass:: numopt::GradientDescentSolver
    :members:
 
+Linesearch
+----------
+
+.. doxygenstruct:: numopt::solver::linesearch::LinesearchSettings
+   :members:
+.. doxygenclass:: numopt::solver::linesearch::BackTrackingLinesearch
+   :members:
+.. doxygenclass:: numopt::solver::linesearch::StrongWolfeLinesearch
+   :members:
diff --git a/include/numopt/problems/paraboloidProblem.h b/include/numopt/problems/paraboloidProblem.h
@@ -2,36 +2,37 @@
 
 #include "problem.h"
 
-
 namespace numopt {
 namespace problems {
 /**
  * Describes a simple paraboloid problem with analytic gradients and hessian.
  * The minima is at T(5)
- */ 
+ */
 class ParaboloidProblem : public ProblemBase {
 
 public:
   ParaboloidProblem() {}
   /** \fn operator()(const VectorX &in) const
-   * \brief returns function value at \a in
-   * \param in The vector to evaluate
+   * \return function value at \a in
+   * \param in vector to evaluate the function at
    */
   double operator()(const VectorX &in) const override {
     return (in.squaredNorm() + (5));
   }
-  /** 
-   * \brief returns the gradient vector at \a in
-   * \param in The vector at which gradient is computed
-   */ 
+  /**
+   * \return a pair where the first element is the gradient vector at \a in and
+   * the second element is the function value at \a in
+   * \param in vector at which gradient is computed
+   */
   std::pair<VectorX, double> gradient(const VectorX &in) const override {
     const VectorX grad = 2 * in.transpose();
     return std::make_pair(grad, this->operator()(in));
   }
   /**
-   * \brief returns hessian matrix at \a in
-   * \param in The vector at which the hessian is computed
-   */ 
+   * \return returns a pair, where the first element is the Hessian matrix at \a
+   * in and the second element is the gradient value at \a in
+   * \param in vector at which the hessian is computed
+   */
   std::pair<SparseMatrixX, VectorX> hessian(const VectorX &in) const {
     SparseMatrixX H(in.rows(), in.rows());
     H.setIdentity();

diff --git a/include/numopt/solvers/gradientDescent.h b/include/numopt/solvers/gradientDescent.h
@@ -1,5 +1,6 @@
 #pragma once
 #include <solvers/linesearch/backtrackingLineSearch.h>
+#include <solvers/linesearch/strongWolfeLineSearch.h>
 #include <solvers/solver.h>
 #include <solvers/solverSettings.h>
 
@@ -20,17 +21,28 @@ class GradientDescentSolver : public Solver<_Problem> {
     solverData.argmin = initValue;
     double prevNorm;
     solverData.min = problem()(initValue);
-    double alpha(1);
+    double alpha(1), dphi(0), dphi_im1(0);
     for (unsigned iter = 0; iter < settings().maxSolverIterations; iter++) {
       prevNorm = solverData.min;
       const VectorX dir = direction(solverData.argmin);
       const VectorX xCur = solverData.argmin;
-      solverData.min = //(settings().linesearchtype == solver::SolverSettings::LineSearchType::BackTracking) ?
-          solver::BackTrackingLineSearch(xCur, dir, problem(), solverData.argmin,
-                                   &alpha, settings().verbosity,
-                                   settings().functionTolerance,
-                                   settings().parameterTolerance,
-                                   settings().maxLineSearchIterations);
+      dphi = dir.dot(dir);
+      const double alphaCur = iter ? alpha * dphi_im1 / dphi : 1e-5;
+      dphi_im1 = dphi;
+      solverData
+          .min = (settings().linesearchtype ==
+                  solver::SolverSettings::LineSearchType::BackTracking) ?
+          solver::linesearch::BackTrackingLinesearch<_Problem>().run(
+              xCur, dir, problem(), solverData.argmin, &alpha, settings(),
+              settings().verbosity):
+          solver::linesearch::StrongWolfeLinesearch<_Problem>(problem()).run(
+              xCur, dir, solverData.argmin, alphaCur, &alpha, settings(),
+              settings().verbosity);
+      if (settings().verbosity) {
+        std::cout << "Gradient Descent iteration : " << iter
+                  << " f(x):" << solverData.min
+                  << " at x_o:" << solverData.argmin << std::endl;
+      }
       solverData.nIter = iter + 1;
       solverData.paramNorm = alpha * dir.norm();
       if (hasConverged(std::abs(prevNorm - solverData.min), alpha * dir.norm(),

diff --git a/include/numopt/solvers/linesearch/backtrackingLineSearch.h b/include/numopt/solvers/linesearch/backtrackingLineSearch.h
@@ -1,62 +1,58 @@
 #pragma once
 
+#include "linesearchSettings.h"
+#include "solvers/solverSettings.h"
 #include <base/types.h>
 
-namespace numopt {
-namespace solver {
-constexpr static double LS_InitalAlpha = 0.8;
-constexpr static double LS_Rho = 0.5;
-constexpr static double LS_C = 0.0001;
+namespace numopt::solver::linesearch {
 /**
- * Implements the Backtracking algorithm. (3.1)
+ * Implements the Backtracking algorithm. (Algorithm 3.1)
  * Only ensures sufficient decrease (weak Wolfe Conditions)
  */
 
-template <typename DerivedX, typename Evaluator>
-double BackTrackingLineSearch(const Eigen::MatrixBase<DerivedX> &xcur,
-                              const Eigen::MatrixBase<DerivedX> &dir,
-                              const Evaluator func,
-                              Eigen::MatrixBase<DerivedX> &xnext,
-                              double *palpha, const unsigned verbose,
-                              const double funcTol, const double paramTol,
-                              const unsigned maxIterations) {
-  double alphak = LS_InitalAlpha;
-  const double initNorm = func(xcur);
-  double prevNorm = initNorm;
-
-  if (verbose > 1)
-    std::cout << "BackTracking - Init Norm : " << initNorm << std::endl;
-
-  for (unsigned iter = 0; iter < maxIterations; iter++) {
-    xnext = (xcur + alphak * dir).eval();
-    const double curNorm = func(xnext);
+template <typename Evaluator> class BackTrackingLinesearch {
+public:
+  double run(const VectorX &xcur, const VectorX &dir, const Evaluator func,
+             VectorX &xnext, double *palpha, const SolverSettings &settings,
+             const unsigned verbose) {
+    double alphak = LS_InitAlpha;
+    const double initNorm = func(xcur);
+    double prevNorm = initNorm;
 
     if (verbose > 1)
-      std::cout << "BackTracking - Cur Norm : " << curNorm
-                << " Step Size : " << alphak << std::endl;
+      std::cout << "BackTracking - Init Norm : " << initNorm << std::endl;
 
-    if (curNorm <= initNorm - LS_C * alphak * dir.norm()) {
-      if (palpha)
-        *palpha = alphak;
+    for (unsigned iter = 0; iter < settings.maxLineSearchIterations; iter++) {
+      xnext = (xcur + alphak * dir).eval();
+      const double curNorm = func(xnext);
 
-      return curNorm;
-    }
+      if (verbose > 1)
+        std::cout << "BackTracking - Cur Norm : " << curNorm
+                  << " Step Size : " << alphak << std::endl;
 
-    if ((iter == (maxIterations - 1)) ||
-        (std::fabs(curNorm - prevNorm) < funcTol) ||
-        (alphak * dir.norm() < paramTol))
-      break;
+      if (curNorm <= initNorm - LS_C1 * alphak * dir.norm()) {
+        if (palpha)
+          *palpha = alphak;
 
-    alphak = alphak * LS_Rho;
-    prevNorm = curNorm;
-  }
+        return curNorm;
+      }
 
-  if (palpha)
-    *palpha = 0.0;
+      if ((iter == (settings.maxLineSearchIterations - 1)) ||
+          (std::fabs(curNorm - prevNorm) < settings.functionTolerance) ||
+          (alphak * dir.norm() < settings.parameterTolerance))
+        break;
+
+      alphak = alphak * LS_Rho;
+      prevNorm = curNorm;
+    }
 
-  xnext = xcur;
+    if (palpha)
+      *palpha = 0.0;
+
+    xnext = xcur;
+
+    return initNorm;
+  }
+};
 
-  return initNorm;
-}
-} // namespace solver
-} // namespace numopt
+} // namespace numopt::solver::linesearch
diff --git a/include/numopt/solvers/linesearch/linesearchSettings.h b/include/numopt/solvers/linesearch/linesearchSettings.h
@@ -0,0 +1,23 @@
+#pragma once
+
+namespace numopt::solver::linesearch {
+constexpr static double LS_MaxAlpha = 1e5;
+constexpr static double LS_InitAlpha = 0.8;
+constexpr static double LS_Rho = 0.5;
+constexpr static double LS_C1 = 1e-4;
+constexpr static double LS_C2 = 0.9;
+/**
+ * @brief  Structure that holds tolerances & max # iterations.
+ */
+struct LinesearchSettings {
+  /** Tolerance for change in f(x) */
+  double functionTolerance;
+  /** Tolerance for change in x*/
+  double parameterTolerance;
+  /**  max no. of linesearch iterations. */
+  unsigned maxIterations;
+  LinesearchSettings(double funcTol, double paramTol, unsigned maxIter)
+      : functionTolerance(funcTol), parameterTolerance(paramTol),
+        maxIterations(maxIter) {}
+};
+} // namespace numopt::solver::linesearch
diff --git a/include/numopt/solvers/linesearch/strongWolfeLineSearch.h b/include/numopt/solvers/linesearch/strongWolfeLineSearch.h
@@ -0,0 +1,106 @@
+#pragma once
+
+#include "linesearchSettings.h"
+#include "solvers/solverSettings.h"
+#include <base/types.h>
+
+namespace numopt::solver::linesearch {
+/**
+ * Implements interpolation based linesearch algorithm (Algorithm 3.5 & 3.6)
+ * Ensures Strong Wolfe conditions are satisfied
+ */
+template <typename Evaluator> class StrongWolfeLinesearch {
+public:
+  StrongWolfeLinesearch(const Evaluator &functor) : functor_(functor) {}
+  double run(const VectorX &xcur, const VectorX &dir, VectorX &xnext,
+             const double alpha_0, double *palpha,
+             const SolverSettings &settings, const unsigned verbose) {
+    xcur_ = xcur;
+    dir_ = dir;
+    auto phiInfo_i =getPhiInfo(std::min(1.0, alpha_0 * 1.01));
+    phiInfo_0_ = getPhiInfo(0);
+    PhiInfo phiInfo_im1 = phiInfo_0_;
+    PhiInfo phiInfo_amax = getPhiInfo(LS_MaxAlpha);
+    for (unsigned iter = 0; iter < settings.maxLineSearchIterations; iter++) {
+      if (check_armijo(phiInfo_i) || (iter && (phiInfo_i.phi >= phiInfo_im1.phi))) {
+        auto phiOptimal = zoom(phiInfo_im1, phiInfo_i);
+        *palpha = phiOptimal.alpha;
+        xnext = xcur_ + *palpha * dir_;
+        return functor_(xnext);
+      }
+      if (abs(phiInfo_i.dphi) <= -LS_C2 * phiInfo_0_.dphi) {
+        *palpha = phiInfo_i.alpha;
+        xnext = xcur_ + *palpha * dir_;
+        return phiInfo_i.phi;
+      }
+      if (phiInfo_i.dphi >= 0) {
+        auto phiOptimal = zoom(phiInfo_i, phiInfo_im1);
+        *palpha = phiOptimal.alpha;
+        xnext = xcur_ + *palpha * dir_;
+        return functor_(xnext);
+      }
+      phiInfo_im1 = phiInfo_i;
+      phiInfo_i = //getphiinfo(std::min(phiinfo_im1.alpha / ls_rho, ls_maxalpha));
+      getPhiInfo(interpolate_cubic(phiInfo_i,phiInfo_amax ));
+      if (verbose > 1) {
+        std::cout << "StrongWolfe iteration " << iter
+                  << " f(x) : " << phiInfo_i.phi << " alpha " << phiInfo_i.alpha
+                  << std::endl;
+      }
+    }
+    return phiInfo_0_.phi;
+  }
+
+private:
+  struct PhiInfo {
+    double alpha;
+    double phi;
+    double dphi;
+  };
+
+  PhiInfo getPhiInfo(const double alpha) {
+    const auto [grad, phi] = functor_.gradient(xcur_ + alpha * dir_);
+    const auto dphi = grad.dot(dir_);
+    return {alpha, phi, dphi};
+  }
+
+  double interpolate_cubic(const PhiInfo low, const PhiInfo high) {
+    const auto d1 = low.dphi + high.dphi -
+                    3 * (low.phi - high.phi) / (low.alpha - high.alpha);
+    const auto d2 = std::copysign(1.0, high.alpha - low.alpha) *
+                    std::sqrt(d1 * d1 - high.dphi * low.dphi);
+    return high.alpha - (high.alpha - low.alpha) * (high.dphi + d2 - d1) /
+                            (high.dphi - low.dphi + 2 * d2);
+  }
+
+  PhiInfo zoom(PhiInfo phiInfo_l, PhiInfo phiInfo_h) {
+    for (int j = 0; j < 10; j++) {
+      double alpha_j = interpolate_cubic(phiInfo_l, phiInfo_h);
+      PhiInfo phiInfo_j = getPhiInfo(alpha_j);
+    //   std::cout << "Z " << phiInfo_j.phi << " " << phiInfo_j.alpha << std::endl;
+      if (check_armijo(phiInfo_j) || (phiInfo_j.phi >= phiInfo_l.phi)) {
+        phiInfo_h = phiInfo_j;
+      } else {
+        if (std::abs(phiInfo_j.dphi) <= -LS_C2 * phiInfo_0_.dphi) {
+          return phiInfo_j;
+        }
+        if (phiInfo_j.dphi * (phiInfo_h.alpha - phiInfo_l.alpha) >= 0) {
+          phiInfo_h = phiInfo_l;
+        }
+        phiInfo_l = phiInfo_j;
+      }
+    }
+    return phiInfo_l;
+  }
+
+  bool check_armijo(const PhiInfo phiInfo) const {
+    return (phiInfo.phi >
+            (phiInfo_0_.phi + LS_C1 * phiInfo.alpha * phiInfo_0_.dphi));
+  }
+
+  PhiInfo phiInfo_0_;
+  const Evaluator &functor_;
+  VectorX xcur_;
+  VectorX dir_;
+};
+} // namespace numopt::solver::linesearch