Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

revert revert 2401 algebra solver newton #2579

Merged
merged 4 commits into from
Sep 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions stan/math/prim/functor/algebra_solver_adapter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#ifndef STAN_MATH_PRIM_FUNCTOR_ALGEBRA_SOLVER_ADAPTER_HPP
#define STAN_MATH_PRIM_FUNCTOR_ALGEBRA_SOLVER_ADAPTER_HPP

#include <ostream>
#include <vector>

/**
* Adapt the non-variadic algebra_solver_newton and algebra_solver_powell
* arguemts to the variadic algebra_solver_newton_impl and
* algebra_solver_powell_impl interfaces.
*
* @tparam F type of function to adapt.
*/
template <typename F>
struct algebra_solver_adapter {
const F& f_;

explicit algebra_solver_adapter(const F& f) : f_(f) {}

template <typename T1, typename... T2>
auto operator()(const T1& x, std::ostream* msgs, T2&&... args) const {
return f_(x, args..., msgs);
}
};

#endif
55 changes: 55 additions & 0 deletions stan/math/rev/core/chainable_object.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,61 @@ auto make_chainable_ptr(T&& obj) {
return &ptr->get();
}

/**
* `unsafe_chainable_object` hold another object and is useful for connecting
* the lifetime of a specific object to the chainable stack. This class
* differs from `chainable_object` in that this class does not evaluate
* expressions.
*
* `unsafe_chainable_object` objects should only be allocated with `new`.
* `unsafe_chainable_object` objects allocated on the stack will result
* in a double free (`obj_` will get destructed once when the
* `unsafe_chainable_object` leaves scope and once when the chainable
* stack memory is recovered).
*
* @tparam T type of object to hold
*/
template <typename T>
class unsafe_chainable_object : public chainable_alloc {
private:
std::decay_t<T> obj_;

public:
/**
* Construct chainable object from another object
*
* @tparam S type of object to hold (must have the same plain type as `T`)
*/
template <typename S,
require_same_t<plain_type_t<T>, plain_type_t<S>>* = nullptr>
explicit unsafe_chainable_object(S&& obj) : obj_(std::forward<S>(obj)) {}

/**
* Return a reference to the underlying object
*
* @return reference to underlying object
*/
inline auto& get() noexcept { return obj_; }
inline const auto& get() const noexcept { return obj_; }
};

/**
* Store the given object in a `chainable_object` so it is destructed
* only when the chainable stack memory is recovered and return
* a pointer to the underlying object This function
* differs from `make_chainable_object` in that this class does not evaluate
* expressions.
*
* @tparam T type of object to hold
* @param obj object to hold
* @return pointer to object held in `chainable_object`
*/
template <typename T>
auto make_unsafe_chainable_ptr(T&& obj) {
auto ptr = new unsafe_chainable_object<T>(std::forward<T>(obj));
return &ptr->get();
}

} // namespace math
} // namespace stan
#endif
233 changes: 170 additions & 63 deletions stan/math/rev/functor/algebra_solver_newton.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@

#include <stan/math/rev/core.hpp>
#include <stan/math/rev/functor/algebra_system.hpp>
#include <stan/math/rev/functor/algebra_solver_powell.hpp>
#include <stan/math/rev/functor/kinsol_solve.hpp>
#include <stan/math/prim/err.hpp>
#include <stan/math/prim/fun/mdivide_left.hpp>
#include <stan/math/prim/fun/eval.hpp>
#include <stan/math/prim/fun/value_of.hpp>
#include <stan/math/prim/functor/algebra_solver_adapter.hpp>
#include <unsupported/Eigen/NonLinearOptimization>
#include <iostream>
#include <string>
Expand All @@ -25,53 +25,182 @@ namespace math {
* The user can also specify the scaled step size, the function
* tolerance, and the maximum number of steps.
*
* This overload handles non-autodiff parameters.
*
* @tparam F type of equation system function.
* @tparam T type of initial guess vector.
* @tparam Args types of additional parameters to the equation system functor
*
* @param[in] f Functor that evaluated the system of equations.
* @param[in] x Vector of starting values.
* @param[in] y Parameter vector for the equation system. The function
* is overloaded to treat y as a vector of doubles or of a
* a template type T.
* @param[in] dat Continuous data vector for the equation system.
* @param[in] dat_int Integer data vector for the equation system.
* @param[in, out] msgs The print stream for warning messages.
* @param[in] scaling_step_size Scaled-step stopping tolerance. If
* a Newton step is smaller than the scaling step
* tolerance, the code breaks, assuming the solver is no
* longer making significant progress (i.e. is stuck)
* @param[in] function_tolerance determines whether roots are acceptable.
* @param[in] max_num_steps maximum number of function evaluations.
* * @throw <code>std::invalid_argument</code> if x has size zero.
* @param[in] args Additional parameters to the equation system functor.
* @return theta Vector of solutions to the system of equations.
* @pre f returns finite values when passed any value of x and the given args.
* @throw <code>std::invalid_argument</code> if x has size zero.
* @throw <code>std::invalid_argument</code> if x has non-finite elements.
* @throw <code>std::invalid_argument</code> if scaled_step_size is strictly
* negative.
* @throw <code>std::invalid_argument</code> if function_tolerance is strictly
* negative.
* @throw <code>std::invalid_argument</code> if max_num_steps is not positive.
* @throw <code>std::domain_error if solver exceeds max_num_steps.
*/
template <typename F, typename T, typename... Args,
require_eigen_vector_t<T>* = nullptr,
require_all_st_arithmetic<Args...>* = nullptr>
Eigen::VectorXd algebra_solver_newton_impl(const F& f, const T& x,
std::ostream* const msgs,
const double scaling_step_size,
const double function_tolerance,
const int64_t max_num_steps,
const Args&... args) {
const auto& x_ref = to_ref(value_of(x));

check_nonzero_size("algebra_solver_newton", "initial guess", x_ref);
check_finite("algebra_solver_newton", "initial guess", x_ref);
check_nonnegative("algebra_solver_newton", "scaling_step_size",
scaling_step_size);
check_nonnegative("algebra_solver_newton", "function_tolerance",
function_tolerance);
check_positive("algebra_solver_newton", "max_num_steps", max_num_steps);

return kinsol_solve(f, x_ref, scaling_step_size, function_tolerance,
max_num_steps, 1, 10, KIN_LINESEARCH, msgs, args...);
}

/**
* Return the solution to the specified system of algebraic
* equations given an initial guess, and parameters and data,
* which get passed into the algebraic system. Use the
* KINSOL solver from the SUNDIALS suite.
*
* The user can also specify the scaled step size, the function
* tolerance, and the maximum number of steps.
*
* This overload handles var parameters.
*
* The Jacobian \(J_{xy}\) (i.e., Jacobian of unknown \(x\) w.r.t. the parameter
* \(y\)) is calculated given the solution as follows. Since
* \[
* f(x, y) = 0,
* \]
* we have (\(J_{pq}\) being the Jacobian matrix \(\tfrac {dq} {dq}\))
* \[
* - J_{fx} J_{xy} = J_{fy},
* \]
* and therefore \(J_{xy}\) can be solved from system
* \[
* - J_{fx} J_{xy} = J_{fy}.
* \]
* Let \(eta\) be the adjoint with respect to \(x\); then to calculate
* \[
* \eta J_{xy},
* \]
* we solve
* \[
* - (\eta J_{fx}^{-1}) J_{fy}.
* \]
*
* @tparam F type of equation system function.
* @tparam T type of initial guess vector.
* @tparam Args types of additional parameters to the equation system functor
*
* @param[in] f Functor that evaluated the system of equations.
* @param[in] x Vector of starting values.
* @param[in, out] msgs The print stream for warning messages.
* @param[in] scaling_step_size Scaled-step stopping tolerance. If
* a Newton step is smaller than the scaling step
* tolerance, the code breaks, assuming the solver is no
* longer making significant progress (i.e. is stuck)
* @param[in] function_tolerance determines whether roots are acceptable.
* @param[in] max_num_steps maximum number of function evaluations.
* @param[in] args Additional parameters to the equation system functor.
* @return theta Vector of solutions to the system of equations.
* @pre f returns finite values when passed any value of x and the given args.
* @throw <code>std::invalid_argument</code> if x has size zero.
* @throw <code>std::invalid_argument</code> if x has non-finite elements.
* @throw <code>std::invalid_argument</code> if y has non-finite elements.
* @throw <code>std::invalid_argument</code> if dat has non-finite elements.
* @throw <code>std::invalid_argument</code> if dat_int has non-finite elements.
* @throw <code>std::invalid_argument</code> if scaled_step_size is strictly
* negative.
* @throw <code>std::invalid_argument</code> if function_tolerance is strictly
* negative.
* @throw <code>std::invalid_argument</code> if max_num_steps is not positive.
* @throw <code>std::domain_error</code> if solver exceeds max_num_steps.
* @throw <code>std::domain_error if solver exceeds max_num_steps.
*/
template <typename F, typename T, require_eigen_vector_t<T>* = nullptr>
Eigen::VectorXd algebra_solver_newton(
const F& f, const T& x, const Eigen::VectorXd& y,
const std::vector<double>& dat, const std::vector<int>& dat_int,
std::ostream* msgs = nullptr, double scaling_step_size = 1e-3,
double function_tolerance = 1e-6,
long int max_num_steps = 200) { // NOLINT(runtime/int)
const auto& x_eval = x.eval();
algebra_solver_check(x_eval, y, dat, dat_int, function_tolerance,
max_num_steps);
check_nonnegative("algebra_solver", "scaling_step_size", scaling_step_size);

check_matching_sizes("algebra_solver", "the algebraic system's output",
value_of(f(x_eval, y, dat, dat_int, msgs)),
"the vector of unknowns, x,", x);

return kinsol_solve(f, value_of(x_eval), y, dat, dat_int, 0,
scaling_step_size, function_tolerance, max_num_steps);
template <typename F, typename T, typename... T_Args,
require_eigen_vector_t<T>* = nullptr,
require_any_st_var<T_Args...>* = nullptr>
Eigen::Matrix<var, Eigen::Dynamic, 1> algebra_solver_newton_impl(
const F& f, const T& x, std::ostream* const msgs,
const double scaling_step_size, const double function_tolerance,
const int64_t max_num_steps, const T_Args&... args) {
const auto& x_ref = to_ref(value_of(x));
auto arena_args_tuple = make_chainable_ptr(std::make_tuple(eval(args)...));
auto args_vals_tuple = apply(
[&](const auto&... args) {
return std::make_tuple(to_ref(value_of(args))...);
},
*arena_args_tuple);

check_nonzero_size("algebra_solver_newton", "initial guess", x_ref);
check_finite("algebra_solver_newton", "initial guess", x_ref);
check_nonnegative("algebra_solver_newton", "scaling_step_size",
scaling_step_size);
check_nonnegative("algebra_solver_newton", "function_tolerance",
function_tolerance);
check_positive("algebra_solver_newton", "max_num_steps", max_num_steps);

// Solve the system
Eigen::VectorXd theta_dbl = apply(
[&](const auto&... vals) {
return kinsol_solve(f, x_ref, scaling_step_size, function_tolerance,
max_num_steps, 1, 10, KIN_LINESEARCH, msgs,
vals...);
},
args_vals_tuple);

auto f_wrt_x = [&](const auto& x) {
return apply([&](const auto&... args) { return f(x, msgs, args...); },
args_vals_tuple);
};

Eigen::MatrixXd Jf_x;
Eigen::VectorXd f_x;

jacobian(f_wrt_x, theta_dbl, f_x, Jf_x);

using ret_type = Eigen::Matrix<var, Eigen::Dynamic, -1>;
arena_t<ret_type> ret = theta_dbl;
auto Jf_x_T_lu_ptr
= make_unsafe_chainable_ptr(Jf_x.transpose().partialPivLu()); // Lu

reverse_pass_callback(
[f, ret, arena_args_tuple, Jf_x_T_lu_ptr, msgs]() mutable {
Eigen::VectorXd eta = -Jf_x_T_lu_ptr->solve(ret.adj().eval());

// Contract with Jacobian of f with respect to y using a nested reverse
// autodiff pass.
{
nested_rev_autodiff rev;

Eigen::VectorXd ret_val = ret.val();
auto x_nrad_ = apply(
[&ret_val, &f, msgs](const auto&... args) {
return eval(f(ret_val, msgs, args...));
},
*arena_args_tuple);
x_nrad_.adj() = eta;
grad();
}
});

return ret_type(ret);
}

/**
Expand All @@ -83,19 +212,15 @@ Eigen::VectorXd algebra_solver_newton(
* The user can also specify the scaled step size, the function
* tolerance, and the maximum number of steps.
*
* Overload the previous definition to handle the case where y
* is a vector of parameters (var). The overload calls the
* algebraic solver defined above and builds a vari object on
* top, using the algebra_solver_vari class.
* Signature to maintain backward compatibility, will be removed
* in the future.
*
* @tparam F type of equation system function.
* @tparam T type of initial guess vector.
*
* @param[in] f Functor that evaluated the system of equations.
* @param[in] x Vector of starting values.
* @param[in] y Parameter vector for the equation system. The function
* is overloaded to treat y as a vector of doubles or of a
* a template type T.
* @param[in] y Parameter vector for the equation system.
* @param[in] dat Continuous data vector for the equation system.
* @param[in] dat_int Integer data vector for the equation system.
* @param[in, out] msgs The print stream for warning messages.
Expand All @@ -119,34 +244,16 @@ Eigen::VectorXd algebra_solver_newton(
* @throw <code>std::domain_error if solver exceeds max_num_steps.
*/
template <typename F, typename T1, typename T2,
require_all_eigen_vector_t<T1, T2>* = nullptr,
require_st_var<T2>* = nullptr>
require_all_eigen_vector_t<T1, T2>* = nullptr>
Eigen::Matrix<scalar_type_t<T2>, Eigen::Dynamic, 1> algebra_solver_newton(
const F& f, const T1& x, const T2& y, const std::vector<double>& dat,
const std::vector<int>& dat_int, std::ostream* msgs = nullptr,
double scaling_step_size = 1e-3, double function_tolerance = 1e-6,
long int max_num_steps = 200) { // NOLINT(runtime/int)

const auto& x_eval = x.eval();
const auto& y_eval = y.eval();
Eigen::VectorXd theta_dbl = algebra_solver_newton(
f, x_eval, value_of(y_eval), dat, dat_int, msgs, scaling_step_size,
function_tolerance, max_num_steps);

typedef system_functor<F, double, double, false> Fy;
typedef system_functor<F, double, double, true> Fs;
typedef hybrj_functor_solver<Fs, F, double, double> Fx;
Fx fx(Fs(), f, value_of(x_eval), value_of(y_eval), dat, dat_int, msgs);

// Construct vari
auto* vi0 = new algebra_solver_vari<Fy, F, scalar_type_t<T2>, Fx>(
Fy(), f, value_of(x_eval), y_eval, dat, dat_int, theta_dbl, fx, msgs);
Eigen::Matrix<var, Eigen::Dynamic, 1> theta(x.size());
theta(0) = var(vi0->theta_[0]);
for (int i = 1; i < x.size(); ++i)
theta(i) = var(vi0->theta_[i]);

return theta;
const std::vector<int>& dat_int, std::ostream* const msgs = nullptr,
const double scaling_step_size = 1e-3,
const double function_tolerance = 1e-6,
const long int max_num_steps = 200) { // NOLINT(runtime/int)
return algebra_solver_newton_impl(algebra_solver_adapter<F>(f), x, msgs,
scaling_step_size, function_tolerance,
max_num_steps, y, dat, dat_int);
}

} // namespace math
Expand Down
Loading