Commit ffb5ca07 authored by Vedran Miletić's avatar Vedran Miletić

[WIP] Replace TNT with Eigen and N-M search with cppoptlib

parent 6dbd80dc
The MIT License (MIT)
Copyright (c) 2014-2015 Patrick Wieschollek
Copyright (c) 2015- the respective contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
\ No newline at end of file
Name: CppOptimizationLibrary
Description: Header-only C++ optimization library using Eigen
Version: 1.0.0+20180915gitec0a32e1
URL: https://github.com/PatWie/CppNumericalSolvers
// CppNumericalSolver
#ifndef META_H
#define META_H
#include <string>
#include <iostream>
#include <Eigen/Core>
namespace cppoptlib {
/*template <typename T>
using Vector = Eigen::Matrix<T, Eigen::Dynamic, 1>;
template <typename T>
using Matrix = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>;
*/
enum class DebugLevel { None = 0, Low, High };
enum class Status {
NotStarted = -1,
Continue = 0,
IterationLimit,
XDeltaTolerance,
FDeltaTolerance,
GradNormTolerance,
Condition,
UserDefined
};
enum class SimplexOp {
Place,
Reflect,
Expand,
ContractIn,
ContractOut,
Shrink
};
inline std::ostream &operator<<(std::ostream &os, const SimplexOp &op) {
switch (op) {
case SimplexOp::Place: os << "place"; break;
case SimplexOp::Reflect: os << "reflect"; break;
case SimplexOp::Expand: os << "expand"; break;
case SimplexOp::ContractIn: os << "contract-in"; break;
case SimplexOp::ContractOut: os << "contract-out"; break;
case SimplexOp::Shrink: os << "shrink"; break;
}
return os;
}
inline std::string op_to_string(SimplexOp op) {
switch (op) {
case SimplexOp::Place:
return "place";
case SimplexOp::Expand:
return "expand";
case SimplexOp::Reflect:
return "reflect";
case SimplexOp::ContractIn:
return "contract-in";
case SimplexOp::ContractOut:
return "contract-out";
case SimplexOp::Shrink:
return "shrink";
}
return "unknown";
}
template<typename T>
class Criteria {
public:
size_t iterations; //!< Maximum number of iterations
T xDelta; //!< Minimum change in parameter vector
T fDelta; //!< Minimum change in cost function
T gradNorm; //!< Minimum norm of gradient vector
T condition; //!< Maximum condition number of Hessian
Criteria() {
reset();
}
static Criteria defaults() {
Criteria d;
d.iterations = 10000;
d.xDelta = 0;
d.fDelta = 0;
d.gradNorm = 1e-4;
d.condition = 0;
return d;
}
void reset() {
iterations = 0;
xDelta = 0;
fDelta = 0;
gradNorm = 0;
condition = 0;
}
void print(std::ostream &os) const {
os << "Iterations: " << iterations << std::endl;
os << "xDelta: " << xDelta << std::endl;
os << "fDelta: " << fDelta << std::endl;
os << "GradNorm: " << gradNorm << std::endl;
os << "Condition: " << condition << std::endl;
}
};
template<typename T>
Status checkConvergence(const Criteria<T> &stop, const Criteria<T> &current) {
if ((stop.iterations > 0) && (current.iterations > stop.iterations)) {
return Status::IterationLimit;
}
if ((stop.xDelta > 0) && (current.xDelta < stop.xDelta)) {
return Status::XDeltaTolerance;
}
if ((stop.fDelta > 0) && (current.fDelta < stop.fDelta)) {
return Status::FDeltaTolerance;
}
if ((stop.gradNorm > 0) && (current.gradNorm < stop.gradNorm)) {
return Status::GradNormTolerance;
}
if ((stop.condition > 0) && (current.condition > stop.condition)) {
return Status::Condition;
}
return Status::Continue;
}
inline std::ostream &operator<<(std::ostream &os, const Status &s) {
switch (s) {
case Status::NotStarted: os << "Solver not started."; break;
case Status::Continue: os << "Convergence criteria not reached."; break;
case Status::IterationLimit: os << "Iteration limit reached."; break;
case Status::XDeltaTolerance: os << "Change in parameter vector too small."; break;
case Status::FDeltaTolerance: os << "Change in cost function value too small."; break;
case Status::GradNormTolerance: os << "Gradient vector norm too small."; break;
case Status::Condition: os << "Condition of Hessian/Covariance matrix too large."; break;
case Status::UserDefined: os << "Stop condition defined in the callback."; break;
}
return os;
}
template<typename T>
std::ostream &operator<<(std::ostream &os, const Criteria<T> &c) {
c.print(os);
return os;
}
} // End namespace cppoptlib
#endif /* META_H */
#ifndef PROBLEM_H
#define PROBLEM_H
#include <array>
#include <vector>
#include <Eigen/Core>
#include "meta.h"
namespace cppoptlib {
template<typename Scalar_, int Dim_ = Eigen::Dynamic>
class Problem {
public:
static const int Dim = Dim_;
typedef Scalar_ Scalar;
using TVector = Eigen::Matrix<Scalar, Dim, 1>;
using THessian = Eigen::Matrix<Scalar, Dim, Dim>;
using TCriteria = Criteria<Scalar>;
using TIndex = typename TVector::Index;
using MatrixType = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
public:
Problem() {}
virtual ~Problem()= default;
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
virtual bool callback(const Criteria<Scalar> &state, const TVector &x) {
return true;
}
virtual bool detailed_callback(const Criteria<Scalar> &state, SimplexOp op, int index, const MatrixType &x, std::vector<Scalar> f) {
return true;
}
#pragma GCC diagnostic pop
/**
* @brief returns objective value in x
* @details [long description]
*
* @param x [description]
* @return [description]
*/
virtual Scalar value(const TVector &x) = 0;
/**
* @brief overload value for nice syntax
* @details [long description]
*
* @param x [description]
* @return [description]
*/
Scalar operator()(const TVector &x) {
return value(x);
}
/**
* @brief returns gradient in x as reference parameter
* @details should be overwritten by symbolic gradient
*
* @param grad [description]
*/
virtual void gradient(const TVector &x, TVector &grad) {
finiteGradient(x, grad);
}
/**
* @brief This computes the hessian
* @details should be overwritten by symbolic hessian, if solver relies on hessian
*/
virtual void hessian(const TVector &x, THessian &hessian) {
finiteHessian(x, hessian);
}
virtual bool checkGradient(const TVector &x, int accuracy = 3) {
// TODO: check if derived class exists:
// int(typeid(&Rosenbrock<double>::gradient) == typeid(&Problem<double>::gradient)) == 1 --> overwritten
const TIndex D = x.rows();
TVector actual_grad(D);
TVector expected_grad(D);
gradient(x, actual_grad);
finiteGradient(x, expected_grad, accuracy);
for (TIndex d = 0; d < D; ++d) {
Scalar scale = std::max(static_cast<Scalar>(std::max(fabs(actual_grad[d]), fabs(expected_grad[d]))), Scalar(1.));
if(fabs(actual_grad[d]-expected_grad[d])>1e-2 * scale)
return false;
}
return true;
}
virtual bool checkHessian(const TVector &x, int accuracy = 3) {
// TODO: check if derived class exists:
// int(typeid(&Rosenbrock<double>::gradient) == typeid(&Problem<double>::gradient)) == 1 --> overwritten
const TIndex D = x.rows();
THessian actual_hessian = THessian::Zero(D, D);
THessian expected_hessian = THessian::Zero(D, D);
hessian(x, actual_hessian);
finiteHessian(x, expected_hessian, accuracy);
for (TIndex d = 0; d < D; ++d) {
for (TIndex e = 0; e < D; ++e) {
Scalar scale = std::max(static_cast<Scalar>(std::max(fabs(actual_hessian(d, e)), fabs(expected_hessian(d, e)))), Scalar(1.));
if(fabs(actual_hessian(d, e)- expected_hessian(d, e))>1e-1 * scale)
return false;
}
}
return true;
}
void finiteGradient(const TVector &x, TVector &grad, int accuracy = 0) {
// accuracy can be 0, 1, 2, 3
const Scalar eps = 2.2204e-6;
static const std::array<std::vector<Scalar>, 4> coeff =
{ { {1, -1}, {1, -8, 8, -1}, {-1, 9, -45, 45, -9, 1}, {3, -32, 168, -672, 672, -168, 32, -3} } };
static const std::array<std::vector<Scalar>, 4> coeff2 =
{ { {1, -1}, {-2, -1, 1, 2}, {-3, -2, -1, 1, 2, 3}, {-4, -3, -2, -1, 1, 2, 3, 4} } };
static const std::array<Scalar, 4> dd = {2, 12, 60, 840};
grad.resize(x.rows());
TVector& xx = const_cast<TVector&>(x);
const int innerSteps = 2*(accuracy+1);
const Scalar ddVal = dd[accuracy]*eps;
for (TIndex d = 0; d < x.rows(); d++) {
grad[d] = 0;
for (int s = 0; s < innerSteps; ++s)
{
Scalar tmp = xx[d];
xx[d] += coeff2[accuracy][s]*eps;
grad[d] += coeff[accuracy][s]*value(xx);
xx[d] = tmp;
}
grad[d] /= ddVal;
}
}
void finiteHessian(const TVector &x, THessian &hessian, int accuracy = 0) {
const Scalar eps = std::numeric_limits<Scalar>::epsilon()*10e7;
hessian.resize(x.rows(), x.rows());
TVector& xx = const_cast<TVector&>(x);
if(accuracy == 0) {
for (TIndex i = 0; i < x.rows(); i++) {
for (TIndex j = 0; j < x.rows(); j++) {
Scalar tmpi = xx[i];
Scalar tmpj = xx[j];
Scalar f4 = value(xx);
xx[i] += eps;
xx[j] += eps;
Scalar f1 = value(xx);
xx[j] -= eps;
Scalar f2 = value(xx);
xx[j] += eps;
xx[i] -= eps;
Scalar f3 = value(xx);
hessian(i, j) = (f1 - f2 - f3 + f4) / (eps * eps);
xx[i] = tmpi;
xx[j] = tmpj;
}
}
} else {
/*
\displaystyle{{\frac{\partial^2{f}}{\partial{x}\partial{y}}}\approx
\frac{1}{600\,h^2} \left[\begin{matrix}
-63(f_{1,-2}+f_{2,-1}+f_{-2,1}+f_{-1,2})+\\
63(f_{-1,-2}+f_{-2,-1}+f_{1,2}+f_{2,1})+\\
44(f_{2,-2}+f_{-2,2}-f_{-2,-2}-f_{2,2})+\\
74(f_{-1,-1}+f_{1,1}-f_{1,-1}-f_{-1,1})
\end{matrix}\right] }
*/
for (TIndex i = 0; i < x.rows(); i++) {
for (TIndex j = 0; j < x.rows(); j++) {
Scalar tmpi = xx[i];
Scalar tmpj = xx[j];
Scalar term_1 = 0;
xx[i] = tmpi; xx[j] = tmpj; xx[i] += 1*eps; xx[j] += -2*eps; term_1 += value(xx);
xx[i] = tmpi; xx[j] = tmpj; xx[i] += 2*eps; xx[j] += -1*eps; term_1 += value(xx);
xx[i] = tmpi; xx[j] = tmpj; xx[i] += -2*eps; xx[j] += 1*eps; term_1 += value(xx);
xx[i] = tmpi; xx[j] = tmpj; xx[i] += -1*eps; xx[j] += 2*eps; term_1 += value(xx);
Scalar term_2 = 0;
xx[i] = tmpi; xx[j] = tmpj; xx[i] += -1*eps; xx[j] += -2*eps; term_2 += value(xx);
xx[i] = tmpi; xx[j] = tmpj; xx[i] += -2*eps; xx[j] += -1*eps; term_2 += value(xx);
xx[i] = tmpi; xx[j] = tmpj; xx[i] += 1*eps; xx[j] += 2*eps; term_2 += value(xx);
xx[i] = tmpi; xx[j] = tmpj; xx[i] += 2*eps; xx[j] += 1*eps; term_2 += value(xx);
Scalar term_3 = 0;
xx[i] = tmpi; xx[j] = tmpj; xx[i] += 2*eps; xx[j] += -2*eps; term_3 += value(xx);
xx[i] = tmpi; xx[j] = tmpj; xx[i] += -2*eps; xx[j] += 2*eps; term_3 += value(xx);
xx[i] = tmpi; xx[j] = tmpj; xx[i] += -2*eps; xx[j] += -2*eps; term_3 -= value(xx);
xx[i] = tmpi; xx[j] = tmpj; xx[i] += 2*eps; xx[j] += 2*eps; term_3 -= value(xx);
Scalar term_4 = 0;
xx[i] = tmpi; xx[j] = tmpj; xx[i] += -1*eps; xx[j] += -1*eps; term_4 += value(xx);
xx[i] = tmpi; xx[j] = tmpj; xx[i] += 1*eps; xx[j] += 1*eps; term_4 += value(xx);
xx[i] = tmpi; xx[j] = tmpj; xx[i] += 1*eps; xx[j] += -1*eps; term_4 -= value(xx);
xx[i] = tmpi; xx[j] = tmpj; xx[i] += -1*eps; xx[j] += 1*eps; term_4 -= value(xx);
xx[i] = tmpi;
xx[j] = tmpj;
hessian(i, j) = (-63 * term_1+63 * term_2+44 * term_3+74 * term_4)/(600.0 * eps * eps);
}
}
}
}
};
}
#endif /* PROBLEM_H */
// CppNumericalSolver
#ifndef ISOLVER_H_
#define ISOLVER_H_
#include <functional>
#include "isolver.h"
#include "../meta.h"
#include "../problem.h"
namespace cppoptlib {
template<typename ProblemType, int Ord>
class ISolver {
public:
using Scalar = typename ProblemType::Scalar;
using TVector = typename ProblemType::TVector;
using THessian = typename ProblemType::THessian;
using TCriteria = typename ProblemType::TCriteria;
protected:
const int order_ = Ord;
TCriteria m_stop, m_current;
Status m_status = Status::NotStarted;
DebugLevel m_debug = DebugLevel::None;
public:
virtual ~ISolver() = default;
ISolver() {
m_stop = TCriteria::defaults();
m_current.reset();
}
ISolver(const TCriteria &s) {
m_stop = s;
m_current.reset();
}
void setStopCriteria(const TCriteria &s) { m_stop = s; }
const TCriteria &criteria() { return m_current; }
const Status &status() { return m_status; }
void setDebug(const DebugLevel &d) { m_debug = d; }
/**
* @brief minimize an objective function given a gradient (and optinal a hessian)
* @details this is just the abstract interface
*
* @param x0 starting point
* @param funObjective objective function
* @param funGradient gradient function
* @param funcHession hessian function
*/
virtual void minimize(ProblemType &objFunc, TVector &x0) = 0;
};
} /* namespace cppoptlib */
#endif /* ISOLVER_H_ */
// CppNumericalSolver
#ifndef NELDERMEADSOLVER_H_
#define NELDERMEADSOLVER_H_
#include <cmath>
#include <Eigen/Core>
#include "isolver.h"
#include "../meta.h"
namespace cppoptlib {
template<typename ProblemType>
class NelderMeadSolver : public ISolver<ProblemType, 0> {
public:
using Superclass = ISolver<ProblemType, 0>;
using typename Superclass::Scalar;
using typename Superclass::TVector;
using MatrixType = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
MatrixType x0;
SimplexOp lastOp = SimplexOp::Place;
Status stop_condition;
bool initialSimplexCreated = false;
MatrixType makeInitialSimplex(TVector &x) {
size_t DIM = x.rows();
MatrixType s = MatrixType::Zero(DIM, DIM + 1);
for (int c = 0; c < int(DIM) + 1; ++c) {
for (int r = 0; r < int(DIM); ++r) {
s(r, c) = x(r);
if (r == c - 1) {
if (x(r) == 0) {
s(r, c) = 0.00025;
} else {
s(r, c) = (1 + 0.05) * x(r);
}
}
}
}
return s;
}
/**
* @brief minimize
* @details [long description]
*
* @param objFunc [description]
*/
void minimize(ProblemType &objFunc, TVector &x) {
const Scalar rho = 1.; // rho > 0
const Scalar xi = 2.; // xi > max(rho, 1)
const Scalar gam = 0.5; // 0 < gam < 1
const size_t DIM = x.rows();
// create initial simplex
if (not initialSimplexCreated) {
x0 = makeInitialSimplex(x);
}
// compute function values
std::vector<Scalar> f; f.resize(DIM + 1);
std::vector<int> index; index.resize(DIM + 1);
for (int i = 0; i < int(DIM) + 1; ++i) {
f[i] = objFunc(static_cast<TVector >(x0.col(i)));
index[i] = i;
}
sort(index.begin(), index.end(), [&](int a, int b)-> bool { return f[a] < f[b]; });
int iter = 0;
const int maxIter = this->m_stop.iterations * DIM;
while (
objFunc.callback(this->m_current, x0.col(index[0])) and
(iter < maxIter)
) {
// conv-check
Scalar max1 = fabs(f[index[1]] - f[index[0]]);
Scalar max2 = (x0.col(index[1]) - x0.col(index[0]) ).array().abs().maxCoeff();
for (int i = 2; i < int(DIM) + 1; ++i) {
Scalar tmp1 = fabs(f[index[i]] - f[index[0]]);
if (tmp1 > max1)
max1 = tmp1;
Scalar tmp2 = (x0.col(index[i]) - x0.col(index[0]) ).array().abs().maxCoeff();
if (tmp2 > max2)
max2 = tmp2;
}
const Scalar tt1 = std::max<Scalar>(Scalar(1.e-04), 10 * std::nextafter(f[index[0]], std::numeric_limits<Scalar>::epsilon()) - f[index[0]]);
const Scalar tt2 = std::max<Scalar>(Scalar(1.e-04), 10 * (std::nextafter(x0.col(index[0]).maxCoeff(), std::numeric_limits<Scalar>::epsilon())
- x0.col(index[0]).maxCoeff()));
// User-defined stopping criteria
this->m_current.iterations = iter;
this->m_current.fDelta = max1;
this->m_current.xDelta = max2;
stop_condition = checkConvergence(this->m_stop, this->m_current);
if (this->m_stop.iterations != 0 and stop_condition != Status::Continue) {
break;
}
// Allow stopping in the callback. This callback gets the correct current
// state unlike the simple one in while(), which get previous state.
if (objFunc.detailed_callback(this->m_current, lastOp, index[0], x0, f) == false) {
stop_condition = Status::UserDefined;
break;
}
// max(||x - shift(x) ||_inf ) <= tol,
if (max1 <= tt1) {
// values to similar
if (max2 <= tt2) {
stop_condition = Status::FDeltaTolerance;
break;
}
}
//////////////////////////
// midpoint of the simplex opposite the worst point
TVector x_bar = TVector::Zero(DIM);
for (int i = 0; i < int(DIM); ++i) {
x_bar += x0.col(index[i]);
}
x_bar /= Scalar(DIM);
// Compute the reflection point
const TVector x_r = ( 1. + rho ) * x_bar - rho * x0.col(index[DIM]);
const Scalar f_r = objFunc(x_r);
lastOp = SimplexOp::Reflect;
if (f_r < f[index[0]]) {
// the expansion point
const TVector x_e = ( 1. + rho * xi ) * x_bar - rho * xi * x0.col(index[DIM]);
const Scalar f_e = objFunc(x_e);
if ( f_e < f_r ) {
// expand
lastOp = SimplexOp::Expand;
x0.col(index[DIM]) = x_e;
f[index[DIM]] = f_e;
} else {
// reflect
lastOp = SimplexOp::Reflect;
x0.col(index[DIM]) = x_r;
f[index[DIM]] = f_r;
}
} else {
if ( f_r < f[index[DIM - 1]] ) {
x0.col(index[DIM]) = x_r;
f[index[DIM]] = f_r;
} else {
// contraction
if (f_r < f[index[DIM]]) {
const TVector x_c = (1 + rho * gam) * x_bar - rho * gam * x0.col(index[DIM]);
const Scalar f_c = objFunc(x_c);
if ( f_c <= f_r ) {
// outside
x0.col(index[DIM]) = x_c;
f[index[DIM]] = f_c;
lastOp = SimplexOp::ContractOut;
} else {
shrink(x0, index, f, objFunc);
lastOp = SimplexOp::Shrink;
}
} else {
// inside
const TVector x_c = ( 1 - gam ) * x_bar + gam * x0.col(index[DIM]);
const Scalar f_c = objFunc(x_c);
if (f_c < f[index[DIM]]) {
x0.col(index[DIM]) = x_c;
f[index[DIM]] = f_c;
lastOp = SimplexOp::ContractIn;
} else {
shrink(x0, index, f, objFunc);
lastOp = SimplexOp::Shrink;
}
}
}
}
sort(index.begin(), index.end(), [&](int a, int b)-> bool { return f[a] < f[b]; });
iter++;
if (iter >= maxIter) {
stop_condition = Status::IterationLimit;
}
else {
stop_condition = Status::UserDefined; // if stopped in the callback in while()
}
} // while loop
// report the last result
objFunc.detailed_callback(this->m_current, lastOp, index[0], x0, f);
x = x0.col(index[0]);
}
void shrink(MatrixType &x, std::vector<int> &index, std::vector<Scalar> &f, ProblemType &objFunc) {
const Scalar sig = 0.5; // 0 < sig < 1
const int DIM = x.rows();
f[index[0]] = objFunc(x.col(index[0]));
for (int i = 1; i < DIM + 1; ++i) {
x.col(index[i]) = sig * x.col(index[i]) + (1. - sig) * x.col(index[0]);
f[index[i]] = objFunc(x.col(index[i]));
}
}
// Need our own checker here to get rid of the gradient test used in other solvers
template<typename T