Skip to content

Commit

Permalink
start adding box optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
chriselrod committed Sep 10, 2023
1 parent aab7313 commit 287fe25
Show file tree
Hide file tree
Showing 4 changed files with 385 additions and 19 deletions.
283 changes: 283 additions & 0 deletions include/Math/BoxOpt.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
#pragma once
#include "Math/Array.hpp"
#include "Math/Constructors.hpp"
#include "Math/Dual.hpp"
#include "Math/Exp.hpp"
#include "Math/LinearAlgebra.hpp"
#include "Math/Math.hpp"
#include "Math/Vector.hpp"
#include "Utilities/Allocators.hpp"
#include <cstddef>
#include <cstdint>

namespace poly::math {

constexpr double EXTREME = 8.0;

class BoxTransformView {
public:
constexpr BoxTransformView(double *f64, int32_t *i32, unsigned ntotal,
unsigned nunfixed)
: f64{f64}, i32{i32}, Ntotal{ntotal}, Nunfixed{nunfixed} {}
[[nodiscard]] constexpr auto size() const -> unsigned { return Ntotal; }
[[nodiscard]] constexpr auto numUnfixed() const -> unsigned {
return Nunfixed;
}
constexpr auto operator()(const AbstractVector auto &x, ptrdiff_t i) const
-> utils::eltype_t<decltype(x)> {
invariant(i < Ntotal);
int j = getInds()[i];
double off = offs()[i];
if (j < 0) return off;
return scales()[i] * x[j] + off;
}
[[nodiscard]] constexpr auto view() const -> BoxTransformView {
return *this;
}

protected:
/// Ntotal offsets
/// Ntotal scales
double *f64;
/// Ntotal indices
/// Ntotal lower bounds
/// Ntotal upper bounds
int32_t *i32;
unsigned Ntotal;
unsigned Nunfixed;

[[nodiscard]] constexpr auto getInds() const -> PtrVector<int32_t> {
return {i32, Ntotal};
}
[[nodiscard]] constexpr auto getLowerBounds() const -> PtrVector<int32_t> {
return {i32 + Ntotal, Ntotal};
}
[[nodiscard]] constexpr auto getUpperBounds() const -> PtrVector<int32_t> {
return {i32 + ptrdiff_t(2) * Ntotal, Ntotal};
}
[[nodiscard]] constexpr auto offs() const -> PtrVector<double> {
return {f64, Ntotal};
}
[[nodiscard]] constexpr auto scales() const -> PtrVector<double> {
return {f64 + Ntotal, Ntotal};
}
[[nodiscard]] constexpr auto getInds() -> MutPtrVector<int32_t> {
return {i32, Ntotal};
}
[[nodiscard]] constexpr auto getLowerBounds() -> MutPtrVector<int32_t> {
return {i32 + Ntotal, Ntotal};
}
[[nodiscard]] constexpr auto getUpperBounds() -> MutPtrVector<int32_t> {
return {i32 + ptrdiff_t(2) * Ntotal, Ntotal};
}
[[nodiscard]] constexpr auto offs() -> MutPtrVector<double> {
return {f64, Ntotal};
}
[[nodiscard]] constexpr auto scales() -> MutPtrVector<double> {
return {f64 + Ntotal, Ntotal};
}
static constexpr auto scaleOff(int32_t lb, int32_t ub)
-> std::pair<double, double> {
#if __cplusplus >= 202202L
// constexpr std::fma requires c++23
constexpr double slb = sigmoid(-EXTREME);
constexpr double sub = sigmoid(EXTREME);
constexpr double scale = 1.0 / (sub - slb);
#else
double slb = sigmoid(-EXTREME);
double sub = sigmoid(EXTREME);
double scale = 1.0 / (sub - slb);
#endif
double s = scale * (ub - lb), o = lb - slb * s;
return {s, o};
}
};
class BoxTransform : public BoxTransformView {

public:
constexpr BoxTransform(unsigned ntotal, int32_t lb, int32_t ub)
: BoxTransformView{new double[long(2) * ntotal],
new int32_t[long(3) * ntotal], ntotal, ntotal} {
invariant(lb < ub);
auto [s, o] = scaleOff(lb, ub);
for (int32_t i = 0; i < int32_t(ntotal); ++i) {
getInds()[i] = i;
getLowerBounds()[i] = lb;
getUpperBounds()[i] = ub;
scales()[i] = s;
offs()[i] = o;
}
}
constexpr void increaseLowerBound(ptrdiff_t idx, int32_t lb) {
invariant(idx < Ntotal);
invariant(lb > getLowerBounds()[idx]);
int32_t ub = getUpperBounds()[idx];
invariant(lb <= ub);
getLowerBounds()[idx] = lb;
double newScale, newOff;
if (lb == ub) {
--Nunfixed;
getInds()[idx] = -1;
// we remove a fixed, so we must now decrement all following inds
for (ptrdiff_t i = idx; ++i < Ntotal;) --getInds()[i];
newScale = 0.0;
newOff = lb;
} else std::tie(newScale, newOff) = scaleOff(lb, ub);
scales()[idx] = newScale;
offs()[idx] = newOff;
}
constexpr void decreaseUpperBound(ptrdiff_t idx, int32_t ub) {
invariant(idx < Ntotal);
invariant(ub < getUpperBounds()[idx]);
int32_t lb = getLowerBounds()[idx];
invariant(lb <= ub);
getUpperBounds()[idx] = ub;
double newScale, newOff;
if (lb == ub) {
--Nunfixed;
getInds()[idx] = -1;
// we remove a fixed, so we must now decrement all following inds
for (ptrdiff_t i = idx; ++i < Ntotal;) --getInds()[i];
newScale = 0.0;
newOff = lb;
} else std::tie(newScale, newOff) = scaleOff(lb, ub);
scales()[idx] = newScale;
offs()[idx] = newOff;
}
constexpr ~BoxTransform() {
delete[] f64;
delete[] i32;
}
constexpr BoxTransform(BoxTransform &&other) noexcept
: BoxTransformView{other.f64, other.i32, other.Ntotal, other.Nunfixed} {
other.f64 = nullptr;
other.i32 = nullptr;
}
constexpr BoxTransform(const BoxTransform &other)
: BoxTransformView{new double[long(2) * other.Ntotal],
new int32_t[long(3) * other.Ntotal], other.Ntotal,
other.Ntotal} {
std::copy_n(other.f64, 2 * Ntotal, f64);
std::copy_n(other.i32, 3 * Ntotal, i32);
}
};

template <AbstractVector V> struct BoxTransformVector {
using value_type = utils::eltype_t<V>;
static_assert(Trivial<V>);
V v;
BoxTransformView btv;

[[nodiscard]] constexpr auto size() const -> ptrdiff_t { return btv.size(); }
constexpr auto operator[](ptrdiff_t i) const -> value_type {
return btv(v, i);
}
[[nodiscard]] constexpr auto view() const -> BoxTransformVector {
return *this;
}
};

static_assert(AbstractVector<BoxTransformVector<PtrVector<double>>>);

template <typename F> struct BoxCall {
[[no_unique_address]] F f;
BoxTransformView transform;
constexpr auto operator()(const AbstractVector auto &x) const
-> utils::eltype_t<decltype(x)> {
invariant(x.size() == transform.numUnfixed());
return f(BoxTransformVector{x.view(), transform}, transform);
}
};
template <typename F> consteval auto isboxcall(BoxCall<F>) -> bool {
return true;
}
consteval auto isboxcall(auto) -> bool { return false; }

constexpr auto minimize(utils::Arena<> *alloc, MutPtrVector<double> x,
const auto &f) -> double {
constexpr bool constrained = isboxcall(f);
constexpr double tol = 1e-8;
constexpr double tol2 = tol * tol;
constexpr double c = 0.5;
constexpr double tau = 0.5;
double alpha = 1.0, fx;
ptrdiff_t L = x.size();
auto scope = alloc->scope();
auto *data = alloc->allocate<double>(3 * L);
MutPtrVector<double> xnew{data, L}, xtmp{data + L, L}, dir{data + 2 * L, L},
xcur{x};
// MutPtrVector<double> dir{vector<double>(alloc, L)};
// MutPtrVector<double> xnew{vector<double>(alloc, L)}, xold{x};
HessianResultCore hr{alloc, unsigned(L)};
for (ptrdiff_t n = 0; n < 1000; ++n) {
fx = hessian(hr, xcur, f);
if (hr.gradient().norm2() < tol2) break;
LDL::ldiv<true>(hr.hessian(), dir, hr.gradient());
if (dir.norm2() < tol2) break;
double t = 0.0;
for (ptrdiff_t i = 0; i < L; ++i) {
t += hr.gradient()[i] * dir[i];
double xi = xcur[i] - alpha * dir[i];
if constexpr (constrained) xi = std::clamp(xi, -EXTREME, EXTREME);
xnew[i] = xi;
}
t *= c;
double fxnew = f(xnew);
bool cond = (fx - fxnew) >= alpha * t;
bool dobreak = false;
if (cond) {
// success; we try to grow alpha, if we're not already optimal
if (((fx - fxnew) <= tol) || (norm2(xcur - xnew) <= tol2)) {
std::swap(xcur, xnew);
break;
}
for (;;) {
double alphanew = alpha * (1 / tau);
for (ptrdiff_t i = 0; i < L; ++i) {

double xi = xcur[i] - alphanew * dir[i];
if constexpr (constrained) xi = std::clamp(xi, -EXTREME, EXTREME);
xtmp[i] = xi;
}
double fxtmp = f(xtmp);
if ((fx - fxtmp) < alphanew * t) break;
std::swap(xtmp, xnew); // we keep xtmp as new best
alpha = alphanew;
fxnew = fxtmp;
if ((fx - fxtmp) <= tol) {
dobreak = true;
break;
}
}
} else {
// failure, we shrink alpha
for (;;) {
alpha *= tau;
for (ptrdiff_t i = 0; i < L; ++i) {

double xi = xcur[i] - alpha * dir[i];
if constexpr (constrained) xi = std::clamp(xi, -EXTREME, EXTREME);
xnew[i] = xi;
}
fxnew = f(xnew);
if ((fx - fxnew) < alpha * t) continue;
if (((fx - fxnew) <= tol) || (norm2(xcur - xnew) <= tol2)) {
dobreak = true;
break;
}
}
}
fx = fxnew;
std::swap(xcur, xnew);
if (dobreak) break;
}
if (x.data() != xcur.data()) x << xcur;
return fx;
}

constexpr auto minimize(PtrVector<double> x, BoxTransformView trf,
const auto &f) -> MutPtrVector<double> {
return minimize(x, BoxCall<decltype(f)>{f, trf});
}

} // namespace poly::math
64 changes: 50 additions & 14 deletions include/Math/Dual.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,19 +172,35 @@ class GradientResult {
return grad;
}
};
class HessianResult {
double x;
class HessianResultCore {
double *ptr;
unsigned dim;

public:
[[nodiscard]] constexpr auto value() const -> double { return x; }
[[nodiscard]] constexpr auto gradient() const -> MutPtrVector<double> {
return {ptr, dim};
}
[[nodiscard]] constexpr auto hessian() const -> MutSquarePtrMatrix<double> {
return {ptr + dim, dim};
}
constexpr HessianResultCore(utils::Arena<> *alloc, unsigned d)
: ptr{alloc->allocate<double>(size_t(d) * (d + 1))}, dim{d} {}
};
class HessianResult : public HessianResultCore {
double x{};

public:
[[nodiscard]] constexpr auto value() -> double & { return x; }
[[nodiscard]] constexpr auto value() const -> double { return x; }

constexpr HessianResult(utils::Arena<> *alloc, unsigned d)
: HessianResultCore{alloc, d} {}

template <size_t I> constexpr auto get() const {
if constexpr (I == 0) return x;
else if constexpr (I == 1) return gradient();
else return hessian();
}
};

template <ptrdiff_t N, AbstractVector T> struct DualVector {
Expand Down Expand Up @@ -240,18 +256,18 @@ constexpr auto extractDualValRecurse(const Dual<T, N> &x) {
return extractDualValRecurse(x.value());
}

template <MatrixDimension S>
constexpr auto hessian(MutPtrVector<double> grad, MutArray<double, S> hess,
PtrVector<double> x, const auto &f, auto update)
-> double {
/// fills the lower triangle of the hessian
constexpr auto hessian(HessianResultCore hr, PtrVector<double> x, const auto &f,
auto update) -> double {
constexpr ptrdiff_t Ui = 8;
constexpr ptrdiff_t Uj = 2;
using D = Dual<double, Ui>;
using DD = Dual<D, Uj>;
ptrdiff_t N = x.size();
MutPtrVector<double> grad = hr.gradient();
MutSquarePtrMatrix<double> hess = hr.hessian();
invariant(N == grad.size());
invariant(N == hess.numCol());
invariant(N == hess.numRow());
for (ptrdiff_t j = 0;; j += Uj) {
bool jbr = j + Uj >= N;
for (ptrdiff_t i = 0;; i += Ui) {
Expand All @@ -273,14 +289,34 @@ constexpr auto hessian(MutPtrVector<double> grad, MutArray<double, S> hess,
}
}
}
constexpr auto hessian(utils::Arena<> *arena, PtrVector<double> x,
const auto &f) {
ptrdiff_t N = x.size();
MutPtrVector<double> grad = vector<double>(arena, N);
MutSquarePtrMatrix<double> hess = matrix<double>(arena, N);
constexpr auto hessian(HessianResultCore hr, PtrVector<double> x, const auto &f)
-> double {
Assign assign{};
return std::make_tuple(hessian(grad, hess, x, f, assign), grad, hess);
return hessian(hr, x, f, assign);
}

constexpr auto hessian(utils::Arena<> *arena, PtrVector<double> x,
const auto &f) -> HessianResult {
unsigned N = x.size();
HessianResult hr{arena, N};
hr.value() = hessian(hr, x, f);
return hr;
}
static_assert(MatrixDimension<SquareDims>);

} // namespace poly::math
namespace std {
template <> struct tuple_size<poly::math::HessianResult> {
static constexpr size_t value = 3;
};
template <> struct tuple_element<size_t(0), poly::math::HessianResult> {
using type = double;
};
template <> struct tuple_element<size_t(1), poly::math::HessianResult> {
using type = poly::math::MutPtrVector<double>;
};
template <> struct tuple_element<size_t(2), poly::math::HessianResult> {
using type = poly::math::MutSquarePtrMatrix<double>;
};

} // namespace std
Loading

0 comments on commit 287fe25

Please sign in to comment.