diff --git a/include/Math/BoxOpt.hpp b/include/Math/BoxOpt.hpp
new file mode 100644
index 0000000..7934c75
--- /dev/null
+++ b/include/Math/BoxOpt.hpp
@@ -0,0 +1,283 @@
+#pragma once
+#include "Math/Array.hpp"
+#include "Math/Constructors.hpp"
+#include "Math/Dual.hpp"
+#include "Math/Exp.hpp"
+#include "Math/LinearAlgebra.hpp"
+#include "Math/Math.hpp"
+#include "Math/Vector.hpp"
+#include "Utilities/Allocators.hpp"
+#include <cstddef>
+#include <cstdint>
+
+namespace poly::math {
+
+constexpr double EXTREME = 8.0;
+
+class BoxTransformView {
+public:
+  constexpr BoxTransformView(double *f64, int32_t *i32, unsigned ntotal,
+                             unsigned nunfixed)
+    : f64{f64}, i32{i32}, Ntotal{ntotal}, Nunfixed{nunfixed} {}
+  [[nodiscard]] constexpr auto size() const -> unsigned { return Ntotal; }
+  [[nodiscard]] constexpr auto numUnfixed() const -> unsigned {
+    return Nunfixed;
+  }
+  constexpr auto operator()(const AbstractVector auto &x, ptrdiff_t i) const
+    -> utils::eltype_t<decltype(x)> {
+    invariant(i < Ntotal);
+    int j = getInds()[i];
+    double off = offs()[i];
+    if (j < 0) return off;
+    return scales()[i] * x[j] + off;
+  }
+  [[nodiscard]] constexpr auto view() const -> BoxTransformView {
+    return *this;
+  }
+
+protected:
+  /// Ntotal offsets
+  /// Ntotal scales
+  double *f64;
+  /// Ntotal indices
+  /// Ntotal lower bounds
+  /// Ntotal upper bounds
+  int32_t *i32;
+  unsigned Ntotal;
+  unsigned Nunfixed;
+
+  [[nodiscard]] constexpr auto getInds() const -> PtrVector<int32_t> {
+    return {i32, Ntotal};
+  }
+  [[nodiscard]] constexpr auto getLowerBounds() const -> PtrVector<int32_t> {
+    return {i32 + Ntotal, Ntotal};
+  }
+  [[nodiscard]] constexpr auto getUpperBounds() const -> PtrVector<int32_t> {
+    return {i32 + ptrdiff_t(2) * Ntotal, Ntotal};
+  }
+  [[nodiscard]] constexpr auto offs() const -> PtrVector<double> {
+    return {f64, Ntotal};
+  }
+  [[nodiscard]] constexpr auto scales() const -> PtrVector<double> {
+    return {f64 + Ntotal, Ntotal};
+  }
+  [[nodiscard]] constexpr auto getInds() -> MutPtrVector<int32_t> {
+    return {i32, Ntotal};
+  }
+  [[nodiscard]] constexpr auto getLowerBounds() -> MutPtrVector<int32_t> {
+    return {i32 + Ntotal, Ntotal};
+  }
+  [[nodiscard]] constexpr auto getUpperBounds() -> MutPtrVector<int32_t> {
+    return {i32 + ptrdiff_t(2) * Ntotal, Ntotal};
+  }
+  [[nodiscard]] constexpr auto offs() -> MutPtrVector<double> {
+    return {f64, Ntotal};
+  }
+  [[nodiscard]] constexpr auto scales() -> MutPtrVector<double> {
+    return {f64 + Ntotal, Ntotal};
+  }
+  static constexpr auto scaleOff(int32_t lb, int32_t ub)
+    -> std::pair<double, double> {
+#if __cplusplus >= 202202L
+    // constexpr std::fma requires c++23
+    constexpr double slb = sigmoid(-EXTREME);
+    constexpr double sub = sigmoid(EXTREME);
+    constexpr double scale = 1.0 / (sub - slb);
+#else
+    double slb = sigmoid(-EXTREME);
+    double sub = sigmoid(EXTREME);
+    double scale = 1.0 / (sub - slb);
+#endif
+    double s = scale * (ub - lb), o = lb - slb * s;
+    return {s, o};
+  }
+};
+class BoxTransform : public BoxTransformView {
+
+public:
+  constexpr BoxTransform(unsigned ntotal, int32_t lb, int32_t ub)
+    : BoxTransformView{new double[long(2) * ntotal],
+                       new int32_t[long(3) * ntotal], ntotal, ntotal} {
+    invariant(lb < ub);
+    auto [s, o] = scaleOff(lb, ub);
+    for (int32_t i = 0; i < int32_t(ntotal); ++i) {
+      getInds()[i] = i;
+      getLowerBounds()[i] = lb;
+      getUpperBounds()[i] = ub;
+      scales()[i] = s;
+      offs()[i] = o;
+    }
+  }
+  constexpr void increaseLowerBound(ptrdiff_t idx, int32_t lb) {
+    invariant(idx < Ntotal);
+    invariant(lb > getLowerBounds()[idx]);
+    int32_t ub = getUpperBounds()[idx];
+    invariant(lb <= ub);
+    getLowerBounds()[idx] = lb;
+    double newScale, newOff;
+    if (lb == ub) {
+      --Nunfixed;
+      getInds()[idx] = -1;
+      // we remove a fixed, so we must now decrement all following inds
+      for (ptrdiff_t i = idx; ++i < Ntotal;) --getInds()[i];
+      newScale = 0.0;
+      newOff = lb;
+    } else std::tie(newScale, newOff) = scaleOff(lb, ub);
+    scales()[idx] = newScale;
+    offs()[idx] = newOff;
+  }
+  constexpr void decreaseUpperBound(ptrdiff_t idx, int32_t ub) {
+    invariant(idx < Ntotal);
+    invariant(ub < getUpperBounds()[idx]);
+    int32_t lb = getLowerBounds()[idx];
+    invariant(lb <= ub);
+    getUpperBounds()[idx] = ub;
+    double newScale, newOff;
+    if (lb == ub) {
+      --Nunfixed;
+      getInds()[idx] = -1;
+      // we remove a fixed, so we must now decrement all following inds
+      for (ptrdiff_t i = idx; ++i < Ntotal;) --getInds()[i];
+      newScale = 0.0;
+      newOff = lb;
+    } else std::tie(newScale, newOff) = scaleOff(lb, ub);
+    scales()[idx] = newScale;
+    offs()[idx] = newOff;
+  }
+  constexpr ~BoxTransform() {
+    delete[] f64;
+    delete[] i32;
+  }
+  constexpr BoxTransform(BoxTransform &&other) noexcept
+    : BoxTransformView{other.f64, other.i32, other.Ntotal, other.Nunfixed} {
+    other.f64 = nullptr;
+    other.i32 = nullptr;
+  }
+  constexpr BoxTransform(const BoxTransform &other)
+    : BoxTransformView{new double[long(2) * other.Ntotal],
+                       new int32_t[long(3) * other.Ntotal], other.Ntotal,
+                       other.Ntotal} {
+    std::copy_n(other.f64, 2 * Ntotal, f64);
+    std::copy_n(other.i32, 3 * Ntotal, i32);
+  }
+};
+
+template <AbstractVector V> struct BoxTransformVector {
+  using value_type = utils::eltype_t<V>;
+  static_assert(Trivial<V>);
+  V v;
+  BoxTransformView btv;
+
+  [[nodiscard]] constexpr auto size() const -> ptrdiff_t { return btv.size(); }
+  constexpr auto operator[](ptrdiff_t i) const -> value_type {
+    return btv(v, i);
+  }
+  [[nodiscard]] constexpr auto view() const -> BoxTransformVector {
+    return *this;
+  }
+};
+
+static_assert(AbstractVector<BoxTransformVector<PtrVector<double>>>);
+
+template <typename F> struct BoxCall {
+  [[no_unique_address]] F f;
+  BoxTransformView transform;
+  constexpr auto operator()(const AbstractVector auto &x) const
+    -> utils::eltype_t<decltype(x)> {
+    invariant(x.size() == transform.numUnfixed());
+    return f(BoxTransformVector{x.view(), transform}, transform);
+  }
+};
+template <typename F> consteval auto isboxcall(BoxCall<F>) -> bool {
+  return true;
+}
+consteval auto isboxcall(auto) -> bool { return false; }
+
+constexpr auto minimize(utils::Arena<> *alloc, MutPtrVector<double> x,
+                        const auto &f) -> double {
+  constexpr bool constrained = isboxcall(f);
+  constexpr double tol = 1e-8;
+  constexpr double tol2 = tol * tol;
+  constexpr double c = 0.5;
+  constexpr double tau = 0.5;
+  double alpha = 1.0, fx;
+  ptrdiff_t L = x.size();
+  auto scope = alloc->scope();
+  auto *data = alloc->allocate<double>(3 * L);
+  MutPtrVector<double> xnew{data, L}, xtmp{data + L, L}, dir{data + 2 * L, L},
+    xcur{x};
+  // MutPtrVector<double> dir{vector<double>(alloc, L)};
+  // MutPtrVector<double> xnew{vector<double>(alloc, L)}, xold{x};
+  HessianResultCore hr{alloc, unsigned(L)};
+  for (ptrdiff_t n = 0; n < 1000; ++n) {
+    fx = hessian(hr, xcur, f);
+    if (hr.gradient().norm2() < tol2) break;
+    LDL::ldiv<true>(hr.hessian(), dir, hr.gradient());
+    if (dir.norm2() < tol2) break;
+    double t = 0.0;
+    for (ptrdiff_t i = 0; i < L; ++i) {
+      t += hr.gradient()[i] * dir[i];
+      double xi = xcur[i] - alpha * dir[i];
+      if constexpr (constrained) xi = std::clamp(xi, -EXTREME, EXTREME);
+      xnew[i] = xi;
+    }
+    t *= c;
+    double fxnew = f(xnew);
+    bool cond = (fx - fxnew) >= alpha * t;
+    bool dobreak = false;
+    if (cond) {
+      // success; we try to grow alpha, if we're not already optimal
+      if (((fx - fxnew) <= tol) || (norm2(xcur - xnew) <= tol2)) {
+        std::swap(xcur, xnew);
+        break;
+      }
+      for (;;) {
+        double alphanew = alpha * (1 / tau);
+        for (ptrdiff_t i = 0; i < L; ++i) {
+
+          double xi = xcur[i] - alphanew * dir[i];
+          if constexpr (constrained) xi = std::clamp(xi, -EXTREME, EXTREME);
+          xtmp[i] = xi;
+        }
+        double fxtmp = f(xtmp);
+        if ((fx - fxtmp) < alphanew * t) break;
+        std::swap(xtmp, xnew); // we keep xtmp as new best
+        alpha = alphanew;
+        fxnew = fxtmp;
+        if ((fx - fxtmp) <= tol) {
+          dobreak = true;
+          break;
+        }
+      }
+    } else {
+      // failure, we shrink alpha
+      for (;;) {
+        alpha *= tau;
+        for (ptrdiff_t i = 0; i < L; ++i) {
+
+          double xi = xcur[i] - alpha * dir[i];
+          if constexpr (constrained) xi = std::clamp(xi, -EXTREME, EXTREME);
+          xnew[i] = xi;
+        }
+        fxnew = f(xnew);
+        if ((fx - fxnew) < alpha * t) continue;
+        if (((fx - fxnew) <= tol) || (norm2(xcur - xnew) <= tol2)) {
+          dobreak = true;
+          break;
+        }
+      }
+    }
+    fx = fxnew;
+    std::swap(xcur, xnew);
+    if (dobreak) break;
+  }
+  if (x.data() != xcur.data()) x << xcur;
+  return fx;
+}
+
+constexpr auto minimize(PtrVector<double> x, BoxTransformView trf,
+                        const auto &f) -> MutPtrVector<double> {
+  return minimize(x, BoxCall<decltype(f)>{f, trf});
+}
+
+} // namespace poly::math
diff --git a/include/Math/Dual.hpp b/include/Math/Dual.hpp
index 75584a9..93fef04 100644
--- a/include/Math/Dual.hpp
+++ b/include/Math/Dual.hpp
@@ -172,19 +172,35 @@ class GradientResult {
     return grad;
   }
 };
-class HessianResult {
-  double x;
+class HessianResultCore {
   double *ptr;
   unsigned dim;
 
 public:
-  [[nodiscard]] constexpr auto value() const -> double { return x; }
   [[nodiscard]] constexpr auto gradient() const -> MutPtrVector<double> {
     return {ptr, dim};
   }
   [[nodiscard]] constexpr auto hessian() const -> MutSquarePtrMatrix<double> {
     return {ptr + dim, dim};
   }
+  constexpr HessianResultCore(utils::Arena<> *alloc, unsigned d)
+    : ptr{alloc->allocate<double>(size_t(d) * (d + 1))}, dim{d} {}
+};
+class HessianResult : public HessianResultCore {
+  double x{};
+
+public:
+  [[nodiscard]] constexpr auto value() -> double & { return x; }
+  [[nodiscard]] constexpr auto value() const -> double { return x; }
+
+  constexpr HessianResult(utils::Arena<> *alloc, unsigned d)
+    : HessianResultCore{alloc, d} {}
+
+  template <size_t I> constexpr auto get() const {
+    if constexpr (I == 0) return x;
+    else if constexpr (I == 1) return gradient();
+    else return hessian();
+  }
 };
 
 template <ptrdiff_t N, AbstractVector T> struct DualVector {
@@ -240,18 +256,18 @@ constexpr auto extractDualValRecurse(const Dual<T, N> &x) {
   return extractDualValRecurse(x.value());
 }
 
-template <MatrixDimension S>
-constexpr auto hessian(MutPtrVector<double> grad, MutArray<double, S> hess,
-                       PtrVector<double> x, const auto &f, auto update)
-  -> double {
+/// fills the lower triangle of the hessian
+constexpr auto hessian(HessianResultCore hr, PtrVector<double> x, const auto &f,
+                       auto update) -> double {
   constexpr ptrdiff_t Ui = 8;
   constexpr ptrdiff_t Uj = 2;
   using D = Dual<double, Ui>;
   using DD = Dual<D, Uj>;
   ptrdiff_t N = x.size();
+  MutPtrVector<double> grad = hr.gradient();
+  MutSquarePtrMatrix<double> hess = hr.hessian();
   invariant(N == grad.size());
   invariant(N == hess.numCol());
-  invariant(N == hess.numRow());
   for (ptrdiff_t j = 0;; j += Uj) {
     bool jbr = j + Uj >= N;
     for (ptrdiff_t i = 0;; i += Ui) {
@@ -273,14 +289,34 @@ constexpr auto hessian(MutPtrVector<double> grad, MutArray<double, S> hess,
     }
   }
 }
-constexpr auto hessian(utils::Arena<> *arena, PtrVector<double> x,
-                       const auto &f) {
-  ptrdiff_t N = x.size();
-  MutPtrVector<double> grad = vector<double>(arena, N);
-  MutSquarePtrMatrix<double> hess = matrix<double>(arena, N);
+constexpr auto hessian(HessianResultCore hr, PtrVector<double> x, const auto &f)
+  -> double {
   Assign assign{};
-  return std::make_tuple(hessian(grad, hess, x, f, assign), grad, hess);
+  return hessian(hr, x, f, assign);
+}
+
+constexpr auto hessian(utils::Arena<> *arena, PtrVector<double> x,
+                       const auto &f) -> HessianResult {
+  unsigned N = x.size();
+  HessianResult hr{arena, N};
+  hr.value() = hessian(hr, x, f);
+  return hr;
 }
 static_assert(MatrixDimension<SquareDims>);
 
 } // namespace poly::math
+namespace std {
+template <> struct tuple_size<poly::math::HessianResult> {
+  static constexpr size_t value = 3;
+};
+template <> struct tuple_element<size_t(0), poly::math::HessianResult> {
+  using type = double;
+};
+template <> struct tuple_element<size_t(1), poly::math::HessianResult> {
+  using type = poly::math::MutPtrVector<double>;
+};
+template <> struct tuple_element<size_t(2), poly::math::HessianResult> {
+  using type = poly::math::MutSquarePtrMatrix<double>;
+};
+
+} // namespace std
diff --git a/include/Math/LinearAlgebra.hpp b/include/Math/LinearAlgebra.hpp
index 59baaff..72a8861 100644
--- a/include/Math/LinearAlgebra.hpp
+++ b/include/Math/LinearAlgebra.hpp
@@ -290,17 +290,48 @@ template <typename T> class Fact {
   constexpr Fact(MutSquarePtrMatrix<T> A) : fact{A} {};
 
   constexpr void ldiv(MutPtrMatrix<T> rhs) {
-    auto [M, N] = rhs.size();
-    invariant(ptrdiff_t(fact.numRow()), ptrdiff_t(M));
+    ptrdiff_t M = ptrdiff_t(rhs.numRow());
+    invariant(ptrdiff_t(fact.numRow()), M);
     // LD^-1L' x = rhs
     // L y = rhs // L is UnitLowerTriangular
     for (ptrdiff_t m = 0; m < M; ++m)
-      for (ptrdiff_t k = 0; k < m; ++k) rhs(m, _) -= fact(m, k) * rhs(k, _);
+      rhs(m, _) -= rhs(_(0, m), _).transpose() * fact(m, _(0, m));
+
     // D^-1 L' x = y
     // L' x = D y
-    for (ptrdiff_t m = ptrdiff_t(M); m--;) {
+    for (ptrdiff_t m = M; m--;) {
       rhs(m, _) *= fact(m, m);
-      for (ptrdiff_t k = m + 1; k < M; ++k) rhs(m, _) -= fact(k, m) * rhs(k, _);
+      rhs(m, _) -= rhs(_(m + 1, M), _).transpose() * fact(_(m + 1, M), m);
+    }
+  }
+  constexpr void ldiv(MutPtrVector<T> rhs) {
+    ptrdiff_t M = rhs.size();
+    invariant(ptrdiff_t(fact.numRow()), M);
+    // LD^-1L' x = rhs
+    // L y = rhs // L is UnitLowerTriangular
+    for (ptrdiff_t m = 0; m < M; ++m)
+      rhs[m] -= fact(m, _(0, m)).transpose() * rhs[_(0, m)];
+    // D^-1 L' x = y
+    // L' x = D y
+    for (ptrdiff_t m = M; m--;) {
+      rhs[m] *= fact(m, m);
+      rhs[m] -= fact(_(m + 1, M), m).transpose() * rhs[_(m + 1, M)];
+    }
+  }
+  constexpr void ldiv(MutPtrVector<T> dst, TrivialVec auto src) {
+    ptrdiff_t M = dst.size();
+    invariant(M == src.size());
+    invariant(ptrdiff_t(fact.numRow()), M);
+    // LD^-1L' x = rhs
+    // L y = rhs // L is UnitLowerTriangular
+    for (ptrdiff_t m = 0; m < M; ++m)
+      dst[m] = src[m] - fact(m, _(0, m)).transpose() * dst[_(0, m)];
+
+    // D^-1 L' x = y
+    // L' x = D y
+    for (ptrdiff_t m = M; m--;) {
+      dst[m] *= fact(m, m);
+      dst[m] -= fact(_(m + 1, M), m).transpose() * dst[_(m + 1, M)];
     }
   }
 };
@@ -327,6 +358,15 @@ template <bool ForcePD = false, typename T>
 constexpr void ldiv(MutSquarePtrMatrix<T> A, MutPtrMatrix<T> B) {
   factorize<ForcePD>(A).ldiv(B);
 }
+template <bool ForcePD = false, typename T>
+constexpr void ldiv(MutSquarePtrMatrix<T> A, MutPtrVector<T> B) {
+  factorize<ForcePD>(A).ldiv(B);
+}
+template <bool ForcePD = false, typename T>
+constexpr void ldiv(MutSquarePtrMatrix<T> A, MutPtrVector<T> B,
+                    TrivialVec auto C) {
+  factorize<ForcePD>(A).ldiv(B, C);
+}
 
 } // namespace LDL
 } // namespace poly::math
diff --git a/include/Math/Math.hpp b/include/Math/Math.hpp
index c4c8675..13e57bb 100644
--- a/include/Math/Math.hpp
+++ b/include/Math/Math.hpp
@@ -62,6 +62,10 @@ concept TrivialCompatibile = Trivial<T> && Compatible<T, C>;
 template <typename T>
 concept TrivialVecOrMat = Trivial<T> && VecOrMat<T>;
 template <typename T>
+concept TrivialVec = Trivial<T> && AbstractVector<T>;
+template <typename T>
+concept TrivialMat = Trivial<T> && AbstractMatrix<T>;
+template <typename T>
 concept TrivialDataMatrix = Trivial<T> && DataMatrix<T>;
 
 //   // TODO: binary func invocable trait?
@@ -267,6 +271,9 @@ template <class T, class S> constexpr auto view(const Array<T, S> &x) {
   return x;
 }
 constexpr auto transpose(const auto &a) { return Transpose{view(a)}; }
+template <typename T> constexpr auto transpose(const Transpose<T> &a) -> T {
+  return a.transpose();
+}
 
 template <TrivialVecOrMat C, Trivial A, Trivial B>
 struct Select : public AbstractSelect<C, A, B> {