fract4d · edyoung · May 31, 2020 · May 23, 2020 · May 23, 2020 · May 23, 2020
diff --git a/doc/mathlibs.md b/doc/mathlibs.md
@@ -0,0 +1,40 @@
+
+# Notes on Math Libraries
+
+I went through the list of all arbitrary precision math libs at https://en.wikipedia.org/wiki/List_of_arbitrary-precision_arithmetic_software to see which might work best for Gnofract 4D.
+
+**Criteria**:
+- Fast
+- Supports floating point
+- Target numbers up to 4000 bits (?)
+- Need trig and other elementary functions
+
+
+**Library** | **Suitable** | **Notes**
+--- | --- | ---
+[Boost](https://www.boost.org/doc/libs/1_73_0/libs/multiprecision/doc/html/boost_multiprecision/intro.html) | Investigate | C++ wrapper/front-end for several libs (" GMP, MPFR, MPIR, MPC, TomMath")
+[TTMath](https://www.ttmath.org/) | Not now | Appears unmaintained 1person project
+[LibBF](https://bellard.org/libbf/benchmark.html) | No | 'slower than GMP for less than 1e9 digits'
+[GMP]() | Maybe | For floats, provides mpf but suggests using mpfr instead
+[MPFR](https://www.mpfr.org/) | Yes | We have a sample already
+[CLN](https://ginac.de/CLN/cln.html) | Maybe | Says 'using GNU MP (internally) can provide quite a boost'
+[ARPREC](http://crd-legacy.lbl.gov/~dhbailey/mpdist/) | No | Appears FORTRAN-focused
+[MAPM](https://github.com/LuaDist/mapm) | No | Appears unmaintained
+[CORE](https://cs.nyu.edu/exact/core_pages/index.html) | No | Academically-focused, all about exact computation
+[LEDA](https://www.algorithmic-solutions.com/index.php/products/leda-for-c)  | No | Proprietary
+[CGAL](https://www.cgal.org/) | No | Seems mostly focused on geometry, couldn't even find arbitrary precision math on web site
+[MPIR](http://mpir.org/#about) | Yes | GMP Fork. Info seems a bit sparse.
+[FLINT](http://www.flintlib.org/) | Maybe | Looks like it uses MPFR internally
+[Arb](http://arblib.org/) | No | Believe the 'ball arithmetic' approach will be slower
+
+
+
+# Boost
+
+Boost seems a strong contender. It provides a whole set of different math libraries via
+a family of templated types, including a 'quad' type (2x a regular double) and more. Since the number of bits is part of the template we have to recompile in order to go deeper but that doesn't seem too hard. Installation is a bit of an issue - either we have to redist the relevant headers or require users to install libbost-all-dev in 
+order to run.
+
+
+
+
diff --git a/examples/README.md b/examples/README.md
@@ -161,4 +161,26 @@ Execute:
 ```
 examples/cpp/mp_mandelbrot.sh
 ```
-Then you should see a new file under `examples/output`.
+Then you should see a new file under `examples/output`.
+
+### Benchmark: Creating a simple mandelbrot using a custom formula with MPFR
+In this example we use [google microbenchmark support library] to get some feedback about calculations performance with [mpfr library](https://www.mpfr.org/).
+Execute:
+```
+examples/cpp/mp_benchmark.sh
+```
+Check the output, it should be something like:
+```shell
+Run on (4 X 2300 MHz CPU s)
+CPU Caches:
+  L1 Data 32 KiB (x4)
+  L1 Instruction 32 KiB (x4)
+  L2 Unified 256 KiB (x4)
+  L3 Unified 6144 KiB (x4)
+Load Average: 1.02, 0.55, 0.38
+***WARNING*** Library was built as DEBUG. Timings may be affected.
+----------------------------------------------------------------------
+Benchmark                            Time             CPU   Iterations
+----------------------------------------------------------------------
+BM_fractal/min_time:120.000       5414 ms         5396 ms           31
+```
diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt
@@ -35,6 +35,9 @@ target_link_libraries(example ${CMAKE_DL_LIBS})
 
 add_definitions(-DTHREADS -D_REENTRANT -DPNG_ENABLED -DJPG_ENABLED)
 
+find_package(benchmark REQUIRED)
+target_link_libraries(example benchmark::benchmark)
+
 # FRACT_STDLIB
 
 add_library(fract_stdlib SHARED

diff --git a/examples/cpp/Dockerfile b/examples/cpp/Dockerfile
@@ -9,6 +9,14 @@ RUN apt-get install -y libmpfr-dev libgmp-dev
 
 WORKDIR /src
 
+# set up benchmark library
+RUN git clone https://github.com/google/benchmark.git && \
+	git clone https://github.com/google/googletest.git benchmark/googletest && \
+	cd benchmark && \
+	mkdir build && cd build && \
+	cmake ../ && make && make install
+
+
 ARG main_source
 ARG formula_source
 ARG gmp_support

diff --git a/examples/cpp/benchmark.cpp b/examples/cpp/benchmark.cpp
@@ -0,0 +1,183 @@
+#include <cstdlib>
+#include <dlfcn.h>
+#include <cstdio>
+#include <fcntl.h>
+#include <unistd.h>
+#include <new>
+#include <memory>
+#include <algorithm>
+#include <chrono>
+
+#include "pf.h"
+
+#include "model/colormap.h"
+#include "model/image.h"
+#include "model/enums.h"
+#include "model/imagewriter.h"
+
+#include "model/fractfunc.h"
+#include "model/vectors.h"
+
+#include "benchmark/benchmark.h"
+
+#define MAX_ITERATIONS 1000
+
+constexpr double pos_params[N_PARAMS] {
+    0.0, 0.0, 0.0, 0.0, // X Y Z W
+    4.0, // Size or zoom
+    0.0, 0.0, 0.0, 0.0, 0.0, 0.0 // XY XZ XW YZ YW ZW planes (4D stuff)
+};
+
+// So that we don't do a bunch of file I/O during the benchmark, load the 
+// formula and other objects first and cache them across multiple calls.
+// NB: we don't free them at all, and this is not thread-safe.
+static pf_obj * GetPointFunc() {
+    static pf_obj *instance;
+    if(!instance) {
+        void *fract_stdlib_handle = dlopen("./fract_stdlib.so", RTLD_GLOBAL | RTLD_NOW);
+        if (!fract_stdlib_handle) {                    
+            fprintf(stderr, "Error loading libfract_stdlib: %s", dlerror());
+            throw std::exception();
+        }
+
+        // load formula lib
+        void *lib_handle = dlopen("./formula.so", RTLD_NOW);
+        if (!lib_handle)
+        {
+            fprintf(stderr, "Error loading formula: %s", dlerror());
+            throw std::exception();
+        }
+        pf_obj *(*pfn)(void);
+        pfn = reinterpret_cast<pf_obj * (*)(void)>(dlsym(lib_handle, "pf_new"));
+        if (!pfn)
+        {
+            fprintf(stderr, "Error loading formula symbols: %s", dlerror());
+            throw std::exception();
+        }
+        instance = pfn();
+    }
+    return instance;
+}
+
+static void inner_fractal(pf_obj *pf_handle, int precision) {    
+    // formula params: [0, 4.0, 0.0, 1.0, 4.0, 0.0, 1.0, precision]
+    int param_len = 8;
+    auto params{std::make_unique<s_param []>(param_len)};
+    params[0].t = INT;
+    params[0].intval = 0;
+    params[1].t = FLOAT;
+    params[1].doubleval = 4.0;
+    params[2].t = FLOAT;
+    params[2].doubleval = 0.0;
+    params[3].t = FLOAT;
+    params[3].doubleval = 1.0;
+    params[4].t = FLOAT;
+    params[4].doubleval = 4.0;
+    params[5].t = FLOAT;
+    params[5].doubleval = 0.0;
+    params[6].t = FLOAT;
+    params[6].doubleval = 1.0;
+    params[7].t = INT;
+    params[7].intval = precision;
+
+    // initialize the point function with the params
+    pf_handle->vtbl->init(pf_handle, const_cast<double *>(pos_params), params.get(), param_len);
+
+    // create the colormap with 3 colors
+    std::unique_ptr<ListColorMap> cmap{new (std::nothrow) ListColorMap{}};
+    cmap->init(3);
+    cmap->set(0, 0.0, 0, 0, 0, 255);
+    cmap->set(1, 0.004, 255, 255, 255, 255);
+    cmap->set(2, 1.0, 255, 255, 255, 255);
+
+    // create the image (logic representation)
+    auto im{std::make_unique<image>()};
+    im->set_resolution(640, 480, -1, -1);
+
+    // calculate the 4D vectors: topleft and deltas (x, y)
+    dvec4 center = dvec4(
+        pos_params[XCENTER], pos_params[YCENTER],
+        pos_params[ZCENTER], pos_params[WCENTER]
+    );
+    dmat4 rot_matrix = rotated_matrix(const_cast<double *>(pos_params));
+    rot_matrix = rot_matrix / im->totalXres();
+    dvec4 deltax = rot_matrix[VX];
+    dvec4 deltay = rot_matrix[VY];
+    dvec4 delta_aa_x = deltax / 2.0;
+    dvec4 delta_aa_y = deltay / 2.0;
+    dvec4 topleft = center - deltax * im->totalXres() / 2.0 - deltay * im->totalYres() / 2.0;
+    topleft += delta_aa_x + delta_aa_y;
+
+    const auto w = im->Xres();
+    const auto h = im->Yres();
+    // we put these variables out of the loop scope to use its previous value
+    int iters_taken = 0;
+    int min_period_iters = 0;
+    // now we calculate every pixel (for a 2D image projection in a single tile)
+    for (auto x = 0; x < w; x++) {
+        for (auto y = 0; y < h; y++) {
+            // calculate the position in 4D (x, y, z, w)
+            dvec4 pos = topleft + x * deltax + y * deltay;
+            // run the formula
+            double dist = 0.0;
+            int fate = 0;
+            int solid = 0;
+            int direct_color = 0;
+            double colors[4] = {0.0};
+            int inside = 0;
+            if (iters_taken == -1) { // we got inside the last time so we'll probably do it again
+                min_period_iters = 0;
+            } else {
+                min_period_iters = std::min(min_period_iters + 10, MAX_ITERATIONS);
+            }
+            pf_handle->vtbl->calc(
+                pf_handle,
+                pos.n,
+                MAX_ITERATIONS,
+                -1, // wrap param
+                min_period_iters, // iters to perform before checking the period tolerance
+                1.0E-9, // period tolerance
+                x, y, 0, // x, y and aa: these values are not needed in the formula but required as arguments for debugging purposes
+                &iters_taken, &fate, &dist, &solid,
+                &direct_color, &colors[0]);
+            // process the formula output and get the value from colormap
+            rgba_t color{};
+            if (fate & FATE_INSIDE) {
+                iters_taken = -1;
+                inside = 1;
+            }
+            if (direct_color) {
+                color = cmap->lookup_with_dca(solid, inside, colors);
+                fate |= FATE_DIRECT;
+            } else {
+                color = cmap->lookup_with_transfer(dist, solid, inside);
+            }
+            if (solid)
+            {
+                fate |= FATE_SOLID;
+            }
+            // this is only needed for optimization and zooming
+            im->setIter(x, y, iters_taken);
+            im->setFate(x, y, 0, fate);
+            im->setIndex(x, y, 0, dist);
+            // put the pixel color into the image buffer position
+            im->put(x, y, color);
+        }
+    }
+}
+
+static void BM_fractal(benchmark::State& state) {
+    pf_obj *pf_handle = GetPointFunc();
+
+    for (auto _ : state) {
+        inner_fractal(pf_handle, state.range(0));
+    }
+}
+
+// run benchmark
+// - report time per iteration in milliseconds
+// - for at least 2 minutes
+// - measure real time not CPU time for main thread
+BENCHMARK(BM_fractal)->Arg(64)->Arg(128)->Arg(432)->Unit(benchmark::kMillisecond)->MinTime(120.0)->UseRealTime(); 
+
+BENCHMARK_MAIN();
diff --git a/examples/cpp/mp_benchmark.sh b/examples/cpp/mp_benchmark.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+set -e
+
+docker build . -f examples/cpp/Dockerfile -t mp_mandelbrot:1.0.0 \
+    --build-arg main_source=benchmark.cpp \
+    --build-arg formula_source=examples/cpp/mp_formula.cpp \
+    --build-arg gmp_support=1
+
+# disable CPU frequency scaling during test for hopefully more consistent results
+if [ -x "$(command -v cpupower)" ]; then
+    sudo cpupower frequency-set --governor performance
+fi
+
+docker run --rm -it -v ${PWD}/examples/output:/src/output mp_mandelbrot:1.0.0
+
+if [ -x "$(command -v cpupower)" ]; then
+    sudo cpupower frequency-set --governor powersave
+fi
diff --git a/examples/cpp/mp_double.h b/examples/cpp/mp_double.h
@@ -39,7 +39,7 @@ class MpDouble final {
         mpfr_clear(value);
     }
 
-    static void setPreccisionInBits(int bits) {
+    static void setPrecisionInBits(int bits) {
         mpfr_set_default_prec(bits);
     }
 

diff --git a/examples/cpp/mp_formula.cpp b/examples/cpp/mp_formula.cpp
@@ -27,14 +27,20 @@ static void pf_init(
     int nparams)
 {
     pf_real *pfo = (pf_real *)p_stub;
-    if (nparams > PF_MAXPARAMS)
-    {
+    if (nparams > PF_MAXPARAMS) {
         nparams = PF_MAXPARAMS;
     }
-    for (int i = 0; i < nparams; ++i)
-    {
+    for (int i = 0; i < nparams; ++i) {
         pfo->p[i] = params[i];
     }
+
+    if(nparams > 7) {
+        // HACK: set the desired precision to parameter 7 for benchmarking
+        MpDouble::setPrecisionInBits(params[7].intval);
+    } else {
+        MpDouble::setPrecisionInBits(432);
+    }
+
 }
 
 static void pf_calc(
@@ -240,7 +246,6 @@ static struct s_pf_vtable vtbl =
 
 pf_obj *pf_new()
 {
-    MpDouble::setPreccisionInBits(432);
     pf_real *p = new (std::nothrow) pf_real;
     if (!p)
         return NULL;