NRN_ENABLE_DIGEST and NRN_ENABLE_ARCH_INDEP_EXP_POW (#3135)

* Selected messages from old digest-debug branch Print digest of cvode f(y,t) and solvex(b,...) cmake -DNRN_DIGEST=ON nrn_digest() starts the accumulation of cvode digest info. nrn_digest("filename") prints accumulated cvode digest info. filename format is: message threadid index t digest where digest is the first 16 hex characters of the SHA1 hash of the double* array indicated by the message. -DNRN_ENABLE_ARCH_INDEP_EXP_POW=ON (default OFF) Provides h.use_exp_pow_precision(style) style = 0 means use normal machine IEEE precision for exp(x) and pow(x,y) style = 1 means use 53 bit mpfr style = 2 means use IEEE but truncate to 32 bit precision. sundials uses hoc_pow. digest format has a f(y,t) call count. * nrn_digest(tid, i) will print the details of the i'th digest item of thread tid. * sundial RPowerR uses hoc_pow. nrn_digest(tid, i, "abort") calls abort() on reaching index i of thread tid * Documentation for nrn_digest and use_exp_pow_precision
neuronsimulator · Oct 22, 2024 · 510b9bc · 510b9bc
1 parent ceedb15
commit 510b9bc
Show file tree

Hide file tree

Showing 18 changed files with 444 additions and 17 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -85,6 +85,13 @@ option(
   NRN_ENABLE_PERFORMANCE_TESTS
   "Enable tests that measure performance. These are known to be unreliable when run on busy/oversubscribed machines such as CI runners."
   ${NRN_ENABLE_PERFORMANCE_TESTS_DEFAULT})
+option(NRN_ENABLE_DIGEST
+       "Provides nrn_digest function for debugging cross platform floating result differences."
+       ${NRN_ENABLE_DIGEST_DEFAULT})
+option(
+  NRN_ENABLE_ARCH_INDEP_EXP_POW
+  "Provides use_exp_pow_precision(style) function so that exp and pow produce same results on all platforms"
+  ${NRN_ENABLE_ARCH_INDEP_EXP_POW_DEFAULT})
 # This can be helpful in very specific CI build configurations, where ccache is used *and* different
 # CI builds are built under different directories.
 option(NRN_AVOID_ABSOLUTE_PATHS
@@ -1056,6 +1063,10 @@ if(NRN_ENABLE_PROFILING)
     message(STATUS "  Caliper     | ${caliper_DIR}")
   endif()
 endif()
+if(NRN_ENABLE_DIGEST OR NRN_ARCH_INDEP_EXP_POW)
+  message(STATUS "NRN_ENABLE_DIGEST | ${NRN_ENABLE_DIGEST}")
+  message(STATUS "NRN_ENABLE_ARCH_INDEP_EXP_POW | ${NRN_ENABLE_ARCH_INDEP_EXP_POW}")
+endif()
 message(STATUS "--------------+--------------------------------------------------------------")
 message(STATUS " See documentation : https://www.neuron.yale.edu/neuron/")
 message(STATUS "--------------+--------------------------------------------------------------")

diff --git a/cmake/BuildOptionDefaults.cmake b/cmake/BuildOptionDefaults.cmake
@@ -29,6 +29,8 @@ set(NRN_AVOID_ABSOLUTE_PATHS_DEFAULT OFF)
 set(NRN_NMODL_CXX_FLAGS_DEFAULT "-O0")
 set(NRN_SANITIZERS_DEFAULT "")
 set(NRN_ENABLE_MATH_OPT_DEFAULT OFF)
+set(NRN_ENABLE_DIGEST_DEFAULT OFF)
+set(NRN_ENABLE_ARCH_INDEP_EXP_POW_DEFAULT OFF)
 
 # Some distributions may set the prefix. To avoid errors, unset it
 set(NRN_PYTHON_DYNAMIC_DEFAULT "")

diff --git a/cmake_nrnconf.h.in b/cmake_nrnconf.h.in
@@ -1,5 +1,12 @@
 #pragma once
 
+/* Define to one if want to debug using sha1 hashes of data */
+#cmakedefine01 NRN_ENABLE_DIGEST
+
+/* Define to one if want to allow selection of architecture independent */
+/* 53 bit double precision of exp and pow from mpfr */
+#cmakedefine01 NRN_ENABLE_ARCH_INDEP_EXP_POW
+
 /* Define if building universal (internal helper macro) */
 #cmakedefine AC_APPLE_UNIVERSAL_BUILD @AC_APPLE_UNIVERSAL_BUILD@
 

diff --git a/docs/cmake_doc/options.rst b/docs/cmake_doc/options.rst
@@ -674,3 +674,23 @@ NRN_ENABLE_MATH_OPT:BOOL=OFF
 
   Note: Compilers like Intel, NVHPC, Cray etc enable such optimisations
   by default.
+
+NRN_ENABLE_DIGEST:BOOL=OFF
+------------------------------
+  Provides \ :func:`nrn_digest` function for debugging cross platform floating
+  result differences.
+
+  Requires libcrypto
+
+NRN_ENABLE_ARCH_INDEP_EXP_POW:BOOL=OFF
+---------------------------------
+  Provides \ :func:`use_exp_pow_precision` function so that exp and pow produce
+  same results on all platforms.
+
+  Requires mpfr (multiple precision floating-point computation). eg.
+  ``sudo apt install libmpfr-dev``
+
+  To get platform independent floating point results with clang,
+  also consider using
+  ``-DCMAKE_C_FLAGS="-ffp-contract=off" -DCMAKE_CXX_FLAGS="-ffp-contract=off"``
+  or, alternatively, ``"-fp-model=strict``
diff --git a/docs/python/programming/internals.rst b/docs/python/programming/internals.rst
@@ -261,3 +261,115 @@ Miscellaneous
         the variable from its interpreter name. Not needed by or useful for the user; returns 1.0 on
         success.
 
+----
+
+Debugging
+~~~~~~~~~~~
+
+.. function:: nrn_digest
+
+    Syntax:
+        ``h.nrn_digest()``
+
+        ``h.nrn_digest(tid, i)``
+
+        ``h.nrn_digest(tid, i, "abort")``
+
+        ``h.nrn_digest(filename)``
+
+    Description:
+        Available when configured with the cmake option ``-DNRN_ENABLE_DIGEST=ON``
+
+        If the same simulation gives different results on different machines,
+        this function can help isolate the statement that generates the
+        first difference during the simulation.
+        I think :meth:`ParallelContext.prcellstate` is generally better, but in rare
+        situations, nrn_digest can be very helpful.
+
+        The first three forms begin digest gathering. The last form
+        prints the gathered digest information to the filename.
+        With just the two ``tid, i`` arguments, the i gathered item of the
+        tid thread is printed (for single thread simulations, use ``tid = 0``),
+        to the terminal as well as the individual values of the array
+        for that digest item. With the third ``"abort"`` argument, the
+        ith gathered item is printed and ``abort()`` is called (dropping
+        into gdb if that is being used so that one can observe the backtrace).
+
+        Lines are inserted into the digest by calling the C function declared
+        in ``src/oc/nrndigest.h``.
+            ``void nrn_digest_dbl_array(const char* msg, int tid, double t, double* array, size_t sz);``
+        at the moment, such lines are present in ``src/nrncvode/occvode.cpp``
+        to instrument the cvode callbacks that compute ``y' = f(y, t)`` and the
+        approximate jacobian matrix solver ``M*x = b``. I.e in part
+
+        .. code-block::
+
+            #include "nrndigest.h"
+            ...
+            void Cvode::fun_thread(neuron::model_sorted_token const& sorted_token,
+                       double tt,
+                       double* y,
+                       double* ydot,
+                       NrnThread* nt) {
+                CvodeThreadData& z = CTD(nt->id);
+            #if NRN_DIGEST
+                if (nrn_digest_) {
+                    nrn_digest_dbl_array("y", nt->id, tt, y, z.nvsize_);
+                }
+            #endif
+            ...
+            #if NRN_DIGEST
+                if (nrn_digest_ && ydot) {
+                    nrn_digest_dbl_array("ydot", nt->id, tt, ydot, z.nvsize_);
+                }
+            #endif
+
+        Note: when manually adding such lines, the conditional compilation and
+        nrn\_digest\_ test are not needed. The arguments to
+        ``nrn_digest_dbl_array`` determine the line added to the digest.
+        The 5th arg is the size of the 4th arg double array. The double array
+        is processed by SHA1 and the first 16 hex digits are appended to the line.
+        An example of the first few lines of output in a digest file is
+        .. code-block::
+
+            tid=0 size=1344
+            y 0 0 0 e1f6a372856b45e6
+            y 0 1 0 e1f6a372856b45e6
+            ydot 0 2 0 523c9694c335e458
+            y 0 3 4.7121609153871379e-09 fabb4bc469447404
+            ydot 0 4 4.7121609153871379e-09 60bcff174645fc29
+
+        The first line is thread id and number of lines for that thread.
+        Other thread groups, if any, follow the end of each thread group.
+        The digest lines consist of thread id, line identifier (start from 0
+        for each group), double value of the 3rd arg, hash of the array.
+
+----
+
+.. function:: use_exp_pow_precision
+
+    Syntax:
+        ``h.use_exp_pow_precision(istyle)``
+
+    Description:
+        Works when configured with the cmake option
+        ``-DNRN_ENABLE_ARCH_INDEP_EXP_POW=ON`` and otherwise does nothing.
+
+        * istyle = 1
+            All calls to :func:`exp` and :func:`pow` as well as their use
+            internally, in mod files, and by cvode, are computed on mac, linux,
+            windows so that double precision floating point results are
+            cross platform consistent. (Makes use of a
+            multiple precision floating-point computation library.)
+
+        * istyle = 2
+            exp and pow are rounded to 32 bits of mantissa
+
+        * istyle = 0
+            Default.
+            exp and pow calcualted natively (cross platform values can have
+            round off error differences.)
+
+            When using clang (eg. on a mac) cross platform floating point
+            identity is often attainable with  C and C++ flag option
+            ``"-ffp-contract=off"``.
diff --git a/src/nocmodl/nocpout.cpp b/src/nocmodl/nocpout.cpp
@@ -256,6 +256,8 @@ void parout() {
 \n#if !NRNGPU\
 \n#undef exp\
 \n#define exp hoc_Exp\
+\n#undef pow\
+\n#define pow hoc_pow\
 \n#endif\n\
 ");
     if (protect_include_) {

diff --git a/src/nrncvode/occvode.cpp b/src/nrncvode/occvode.cpp
@@ -12,6 +12,7 @@
 #include "vrecitem.h"
 #include "membfunc.h"
 #include "nonvintblock.h"
+#include "nrndigest.h"
 
 #include <cerrno>
 #include <numeric>
@@ -323,7 +324,7 @@ void Cvode::new_no_cap_memb(CvodeThreadData& z, NrnThread* _nt) {
                 }
             }
         }
-        assert(ncm->ml.size() == n);
+        assert(ncm->ml.size() == std::size_t(n));
     }
 }
 
@@ -456,7 +457,7 @@ extern void nrn_extra_scatter_gather(int, int);
 
 void Cvode::scatter_y(neuron::model_sorted_token const& sorted_token, double* y, int tid) {
     CvodeThreadData& z = CTD(tid);
-    assert(z.nonvint_extra_offset_ == z.pv_.size());
+    assert(std::size_t(z.nonvint_extra_offset_) == z.pv_.size());
     for (int i = 0; i < z.nonvint_extra_offset_; ++i) {
         // TODO: understand why this wasn't needed before
         if (z.pv_[i]) {
@@ -494,7 +495,7 @@ void Cvode::gather_y(N_Vector y) {
 void Cvode::gather_y(double* y, int tid) {
     CvodeThreadData& z = CTD(tid);
     nrn_extra_scatter_gather(1, tid);
-    assert(z.nonvint_extra_offset_ == z.pv_.size());
+    assert(std::size_t(z.nonvint_extra_offset_) == z.pv_.size());
     for (int i = 0; i < z.nonvint_extra_offset_; ++i) {
         // TODO: understand why this wasn't needed before
         if (z.pv_[i]) {
@@ -565,6 +566,12 @@ int Cvode::solvex_thread(neuron::model_sorted_token const& sorted_token,
     if (z.nvsize_ == 0) {
         return 0;
     }
+#if NRN_DIGEST
+    if (nrn_digest_) {
+        nrn_digest_dbl_array("solvex enter b", nt->id, t_, b, z.nvsize_);
+        nrn_digest_dbl_array("solvex enter y", nt->id, t_, y, z.nvsize_);
+    }
+#endif
     lhs(sorted_token, nt);  // special version for cvode.
     scatter_ydot(b, nt->id);
     if (z.cmlcap_) {
@@ -597,6 +604,11 @@ int Cvode::solvex_thread(neuron::model_sorted_token const& sorted_token,
     // printf("\texit b\n");
     // for (i=0; i < neq_; ++i) { printf("\t\t%d %g\n", i, b[i]);}
     nrn_nonvint_block_ode_solve(z.nvsize_, b, y, nt->id);
+#if NRN_DIGEST
+    if (nrn_digest_) {
+        nrn_digest_dbl_array("solvex leave b", nt->id, t_, b, z.nvsize_);
+    }
+#endif
     return 0;
 }
 
@@ -670,9 +682,20 @@ void Cvode::fun_thread(neuron::model_sorted_token const& sorted_token,
                        double* ydot,
                        NrnThread* nt) {
     CvodeThreadData& z = CTD(nt->id);
+#if NRN_DIGEST
+    if (nrn_digest_) {
+        nrn_digest_dbl_array("y", nt->id, tt, y, z.nvsize_);
+    }
+#endif
     fun_thread_transfer_part1(sorted_token, tt, y, nt);
     nrn_nonvint_block_ode_fun(z.nvsize_, y, ydot, nt->id);
     fun_thread_transfer_part2(sorted_token, ydot, nt);
+
+#if NRN_DIGEST
+    if (nrn_digest_ && ydot) {
+        nrn_digest_dbl_array("ydot", nt->id, tt, ydot, z.nvsize_);
+    }
+#endif
 }
 
 void Cvode::fun_thread_transfer_part1(neuron::model_sorted_token const& sorted_token,
@@ -740,7 +763,6 @@ void Cvode::fun_thread_transfer_part2(neuron::model_sorted_token const& sorted_t
 }
 
 void Cvode::fun_thread_ms_part1(double tt, double* y, NrnThread* nt) {
-    CvodeThreadData& z = ctd_[nt->id];
     nt->_t = tt;
 
     // fix this!!!
@@ -1001,7 +1023,6 @@ void Cvode::error_weights(double* pd) {
 
 void Cvode::acor(double* pd) {
     int i, id;
-    NrnThread* nt;
     for (id = 0; id < nctd_; ++id) {
         CvodeThreadData& z = ctd_[id];
         double* s = n_vector_data(acorvec(), id);

diff --git a/src/nrniv/CMakeLists.txt b/src/nrniv/CMakeLists.txt
@@ -414,6 +414,26 @@ endif()
 if(NRN_ENABLE_THREADS)
   target_link_libraries(nrniv_lib Threads::Threads)
 endif()
+
+if(${NRN_ENABLE_DIGEST})
+  if(NRN_MACOS_BUILD)
+    # where to get openssl/sha.h after brew install openssl
+    set_property(
+      SOURCE ${NRN_OC_SRC_DIR}/debug.cpp
+      APPEND
+      PROPERTY INCLUDE_DIRECTORIES /opt/homebrew/Cellar/openssl@3/3.1.0/include)
+    find_library(LIB_CRYPTO crypto PATHS /opt/homebrew/Cellar/openssl@3/3.1.0/lib REQUIRED)
+    target_link_libraries(nrniv_lib ${LIB_CRYPTO})
+  else()
+    target_link_libraries(nrniv_lib crypto)
+  endif()
+endif()
+
+if(${NRN_ENABLE_ARCH_INDEP_EXP_POW})
+  find_library(LIB_MPFR mpfr REQUIRED)
+  target_link_libraries(nrniv_lib ${LIB_MPFR})
+endif()
+
 if(NRN_WINDOWS_BUILD)
   target_link_libraries(nrniv_lib ${TERMCAP_LIBRARIES} ${Readline_LIBRARY})
 else()

diff --git a/src/nrniv/nmodlrandom.cpp b/src/nrniv/nmodlrandom.cpp
@@ -100,12 +100,8 @@ static void nmodlrandom_destruct(void* v) {
 }
 
 void NMODLRandom_reg() {
-    class2oc("NMODLRandom",
-             nmodlrandom_cons,
-             nmodlrandom_destruct,
-             members,
-             retobj_members,
-             nullptr);
+    class2oc(
+        "NMODLRandom", nmodlrandom_cons, nmodlrandom_destruct, members, retobj_members, nullptr);
     if (!nmodlrandom_sym) {
         nmodlrandom_sym = hoc_lookup("NMODLRandom");
         assert(nmodlrandom_sym);

diff --git a/src/nrniv/nrnmenu.cpp b/src/nrniv/nrnmenu.cpp
@@ -1129,8 +1129,7 @@ static Member_ret_obj_func mt_retobj_members[] = {{"pp_begin", mt_pp_begin},
                                                   {0, 0}};
 static Member_ret_str_func mt_retstr_func[] = {{"code", mt_code}, {"file", mt_file}, {0, 0}};
 void MechanismType_reg() {
-    class2oc(
-        "MechanismType", mt_cons, mt_destruct, mt_members, mt_retobj_members, mt_retstr_func);
+    class2oc("MechanismType", mt_cons, mt_destruct, mt_members, mt_retobj_members, mt_retstr_func);
     mt_class_sym_ = hoc_lookup("MechanismType");
 }
 

diff --git a/src/nrnpython/CMakeLists.txt b/src/nrnpython/CMakeLists.txt
@@ -74,7 +74,8 @@ else()
     target_link_libraries(nrnpython ${NRN_DEFAULT_PYTHON_LIBRARIES})
   endif()
   target_link_libraries(nrnpython fmt::fmt)
-  target_include_directories(nrnpython SYSTEM PUBLIC ${PROJECT_SOURCE_DIR}/${NRN_3RDPARTY_DIR}/eigen)
+  target_include_directories(nrnpython SYSTEM
+                             PUBLIC ${PROJECT_SOURCE_DIR}/${NRN_3RDPARTY_DIR}/eigen)
   target_include_directories(nrnpython PUBLIC ${PROJECT_BINARY_DIR}/src/nrniv/oc_generated)
   make_nanobind_target(nanobind ${NRN_DEFAULT_PYTHON_INCLUDES})
   target_link_libraries(nrnpython nanobind)