From 510b9bc94592e77d282d28921db483e25a1400f6 Mon Sep 17 00:00:00 2001 From: nrnhines Date: Mon, 21 Oct 2024 20:50:56 -0400 Subject: [PATCH] NRN_ENABLE_DIGEST and NRN_ENABLE_ARCH_INDEP_EXP_POW (#3135) * Selected messages from old digest-debug branch Print digest of cvode f(y,t) and solvex(b,...) cmake -DNRN_DIGEST=ON nrn_digest() starts the accumulation of cvode digest info. nrn_digest("filename") prints accumulated cvode digest info. filename format is: message threadid index t digest where digest is the first 16 hex characters of the SHA1 hash of the double* array indicated by the message. -DNRN_ENABLE_ARCH_INDEP_EXP_POW=ON (default OFF) Provides h.use_exp_pow_precision(style) style = 0 means use normal machine IEEE precision for exp(x) and pow(x,y) style = 1 means use 53 bit mpfr style = 2 means use IEEE but truncate to 32 bit precision. sundials uses hoc_pow. digest format has a f(y,t) call count. * nrn_digest(tid, i) will print the details of the i'th digest item of thread tid. * sundial RPowerR uses hoc_pow. nrn_digest(tid, i, "abort") calls abort() on reaching index i of thread tid * Documentation for nrn_digest and use_exp_pow_precision --- CMakeLists.txt | 11 +++ cmake/BuildOptionDefaults.cmake | 2 + cmake_nrnconf.h.in | 7 ++ docs/cmake_doc/options.rst | 20 +++++ docs/python/programming/internals.rst | 112 +++++++++++++++++++++++++ src/nocmodl/nocpout.cpp | 2 + src/nrncvode/occvode.cpp | 31 +++++-- src/nrniv/CMakeLists.txt | 20 +++++ src/nrniv/nmodlrandom.cpp | 8 +- src/nrniv/nrnmenu.cpp | 3 +- src/nrnpython/CMakeLists.txt | 3 +- src/oc/debug.cpp | 92 +++++++++++++++++++++ src/oc/hoc_init.cpp | 4 + src/oc/math.cpp | 114 +++++++++++++++++++++++++- src/oc/nrndigest.h | 19 +++++ src/oc/oc_ansi.h | 1 + src/oc/ocfunc.h | 6 ++ src/sundials/shared/sundialsmath.c | 6 +- 18 files changed, 444 insertions(+), 17 deletions(-) create mode 100644 src/oc/nrndigest.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 87d423a7b4..57bd453036 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -85,6 +85,13 @@ option( NRN_ENABLE_PERFORMANCE_TESTS "Enable tests that measure performance. These are known to be unreliable when run on busy/oversubscribed machines such as CI runners." ${NRN_ENABLE_PERFORMANCE_TESTS_DEFAULT}) +option(NRN_ENABLE_DIGEST + "Provides nrn_digest function for debugging cross platform floating result differences." + ${NRN_ENABLE_DIGEST_DEFAULT}) +option( + NRN_ENABLE_ARCH_INDEP_EXP_POW + "Provides use_exp_pow_precision(style) function so that exp and pow produce same results on all platforms" + ${NRN_ENABLE_ARCH_INDEP_EXP_POW_DEFAULT}) # This can be helpful in very specific CI build configurations, where ccache is used *and* different # CI builds are built under different directories. option(NRN_AVOID_ABSOLUTE_PATHS @@ -1056,6 +1063,10 @@ if(NRN_ENABLE_PROFILING) message(STATUS " Caliper | ${caliper_DIR}") endif() endif() +if(NRN_ENABLE_DIGEST OR NRN_ARCH_INDEP_EXP_POW) + message(STATUS "NRN_ENABLE_DIGEST | ${NRN_ENABLE_DIGEST}") + message(STATUS "NRN_ENABLE_ARCH_INDEP_EXP_POW | ${NRN_ENABLE_ARCH_INDEP_EXP_POW}") +endif() message(STATUS "--------------+--------------------------------------------------------------") message(STATUS " See documentation : https://www.neuron.yale.edu/neuron/") message(STATUS "--------------+--------------------------------------------------------------") diff --git a/cmake/BuildOptionDefaults.cmake b/cmake/BuildOptionDefaults.cmake index e3e1f3dd63..b43338e5ac 100644 --- a/cmake/BuildOptionDefaults.cmake +++ b/cmake/BuildOptionDefaults.cmake @@ -29,6 +29,8 @@ set(NRN_AVOID_ABSOLUTE_PATHS_DEFAULT OFF) set(NRN_NMODL_CXX_FLAGS_DEFAULT "-O0") set(NRN_SANITIZERS_DEFAULT "") set(NRN_ENABLE_MATH_OPT_DEFAULT OFF) +set(NRN_ENABLE_DIGEST_DEFAULT OFF) +set(NRN_ENABLE_ARCH_INDEP_EXP_POW_DEFAULT OFF) # Some distributions may set the prefix. To avoid errors, unset it set(NRN_PYTHON_DYNAMIC_DEFAULT "") diff --git a/cmake_nrnconf.h.in b/cmake_nrnconf.h.in index 5bbd435916..3c7851c6d5 100644 --- a/cmake_nrnconf.h.in +++ b/cmake_nrnconf.h.in @@ -1,5 +1,12 @@ #pragma once +/* Define to one if want to debug using sha1 hashes of data */ +#cmakedefine01 NRN_ENABLE_DIGEST + +/* Define to one if want to allow selection of architecture independent */ +/* 53 bit double precision of exp and pow from mpfr */ +#cmakedefine01 NRN_ENABLE_ARCH_INDEP_EXP_POW + /* Define if building universal (internal helper macro) */ #cmakedefine AC_APPLE_UNIVERSAL_BUILD @AC_APPLE_UNIVERSAL_BUILD@ diff --git a/docs/cmake_doc/options.rst b/docs/cmake_doc/options.rst index 087b477ab1..fcb296682a 100644 --- a/docs/cmake_doc/options.rst +++ b/docs/cmake_doc/options.rst @@ -674,3 +674,23 @@ NRN_ENABLE_MATH_OPT:BOOL=OFF Note: Compilers like Intel, NVHPC, Cray etc enable such optimisations by default. + +NRN_ENABLE_DIGEST:BOOL=OFF +------------------------------ + Provides \ :func:`nrn_digest` function for debugging cross platform floating + result differences. + + Requires libcrypto + +NRN_ENABLE_ARCH_INDEP_EXP_POW:BOOL=OFF +--------------------------------- + Provides \ :func:`use_exp_pow_precision` function so that exp and pow produce + same results on all platforms. + + Requires mpfr (multiple precision floating-point computation). eg. + ``sudo apt install libmpfr-dev`` + + To get platform independent floating point results with clang, + also consider using + ``-DCMAKE_C_FLAGS="-ffp-contract=off" -DCMAKE_CXX_FLAGS="-ffp-contract=off"`` + or, alternatively, ``"-fp-model=strict`` diff --git a/docs/python/programming/internals.rst b/docs/python/programming/internals.rst index fab28b96e0..5f6096bae6 100755 --- a/docs/python/programming/internals.rst +++ b/docs/python/programming/internals.rst @@ -261,3 +261,115 @@ Miscellaneous the variable from its interpreter name. Not needed by or useful for the user; returns 1.0 on success. +---- + +Debugging +~~~~~~~~~~~ + +.. function:: nrn_digest + + Syntax: + ``h.nrn_digest()`` + + ``h.nrn_digest(tid, i)`` + + ``h.nrn_digest(tid, i, "abort")`` + + ``h.nrn_digest(filename)`` + + Description: + Available when configured with the cmake option ``-DNRN_ENABLE_DIGEST=ON`` + + If the same simulation gives different results on different machines, + this function can help isolate the statement that generates the + first difference during the simulation. + I think :meth:`ParallelContext.prcellstate` is generally better, but in rare + situations, nrn_digest can be very helpful. + + The first three forms begin digest gathering. The last form + prints the gathered digest information to the filename. + With just the two ``tid, i`` arguments, the i gathered item of the + tid thread is printed (for single thread simulations, use ``tid = 0``), + to the terminal as well as the individual values of the array + for that digest item. With the third ``"abort"`` argument, the + ith gathered item is printed and ``abort()`` is called (dropping + into gdb if that is being used so that one can observe the backtrace). + + Lines are inserted into the digest by calling the C function declared + in ``src/oc/nrndigest.h``. + ``void nrn_digest_dbl_array(const char* msg, int tid, double t, double* array, size_t sz);`` + at the moment, such lines are present in ``src/nrncvode/occvode.cpp`` + to instrument the cvode callbacks that compute ``y' = f(y, t)`` and the + approximate jacobian matrix solver ``M*x = b``. I.e in part + + .. code-block:: + + #include "nrndigest.h" + ... + void Cvode::fun_thread(neuron::model_sorted_token const& sorted_token, + double tt, + double* y, + double* ydot, + NrnThread* nt) { + CvodeThreadData& z = CTD(nt->id); + #if NRN_DIGEST + if (nrn_digest_) { + nrn_digest_dbl_array("y", nt->id, tt, y, z.nvsize_); + } + #endif + ... + #if NRN_DIGEST + if (nrn_digest_ && ydot) { + nrn_digest_dbl_array("ydot", nt->id, tt, ydot, z.nvsize_); + } + #endif + + Note: when manually adding such lines, the conditional compilation and + nrn\_digest\_ test are not needed. The arguments to + ``nrn_digest_dbl_array`` determine the line added to the digest. + The 5th arg is the size of the 4th arg double array. The double array + is processed by SHA1 and the first 16 hex digits are appended to the line. + An example of the first few lines of output in a digest file is + .. code-block:: + + tid=0 size=1344 + y 0 0 0 e1f6a372856b45e6 + y 0 1 0 e1f6a372856b45e6 + ydot 0 2 0 523c9694c335e458 + y 0 3 4.7121609153871379e-09 fabb4bc469447404 + ydot 0 4 4.7121609153871379e-09 60bcff174645fc29 + + The first line is thread id and number of lines for that thread. + Other thread groups, if any, follow the end of each thread group. + The digest lines consist of thread id, line identifier (start from 0 + for each group), double value of the 3rd arg, hash of the array. + +---- + +.. function:: use_exp_pow_precision + + Syntax: + ``h.use_exp_pow_precision(istyle)`` + + Description: + Works when configured with the cmake option + ``-DNRN_ENABLE_ARCH_INDEP_EXP_POW=ON`` and otherwise does nothing. + + * istyle = 1 + All calls to :func:`exp` and :func:`pow` as well as their use + internally, in mod files, and by cvode, are computed on mac, linux, + windows so that double precision floating point results are + cross platform consistent. (Makes use of a + multiple precision floating-point computation library.) + + * istyle = 2 + exp and pow are rounded to 32 bits of mantissa + + * istyle = 0 + Default. + exp and pow calcualted natively (cross platform values can have + round off error differences.) + + When using clang (eg. on a mac) cross platform floating point + identity is often attainable with C and C++ flag option + ``"-ffp-contract=off"``. diff --git a/src/nocmodl/nocpout.cpp b/src/nocmodl/nocpout.cpp index 92c18d6b2d..c11d63e24d 100644 --- a/src/nocmodl/nocpout.cpp +++ b/src/nocmodl/nocpout.cpp @@ -256,6 +256,8 @@ void parout() { \n#if !NRNGPU\ \n#undef exp\ \n#define exp hoc_Exp\ +\n#undef pow\ +\n#define pow hoc_pow\ \n#endif\n\ "); if (protect_include_) { diff --git a/src/nrncvode/occvode.cpp b/src/nrncvode/occvode.cpp index 67e24d1c87..a2f6bd24eb 100644 --- a/src/nrncvode/occvode.cpp +++ b/src/nrncvode/occvode.cpp @@ -12,6 +12,7 @@ #include "vrecitem.h" #include "membfunc.h" #include "nonvintblock.h" +#include "nrndigest.h" #include #include @@ -323,7 +324,7 @@ void Cvode::new_no_cap_memb(CvodeThreadData& z, NrnThread* _nt) { } } } - assert(ncm->ml.size() == n); + assert(ncm->ml.size() == std::size_t(n)); } } @@ -456,7 +457,7 @@ extern void nrn_extra_scatter_gather(int, int); void Cvode::scatter_y(neuron::model_sorted_token const& sorted_token, double* y, int tid) { CvodeThreadData& z = CTD(tid); - assert(z.nonvint_extra_offset_ == z.pv_.size()); + assert(std::size_t(z.nonvint_extra_offset_) == z.pv_.size()); for (int i = 0; i < z.nonvint_extra_offset_; ++i) { // TODO: understand why this wasn't needed before if (z.pv_[i]) { @@ -494,7 +495,7 @@ void Cvode::gather_y(N_Vector y) { void Cvode::gather_y(double* y, int tid) { CvodeThreadData& z = CTD(tid); nrn_extra_scatter_gather(1, tid); - assert(z.nonvint_extra_offset_ == z.pv_.size()); + assert(std::size_t(z.nonvint_extra_offset_) == z.pv_.size()); for (int i = 0; i < z.nonvint_extra_offset_; ++i) { // TODO: understand why this wasn't needed before if (z.pv_[i]) { @@ -565,6 +566,12 @@ int Cvode::solvex_thread(neuron::model_sorted_token const& sorted_token, if (z.nvsize_ == 0) { return 0; } +#if NRN_DIGEST + if (nrn_digest_) { + nrn_digest_dbl_array("solvex enter b", nt->id, t_, b, z.nvsize_); + nrn_digest_dbl_array("solvex enter y", nt->id, t_, y, z.nvsize_); + } +#endif lhs(sorted_token, nt); // special version for cvode. scatter_ydot(b, nt->id); if (z.cmlcap_) { @@ -597,6 +604,11 @@ int Cvode::solvex_thread(neuron::model_sorted_token const& sorted_token, // printf("\texit b\n"); // for (i=0; i < neq_; ++i) { printf("\t\t%d %g\n", i, b[i]);} nrn_nonvint_block_ode_solve(z.nvsize_, b, y, nt->id); +#if NRN_DIGEST + if (nrn_digest_) { + nrn_digest_dbl_array("solvex leave b", nt->id, t_, b, z.nvsize_); + } +#endif return 0; } @@ -670,9 +682,20 @@ void Cvode::fun_thread(neuron::model_sorted_token const& sorted_token, double* ydot, NrnThread* nt) { CvodeThreadData& z = CTD(nt->id); +#if NRN_DIGEST + if (nrn_digest_) { + nrn_digest_dbl_array("y", nt->id, tt, y, z.nvsize_); + } +#endif fun_thread_transfer_part1(sorted_token, tt, y, nt); nrn_nonvint_block_ode_fun(z.nvsize_, y, ydot, nt->id); fun_thread_transfer_part2(sorted_token, ydot, nt); + +#if NRN_DIGEST + if (nrn_digest_ && ydot) { + nrn_digest_dbl_array("ydot", nt->id, tt, ydot, z.nvsize_); + } +#endif } void Cvode::fun_thread_transfer_part1(neuron::model_sorted_token const& sorted_token, @@ -740,7 +763,6 @@ void Cvode::fun_thread_transfer_part2(neuron::model_sorted_token const& sorted_t } void Cvode::fun_thread_ms_part1(double tt, double* y, NrnThread* nt) { - CvodeThreadData& z = ctd_[nt->id]; nt->_t = tt; // fix this!!! @@ -1001,7 +1023,6 @@ void Cvode::error_weights(double* pd) { void Cvode::acor(double* pd) { int i, id; - NrnThread* nt; for (id = 0; id < nctd_; ++id) { CvodeThreadData& z = ctd_[id]; double* s = n_vector_data(acorvec(), id); diff --git a/src/nrniv/CMakeLists.txt b/src/nrniv/CMakeLists.txt index f1f5830dde..0f9cd2564a 100644 --- a/src/nrniv/CMakeLists.txt +++ b/src/nrniv/CMakeLists.txt @@ -414,6 +414,26 @@ endif() if(NRN_ENABLE_THREADS) target_link_libraries(nrniv_lib Threads::Threads) endif() + +if(${NRN_ENABLE_DIGEST}) + if(NRN_MACOS_BUILD) + # where to get openssl/sha.h after brew install openssl + set_property( + SOURCE ${NRN_OC_SRC_DIR}/debug.cpp + APPEND + PROPERTY INCLUDE_DIRECTORIES /opt/homebrew/Cellar/openssl@3/3.1.0/include) + find_library(LIB_CRYPTO crypto PATHS /opt/homebrew/Cellar/openssl@3/3.1.0/lib REQUIRED) + target_link_libraries(nrniv_lib ${LIB_CRYPTO}) + else() + target_link_libraries(nrniv_lib crypto) + endif() +endif() + +if(${NRN_ENABLE_ARCH_INDEP_EXP_POW}) + find_library(LIB_MPFR mpfr REQUIRED) + target_link_libraries(nrniv_lib ${LIB_MPFR}) +endif() + if(NRN_WINDOWS_BUILD) target_link_libraries(nrniv_lib ${TERMCAP_LIBRARIES} ${Readline_LIBRARY}) else() diff --git a/src/nrniv/nmodlrandom.cpp b/src/nrniv/nmodlrandom.cpp index 66be7a9dc1..88806ddde9 100644 --- a/src/nrniv/nmodlrandom.cpp +++ b/src/nrniv/nmodlrandom.cpp @@ -100,12 +100,8 @@ static void nmodlrandom_destruct(void* v) { } void NMODLRandom_reg() { - class2oc("NMODLRandom", - nmodlrandom_cons, - nmodlrandom_destruct, - members, - retobj_members, - nullptr); + class2oc( + "NMODLRandom", nmodlrandom_cons, nmodlrandom_destruct, members, retobj_members, nullptr); if (!nmodlrandom_sym) { nmodlrandom_sym = hoc_lookup("NMODLRandom"); assert(nmodlrandom_sym); diff --git a/src/nrniv/nrnmenu.cpp b/src/nrniv/nrnmenu.cpp index f4103033cb..0f8e87f750 100644 --- a/src/nrniv/nrnmenu.cpp +++ b/src/nrniv/nrnmenu.cpp @@ -1129,8 +1129,7 @@ static Member_ret_obj_func mt_retobj_members[] = {{"pp_begin", mt_pp_begin}, {0, 0}}; static Member_ret_str_func mt_retstr_func[] = {{"code", mt_code}, {"file", mt_file}, {0, 0}}; void MechanismType_reg() { - class2oc( - "MechanismType", mt_cons, mt_destruct, mt_members, mt_retobj_members, mt_retstr_func); + class2oc("MechanismType", mt_cons, mt_destruct, mt_members, mt_retobj_members, mt_retstr_func); mt_class_sym_ = hoc_lookup("MechanismType"); } diff --git a/src/nrnpython/CMakeLists.txt b/src/nrnpython/CMakeLists.txt index c857e3c37b..0996b99c04 100644 --- a/src/nrnpython/CMakeLists.txt +++ b/src/nrnpython/CMakeLists.txt @@ -74,7 +74,8 @@ else() target_link_libraries(nrnpython ${NRN_DEFAULT_PYTHON_LIBRARIES}) endif() target_link_libraries(nrnpython fmt::fmt) - target_include_directories(nrnpython SYSTEM PUBLIC ${PROJECT_SOURCE_DIR}/${NRN_3RDPARTY_DIR}/eigen) + target_include_directories(nrnpython SYSTEM + PUBLIC ${PROJECT_SOURCE_DIR}/${NRN_3RDPARTY_DIR}/eigen) target_include_directories(nrnpython PUBLIC ${PROJECT_BINARY_DIR}/src/nrniv/oc_generated) make_nanobind_target(nanobind ${NRN_DEFAULT_PYTHON_INCLUDES}) target_link_libraries(nrnpython nanobind) diff --git a/src/oc/debug.cpp b/src/oc/debug.cpp index 2f8aeb5626..c9e0bb9988 100644 --- a/src/oc/debug.cpp +++ b/src/oc/debug.cpp @@ -1,12 +1,21 @@ #include <../../nrnconf.h> /* /local/src/master/nrn/src/oc/debug.cpp,v 1.7 1996/04/09 16:39:14 hines Exp */ + #include "hocdec.h" #include "code.h" #include "equation.h" +#include "multicore.h" #include #include "utils/logger.hpp" +#include "nrndigest.h" +#if NRN_DIGEST +#include +#include +#include +#endif + int hoc_zzdebug; #define prcod(c1, c2) else if (p->pf == c1) Printf("%p %p %s", fmt::ptr(p), fmt::ptr(p->pf), c2) @@ -144,3 +153,86 @@ void debugzz(Inst* p) { } #endif /*OCSMALL*/ } + +#if NRN_DIGEST + +int nrn_digest_; +static std::vector> digest; // nthread string vectors +static std::vector digest_cnt; // nthread counts. +static int nrn_digest_print_item_ = -1; +static int nrn_digest_print_tid_ = 0; +static bool nrn_digest_abort_ = false; + +void nrn_digest() { + if (ifarg(1) && hoc_is_str_arg(1)) { + // print the digest to the file and turn off accumulation + const char* fname = gargstr(1); + FILE* f = fopen(fname, "w"); + if (!f) { + hoc_execerr_ext("Could not open %s for writing", fname); + } + + int tid = 0; + for (auto& d: digest) { + fprintf(f, "tid=%d size=%zd\n", tid, digest[tid].size()); + for (auto& s: d) { + fprintf(f, "%s\n", s.c_str()); + } + tid++; + } + fclose(f); + nrn_digest_ = 0; + } else { // start accumulating digest info + nrn_digest_ = 1; + nrn_digest_print_item_ = -1; + nrn_digest_print_tid_ = 0; + if (ifarg(2)) { + nrn_digest_print_tid_ = int(chkarg(1, 0., nrn_nthread - 1)); + nrn_digest_print_item_ = int(chkarg(2, 0., 1e9)); + } + nrn_digest_abort_ = (ifarg(3) && strcmp(gargstr(3), "abort") == 0); + } + size_t size = digest.size() ? digest[0].size() : 0; + digest.clear(); // in any case, start over. + digest.resize(nrn_nthread); + digest_cnt.clear(); + digest_cnt.resize(nrn_nthread); + hoc_ret(); + hoc_pushx(double(size)); +} + +void nrn_digest_dbl_array(const char* msg, int tid, double t, double* array, size_t sz) { + if (!nrn_digest_) { + return; + } + unsigned char md[SHA_DIGEST_LENGTH]; + size_t n = sz * sizeof(double); + unsigned char* d = (unsigned char*) array; + SHA1(d, n, md); + + std::string s(msg); + char buf[100]; + int ix = int(digest_cnt[tid]); + digest_cnt[tid]++; + sprintf(buf, " %d %d %.17g ", tid, ix, t); + s += buf; + + for (int i = 0; i < 8; ++i) { + sprintf(buf, "%02x", (int) md[i]); + s += buf; + } + + digest[tid].push_back(s); + + if (nrn_digest_print_item_ == ix && nrn_digest_print_tid_ == tid) { + printf("ZZ %s\n", s.c_str()); + if (nrn_digest_abort_) { + abort(); + } + for (size_t i = 0; i < sz; ++i) { + printf("Z %zd %.20g\n", i, array[i]); + } + } +} + +#endif // NRN_DIGEST diff --git a/src/oc/hoc_init.cpp b/src/oc/hoc_init.cpp index bfcd5a9342..e8bd07f46c 100644 --- a/src/oc/hoc_init.cpp +++ b/src/oc/hoc_init.cpp @@ -195,6 +195,10 @@ static struct { /* Builtin functions with multiple or variable args */ #if defined(WIN32) {"WinExec", hoc_win_exec}, #endif +#if NRN_DIGEST + {"nrn_digest", nrn_digest}, +#endif + {"use_exp_pow_precision", hoc_use_exp_pow_precision}, {0, 0}}; static struct { /* functions that return a string */ diff --git a/src/oc/math.cpp b/src/oc/math.cpp index a568087679..2f2321fcf7 100644 --- a/src/oc/math.cpp +++ b/src/oc/math.cpp @@ -9,12 +9,16 @@ #include "nrnmpiuse.h" #include "ocfunc.h" +#include "nrnassrt.h" #include #include #include #include +#if NRN_ARCH_INDEP_EXP_POW +#include +#endif #define EPS hoc_epsilon #define MAXERRCOUNT 5 @@ -61,6 +65,109 @@ double hoc_Log10(double x) { return errcheck(log10(x), "log10"); } +#if NRN_ARCH_INDEP_EXP_POW + +static double accuracy32(double val) { + int ex; + double mant = frexp(val, &ex); + // round to about 32 bits after . + double prec = 4294967296.0; + double result = mant * prec; + result = round(result); + result /= prec; + result = ldexp(result, ex); + return result; +} + +static double pow_arch_indep(double x, double y) { + mpfr_prec_t prec = 53; + mpfr_rnd_t rnd = MPFR_RNDN; + + mpfr_t x_, y_; + mpfr_init2(x_, prec); + mpfr_init2(y_, prec); + + mpfr_set_d(x_, x, rnd); + mpfr_set_d(y_, y, rnd); + + mpfr_pow(x_, x_, y_, rnd); + + double r = mpfr_get_d(x_, rnd); + + mpfr_clear(x_); + mpfr_clear(y_); + + return r; +} + +static double exp_arch_indep(double x) { + mpfr_prec_t prec = 53; + mpfr_rnd_t rnd = MPFR_RNDN; + + mpfr_t x_; + mpfr_init2(x_, prec); + mpfr_set_d(x_, x, rnd); + mpfr_exp(x_, x_, rnd); + double r = mpfr_get_d(x_, rnd); + mpfr_clear(x_); + return r; +} + +static double pow_precision32(double x, double y) { + return accuracy32(pow(x, y)); +} + +static double exp_precision32(double x) { + return accuracy32(exp(x)); +} + +static double (*pow_ptr)(double x, double y) = pow; +static double (*pow_ieee_ptr)(double x, + double y) = pow; // avoid error! Maybe because of c++ overloading. +static double (*exp_ptr)(double x) = exp; + +int nrn_use_exp_pow_precision(int style) { + if (style == 0) { // default IEEE + pow_ptr = pow; + exp_ptr = exp; + } else if (style == 1) { // 53 bit mpfr + pow_ptr = pow_arch_indep; + exp_ptr = exp_arch_indep; + } else if (style == 2) { // 32 bit truncation + pow_ptr = pow_precision32; + exp_ptr = exp_precision32; + } + return style; +} + +#endif // NRN_ARCH_INDEP_EXP_POW + +void hoc_use_exp_pow_precision() { + int style = 0; + if (ifarg(1)) { + style = chkarg(1, 0.0, 2.0); + } +#if NRN_ARCH_INDEP_EXP_POW + style = nrn_use_exp_pow_precision(style); +#else + style = 0; +#endif // NRN_ARCH_INDEP_EXP_POW + hoc_ret(); + hoc_pushx(double(style)); +} + +// Try to overcome difference between linux and windows. +// by rounding mantissa to 32 bit accuracy or using +// hopefully arch independent mpfr for exp and pow. + +double hoc_pow(double x, double y) { +#if NRN_ARCH_INDEP_EXP_POW + return (*pow_ptr)(x, y); +#else + return pow(x, y); +#endif +} + /* used by nmodl and other c, c++ code */ double hoc_Exp(double x) { if (x < -700.) { @@ -78,7 +185,12 @@ double hoc_Exp(double x) { } return exp(700.); } + +#if NRN_ARCH_INDEP_EXP_POW + return (*exp_ptr)(x); +#else return exp(x); +#endif } /* used by interpreter */ @@ -103,7 +215,7 @@ double hoc_Sqrt(double x) { double hoc_Pow(double x, double y) { clear_fe_except(); - return errcheck(pow(x, y), "exponentiation"); + return errcheck(hoc_pow(x, y), "exponentiation"); } double hoc_integer(double x) { diff --git a/src/oc/nrndigest.h b/src/oc/nrndigest.h new file mode 100644 index 0000000000..0709460d0c --- /dev/null +++ b/src/oc/nrndigest.h @@ -0,0 +1,19 @@ + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define NRN_DIGEST NRN_ENABLE_DIGEST + +#if NRN_DIGEST +extern int nrn_digest_; // debugging differences on different machines. +extern void nrn_digest_dbl_array(const char* msg, int tid, double t, double* array, size_t sz); +#endif + +#ifdef __cplusplus +} +#endif diff --git a/src/oc/oc_ansi.h b/src/oc/oc_ansi.h index 0b95fc8732..70466b93bb 100644 --- a/src/oc/oc_ansi.h +++ b/src/oc/oc_ansi.h @@ -100,6 +100,7 @@ void install_vector_method(const char*, double (*)(void*)); int vector_arg_px(int i, double** p); double hoc_Exp(double); +extern "C" double hoc_pow(double, double); int hoc_is_tempobj_arg(int narg); std::FILE* hoc_obj_file_arg(int i); void hoc_reg_nmodl_text(int type, const char* txt); diff --git a/src/oc/ocfunc.h b/src/oc/ocfunc.h index b30006011a..67bf910d86 100644 --- a/src/oc/ocfunc.h +++ b/src/oc/ocfunc.h @@ -38,6 +38,12 @@ extern void hoc_Setcolor(void); extern void hoc_init_space(void); extern void hoc_install_hoc_obj(void); extern void nrn_feenableexcept(void); + +#if NRN_DIGEST +extern void nrn_digest(); +#endif +extern void hoc_use_exp_pow_precision(); + void hoc_coreneuron_handle(); void hoc_get_config_key(); void hoc_get_config_val(); diff --git a/src/sundials/shared/sundialsmath.c b/src/sundials/shared/sundialsmath.c index 972d3d82f1..84fb7e555e 100755 --- a/src/sundials/shared/sundialsmath.c +++ b/src/sundials/shared/sundialsmath.c @@ -39,12 +39,14 @@ realtype RPowerI(realtype base, int exponent) return(prod); } +extern double hoc_pow(double, double); + realtype RPowerR(realtype base, realtype exponent) { if (base <= ZERO) return(ZERO); #if defined(SUNDIALS_USE_GENERIC_MATH) - return((realtype) pow((double) base, (double) exponent)); + return((realtype) hoc_pow((double) base, (double) exponent)); #elif defined(SUNDIALS_SINGLE_PRECISION) return(powf(base, exponent)); #elif defined(SUNDIALS_EXTENDED_PRECISION) @@ -79,7 +81,7 @@ realtype RAbs(realtype x) realtype RPower2(realtype x) { #if defined(SUNDIALS_USE_GENERIC_MATH) - return((realtype) pow((double) x, 2.0)); + return((realtype) hoc_pow((double) x, 2.0)); #elif defined(SUNDIALS_SINGLE_PRECISION) return(powf(x, TWO)); #elif defined(SUNDIALS_EXTENDED_PRECISION)