Merge pull request #35 from nakatamaho/v2.0

V2.0
nakatamaho · Jul 22, 2022 · bc22cf0 · bc22cf0
2 parents 9d0988d + 0baf910
commit bc22cf0
Show file tree

Hide file tree

Showing 937 changed files with 42,778 additions and 43,406 deletions.
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,11 @@
+2022/07/22    Nakata Maho <[email protected]> 2.0 branch
+            * Fix iMparam2stage.
+            * Fix issues for test programs; stack overflows, possible memory leaks and memory accesses.
+            * Extensive tests and fixes for mingw64; we provide DLLs for libqd and mpblas.
+            * Correct exponent ranges for GMP, MPFR and each OS.
+            * Update test results for aarch64-unknown-linux-gnu, x86_64-apple-darwin20.6.0,
+              x86_64-pc-linux-gnu, x86_64-pc-linux-gnu_inteloneapi and x86_64-w64-mingw32.
+
 2022/06/21    Nakata Maho <[email protected]> 2.0 branch
             * Enable Rgejsv, Cgejsv, Rgesvj and Cgesvj for dd/qd by changing Rlamch("O") to approx.
               1e-16 * Rlamch("O").

diff --git a/include/mplapack_config.h b/include/mplapack_config.h
@@ -54,15 +54,13 @@
 #define USE64BITINT
 
 #ifdef USE64BITINT
-    #if defined __APPLE__ //workaround for int64_t is long long which contradicts with GMP. However, sizeof(long long)==sizeof(long).
-        typedef long int mplapackint;
-    #elif defined _WIN32  //workaround for Windows, and cannot be fixed. int64_t is long long and support GMP version is not straightfoward.
+    #if defined _WIN32  //workaround for Windows. long int is 32bit and int64_t is long long. Supporting GMP version is not straightfoward.
         typedef long int mplapackint;
+    #elif defined __APPLE__ //on apple int64_t doesn't work, but it long works and it is 8 bytes.
+        typedef long mplapackint;
     #else
         typedef int64_t mplapackint;
     #endif
-#else
-    typedef int32_t mplapackint;
 #endif
 
 typedef mplapackint mplapacklogical;

diff --git a/mpfrc++/mpcomplex.h b/mpfrc++/mpcomplex.h
@@ -241,14 +241,13 @@ class mpcomplex {
 };
 
 #if defined ___MPLAPACK_MPLAPACK_INIT___
-mpc_rnd_t mpfr::mpcomplex::default_rnd = MPFR_RNDN; //must be initialized at mpblas/reference/mplapackinit.cpp
+mpc_rnd_t mpfr::mpcomplex::default_rnd = MPFR_RNDN; // must be initialized at mpblas/reference/mplapackinit.cpp
 mp_prec_t mpfr::mpcomplex::default_real_prec = ___MPREAL_DEFAULT_PRECISION___;
 mp_prec_t mpfr::mpcomplex::default_imag_prec = ___MPREAL_DEFAULT_PRECISION___;
 int mpfr::mpcomplex::default_base = 2;
 int mpfr::mpcomplex::double_bits = -1;
 #endif
 
-
 //+ addition
 const mpcomplex operator+(const mpcomplex &a, const mpcomplex &b);
 const mpcomplex operator+(const mpcomplex &a, const std::complex<double> b);
@@ -747,8 +746,36 @@ inline const mpcomplex operator*(const mpreal &a, const mpcomplex &b) {
 
 // / division
 inline mpcomplex &mpcomplex::operator/=(const mpcomplex &a) {
+// mpc doesn't consider overflow/underflow for devision.
+// usually, they don't occur as exponent range is 4bytes in MPFR, and 8bytes GMP, respectively.
+// however, for Windows, whose long is int = 4bytes, we have to consider overflow and underflow.
+#if defined _WIN32
+    mpcomplex tmp(*this);
+    mpreal abr, abi, ratio, den;
+    if ((abr = a.real()) < 0.)
+        abr = -abr;
+    if ((abi = a.imag()) < 0.)
+        abi = -abi;
+    if (abr <= abi) {
+        if (abi == 0) {
+            if (tmp.imag() != 0 || tmp.real() != 0)
+                abi = 1.;
+            (*this) = mpcomplex(abi / abr, abi / abr);
+            return (*this);
+        }
+        ratio = a.real() / a.imag();
+        den = a.imag() * (1.0 + ratio * ratio);
+        (*this) = mpcomplex((tmp.real() * ratio + tmp.imag()) / den, (tmp.imag() * ratio - tmp.real()) / den);
+    } else {
+        ratio = a.imag() / a.real();
+        den = a.real() * (1.0 + ratio * ratio);
+        (*this) = mpcomplex((tmp.real() + tmp.imag() * ratio) / den, (tmp.imag() - tmp.real() * ratio) / den);
+    }
+    return (*this);
+#else
     mpc_div(mpc, mpc, a.mpc, default_rnd);
     return *this;
+#endif
 }
 
 inline mpcomplex &mpcomplex::operator/=(const mpc_t a) {

diff --git a/mplapack/reference/Rlamch.cpp b/mplapack/reference/Rlamch.cpp
@@ -232,8 +232,11 @@ REAL RlamchS_gmp(void) {
     REAL sfmin;
     REAL one = 1.0;
     unsigned long exp2;
-    // XXX
-    exp2 = 0xffffffff;
+#if defined _WIN32 //exponent investigated by MPFR
+    exp2 = 0x3FFFFFFF;
+#else
+    exp2 = 0x3FFFFFFFFFFFFFFFL;
+#endif
     mpf_div_2exp(sfmin.get_mpf_t(), one.get_mpf_t(), exp2);
     return sfmin;
 
@@ -309,8 +312,11 @@ REAL RlamchM_gmp(void) {
     unsigned long exp2;
     REAL tmp;
     REAL uflowmin, one = 1.0;
-    // XXX
-    exp2 = 0xffffffff;
+#if defined _WIN32 //exponent investigated by MPFR
+    exp2 = 0x3FFFFFFF;
+#else
+    exp2 = 0x3FFFFFFFFFFFFFFFL; //exponent investigated by MPFR
+#endif
     tmp = exp2;
     return -tmp;
 
@@ -341,8 +347,11 @@ REAL RlamchU_gmp(void) {
     REAL underflowmin;
     REAL one = 1.0;
     unsigned long exp2;
-    // XXX    exp2 = (1UL << (mp_bits_per_limb - 8)) - 1; // 6 seems to be the smallest on amd64 but for safty
-    exp2 = 0xffffffff;
+#if defined _WIN32
+    exp2 = 0x3FFFFFFF;
+#else
+    exp2 = 0x3FFFFFFFFFFFFFFFL;
+#endif
     mpf_div_2exp(underflowmin.get_mpf_t(), one.get_mpf_t(), exp2);
     return underflowmin;
 }
@@ -352,9 +361,11 @@ REAL RlamchU_gmp(void) {
 REAL RlamchL_gmp(void) {
     REAL maxexp;
     unsigned long exp2;
-    exp2 = (1UL << (mp_bits_per_limb - 8)) - 1; // 6 seems to be the smallest on amd64 but for safty
-                                                // XXX
-    maxexp = 0xffffffff;
+#if defined _WIN32
+    exp2 = 0x3FFFFFFF;
+#else
+    exp2 = 0x3FFFFFFFFFFFFFFFL;
+#endif
     return maxexp;
 }
 
@@ -364,10 +375,12 @@ REAL RlamchO_gmp(void) {
     REAL overflowmax;
     REAL one = 1.0;
     unsigned long exp2;
-    // XXX    exp2 = (1UL << (mp_bits_per_limb - 8)) - 1; // 6 seems to be the smallest on amd64 but for safty
-    exp2 = 0xffffffff;
+#if defined _WIN32
+    exp2 = 0x3FFFFFFF;
+#else
+    exp2 = 0x3FFFFFFFFFFFFFFFL;
+#endif
     mpf_mul_2exp(overflowmax.get_mpf_t(), one.get_mpf_t(), exp2);
-
     return overflowmax;
 }
 

diff --git a/mplapack/reference/Rlaruv.cpp b/mplapack/reference/Rlaruv.cpp
@@ -33,17 +33,25 @@
 
 #if defined ___MPLAPACK_BUILD_WITH_MPFR___
 gmp_randstate_t ___random_mplapack_mpfr_state;
-void __attribute__((constructor)) ___mplapack_mpfr_initialize(void);
-void ___mplapack_mpfr_initialize(void) { gmp_randinit_default(___random_mplapack_mpfr_state); } // this is gmp_randinit_mt
-void __attribute__((destructor)) mplapack_mpfr_finalize(void);
-void mplapack_mpfr_finalize(void) { gmp_randclear(___random_mplapack_mpfr_state); } // this is gmp_randinit_mt
+void __attribute__((constructor)) ___mplapack_Rlaruv_mpfr_initialize(void) {
+    gmp_randinit_default(___random_mplapack_mpfr_state);
+    gmp_randseed_ui(___random_mplapack_mpfr_state, (unsigned long int)time(NULL));
+}
+void __attribute__((destructor)) ___mplapack_Rlaruv_mpfr_finalize(void) {
+    gmp_randclear(___random_mplapack_mpfr_state);
+}
 #endif
 
 #if defined ___MPLAPACK_BUILD_WITH_GMP___
 gmp_randstate_t ___random_mplapack_gmp_state;
 gmp_randclass ___random_mplapack_gmp(gmp_randinit_default);
-void __attribute__((constructor)) mplapack_gmp_initialize(void);
-void mplapack_Rlaruv_gmp_initialize(void) { ___random_mplapack_gmp.seed((unsigned long int)time(NULL)); } // XXX better initializaition req'ed
+void __attribute__((constructor)) ___mplapack_Rlaruv_gmp_initialize(void) {
+    gmp_randinit_default(___random_mplapack_gmp_state);
+    gmp_randseed_ui(___random_mplapack_gmp_state, (unsigned long int)time(NULL));
+}
+void __attribute__((destructor)) ___mplapack_Rlaruv_gmp_finalize(void) {
+    gmp_randclear(___random_mplapack_gmp_state);
+}
 #endif
 
 void Rlaruv(INTEGER *iseed, INTEGER const n, REAL *x) {

diff --git a/mplapack/test/compare/common/Rlamch.test.cpp b/mplapack/test/compare/common/Rlamch.test.cpp
@@ -74,7 +74,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <blas.h>
 #include <lapack.h>
 
-#define VERBOSE_TEST
+//#define VERBOSE_TEST
 
 /* dlamch result on FreeBSD 6/i386 + Lapack 3.1.1
   Epsilon                      =   1.110223024625157E-016
@@ -92,6 +92,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #if defined ___MPLAPACK_BUILD_WITH_MPFR___
 void Rlamch_mpfr_test() {
     REAL tmp, tmp2;
+#if defined VERBOSE_TEST
+    REAL one = 1.0;
+    REAL two = 2.0;
+    REAL tmp3, tmp4, tmp5, tmp6;
+#endif
+
     tmp = Rlamch_mpfr("E");
 
     printf("Rlamch E: Epsilon                      ");
@@ -106,19 +112,18 @@ void Rlamch_mpfr_test() {
     printf("Rlamch S: Safe minimum                 ");
     printnum(tmp);
     printf("\n");
-
-    /*
-        REAL tmp3 = tmp / 2.0;
-        REAL tmp4 = 1.0 / (tmp);
-        REAL tmp5 = 1.0 / (tmp3);
-        REAL tmp6 = 2.225073858507201e-308;
-        mpfr_printf("%72.64Rb\n", mpfr_ptr(tmp));
-        mpfr_printf("%72.64Rb should be smaller than above\n", mpfr_ptr(tmp3));
-        mpfr_printf("%72.64Rb\n", mpfr_ptr(tmp4));
-        mpfr_printf("%72.64Rb should overflow\n", mpfr_ptr(tmp5));
-        mpfr_printf("%72.64Rb just a ref.\n", mpfr_ptr(tmp6));
-    */
-
+#if defined VERBOSE_TEST
+    tmp3 = tmp / 2.0;
+    tmp4 = 1.0 / (tmp);
+    tmp5 = 1.0 / (tmp3);
+    tmp6 = 2.225073858507201e-308;
+    mpfr_printf("%72.64Rb\n", mpfr_ptr(tmp));
+    mpfr_printf("%72.64Rb safmin/2; should be smaller than above\n", mpfr_ptr(tmp3));
+    mpfr_printf("%72.64Rb 1/safmin, this should not be inf\n", mpfr_ptr(tmp4));
+    mpfr_printf("%72.64Rb 1/(safmin/2); inf it's okay\n", mpfr_ptr(tmp5));
+    mpfr_printf("%72.64Re same as above but in decimal digits\n", mpfr_ptr(tmp5));
+    mpfr_printf("%72.64Rb safe minimum of double\n", mpfr_ptr(tmp6));
+#endif
     tmp = Rlamch_mpfr("B");
     printf("Rlamch B: Base                         ");
     printnum(tmp);
@@ -143,40 +148,40 @@ void Rlamch_mpfr_test() {
     printf("Rlamch M: Minimum exponent:            ");
     printnum(tmp);
     printf("\n");
+#if defined VERBOSE_TEST
+    tmp3 = mul_2si(one, (long)tmp);
+    tmp4 = (one - Rlamch_mpfr("E")) * mul_2si(one, (long)tmp); // fill out sig. dig. by 1.
+    tmp5 = tmp4 / two;
+    mpfr_printf("%.512Rb 2^minexponent: fill out by 1: default 512bits\n", mpfr_ptr(tmp4));
+    mpfr_printf("%.76Re 2^minexponent: fill out by 1: in decimal digits\n", mpfr_ptr(tmp4));
+    mpfr_printf("%.512Rb divided by two; abrupt underflow by MPFR\n", mpfr_ptr(tmp5));
 
-    /*
-        REAL tmp3, tmp4, tmp5;
-        REAL one = 1.0;
-        tmp3= mul_2si(one, (long)tmp);
-        tmp4= (1.0 - Rlamch_mpfr("E")) * mul_2si(one, (long)tmp); //fill out sig. dig. by 1.
-        tmp5= tmp4 / 2.0;
-        tmp5= tmp5 * 2.0;
-        mpfr_printf("%72.64Rb 2^(minexp)\n", mpfr_ptr(tmp3));
-        mpfr_printf("%72.64Rb fill out by 1\n", mpfr_ptr(tmp4));
-        mpfr_printf("%72.64Rb abrupt undeflow MPFR; gradual underflow: lost one bit\n", mpfr_ptr(tmp5));
-    */
+    tmp4 = (one - Rlamch_mpfr("E")) * mul_2si(one, (long)tmp + 1); // fill out sig. dig. by 1.
+    tmp5 = tmp4 / two;
+    mpfr_printf("%.512Rb 2^(minexponent+1): fill out by 1: default 512bits\n", mpfr_ptr(tmp4));
+    mpfr_printf("%.76Re 2^(minexponent+1): fill out by 1: in decimal digits\n", mpfr_ptr(tmp4));
+    mpfr_printf("%.512Rb divided by two; do not underflow\n", mpfr_ptr(tmp5));
+#endif
     //
     tmp = Rlamch_mpfr("U");
     printf("Rlamch U: Underflow threshold          ");
     printnum(tmp);
     printf("\n");
-    /*
-        mpfr_printf("%72.64Rb\n", mpfr_ptr(tmp));
-    */
-
+#if defined VERBOSE_TEST
+    tmp2 = tmp / two;
+    mpfr_printf("%.512Rb underflow threshold: default 512bits\n", mpfr_ptr(tmp));
+    mpfr_printf("%.512Rb underflow threshold/2: default 512bits\n", mpfr_ptr(tmp2));
+#endif
     tmp = Rlamch_mpfr("L");
     printf("Rlamch L: Largest exponent             ");
     printnum(tmp);
     printf("\n");
-
-    /*
-        REAL tmp3, tmp4;
-        REAL one = 1.0;
+#if defined VERBOSE_TEST
         tmp3= mul_2si(one, (long)tmp);
-        tmp4 = tmp3*2.0;
+        tmp4 = tmp3 * 2.0;
         mpfr_printf("%72.64Rb 2^(largeexp)\n", mpfr_ptr(tmp3));
         mpfr_printf("%72.64Rb should overflow\n", mpfr_ptr(tmp4));
-    */
+#endif
 
     tmp = Rlamch_mpfr("O");
     printf("Rlamch O: Overflow threshold           ");

diff --git a/mplapack/test/eig/results/aarch64-unknown-linux-gnu/log.xeigtstC__Float128.Cec.in b/mplapack/test/eig/results/aarch64-unknown-linux-gnu/log.xeigtstC__Float128.Cec.in
@@ -13,5 +13,5 @@
 
 
  End of tests
- Total time used =         2.00 seconds
+ Total time used =         1.00 seconds
 
diff --git a/mplapack/test/eig/results/aarch64-unknown-linux-gnu/log.xeigtstC__Float128.Ced.in b/mplapack/test/eig/results/aarch64-unknown-linux-gnu/log.xeigtstC__Float128.Ced.in
@@ -117,5 +117,5 @@ The following parameter values will be used:
 
 
  End of tests
- Total time used =        43.00 seconds
+ Total time used =        39.00 seconds
 
diff --git a/mplapack/test/eig/results/aarch64-unknown-linux-gnu/log.xeigtstC__Float128.Cgd.in b/mplapack/test/eig/results/aarch64-unknown-linux-gnu/log.xeigtstC__Float128.Cgd.in
@@ -151,5 +151,5 @@ The following parameter values will be used:
 
 
  End of tests
- Total time used =        45.00 seconds
+ Total time used =        39.00 seconds
 
diff --git a/mplapack/test/eig/results/aarch64-unknown-linux-gnu/log.xeigtstC__Float128.Csg.in b/mplapack/test/eig/results/aarch64-unknown-linux-gnu/log.xeigtstC__Float128.Csg.in
@@ -33,5 +33,5 @@ The following parameter values will be used:
 
 
  End of tests
- Total time used =        83.00 seconds
+ Total time used =        80.00 seconds
 
diff --git a/mplapack/test/eig/results/aarch64-unknown-linux-gnu/log.xeigtstC__Float128.csd.in b/mplapack/test/eig/results/aarch64-unknown-linux-gnu/log.xeigtstC__Float128.csd.in
@@ -23,5 +23,5 @@ The following parameter values will be used:
 
 
  End of tests
- Total time used =         6.00 seconds
+ Total time used =         4.00 seconds
 
diff --git a/mplapack/test/eig/results/aarch64-unknown-linux-gnu/log.xeigtstC__Float128.glm.in b/mplapack/test/eig/results/aarch64-unknown-linux-gnu/log.xeigtstC__Float128.glm.in
@@ -20,5 +20,5 @@ The following parameter values will be used:
 
 
  End of tests
- Total time used =         3.00 seconds
+ Total time used =         2.00 seconds
 
diff --git a/mplapack/test/eig/results/aarch64-unknown-linux-gnu/log.xeigtstC__Float128.nep.in b/mplapack/test/eig/results/aarch64-unknown-linux-gnu/log.xeigtstC__Float128.nep.in
@@ -49,5 +49,5 @@ The following parameter values will be used:
 
 
  End of tests
- Total time used =        93.00 seconds
+ Total time used =        83.00 seconds