diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..00a27188
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,11 @@
+test :
+	cd java; make test
+	cd purec; make test
+	cd simd; make testsse2 testavx
+
+clean :
+	rm -f *~
+	cd java; make clean
+	cd purec; make clean
+	cd simd; make clean
+	cd tester; make clean
diff --git a/README b/README
new file mode 100644
index 00000000..c7bb3124
--- /dev/null
+++ b/README
@@ -0,0 +1,58 @@
+In this library, functions for evaluating some elementary functions
+are implemented. The algorithm is intentended for efficient evaluation
+utilizing SIMD instruction sets like SSE or AVX, but it is also fast
+using usual scalar operations.
+
+The package contains a few directories in which implementation in the
+corresponding languages are contained. You can run "make test" in
+order to test the functions in each directory.
+
+The software is in public domain. You can use the software without any
+obligation.
+
+
+Author : Naoki Shibata
+
+Main download page : http://shibatch.sourceforge.net/
+
+
+
+History
+
+2.80 Added support for ARM NEON. Added higher accuracy single
+precision functions : sinf_u1, cosf_u1, sincosf_u1, tanf_u1, asinf_u1,
+acosf_u1, atanf_u1, atan2f_u1, logf_u1, and cbrtf_u1.
+
+2.70 Added higher accuracy functions : sin_u1, cos_u1, sincos_u1,
+tan_u1, asin_u1, acos_u1, atan_u1, atan2_u1, log_u1, and
+cbrt_u1. These functions evaluate the corresponding function with at
+most 1 ulp of error.
+
+2.60 Added the remaining single precision functions : powf, sinhf,
+coshf, tanhf, exp2f, exp10f, log10f, log1pf. Added support for FMA4
+(for AMD Bulldozer). Added more test cases. Fixed minor bugs (which
+degraded accuracy in some rare cases).
+
+2.50 Added support for AVX2. SLEEF now compiles with ICC.
+
+2.40 Fixed incorrect denormal/nonnumber handling in ldexp, ldexpf,
+sinf and cosf. Removed support for Go language.
+
+2.31 Added sincosf.
+
+2.30 Added single precision functions : sinf, cosf, tanf, asinf,
+acosf, atanf, logf, expf, atan2f and cbrtf.
+
+2.20 Added exp2, exp10, expm1, log10, log1p, and cbrt.
+
+2.10 asin() and acos() are back. Added ilogb() and ldexp(). Added
+hyperbolic functions.  Eliminated dependency on frexp, ldexp, fabs,
+isnan and isinf.
+
+2.00 All of the algorithm has been updated. Both accuracy and speed
+are improved since version 1.10. Denormal number handling is also
+improved.
+
+1.10 AVX support is added. Accuracy tester is added.
+
+1.00 Initial release
diff --git a/java/IUT.java b/java/IUT.java
new file mode 100644
index 00000000..adbfefa2
--- /dev/null
+++ b/java/IUT.java
@@ -0,0 +1,296 @@
+import java.io.*;
+
+import org.naokishibata.sleef.*;
+
+public class IUT {
+    static long hexToLong(String s) {
+	long ret = 0;
+	for(int i=0;i<s.length();i++) {
+	    char c = s.charAt(i);
+	    ret <<= 4;
+	    if ('0' <= c && c <= '9') ret += c - '0'; else ret += c - 'a' + 10;
+	}
+	return ret;
+    }
+
+    static String longToHex(long l) {
+	if (l == 0) return "0";
+	String str = "";
+	while(l != 0) {
+	    int d = (int)l & 0xf;
+	    l = (l >>> 4) & 0x7fffffffffffffffL;
+	    str = Character.forDigit(d, 16) + str;
+	}
+	return str;
+    }
+
+    public static void main(String[] args) throws Exception {
+	LineNumberReader lnr = new LineNumberReader(new InputStreamReader(System.in));
+
+	for(;;) {
+	    String s = lnr.readLine();
+	    if (s == null) break;
+
+	    if (s.startsWith("atan2 ")) {
+		String[] a = s.split(" ");
+		long y = hexToLong(a[1]);
+		long x = hexToLong(a[2]);
+		double d = FastMath.atan2(Double.longBitsToDouble(y), Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("pow ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		long y = hexToLong(a[2]);
+		double d = FastMath.pow(Double.longBitsToDouble(x), Double.longBitsToDouble(y));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("sincos ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		FastMath.double2 d2 = FastMath.sincos(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d2.x)) + " " + longToHex(Double.doubleToRawLongBits(d2.y)));
+	    } else if (s.startsWith("sin ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.sin(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("cos ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.cos(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("tan ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.tan(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("asin ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.asin(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("acos ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.acos(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("atan ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.atan(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("log ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.log(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("exp ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.exp(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("sinh ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.sinh(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("cosh ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.cosh(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("tanh ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.tanh(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("asinh ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.asinh(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("acosh ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.acosh(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("atanh ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.atanh(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("sqrt ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.sqrt(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("cbrt ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.cbrt(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("exp2 ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.exp2(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("exp10 ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.exp10(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("expm1 ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.expm1(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("log10 ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.log10(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("log1p ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		double d = FastMath.log1p(Double.longBitsToDouble(x));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("ldexp ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]), y = hexToLong(a[2]);
+		double d = FastMath.ldexp(Double.longBitsToDouble(x), (int)Double.longBitsToDouble(y));
+		System.out.println(longToHex(Double.doubleToRawLongBits(d)));
+	    } else if (s.startsWith("sinf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.sinf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("cosf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.cosf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("sincosf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		FastMath.float2 d2 = FastMath.sincosf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d2.x)) + " " + longToHex(Float.floatToRawIntBits(d2.y)));
+	    } else if (s.startsWith("tanf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.tanf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("asinf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.asinf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("acosf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.acosf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("atanf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.atanf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("logf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.logf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("expf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.expf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("cbrtf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.cbrtf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("atan2f ")) {
+		String[] a = s.split(" ");
+		long y = hexToLong(a[1]);
+		long x = hexToLong(a[2]);
+		float d = FastMath.atan2f(Float.intBitsToFloat((int)y), Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("ldexpf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		long y = hexToLong(a[2]);
+		float d = FastMath.ldexpf(Float.intBitsToFloat((int)x), (int)Float.intBitsToFloat((int)y));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("powf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		long y = hexToLong(a[2]);
+		float d = FastMath.powf(Float.intBitsToFloat((int)x), Float.intBitsToFloat((int)y));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("sinhf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.sinhf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("coshf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.coshf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("tanhf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.tanhf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("asinhf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.asinhf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("acoshf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.acoshf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("atanhf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.atanhf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("exp2f ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.exp2f(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("exp10f ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.exp10f(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("expm1f ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.expm1f(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("log10f ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.log10f(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("log1pf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = FastMath.log1pf(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else if (s.startsWith("sqrtf ")) {
+		String[] a = s.split(" ");
+		long x = hexToLong(a[1]);
+		float d = (float)Math.sqrt(Float.intBitsToFloat((int)x));
+		System.out.println(longToHex(Float.floatToRawIntBits(d)));
+	    } else {
+		break;
+	    }
+
+	    System.out.flush();
+	}
+    }
+}
diff --git a/java/Makefile b/java/Makefile
new file mode 100644
index 00000000..81a1c344
--- /dev/null
+++ b/java/Makefile
@@ -0,0 +1,21 @@
+IUT.class : IUT.java org/naokishibata/sleef/FastMath.java
+	javac IUT.java
+
+doc : org/naokishibata/sleef/FastMath.java
+	javadoc -d ./javadoc -subpackages org.naokishibata
+
+../tester/tester :
+	cd ../tester; make tester
+
+../tester/testersp :
+	cd ../tester; make testersp
+
+test : IUT.class ../tester/tester ../tester/testersp
+	../tester/tester java -ea IUT
+	../tester/testersp java -ea IUT
+
+clean :
+	rm -f *~ IUT.class
+	rm -rf javadoc
+	find org -name "*.class" -exec rm {} ";"
+	find org -name "*~" -exec rm {} ";"
diff --git a/java/org/naokishibata/examples/FastMathTest.java b/java/org/naokishibata/examples/FastMathTest.java
new file mode 100644
index 00000000..d8840161
--- /dev/null
+++ b/java/org/naokishibata/examples/FastMathTest.java
@@ -0,0 +1,2036 @@
+package org.naokishibata.examples;
+
+import static org.naokishibata.sleef.FastMath.*;
+
+/** A class to perform correctness and speed tests for FastMath class
+ *
+ * @author Naoki Shibata
+ */
+public class FastMathTest {
+    static boolean isnan(double d) { return d != d; }
+
+    static boolean cmpDenorm(double x, double y) {
+	if (isnan(x) && isnan(y)) return true;
+	if (x == Double.POSITIVE_INFINITY && y == Double.POSITIVE_INFINITY) return true;
+	if (x == Double.NEGATIVE_INFINITY && y == Double.NEGATIVE_INFINITY) return true;
+	if (!isnan(x) && !isnan(y) && !Double.isInfinite(x) && !Double.isInfinite(y)) return true;
+	return false;
+    }
+
+    /** Perform correctness and speed tests. The accuracy is checked
+     * by comparing results with the standard math library. Note that
+     * calculation by the standard math library also has error, and
+     * the reported error is basically 1 + the calculation error by
+     * the FastMath methods.
+     */
+    public static void main(String[] args) throws Exception {
+	System.out.println();
+
+	//
+
+	System.out.println("Denormal test atan2(y, x)");
+	System.out.println();
+
+	System.out.print("If y is +0 and x is -0, +pi is returned ... ");
+	System.out.println((atan2(+0.0, -0.0) == Math.PI) ? "OK" : "NG");
+	//System.out.println(atan2(+0.0, -0.0));
+
+	System.out.print("If y is -0 and x is -0, -pi is returned ... ");
+	System.out.println((atan2(-0.0, -0.0) == -Math.PI) ? "OK" : "NG");
+	//System.out.println(atan2(-0.0, -0.0));
+
+	System.out.print("If y is +0 and x is +0, +0 is returned ... ");
+	System.out.println(isPlusZero(atan2(+0.0, +0.0)) ? "OK" : "NG");
+	//System.out.println(atan2(+0.0, +0.0));
+
+	System.out.print("If y is -0 and x is +0, -0 is returned ... ");
+	System.out.println(isMinusZero(atan2(-0.0, +0.0)) ? "OK" : "NG");
+	//System.out.println(atan2(-0.0, +0.0));
+
+	System.out.print("If y is positive infinity and x is negative infinity, +3*pi/4 is returned ... ");
+	System.out.println((atan2(Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY) == 3*Math.PI/4) ? "OK" : "NG");
+	//System.out.println(atan2(Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY));
+
+	System.out.print("If y is negative infinity and x is negative infinity, -3*pi/4 is returned ... ");
+	System.out.println((atan2(Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY) == -3*Math.PI/4) ? "OK" : "NG");
+	//System.out.println(atan2(Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY));
+
+	System.out.print("If y is positive infinity and x is positive infinity, +pi/4 is returned ... ");
+	System.out.println((atan2(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY) == Math.PI/4) ? "OK" : "NG");
+	//System.out.println(atan2(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY));
+
+	System.out.print("If y is negative infinity and x is positive infinity, -pi/4 is returned ... ");
+	System.out.println((atan2(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY) == -Math.PI/4) ? "OK" : "NG");
+	//System.out.println(atan2(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
+
+	{
+	    System.out.print("If y is +0 and x is less than 0, +pi is returned ... ");
+
+	    double[] ya = { +0.0 };
+	    double[] xa = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (atan2(ya[j], xa[i]) != Math.PI) {
+			System.out.print("[atan2(" + ya[j] + ", " + xa[i] + ") = " + atan2(ya[j], xa[i]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If y is -0 and x is less than 0, -pi is returned ... ");
+
+
+	    double[] ya = { -0.0 };
+	    double[] xa = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (atan2(ya[j], xa[i]) != -Math.PI) {
+			System.out.print("[atan2(" + ya[j] + ", " + xa[i] + ") = " + atan2(ya[j], xa[i]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If y is less than 0 and x is 0, -pi/2 is returned ... ");
+
+	    double[] ya = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+	    double[] xa = { +0.0, -0.0 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (atan2(ya[j], xa[i]) != -Math.PI/2) {
+			System.out.print("[atan2(" + ya[j] + ", " + xa[i] + ") = " + atan2(ya[j], xa[i]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If y is greater than 0 and x is 0, pi/2 is returned ... ");
+
+
+	    double[] ya = { 100000.5, 100000, 3, 2.5, 2, 1.5, 1.0, 0.5 };
+	    double[] xa = { +0.0, -0.0 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (atan2(ya[j], xa[i]) != Math.PI/2) {
+			System.out.print("[atan2(" + ya[j] + ", " + xa[i] + ") = " + atan2(ya[j], xa[i]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If y is greater than 0 and x is -0, pi/2 is returned ... ");
+
+	    double[] ya = { 100000.5, 100000, 3, 2.5, 2, 1.5, 1.0, 0.5 };
+	    double[] xa = { -0.0 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (atan2(ya[j], xa[i]) != Math.PI/2) {
+			System.out.print("[atan2(" + ya[j] + ", " + xa[i] + ") = " + atan2(ya[j], xa[i]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If y is positive infinity, and x is finite, pi/2 is returned ... ");
+
+	    double[] ya = { Double.POSITIVE_INFINITY };
+	    double[] xa = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (atan2(ya[j], xa[i]) != Math.PI/2) {
+			System.out.print("[atan2(" + ya[j] + ", " + xa[i] + ") = " + atan2(ya[j], xa[i]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If y is negative infinity, and x is finite, -pi/2 is returned ... ");
+
+	    double[] ya = { Double.NEGATIVE_INFINITY };
+	    double[] xa = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (atan2(ya[j], xa[i]) != -Math.PI/2) {
+			System.out.print("[atan2(" + ya[j] + ", " + xa[i] + ") = " + atan2(ya[j], xa[i]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If y is a finite value greater than 0, and x is negative infinity, +pi is returned ... ");
+
+	    double[] ya = { 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+	    double[] xa = { Double.NEGATIVE_INFINITY };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (atan2(ya[j], xa[i]) != Math.PI) {
+			System.out.print("[atan2(" + ya[j] + ", " + xa[i] + ") = " + atan2(ya[j], xa[i]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If y is a finite value less than 0, and x is negative infinity, -pi is returned ... ");
+
+	    double[] ya = { -0.5, -1.5, -2.0, -2.5, -3.0, -100000, -100000.5 };
+	    double[] xa = { Double.NEGATIVE_INFINITY };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (atan2(ya[j], xa[i]) != -Math.PI) {
+			System.out.print("[atan2(" + ya[j] + ", " + xa[i] + ") = " + atan2(ya[j], xa[i]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If y is a finite value greater than 0, and x is positive infinity, +0 is returned ... ");
+
+	    double[] ya = { 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+	    double[] xa = { Double.POSITIVE_INFINITY };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!isPlusZero(atan2(ya[j], xa[i]))) {
+			System.out.print("[atan2(" + ya[j] + ", " + xa[i] + ") = " + atan2(ya[j], xa[i]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If y is a finite value less than 0, and x is positive infinity, -0 is returned ... ");
+
+	    double[] ya = { -0.5, -1.5, -2.0, -2.5, -3.0, -100000, -100000.5 };
+	    double[] xa = { Double.POSITIVE_INFINITY };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!isMinusZero(atan2(ya[j], xa[i]))) {
+			System.out.print("[atan2(" + ya[j] + ", " + xa[i] + ") = " + atan2(ya[j], xa[i]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If x is NaN, a NaN is returned ... ");
+
+	    double[] ya = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, Double.NaN };
+	    double[] xa = { Double.NaN };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!Double.isNaN(atan2(ya[j], xa[i]))) {
+			System.out.print("[atan2(" + ya[j] + ", " + xa[i] + ") = " + atan2(ya[j], xa[i]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If y is a NaN, the result is a NaN ... ");
+
+	    double[] ya = { Double.NaN };
+	    double[] xa = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, Double.NaN };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!Double.isNaN(atan2(ya[j], xa[i]))) {
+			System.out.print("[atan2(" + ya[j] + ", " + xa[i] + ") = " + atan2(ya[j], xa[i]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	System.out.println();
+	System.out.println("end of atan2 denormal test");
+	System.out.println();
+
+	//
+
+	System.out.println("Denormal test pow(x, y)");
+	System.out.println();
+
+	//System.out.print("If the result overflows, a range error occurs, and the functions return HUGE_VAL with the mathematically correct sign ... ");
+	//System.out.print("If result underflows, and is not representable, a range error occurs, and 0.0 is returned ... ");
+
+	System.out.print("If x is +1 and y is a NaN, the result is 1.0 ... ");
+	System.out.println(pow(1, Double.NaN) == 1.0 ? "OK" : "NG");
+
+	System.out.print("If y is 0 and x is a NaN, the result is 1.0 ... ");
+	System.out.println(pow(Double.NaN, 0) == 1.0 ? "OK" : "NG");
+
+	System.out.print("If x is -1, and y is positive infinity, the result is 1.0 ... ");
+	System.out.println(pow(-1, Double.POSITIVE_INFINITY) == 1.0 ? "OK" : "NG");
+
+	System.out.print("If x is -1, and y is negative infinity, the result is 1.0 ... ");
+	System.out.println(pow(-1, Double.NEGATIVE_INFINITY) == 1.0 ? "OK" : "NG");
+
+	{
+	    System.out.print("If x is a finite value less than 0, and y is a finite non-integer, a NaN is returned ... ");
+
+	    double[] xa = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+	    double[] ya = { -100000.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 100000.5 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!Double.isNaN(pow(xa[i], ya[j]))) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If x is a NaN, the result is a NaN ... ");
+
+	    double[] xa = { Double.NaN };
+	    double[] ya = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!Double.isNaN(pow(xa[i], ya[j]))) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If y is a NaN, the result is a NaN ... ");
+
+	    double[] xa = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+	    double[] ya = { Double.NaN };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!Double.isNaN(pow(xa[i], ya[j]))) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If x is +0, and y is an odd integer greater than 0, the result is +0 ... ");
+
+	    double[] xa = { +0.0 };
+	    double[] ya = { 1, 3, 5, 7, 100001 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!isPlusZero(pow(xa[i], ya[j]))) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If x is -0, and y is an odd integer greater than 0, the result is -0 ... ");
+
+	    double[] xa = { -0.0 };
+	    double[] ya = { 1, 3, 5, 7, 100001 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!isMinusZero(pow(xa[i], ya[j]))) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If x is 0, and y greater than 0 and not an odd integer, the result is +0 ... ");
+
+	    double[] xa = { +0.0, -0.0 };
+	    double[] ya = { 0.5, 1.5, 2.0, 2.5, 4.0, 100000, 100000.5 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!isPlusZero(pow(xa[i], ya[j]))) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If the absolute value of x is less than 1, and y is negative infinity, the result is positive infinity ... ");
+
+	    double[] xa = { -0.999, -0.5, -0.0, +0.0, +0.5, +0.999 };
+	    double[] ya = { Double.NEGATIVE_INFINITY };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (pow(xa[i], ya[j]) != Double.POSITIVE_INFINITY) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If the absolute value of x is greater than 1, and y is negative infinity, the result is +0 ... ");
+
+	    double[] xa = { -100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+	    double[] ya = { Double.NEGATIVE_INFINITY };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!isPlusZero(pow(xa[i], ya[j]))) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If the absolute value of x is less than 1, and y is positive infinity, the result is +0 ... ");
+
+	    double[] xa = { -0.999, -0.5, -0.0, +0.0, +0.5, +0.999 };
+	    double[] ya = { Double.POSITIVE_INFINITY };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!isPlusZero(pow(xa[i], ya[j]))) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If the absolute value of x is greater than 1, and y is positive infinity, the result is positive infinity ... ");
+
+	    double[] xa = { -100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+	    double[] ya = { Double.POSITIVE_INFINITY };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (pow(xa[i], ya[j]) != Double.POSITIVE_INFINITY) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If x is negative infinity, and y is an odd integer less than 0, the result is -0 ... ");
+
+	    double[] xa = { Double.NEGATIVE_INFINITY };
+	    double[] ya = { -100001, -5, -3, -1 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!isMinusZero(pow(xa[i], ya[j]))) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If x is negative infinity, and y less than 0 and not an odd integer, the result is +0 ... ");
+
+	    double[] xa = { Double.NEGATIVE_INFINITY };
+	    double[] ya = { -100000.5, -100000, -4, -2.5, -2, -1.5, -0.5 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!isPlusZero(pow(xa[i], ya[j]))) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If x is negative infinity, and y is an odd integer greater than 0, the result is negative infinity ... ");
+
+	    double[] xa = { Double.NEGATIVE_INFINITY };
+	    double[] ya = { 1, 3, 5, 7, 100001 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (pow(xa[i], ya[j]) != Double.NEGATIVE_INFINITY) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If x is negative infinity, and y greater than 0 and not an odd integer, the result is positive infinity ... ");
+
+	    double[] xa = { Double.NEGATIVE_INFINITY };
+	    double[] ya = { 0.5, 1.5, 2, 2.5, 3.5, 4, 100000, 100000.5 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (pow(xa[i], ya[j]) != Double.POSITIVE_INFINITY) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If x is positive infinity, and y less than 0, the result is +0 ... ");
+
+	    double[] xa = { Double.POSITIVE_INFINITY };
+	    double[] ya = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!isPlusZero(pow(xa[i], ya[j]))) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If x is positive infinity, and y greater than 0, the result is positive infinity ... ");
+
+	    double[] xa = { Double.POSITIVE_INFINITY };
+	    double[] ya = { 0.5, 1, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (pow(xa[i], ya[j]) != Double.POSITIVE_INFINITY) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If x is +0, and y is an odd integer less than 0, +HUGE_VAL is returned ... ");
+
+	    double[] xa = { +0.0 };
+	    double[] ya = { -100001, -5, -3, -1 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (pow(xa[i], ya[j]) != Double.POSITIVE_INFINITY) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If x is -0, and y is an odd integer less than 0, -HUGE_VAL is returned ... ");
+
+	    double[] xa = { -0.0 };
+	    double[] ya = { -100001, -5, -3, -1 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (pow(xa[i], ya[j]) != Double.NEGATIVE_INFINITY) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If x is 0, and y is less than 0 and not an odd integer, +HUGE_VAL is returned ... ");
+
+	    double[] xa = { +0.0, -0.0 };
+	    double[] ya = { -100000.5, -100000, -4, -2.5, -2, -1.5, -0.5 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (pow(xa[i], ya[j]) != Double.POSITIVE_INFINITY) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("If the result overflows, the functions return HUGE_VAL with the mathematically correct sign ... ");
+
+	    double[] xa = { 1000, -1000 };
+	    double[] ya = { 1000, 1000.5, 1001 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		for(int j=0;j<ya.length && success;j++) {
+		    if (!cmpDenorm(pow(xa[i], ya[j]), Math.pow(xa[i], ya[j]))) {
+			System.out.print("[x = " + xa[i] + ", y = " + ya[j] + ", pow(x,y) = " + pow(xa[i], ya[j]) + "] ");
+			success = false;
+			break;
+		    }
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	System.out.println();
+	System.out.println("End of pow denormal test");
+	System.out.println();
+	
+	//
+
+	{
+	    System.out.print("sin denormal test ... ");
+
+	    double[] xa = { Double.NaN, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		if (!cmpDenorm(sin(xa[i]), Math.sin(xa[i]))) {
+		    System.out.print("[x = " + xa[i] + ", func(x) = " + sin(xa[i]) + "] ");
+		    success = false;
+		    break;
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("sin in sincos denormal test ... ");
+
+	    double[] xa = { Double.NaN, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		double2 q = sincos(xa[i]);
+		if (!cmpDenorm(q.x, Math.sin(xa[i]))) {
+		    System.out.print("[x = " + xa[i] + ", func(x) = " + q.x + "] ");
+		    success = false;
+		    break;
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("cos denormal test ... ");
+
+	    double[] xa = { Double.NaN, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		if (!cmpDenorm(cos(xa[i]), Math.cos(xa[i]))) {
+		    System.out.print("[x = " + xa[i] + ", func(x) = " + cos(xa[i]) + "] ");
+		    success = false;
+		    break;
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("cos in sincos denormal test ... ");
+
+	    double[] xa = { Double.NaN, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		double2 q = sincos(xa[i]);
+		if (!cmpDenorm(q.y, Math.cos(xa[i]))) {
+		    System.out.print("[x = " + xa[i] + ", func(x) = " + q.y + "] ");
+		    success = false;
+		    break;
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("tan denormal test ... ");
+
+	    double[] xa = { Double.NaN, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, Math.PI/2, -Math.PI/2 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		if (!cmpDenorm(tan(xa[i]), Math.tan(xa[i]))) {
+		    System.out.print("[x = " + xa[i] + ", func(x) = " + tan(xa[i]) + "] ");
+		    success = false;
+		    break;
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("asin denormal test ... ");
+
+	    double[] xa = { Double.NaN, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 2, -2 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		if (!cmpDenorm(asin(xa[i]), Math.asin(xa[i]))) {
+		    System.out.print("[x = " + xa[i] + ", func(x) = " + asin(xa[i]) + "] ");
+		    success = false;
+		    break;
+		}
+		//System.out.print("[x = " + xa[i] + ", func(x) = " + asin(xa[i]) + ", correct = " + Math.asin(xa[i]) + "] ");
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("acos denormal test ... ");
+
+	    double[] xa = { Double.NaN, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 2, -2 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		if (!cmpDenorm(acos(xa[i]), Math.acos(xa[i]))) {
+		    System.out.print("[x = " + xa[i] + ", func(x) = " + acos(xa[i]) + "] ");
+		    success = false;
+		    break;
+		}
+		//System.out.print("[x = " + xa[i] + ", func(x) = " + acos(xa[i]) + ", correct = " + Math.acos(xa[i]) + "] ");
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("atan denormal test ... ");
+
+	    double[] xa = { Double.NaN, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		if (!cmpDenorm(atan(xa[i]), Math.atan(xa[i]))) {
+		    System.out.print("[x = " + xa[i] + ", func(x) = " + atan(xa[i]) + "] ");
+		    success = false;
+		    break;
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("log denormal test ... ");
+
+	    double[] xa = { Double.NaN, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 0, -1 };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		if (!cmpDenorm(log(xa[i]), Math.log(xa[i]))) {
+		    System.out.print("[x = " + xa[i] + ", func(x) = " + log(xa[i]) + "] ");
+		    success = false;
+		    break;
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	{
+	    System.out.print("exp denormal test ... ");
+
+	    double[] xa = { Double.NaN, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY };
+
+	    boolean success = true;
+	    for(int i=0;i<xa.length && success;i++) {
+		if (!cmpDenorm(exp(xa[i]), Math.exp(xa[i]))) {
+		    System.out.print("[x = " + xa[i] + ", func(x) = " + exp(xa[i]) + "] ");
+		    success = false;
+		    break;
+		}
+	    }
+
+	    System.out.println(success ? "OK" : "NG");
+	}
+
+	//
+
+	System.out.println();
+	System.out.println("Accuracy test (max error in ulp)");
+	System.out.println();
+
+	double max;
+
+	max = 0;
+
+	for(double d = -10;d < 10;d += 0.000001) {
+	    double q = sin(d);
+	    double c = Math.sin(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	for(double d = -10000;d < 10000;d += 0.001) {
+	    double q = sin(d);
+	    double c = Math.sin(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("sin : " + max);
+
+	max = 0;
+
+	for(double d = -10;d < 10;d += 0.000001) {
+	    double q = cos(d);
+	    double c = Math.cos(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	for(double d = -10000;d < 10000;d += 0.001) {
+	    double q = cos(d);
+	    double c = Math.cos(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("cos : " + max);
+
+	max = 0;
+
+	for(double d = -10;d < 10;d += 0.000001) {
+	    double2 q = sincos(d);
+	    double c = Math.sin(d);
+	    double u = Math.abs((q.x - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	for(double d = -10000;d < 10000;d += 0.001) {
+	    double2 q = sincos(d);
+	    double c = Math.sin(d);
+	    double u = Math.abs((q.x - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("sin in sincos : " + max);
+
+	max = 0;
+
+	for(double d = -10;d < 10;d += 0.000001) {
+	    double2 q = sincos(d);
+	    double c = Math.cos(d);
+	    double u = Math.abs((q.y - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	for(double d = -10000;d < 10000;d += 0.001) {
+	    double2 q = sincos(d);
+	    double c = Math.cos(d);
+	    double u = Math.abs((q.y - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("cos in sincos : " + max);
+
+	max = 0;
+
+	for(double d = -10;d < 10;d += 0.000001) {
+	    double q = tan(d);
+	    double c = Math.tan(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	for(double d = -10000;d < 10000;d += 0.001) {
+	    double q = tan(d);
+	    double c = Math.tan(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("tan : " + max);
+
+	max = 0;
+
+	for(double d = -1;d < 1;d += 0.0000001) {
+	    double q = asin(d);
+	    double c = Math.asin(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("asin : " + max);
+
+	max = 0;
+
+	for(double d = -1;d < 1;d += 0.0000001) {
+	    double q = acos(d);
+	    double c = Math.acos(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("acos : " + max);
+
+	max = 0;
+
+	for(double d = -10;d < 10;d += 0.000001) {
+	    double q = atan(d);
+	    double c = Math.atan(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	for(double d = -10000;d < 10000;d += 0.001) {
+	    double q = atan(d);
+	    double c = Math.atan(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("atan : " + max);
+
+	max = 0;
+
+	for(double d = 0.001;d < 10;d += 0.000001) {
+	    double q = log(d);
+	    double c = Math.log(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	for(double d = 0.001;d < 100000;d += 0.01) {
+	    double q = log(d);
+	    double c = Math.log(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("log : " + max);
+
+	max = 0;
+
+	for(double d = -10;d < 10;d += 0.000001) {
+	    double q = exp(d);
+	    double c = Math.exp(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	for(double d = -700;d < 700;d += 0.0001) {
+	    double q = exp(d);
+	    double c = Math.exp(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("exp : " + max);
+
+	//
+
+	max = 0;
+
+	for(double y = -10;y < 10;y += 0.01) {
+	    for(double x = -10;x < 10;x += 0.01) {
+		double q = atan2(y, x);
+		double c = Math.atan2(y, x);
+		double u = Math.abs((q - c) / Math.ulp(c));
+		max = max(max, u);
+	    }
+	}
+
+	for(double y = -1000;y < 1000;y += 1.01) {
+	    for(double x = -1000;x < 1000;x += 1.01) {
+		double q = atan2(y, x);
+		double c = Math.atan2(y, x);
+		double u = Math.abs((q - c) / Math.ulp(c));
+		max = max(max, u);
+	    }
+	}
+
+	System.out.println("atan2 : " + max);
+
+	max = 0;
+
+	for(double y = 0;y < 100;y += 0.05) {
+	    for(double x = -100;x < 100;x += 0.05) {
+		double q = pow(x, y);
+		double c = Math.pow(x, y);
+		double u = Math.abs((q - c) / Math.ulp(c));
+		max = max(max, u);
+	    }
+	}
+
+	System.out.println("pow : " + max);
+
+	//
+
+	max = 0;
+
+	for(double d = -700;d < 700;d += 0.00001) {
+	    double q = sinh(d);
+	    double c = Math.sinh(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("sinh : " + max);
+
+	//
+
+	max = 0;
+
+	for(double d = -700;d < 700;d += 0.00001) {
+	    double q = cosh(d);
+	    double c = Math.cosh(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("cosh : " + max);
+
+	//
+
+	max = 0;
+
+	for(double d = -700;d < 700;d += 0.00001) {
+	    double q = tanh(d);
+	    double c = Math.tanh(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("tanh : " + max);
+
+	//
+
+	max = 0;
+
+	for(double d = 0;d < 10000;d += 0.001) {
+	    double q = sqrt(d);
+	    double c = Math.sqrt(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("sqrt : " + max);
+
+	//
+
+	max = 0;
+
+	for(double d = -10000;d < 10000;d += 0.001) {
+	    double q = cbrt(d);
+	    double c = Math.cbrt(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("cbrt : " + max);
+
+	/*
+	
+	max = 0;
+
+	for(double d = -700;d < 700;d += 0.0002) {
+	    double q = asinh(d);
+	    double c = Math.asinh(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("asinh : " + max);
+
+	//
+
+	max = 0;
+
+	for(double d = 1;d < 700;d += 0.0001) {
+	    double q = acosh(d);
+	    double c = Math.acosh(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("acosh : " + max);
+
+	//
+
+	max = 0;
+
+	for(double d = -700;d < 700;d += 0.0002) {
+	    double q = atanh(d);
+	    double c = Math.atanh(d);
+	    double u = Math.abs((q - c) / Math.ulp(c));
+	    max = max(max, u);
+	}
+
+	System.out.println("atanh : " + max);
+
+	*/
+
+	System.out.println();
+	System.out.println("Speed test");
+	System.out.println();
+
+	double total = 0;
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += Math.sin(d + 0);
+		sum += Math.sin(d + 1);
+		sum += Math.sin(d + 2);
+		sum += Math.sin(d + 3);
+		sum += Math.sin(d + 4);
+		sum += Math.sin(d + 5);
+		sum += Math.sin(d + 6);
+		sum += Math.sin(d + 7);
+		sum += Math.sin(d + 8);
+		sum += Math.sin(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("sin standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += sin(d + 0);
+		sum += sin(d + 1);
+		sum += sin(d + 2);
+		sum += sin(d + 3);
+		sum += sin(d + 4);
+		sum += sin(d + 5);
+		sum += sin(d + 6);
+		sum += sin(d + 7);
+		sum += sin(d + 8);
+		sum += sin(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("sin sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += Math.cos(d + 0);
+		sum += Math.cos(d + 1);
+		sum += Math.cos(d + 2);
+		sum += Math.cos(d + 3);
+		sum += Math.cos(d + 4);
+		sum += Math.cos(d + 5);
+		sum += Math.cos(d + 6);
+		sum += Math.cos(d + 7);
+		sum += Math.cos(d + 8);
+		sum += Math.cos(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("cos standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += cos(d + 0);
+		sum += cos(d + 1);
+		sum += cos(d + 2);
+		sum += cos(d + 3);
+		sum += cos(d + 4);
+		sum += cos(d + 5);
+		sum += cos(d + 6);
+		sum += cos(d + 7);
+		sum += cos(d + 8);
+		sum += cos(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("cos sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += Math.tan(d + 0);
+		sum += Math.tan(d + 1);
+		sum += Math.tan(d + 2);
+		sum += Math.tan(d + 3);
+		sum += Math.tan(d + 4);
+		sum += Math.tan(d + 5);
+		sum += Math.tan(d + 6);
+		sum += Math.tan(d + 7);
+		sum += Math.tan(d + 8);
+		sum += Math.tan(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("tan standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += tan(d + 0);
+		sum += tan(d + 1);
+		sum += tan(d + 2);
+		sum += tan(d + 3);
+		sum += tan(d + 4);
+		sum += tan(d + 5);
+		sum += tan(d + 6);
+		sum += tan(d + 7);
+		sum += tan(d + 8);
+		sum += tan(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("tan sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += Math.sin(d + 0); sum += Math.cos(d + 0);
+		sum += Math.sin(d + 1); sum += Math.cos(d + 1);
+		sum += Math.sin(d + 2); sum += Math.cos(d + 2);
+		sum += Math.sin(d + 3); sum += Math.cos(d + 3);
+		sum += Math.sin(d + 4); sum += Math.cos(d + 4);
+		sum += Math.sin(d + 5); sum += Math.cos(d + 5);
+		sum += Math.sin(d + 6); sum += Math.cos(d + 6);
+		sum += Math.sin(d + 7); sum += Math.cos(d + 7);
+		sum += Math.sin(d + 8); sum += Math.cos(d + 8);
+		sum += Math.sin(d + 9); sum += Math.cos(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("sin + cos, standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		double2 r;
+
+		r = sincos(d + 0); sum += r.x + r.y;
+		r = sincos(d + 1); sum += r.x + r.y;
+		r = sincos(d + 2); sum += r.x + r.y;
+		r = sincos(d + 3); sum += r.x + r.y;
+		r = sincos(d + 4); sum += r.x + r.y;
+		r = sincos(d + 5); sum += r.x + r.y;
+		r = sincos(d + 6); sum += r.x + r.y;
+		r = sincos(d + 7); sum += r.x + r.y;
+		r = sincos(d + 8); sum += r.x + r.y;
+		r = sincos(d + 9); sum += r.x + r.y;
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("sincos sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -1;d < 0;d += 0.0000005) {
+		sum += Math.asin(d + 0.0);
+		sum += Math.asin(d + 0.1);
+		sum += Math.asin(d + 0.2);
+		sum += Math.asin(d + 0.3);
+		sum += Math.asin(d + 0.4);
+		sum += Math.asin(d + 0.5);
+		sum += Math.asin(d + 0.6);
+		sum += Math.asin(d + 0.7);
+		sum += Math.asin(d + 0.8);
+		sum += Math.asin(d + 0.9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("asin standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -1;d < 0;d += 0.0000005) {
+		sum += asin(d + 0.0);
+		sum += asin(d + 0.1);
+		sum += asin(d + 0.2);
+		sum += asin(d + 0.3);
+		sum += asin(d + 0.4);
+		sum += asin(d + 0.5);
+		sum += asin(d + 0.6);
+		sum += asin(d + 0.7);
+		sum += asin(d + 0.8);
+		sum += asin(d + 0.9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("asin sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -1;d < 0;d += 0.0000005) {
+		sum += Math.acos(d + 0.0);
+		sum += Math.acos(d + 0.1);
+		sum += Math.acos(d + 0.2);
+		sum += Math.acos(d + 0.3);
+		sum += Math.acos(d + 0.4);
+		sum += Math.acos(d + 0.5);
+		sum += Math.acos(d + 0.6);
+		sum += Math.acos(d + 0.7);
+		sum += Math.acos(d + 0.8);
+		sum += Math.acos(d + 0.9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("acos standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -1;d < 0;d += 0.0000005) {
+		sum += acos(d + 0.0);
+		sum += acos(d + 0.1);
+		sum += acos(d + 0.2);
+		sum += acos(d + 0.3);
+		sum += acos(d + 0.4);
+		sum += acos(d + 0.5);
+		sum += acos(d + 0.6);
+		sum += acos(d + 0.7);
+		sum += acos(d + 0.8);
+		sum += acos(d + 0.9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("acos sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += Math.atan(d + 0);
+		sum += Math.atan(d + 1);
+		sum += Math.atan(d + 2);
+		sum += Math.atan(d + 3);
+		sum += Math.atan(d + 4);
+		sum += Math.atan(d + 5);
+		sum += Math.atan(d + 6);
+		sum += Math.atan(d + 7);
+		sum += Math.atan(d + 8);
+		sum += Math.atan(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("atan standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += atan(d + 0);
+		sum += atan(d + 1);
+		sum += atan(d + 2);
+		sum += atan(d + 3);
+		sum += atan(d + 4);
+		sum += atan(d + 5);
+		sum += atan(d + 6);
+		sum += atan(d + 7);
+		sum += atan(d + 8);
+		sum += atan(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("atan sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double y = -10;y < 10;y += 0.01) {
+		for(double x = -10;x < 10;x += 0.01) {
+		    sum += Math.atan2(y + 0, x);
+		    sum += Math.atan2(y + 1, x);
+		    sum += Math.atan2(y + 2, x);
+		    sum += Math.atan2(y + 3, x);
+		    sum += Math.atan2(y + 4, x);
+		    sum += Math.atan2(y + 5, x);
+		    sum += Math.atan2(y + 6, x);
+		    sum += Math.atan2(y + 7, x);
+		    sum += Math.atan2(y + 8, x);
+		    sum += Math.atan2(y + 9, x);
+		}
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("atan2 standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double y = -10;y < 10;y += 0.01) {
+		for(double x = -10;x < 10;x += 0.01) {
+		    sum += atan2(y + 0, x);
+		    sum += atan2(y + 1, x);
+		    sum += atan2(y + 2, x);
+		    sum += atan2(y + 3, x);
+		    sum += atan2(y + 4, x);
+		    sum += atan2(y + 5, x);
+		    sum += atan2(y + 6, x);
+		    sum += atan2(y + 7, x);
+		    sum += atan2(y + 8, x);
+		    sum += atan2(y + 9, x);
+		}
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("atan2 sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = 0.001;d < 10;d += 0.000001) {
+		sum += Math.log(d + 0);
+		sum += Math.log(d + 1);
+		sum += Math.log(d + 2);
+		sum += Math.log(d + 3);
+		sum += Math.log(d + 4);
+		sum += Math.log(d + 5);
+		sum += Math.log(d + 6);
+		sum += Math.log(d + 7);
+		sum += Math.log(d + 8);
+		sum += Math.log(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("log standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = 0.001;d < 10;d += 0.000001) {
+		sum += log(d + 0);
+		sum += log(d + 1);
+		sum += log(d + 2);
+		sum += log(d + 3);
+		sum += log(d + 4);
+		sum += log(d + 5);
+		sum += log(d + 6);
+		sum += log(d + 7);
+		sum += log(d + 8);
+		sum += log(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("log sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += Math.exp(d + 0);
+		sum += Math.exp(d + 1);
+		sum += Math.exp(d + 2);
+		sum += Math.exp(d + 3);
+		sum += Math.exp(d + 4);
+		sum += Math.exp(d + 5);
+		sum += Math.exp(d + 6);
+		sum += Math.exp(d + 7);
+		sum += Math.exp(d + 8);
+		sum += Math.exp(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("exp standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += exp(d + 0);
+		sum += exp(d + 1);
+		sum += exp(d + 2);
+		sum += exp(d + 3);
+		sum += exp(d + 4);
+		sum += exp(d + 5);
+		sum += exp(d + 6);
+		sum += exp(d + 7);
+		sum += exp(d + 8);
+		sum += exp(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("exp sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double y = 0;y < 100;y += 0.05) {
+		for(double x = 0.001;x < 100;x += 0.05) {
+		    sum += Math.pow(x + 0, y);
+		    sum += Math.pow(x + 1, y);
+		    sum += Math.pow(x + 2, y);
+		    sum += Math.pow(x + 3, y);
+		    sum += Math.pow(x + 4, y);
+		    sum += Math.pow(x + 5, y);
+		    sum += Math.pow(x + 6, y);
+		    sum += Math.pow(x + 7, y);
+		    sum += Math.pow(x + 8, y);
+		    sum += Math.pow(x + 9, y);
+		}
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("pow standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double y = 0;y < 100;y += 0.05) {
+		for(double x = 0.001;x < 100;x += 0.05) {
+		    sum += pow(x + 0, y);
+		    sum += pow(x + 1, y);
+		    sum += pow(x + 2, y);
+		    sum += pow(x + 3, y);
+		    sum += pow(x + 4, y);
+		    sum += pow(x + 5, y);
+		    sum += pow(x + 6, y);
+		    sum += pow(x + 7, y);
+		    sum += pow(x + 8, y);
+		    sum += pow(x + 9, y);
+		}
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("pow sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += Math.sinh(d + 0);
+		sum += Math.sinh(d + 1);
+		sum += Math.sinh(d + 2);
+		sum += Math.sinh(d + 3);
+		sum += Math.sinh(d + 4);
+		sum += Math.sinh(d + 5);
+		sum += Math.sinh(d + 6);
+		sum += Math.sinh(d + 7);
+		sum += Math.sinh(d + 8);
+		sum += Math.sinh(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("sinh standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += sinh(d + 0);
+		sum += sinh(d + 1);
+		sum += sinh(d + 2);
+		sum += sinh(d + 3);
+		sum += sinh(d + 4);
+		sum += sinh(d + 5);
+		sum += sinh(d + 6);
+		sum += sinh(d + 7);
+		sum += sinh(d + 8);
+		sum += sinh(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("sinh sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = 0;d < 10;d += 0.000005) {
+		sum += Math.cosh(d + 0);
+		sum += Math.cosh(d + 1);
+		sum += Math.cosh(d + 2);
+		sum += Math.cosh(d + 3);
+		sum += Math.cosh(d + 4);
+		sum += Math.cosh(d + 5);
+		sum += Math.cosh(d + 6);
+		sum += Math.cosh(d + 7);
+		sum += Math.cosh(d + 8);
+		sum += Math.cosh(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("cosh standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = 0;d < 10;d += 0.000005) {
+		sum += cosh(d + 0);
+		sum += cosh(d + 1);
+		sum += cosh(d + 2);
+		sum += cosh(d + 3);
+		sum += cosh(d + 4);
+		sum += cosh(d + 5);
+		sum += cosh(d + 6);
+		sum += cosh(d + 7);
+		sum += cosh(d + 8);
+		sum += cosh(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("cosh sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += Math.tanh(d + 0);
+		sum += Math.tanh(d + 1);
+		sum += Math.tanh(d + 2);
+		sum += Math.tanh(d + 3);
+		sum += Math.tanh(d + 4);
+		sum += Math.tanh(d + 5);
+		sum += Math.tanh(d + 6);
+		sum += Math.tanh(d + 7);
+		sum += Math.tanh(d + 8);
+		sum += Math.tanh(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("tanh standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += tanh(d + 0);
+		sum += tanh(d + 1);
+		sum += tanh(d + 2);
+		sum += tanh(d + 3);
+		sum += tanh(d + 4);
+		sum += tanh(d + 5);
+		sum += tanh(d + 6);
+		sum += tanh(d + 7);
+		sum += tanh(d + 8);
+		sum += tanh(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("tanh sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = 0;d < 10;d += 0.000005) {
+		sum += Math.sqrt(d + 0);
+		sum += Math.sqrt(d + 1);
+		sum += Math.sqrt(d + 2);
+		sum += Math.sqrt(d + 3);
+		sum += Math.sqrt(d + 4);
+		sum += Math.sqrt(d + 5);
+		sum += Math.sqrt(d + 6);
+		sum += Math.sqrt(d + 7);
+		sum += Math.sqrt(d + 8);
+		sum += Math.sqrt(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("sqrt standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = 0;d < 10;d += 0.000005) {
+		sum += sqrt(d + 0);
+		sum += sqrt(d + 1);
+		sum += sqrt(d + 2);
+		sum += sqrt(d + 3);
+		sum += sqrt(d + 4);
+		sum += sqrt(d + 5);
+		sum += sqrt(d + 6);
+		sum += sqrt(d + 7);
+		sum += sqrt(d + 8);
+		sum += sqrt(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("sqrt sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += Math.cbrt(d + 0);
+		sum += Math.cbrt(d + 1);
+		sum += Math.cbrt(d + 2);
+		sum += Math.cbrt(d + 3);
+		sum += Math.cbrt(d + 4);
+		sum += Math.cbrt(d + 5);
+		sum += Math.cbrt(d + 6);
+		sum += Math.cbrt(d + 7);
+		sum += Math.cbrt(d + 8);
+		sum += Math.cbrt(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("cbrt standard library ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	{
+	    long start = System.currentTimeMillis();
+
+	    double sum = 0;
+	    for(double d = -10;d < 10;d += 0.00001) {
+		sum += cbrt(d + 0);
+		sum += cbrt(d + 1);
+		sum += cbrt(d + 2);
+		sum += cbrt(d + 3);
+		sum += cbrt(d + 4);
+		sum += cbrt(d + 5);
+		sum += cbrt(d + 6);
+		sum += cbrt(d + 7);
+		sum += cbrt(d + 8);
+		sum += cbrt(d + 9);
+	    }
+
+	    long finish = System.currentTimeMillis();
+
+	    System.out.println("cbrt sleef ... " + (finish - start));
+
+	    total += sum;
+	}
+
+	System.out.println();
+	System.out.println("A meaningless value ... " + total);
+    }
+}
diff --git a/java/org/naokishibata/sleef/FastMath.java b/java/org/naokishibata/sleef/FastMath.java
new file mode 100644
index 00000000..6937eb9f
--- /dev/null
+++ b/java/org/naokishibata/sleef/FastMath.java
@@ -0,0 +1,1877 @@
+package org.naokishibata.sleef;
+
+/**
+ * FastMath class is a Java implementation of the <a
+ * href="http://freecode.com/projects/sleef">SLEEF</a>
+ * library. Some of the methods can be used as substitutions of the
+ * corresponding methods in Math class. They have slightly less
+ * accuracy, and some methods are faster compared to those methods in
+ * Math class. Please note that the methods in the standard Math class
+ * are JNI methods, and the SLEEF library is specialized for SIMD
+ * operations.
+ */
+public class FastMath {
+    public static double E = Math.E;
+    public static double PI = Math.PI;
+
+    public static double abs(double a) { return Math.abs(a); }
+    public static float abs(float a) { return Math.abs(a); }
+    public static int abs(int a) { return Math.abs(a); }
+    public static long abs(long a) { return Math.abs(a); }
+
+    public static double ceil(double a) { return Math.ceil(a); }
+    public static double floor(double a) { return Math.floor(a); }
+
+    //
+
+    static double upper(double d) {
+	long l = Double.doubleToRawLongBits(d);
+	return Double.longBitsToDouble(l & 0xfffffffff8000000L);
+    }
+
+    static double mla(double x, double y, double z) { return x * y + z; }
+
+    static double mulsign(double x, double y) { return Math.copySign(1, y) * x; }
+
+    //
+
+    /**
+       Returns the absolute value of the argument
+    */
+    public static double fabs(double d) { return Math.copySign(d, 1); }
+
+    /**
+       Returns the larger value of the two arguments. The result is
+       undefined if denormal numbers are given.
+    */
+    public static double max(double x, double y) { return x > y ? x : y; }
+
+    /**
+       Checks if the argument is a NaN or not.
+    */
+    public static boolean isnan(double d) { return d != d; }
+
+    /**
+       Checks if the argument is either positive infinity or negative infinity.
+    */
+    public static boolean isinf(double d) { return fabs(d) == Double.POSITIVE_INFINITY; }
+
+    static boolean ispinf(double d) { return d == Double.POSITIVE_INFINITY; }
+    static boolean isminf(double d) { return d == Double.NEGATIVE_INFINITY; }
+
+    /**
+       Returns the integer value that is closest to the argument. The
+       result is undefined if a denormal number is given.
+    */
+    public static double rint(double x) { return x < 0 ? (int)(x - 0.5) : (int)(x + 0.5); }
+
+    /**
+       Returns the result of multiplying the floating-point number x
+       by 2 raised to the power q
+    */
+    public static double ldexp(double x, int q) {
+	int m = q >> 31;
+	m = (((m + q) >> 9) - m) << 7;
+	q = q - (m << 2);
+	m += 0x3ff;
+	m = m < 0     ? 0     : m;
+	m = m > 0x7ff ? 0x7ff : m;
+	double u = Double.longBitsToDouble(((long)m) << 52);
+	x = x * u * u * u * u;
+	u = Double.longBitsToDouble(((long)(q + 0x3ff)) << 52);
+	return x * u;
+    }
+
+    static double pow2i(int q) {
+	return Double.longBitsToDouble(((long)(q + 0x3ff)) << 52);
+    }
+
+    static int ilogbp1(double d) {
+	boolean m = d < 4.9090934652977266E-91;
+	d = m ? 2.037035976334486E90 * d : d;
+	int q = (int)(Double.doubleToRawLongBits(d) >> 52) & 0x7ff;
+	q = m ? q - (300 + 0x03fe) : q - 0x03fe;
+	return q;
+    }
+
+    /**
+       Returns the exponent part of their argument as a signed integer
+    */
+    public static int ilogb(double d) {
+	int e = ilogbp1(fabs(d)) - 1;
+	e = d == 0 ? -2147483648 : e;
+	e = d == Double.POSITIVE_INFINITY || d == Double.NEGATIVE_INFINITY ? 2147483647 : e;
+	return e;
+    }
+
+    //
+
+    static boolean cmpDenorm(double x, double y) {
+	if (isnan(x) && isnan(y)) return true;
+	if (x == Double.POSITIVE_INFINITY && y == Double.POSITIVE_INFINITY) return true;
+	if (x == Double.NEGATIVE_INFINITY && y == Double.NEGATIVE_INFINITY) return true;
+	if (!isnan(x) && !isnan(y) && !isinf(x) && !isinf(y)) return true;
+	return false;
+    }
+
+    /**
+       Checks if the argument is +0.
+    */
+    public static boolean isPlusZero(double x) { return x == 0 && Math.copySign(1, x) == 1; }
+
+    /**
+       Checks if the argument is -0.
+    */
+    public static boolean isMinusZero(double x) { return x == 0 && Math.copySign(1, x) == -1; }
+
+    static double sign(double d) { return Math.copySign(1, d); }
+
+    //
+
+    /**
+       This class represents a vector of two double values.
+    */
+    public static class double2 {
+	public double x, y;
+	public double2() {}
+	public double2(double x, double y) { this.x = x; this.y = y; }
+
+	public String toString() {
+	    return "(double2:" + x + " + " + y + ")";
+	}
+    }
+
+    static double2 ddnormalize_d2_d2(double2 t) {
+	double2 s = new double2();
+
+	s.x = t.x + t.y;
+	s.y = t.x - s.x + t.y;
+
+	return s;
+    }
+
+    static double2 ddscale_d2_d2_d(double2 d, double s) {
+	double2 r = new double2();
+
+	r.x = d.x * s;
+	r.y = d.y * s;
+
+	return r;
+    }
+
+    static double2 ddadd2_d2_d_d(double x, double y) {
+	double2 r = new double2();
+
+	r.x = x + y;
+	double v = r.x - x;
+	r.y = (x - (r.x - v)) + (y - v);
+
+	return r;
+    }
+
+    static double2 ddadd_d2_d2_d(double2 x, double y) {
+	// |x| >= |y|
+
+	double2 r = new double2();
+
+	//assert(isnan(x.x) || isnan(y) || fabs(x.x) >= fabs(y));
+
+	r.x = x.x + y;
+	r.y = x.x - r.x + y + x.y;
+
+	return r;
+    }
+
+    static double2 ddadd2_d2_d2_d(double2 x, double y) {
+	// |x| >= |y|
+
+	double2 r = new double2();
+
+	r.x  = x.x + y;
+	double v = r.x - x.x;
+	r.y = (x.x - (r.x - v)) + (y - v);
+	r.y += x.y;
+
+	return r;
+    }
+
+    static double2 ddadd_d2_d_d2(double x, double2 y) {
+	// |x| >= |y|
+
+	double2 r = new double2();
+
+	//assert(isnan(x) || isnan(y.x) || fabs(x) >= fabs(y.x));
+
+	r.x = x + y.x;
+	r.y = x - r.x + y.x + y.y;
+
+	return r;
+    }
+
+    static double2 ddadd_d2_d2_d2(double2 x, double2 y) {
+	// |x| >= |y|
+
+	double2 r = new double2();
+
+	//assert(isnan(x.x) || isinf(x.x) || isnan(y.x) || isinf(y.x) || fabs(x.x) >= fabs(y.x)) : "x.x = " + x.x + ", y.x = " + y.x;
+
+	r.x = x.x + y.x;
+	r.y = x.x - r.x + y.x + x.y + y.y;
+
+	return r;
+    }
+
+    static double2 ddadd2_d2_d2_d2(double2 x, double2 y) {
+	double2 r = new double2();
+
+	r.x  = x.x + y.x;
+	double v = r.x - x.x;
+	r.y = (x.x - (r.x - v)) + (y.x - v);
+	r.y += x.y + y.y;
+
+	return r;
+    }
+
+    static double2 ddsub_d2_d2_d2(double2 x, double2 y) {
+	// |x| >= |y|
+
+	double2 r = new double2();
+
+	r.x = x.x - y.x;
+	r.y = x.x - r.x - y.x + x.y - y.y;
+
+	return r;
+    }
+
+    static double2 dddiv_d2_d2_d2(double2 n, double2 d) {
+	double t = 1.0 / d.x;
+	double dh  = upper(d.x), dl  = d.x - dh;
+	double th  = upper(t  ), tl  = t   - th;
+	double nhh = upper(n.x), nhl = n.x - nhh;
+
+	double2 q = new double2();
+
+	q.x = n.x * t;
+
+	double u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl +
+	    q.x * (1 - dh * th - dh * tl - dl * th - dl * tl);
+
+	q.y = t * (n.y - q.x * d.y) + u;
+
+	return q;
+    }
+
+    static double2 ddmul_d2_d_d(double x, double y) {
+	double xh = upper(x), xl = x - xh;
+	double yh = upper(y), yl = y - yh;
+	double2 r = new double2();
+
+	r.x = x * y;
+	r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl;
+
+	return r;
+    }
+
+    static double2 ddmul_d2_d2_d(double2 x, double y) {
+	double xh = upper(x.x), xl = x.x - xh;
+	double yh = upper(y  ), yl = y   - yh;
+	double2 r = new double2();
+
+	r.x = x.x * y;
+	r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y;
+
+	return r;
+    }
+
+    static double2 ddmul_d2_d2_d2(double2 x, double2 y) {
+	double xh = upper(x.x), xl = x.x - xh;
+	double yh = upper(y.x), yl = y.x - yh;
+	double2 r = new double2();
+
+	r.x = x.x * y.x;
+	r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x;
+
+	return r;
+    }
+
+    static double2 ddsqu_d2_d2(double2 x) {
+	double xh = upper(x.x), xl = x.x - xh;
+	double2 r = new double2();
+
+	r.x = x.x * x.x;
+	r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y);
+
+	return r;
+    }
+
+    static double2 ddrec_d2_d(double d) {
+	double t = 1.0 / d;
+	double dh = upper(d), dl = d - dh;
+	double th = upper(t), tl = t - th;
+	double2 q = new double2();
+
+	q.x = t;
+	q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl);
+
+	return q;
+    }
+
+    static double2 ddrec_d2_d2(double2 d) {
+	double t = 1.0 / d.x;
+	double dh = upper(d.x), dl = d.x - dh;
+	double th = upper(t  ), tl = t   - th;
+	double2 q = new double2();
+	
+	q.x = t;
+	q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t);
+
+	return q;
+    }
+
+    static double2 ddsqrt_d2_d2(double2 d) {
+	double t = Math.sqrt(d.x + d.y);
+	return ddscale_d2_d2_d(ddmul_d2_d2_d2(ddadd2_d2_d2_d2(d, ddmul_d2_d_d(t, t)), ddrec_d2_d(t)), 0.5);
+    }
+
+    //
+
+    static double atan2k(double y, double x) {
+	double s, t, u;
+	int q = 0;
+
+	if (x < 0) { x = -x; q = -2; }
+	if (y > x) { t = x; x = y; y = -t; q += 1; }
+
+	s = y / x;
+	t = s * s;
+
+	u = -1.88796008463073496563746e-05;
+	u = u * t + (0.000209850076645816976906797);
+	u = u * t + (-0.00110611831486672482563471);
+	u = u * t + (0.00370026744188713119232403);
+	u = u * t + (-0.00889896195887655491740809);
+	u = u * t + (0.016599329773529201970117);
+	u = u * t + (-0.0254517624932312641616861);
+	u = u * t + (0.0337852580001353069993897);
+	u = u * t + (-0.0407629191276836500001934);
+	u = u * t + (0.0466667150077840625632675);
+	u = u * t + (-0.0523674852303482457616113);
+	u = u * t + (0.0587666392926673580854313);
+	u = u * t + (-0.0666573579361080525984562);
+	u = u * t + (0.0769219538311769618355029);
+	u = u * t + (-0.090908995008245008229153);
+	u = u * t + (0.111111105648261418443745);
+	u = u * t + (-0.14285714266771329383765);
+	u = u * t + (0.199999999996591265594148);
+	u = u * t + (-0.333333333333311110369124);
+
+	t = u * t * s + s;
+	t = q * (Math.PI/2) + t;
+
+	return t;
+    }
+
+    /**
+       This method calculates the arc tangent of y/x in radians, using
+       the signs of the two arguments to determine the quadrant of the
+       result. The results may have maximum error of 2 ulps.
+    */
+    public static double atan2(double y, double x) {
+	double r = atan2k(fabs(y), x);
+
+	r = mulsign(r, x);
+	if (isinf(x) || x == 0) r = Math.PI/2 - (isinf(x) ? (sign(x) * (Math.PI  /2)) : 0);
+	if (isinf(y)          ) r = Math.PI/2 - (isinf(x) ? (sign(x) * (Math.PI*1/4)) : 0);
+	if (            y == 0) r = (sign(x) == -1 ? Math.PI : 0);
+
+	return isnan(x) || isnan(y) ? Double.NaN : mulsign(r, y);
+    }
+
+    /**
+       This method calculates the arc sine of x in radians. The return
+       value is in the range [-pi/2, pi/2]. The results may have
+       maximum error of 3 ulps.
+    */
+    public static double asin(double d) {
+	return mulsign(atan2k(fabs(d), Math.sqrt((1+d)*(1-d))), d);
+    }
+
+    /**
+       This method calculates the arc cosine of x in radians. The
+       return value is in the range [0, pi]. The results may have
+       maximum error of 3 ulps.
+    */
+    public static double acos(double d) {
+	return mulsign(atan2k(Math.sqrt((1+d)*(1-d)), fabs(d)), d) + (d < 0 ? Math.PI : 0);
+    }
+
+    /**
+       Returns the arc tangent of an angle. The results may have
+       maximum error of 2 ulps.
+    */
+    public static double atan(double s) {
+	double t, u;
+	int q = 0;
+
+	if (s < 0) { s = -s; q = 2; }
+	if (s > 1) { s = 1.0 / s; q |= 1; }
+
+	t = s * s;
+
+	u = -1.88796008463073496563746e-05;
+	u = u * t + (0.000209850076645816976906797);
+	u = u * t + (-0.00110611831486672482563471);
+	u = u * t + (0.00370026744188713119232403);
+	u = u * t + (-0.00889896195887655491740809);
+	u = u * t + (0.016599329773529201970117);
+	u = u * t + (-0.0254517624932312641616861);
+	u = u * t + (0.0337852580001353069993897);
+	u = u * t + (-0.0407629191276836500001934);
+	u = u * t + (0.0466667150077840625632675);
+	u = u * t + (-0.0523674852303482457616113);
+	u = u * t + (0.0587666392926673580854313);
+	u = u * t + (-0.0666573579361080525984562);
+	u = u * t + (0.0769219538311769618355029);
+	u = u * t + (-0.090908995008245008229153);
+	u = u * t + (0.111111105648261418443745);
+	u = u * t + (-0.14285714266771329383765);
+	u = u * t + (0.199999999996591265594148);
+	u = u * t + (-0.333333333333311110369124);
+
+	t = s + s * (t * u);
+
+	if ((q & 1) != 0) t = 1.570796326794896557998982 - t;
+	if ((q & 2) != 0) t = -t;
+
+	return t;
+    }
+
+    private static final double PI4_A = 0.78539816290140151978;
+    private static final double PI4_B = 4.9604678871439933374e-10;
+    private static final double PI4_C = 1.1258708853173288931e-18;
+    private static final double PI4_D = 1.7607799325916000908e-27;
+
+    private static final double M_1_PI = 0.3183098861837906715377675267450287;
+
+    /**
+       Returns the trigonometric sine of an angle. The results may
+       have maximum error of 2 ulps.
+    */
+    public static double sin(double d) {
+	int q;
+	double u, s;
+
+	u = d * M_1_PI;
+	q = (int)(u < 0 ? u - 0.5 : u + 0.5);
+
+	d = mla(q, -PI4_A*4, d);
+	d = mla(q, -PI4_B*4, d);
+	d = mla(q, -PI4_C*4, d);
+	d = mla(q, -PI4_D*4, d);
+
+	if ((q & 1) != 0) d = -d;
+
+	s = d * d;
+
+	u = -7.97255955009037868891952e-18;
+	u = mla(u, s, 2.81009972710863200091251e-15);
+	u = mla(u, s, -7.64712219118158833288484e-13);
+	u = mla(u, s, 1.60590430605664501629054e-10);
+	u = mla(u, s, -2.50521083763502045810755e-08);
+	u = mla(u, s, 2.75573192239198747630416e-06);
+	u = mla(u, s, -0.000198412698412696162806809);
+	u = mla(u, s, 0.00833333333333332974823815);
+	u = mla(u, s, -0.166666666666666657414808);
+
+	u = mla(s, u * d, d);
+
+	return u;
+    }
+
+    /**
+       Returns the trigonometric cosine of an angle. The results may
+       have maximum error of 2 ulps.
+    */
+    public static double cos(double d) {
+	int q;
+	double u, s;
+
+	q = 1 + 2*(int)rint(d * M_1_PI - 0.5);
+
+	d = mla(q, -PI4_A*2, d);
+	d = mla(q, -PI4_B*2, d);
+	d = mla(q, -PI4_C*2, d);
+	d = mla(q, -PI4_D*2, d);
+
+	if ((q & 2) == 0) d = -d;
+
+	s = d * d;
+
+	u = -7.97255955009037868891952e-18;
+	u = mla(u, s, 2.81009972710863200091251e-15);
+	u = mla(u, s, -7.64712219118158833288484e-13);
+	u = mla(u, s, 1.60590430605664501629054e-10);
+	u = mla(u, s, -2.50521083763502045810755e-08);
+	u = mla(u, s, 2.75573192239198747630416e-06);
+	u = mla(u, s, -0.000198412698412696162806809);
+	u = mla(u, s, 0.00833333333333332974823815);
+	u = mla(u, s, -0.166666666666666657414808);
+
+	u = mla(s, u * d, d);
+
+	return u;
+    }
+
+    /**
+       Returns the trigonometric sine and cosine of an angle at a
+       time. The sine and cosine of an argument is returned by the x
+       and y field of the return value, respectively. The results may
+       have maximum error of 2 ulps.
+    */
+    public static double2 sincos(double d) {
+	int q;
+	double u, s, t;
+        double2 r = new double2();
+
+	q = (int)rint(d * (2 * M_1_PI));
+
+	s = d;
+
+	s = mla(-q, PI4_A*2, s);
+	s = mla(-q, PI4_B*2, s);
+	s = mla(-q, PI4_C*2, s);
+	s = mla(-q, PI4_D*2, s);
+
+	t = s;
+
+	s = s * s;
+
+	u = 1.58938307283228937328511e-10;
+	u = mla(u, s, -2.50506943502539773349318e-08);
+	u = mla(u, s, 2.75573131776846360512547e-06);
+	u = mla(u, s, -0.000198412698278911770864914);
+	u = mla(u, s, 0.0083333333333191845961746);
+	u = mla(u, s, -0.166666666666666130709393);
+	u = u * s * t;
+
+	r.x = t + u;
+
+	u = -1.13615350239097429531523e-11;
+	u = mla(u, s, 2.08757471207040055479366e-09);
+	u = mla(u, s, -2.75573144028847567498567e-07);
+	u = mla(u, s, 2.48015872890001867311915e-05);
+	u = mla(u, s, -0.00138888888888714019282329);
+	u = mla(u, s, 0.0416666666666665519592062);
+	u = mla(u, s, -0.5);
+
+	r.y = u * s + 1;
+
+	if ((q & 1) != 0) { s = r.y; r.y = r.x; r.x = s; }
+	if ((q & 2) != 0) { r.x = -r.x; }
+	if (((q+1) & 2) != 0) { r.y = -r.y; }
+
+	if (isinf(d)) { r.x = r.y = Double.NaN; }
+
+	return r;
+    }
+
+    /**
+       Returns the trigonometric tangent of an angle. The results may
+       have maximum error of 3 ulps.
+    */
+    public static double tan(double d) {
+	int q;
+	double u, s, x;
+
+	q = (int)rint(d * (2 * M_1_PI));
+
+	x = mla(q, -PI4_A*2, d);
+	x = mla(q, -PI4_B*2, x);
+	x = mla(q, -PI4_C*2, x);
+	x = mla(q, -PI4_D*2, x);
+
+	s = x * x;
+
+	if ((q & 1) != 0) x = -x;
+
+	u = 1.01419718511083373224408e-05;
+	u = mla(u, s, -2.59519791585924697698614e-05);
+	u = mla(u, s, 5.23388081915899855325186e-05);
+	u = mla(u, s, -3.05033014433946488225616e-05);
+	u = mla(u, s, 7.14707504084242744267497e-05);
+	u = mla(u, s, 8.09674518280159187045078e-05);
+	u = mla(u, s, 0.000244884931879331847054404);
+	u = mla(u, s, 0.000588505168743587154904506);
+	u = mla(u, s, 0.00145612788922812427978848);
+	u = mla(u, s, 0.00359208743836906619142924);
+	u = mla(u, s, 0.00886323944362401618113356);
+	u = mla(u, s, 0.0218694882853846389592078);
+	u = mla(u, s, 0.0539682539781298417636002);
+	u = mla(u, s, 0.133333333333125941821962);
+	u = mla(u, s, 0.333333333333334980164153);
+
+	u = mla(s, u * x, x);
+
+	if ((q & 1) != 0) u = 1.0 / u;
+
+	if (isinf(d)) u = Double.NaN;
+
+	return u;
+    }
+
+    //
+
+    private static final double L2U = .69314718055966295651160180568695068359375;
+    private static final double L2L = .28235290563031577122588448175013436025525412068e-12;
+    private static final double R_LN2 = 1.442695040888963407359924681001892137426645954152985934135449406931;
+
+    /**
+       Returns the natural logarithm of the argument. The results may
+       have maximum error of 3 ulps.
+    */
+    public static double log(double d) {
+	double x, x2, t, m;
+	int e, i;
+
+	e = ilogbp1(d * 0.7071);
+	m = ldexp(d, -e);
+
+	x = (m-1) / (m+1);
+	x2 = x * x;
+
+	t = 0.148197055177935105296783;
+	t = mla(t, x2, 0.153108178020442575739679);
+	t = mla(t, x2, 0.181837339521549679055568);
+	t = mla(t, x2, 0.22222194152736701733275);
+	t = mla(t, x2, 0.285714288030134544449368);
+	t = mla(t, x2, 0.399999999989941956712869);
+	t = mla(t, x2, 0.666666666666685503450651);
+	t = mla(t, x2, 2);
+
+	x = x * t + 0.693147180559945286226764 * e;
+
+	if (ispinf(d)) x = Double.POSITIVE_INFINITY;
+	if (d < 0) x = Double.NaN;
+	if (d == 0) x = Double.NEGATIVE_INFINITY;
+
+	return x;
+    }
+
+    /**
+       Returns the value of e raised to the power of the argument. The
+       results may have maximum error of 1 ulps.
+    */
+    public static double exp(double d) {
+	int q = (int)rint(d * R_LN2);
+	double s, u;
+
+	s = mla(q, -L2U, d);
+	s = mla(q, -L2L, s);
+
+	u = 2.08860621107283687536341e-09;
+	u = mla(u, s, 2.51112930892876518610661e-08);
+	u = mla(u, s, 2.75573911234900471893338e-07);
+	u = mla(u, s, 2.75572362911928827629423e-06);
+	u = mla(u, s, 2.4801587159235472998791e-05);
+	u = mla(u, s, 0.000198412698960509205564975);
+	u = mla(u, s, 0.00138888888889774492207962);
+	u = mla(u, s, 0.00833333333331652721664984);
+	u = mla(u, s, 0.0416666666666665047591422);
+	u = mla(u, s, 0.166666666666666851703837);
+	u = mla(u, s, 0.5);
+
+	u = s * s * u + s + 1;
+	u = ldexp(u, q);
+
+	if (isminf(d)) u = 0;
+
+	return u;
+    }
+
+    static double2 logk(double d) {
+	double2 x, x2;
+	double m, t;
+	int e;
+
+	e = ilogbp1(d * 0.7071);
+	m = ldexp(d, -e);
+
+	x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m));
+	x2 = ddsqu_d2_d2(x);
+
+	t = 0.134601987501262130076155;
+	t = mla(t, x2.x, 0.132248509032032670243288);
+	t = mla(t, x2.x, 0.153883458318096079652524);
+	t = mla(t, x2.x, 0.181817427573705403298686);
+	t = mla(t, x2.x, 0.222222231326187414840781);
+	t = mla(t, x2.x, 0.285714285651261412873718);
+	t = mla(t, x2.x, 0.400000000000222439910458);
+	t = mla(t, x2.x, 0.666666666666666371239645);
+
+	return ddadd2_d2_d2_d2(ddmul_d2_d2_d(new double2(0.693147180559945286226764, 2.319046813846299558417771e-17), e),
+			       ddadd2_d2_d2_d2(ddscale_d2_d2_d(x, 2), ddmul_d2_d2_d(ddmul_d2_d2_d2(x2, x), t)));
+    }
+
+    static double expk(double2 d) {
+	int q = (int)rint((d.x + d.y) * R_LN2);
+	double2 s, t;
+	double u;
+
+	s = ddadd2_d2_d2_d(d, -q * L2U);
+	s = ddadd2_d2_d2_d(s, -q * L2L);
+
+	s = ddnormalize_d2_d2(s);
+
+	u = 2.51069683420950419527139e-08;
+	u = mla(u, s.x, 2.76286166770270649116855e-07);
+	u = mla(u, s.x, 2.75572496725023574143864e-06);
+	u = mla(u, s.x, 2.48014973989819794114153e-05);
+	u = mla(u, s.x, 0.000198412698809069797676111);
+	u = mla(u, s.x, 0.0013888888939977128960529);
+	u = mla(u, s.x, 0.00833333333332371417601081);
+	u = mla(u, s.x, 0.0416666666665409524128449);
+	u = mla(u, s.x, 0.166666666666666740681535);
+	u = mla(u, s.x, 0.500000000000000999200722);
+
+	t = ddadd_d2_d2_d2(s, ddmul_d2_d2_d(ddsqu_d2_d2(s), u));
+
+	t = ddadd_d2_d_d2(1, t);
+
+	return ldexp(t.x + t.y, q);
+    }
+
+    /**
+       Returns the value of the first argument raised to the power of
+       the second argument. The results may have maximum error of 1
+       ulps.
+    */
+    public static double pow(double x, double y) {
+	boolean yisint = (int)y == y;
+	boolean yisodd = (1 & (int)y) != 0 && yisint;
+
+	double result = expk(ddmul_d2_d2_d(logk(fabs(x)), y));
+
+	result = isnan(result) ? Double.POSITIVE_INFINITY : result;
+	result *=  (x >= 0 ? 1 : (!yisint ? Double.NaN : (yisodd ? -1 : 1)));
+
+	double efx = mulsign(fabs(x) - 1, y);
+	if (isinf(y)) result = efx < 0 ? 0.0 : (efx == 0 ? 1.0 : Double.POSITIVE_INFINITY);
+	if (isinf(x) || x == 0) result = (yisodd ? sign(x) : 1) * ((x == 0 ? -y : y) < 0 ? 0 : Double.POSITIVE_INFINITY);
+	if (isnan(x) || isnan(y)) result = Double.NaN;
+	if (y == 0 || x == 1) result = 1;
+
+	return result;
+    }
+
+    static double2 expk2(double2 d) {
+	int q = (int)rint((d.x + d.y) * R_LN2);
+	double2 s, t;
+	double u;
+
+	s = ddadd2_d2_d2_d(d, q * -L2U);
+	s = ddadd2_d2_d2_d(s, q * -L2L);
+
+	s = ddnormalize_d2_d2(s);
+
+	u = 2.51069683420950419527139e-08;
+	u = mla(u, s.x, 2.76286166770270649116855e-07);
+	u = mla(u, s.x, 2.75572496725023574143864e-06);
+	u = mla(u, s.x, 2.48014973989819794114153e-05);
+	u = mla(u, s.x, 0.000198412698809069797676111);
+	u = mla(u, s.x, 0.0013888888939977128960529);
+	u = mla(u, s.x, 0.00833333333332371417601081);
+	u = mla(u, s.x, 0.0416666666665409524128449);
+	u = mla(u, s.x, 0.166666666666666740681535);
+	u = mla(u, s.x, 0.500000000000000999200722);
+
+	t = ddadd_d2_d2_d2(s, ddmul_d2_d2_d(ddsqu_d2_d2(s), u));
+
+	t = ddadd_d2_d_d2(1, t);
+	return ddscale_d2_d2_d(t, pow2i(q));
+    }
+
+    /**
+       Returns the hyperbolic sine of x. The results may have maximum
+       error of 2 ulps.
+    */
+    public static double sinh(double x) {
+	double y = fabs(x);
+	double2 d = expk2(new double2(y, 0));
+	d = ddsub_d2_d2_d2(d, ddrec_d2_d2(d));
+	y = (d.x + d.y) * 0.5;
+
+	y = abs(x) > 710 ? Double.POSITIVE_INFINITY : y;
+	y = isnan(y) ? Double.POSITIVE_INFINITY : y;
+	y = mulsign(y, x);
+	y = isnan(x) ? Double.NaN : y;
+
+	return y;
+    }
+
+    /**
+       Returns the hyperbolic cosine of x. The results may have
+       maximum error of 2 ulps.
+    */
+    public static double cosh(double x) {
+	double y = fabs(x);
+	double2 d = expk2(new double2(y, 0));
+	d = ddadd_d2_d2_d2(d, ddrec_d2_d2(d));
+	y = (d.x + d.y) * 0.5;
+
+	y = abs(x) > 710 ? Double.POSITIVE_INFINITY : y;
+	y = isnan(y) ? Double.POSITIVE_INFINITY : y;
+	y = isnan(x) ? Double.NaN : y;
+
+	return y;
+    }
+
+    /**
+       Returns the hyperbolic tangent of x. The results may have
+       maximum error of 2 ulps.
+    */
+    public static double tanh(double x) {
+	double y = fabs(x);
+	double2 d = expk2(new double2(y, 0));
+	double2 e = dddiv_d2_d2_d2(new double2(1, 0), d);
+	d = dddiv_d2_d2_d2(ddadd2_d2_d2_d2(d, ddscale_d2_d2_d(e, -1)), ddadd2_d2_d2_d2(d, e));
+	y = d.x + d.y;
+
+	y = abs(x) > 18.714973875 ? 1.0 : y;
+	y = isnan(y) ? 1.0 : y;
+	y = mulsign(y, x);
+	y = isnan(x) ? Double.NaN : y;
+
+	return y;
+    }
+
+    static double2 logk2(double2 d) {
+	double2 x, x2, m;
+	double t;
+	int e;
+
+	e = ilogbp1(d.x * 0.7071);
+	m = ddscale_d2_d2_d(d, pow2i(-e));
+
+	x = dddiv_d2_d2_d2(ddadd2_d2_d2_d(m, -1), ddadd2_d2_d2_d(m, 1));
+	x2 = ddsqu_d2_d2(x);
+
+	t = 0.134601987501262130076155;
+	t = mla(t, x2.x, 0.132248509032032670243288);
+	t = mla(t, x2.x, 0.153883458318096079652524);
+	t = mla(t, x2.x, 0.181817427573705403298686);
+	t = mla(t, x2.x, 0.222222231326187414840781);
+	t = mla(t, x2.x, 0.285714285651261412873718);
+	t = mla(t, x2.x, 0.400000000000222439910458);
+	t = mla(t, x2.x, 0.666666666666666371239645);
+
+	return ddadd2_d2_d2_d2(ddmul_d2_d2_d(new double2(0.693147180559945286226764, 2.319046813846299558417771e-17), e),
+			       ddadd2_d2_d2_d2(ddscale_d2_d2_d(x, 2), ddmul_d2_d2_d(ddmul_d2_d2_d2(x2, x), t)));
+    }
+
+    /**
+       Returns the inverse hyperbolic sine of x. The results may have
+       maximum error of 2 ulps.
+    */
+    public static double asinh(double x) {
+	double y = fabs(x);
+	double2 d = logk2(ddadd2_d2_d2_d(ddsqrt_d2_d2(ddadd2_d2_d2_d(ddmul_d2_d_d(y, y),  1)), y));
+	y = d.x + d.y;
+
+	y = isinf(x) || isnan(y) ? Double.POSITIVE_INFINITY : y;
+	y = mulsign(y, x);
+	y = isnan(x) ? Double.NaN : y;
+
+	return y;
+    }
+
+    /**
+       Returns the inverse hyperbolic cosine of x. The results may
+       have maximum error of 2 ulps.
+    */
+    public static double acosh(double x) {
+	double2 d = logk2(ddadd2_d2_d2_d(ddsqrt_d2_d2(ddadd2_d2_d2_d(ddmul_d2_d_d(x, x), -1)), x));
+	double y = d.x + d.y;
+
+	y = isinf(x) || isnan(y) ? Double.POSITIVE_INFINITY : y;
+	y = x == 1.0 ? 0.0 : y;
+	y = x < 1.0 ? Double.NaN : y;
+	y = isnan(x) ? Double.NaN : y;
+
+	return y;
+    }
+
+    /**
+       Returns the inverse hyperbolic tangent of x. The results may
+       have maximum error of 2 ulps.
+    */
+    public static double atanh(double x) {
+	double y = fabs(x);
+	double2 d = logk2(dddiv_d2_d2_d2(ddadd2_d2_d_d(1, y), ddadd2_d2_d_d(1, -y)));
+	y = y > 1.0 ? Double.NaN : (y == 1.0 ? Double.POSITIVE_INFINITY : (d.x + d.y) * 0.5);
+
+	y = isinf(x) || isnan(y) ? Double.NaN : y;
+	y = mulsign(y, x);
+	y = isnan(x) ? Double.NaN : y;
+
+	return y;
+    }
+
+    /**
+       This function performs a fused multiply-accumulate
+       operation. This function computes x*y+z, with a single
+       rounding. This implementation gives the exact result unless an
+       overflow occurs.
+    */
+    public static double fma(double x, double y, double z) {
+	double xh = Double.longBitsToDouble((Double.doubleToRawLongBits(x) + 0x4000000) & 0xfffffffff8000000L), xl = x - xh;
+	double yh = Double.longBitsToDouble((Double.doubleToRawLongBits(y) + 0x4000000) & 0xfffffffff8000000L), yl = y - yh;
+
+	double h = x * y;
+	double l = xh * yh - h + xl * yh + xh * yl + xl * yl;
+
+	double h2, l2, v;
+
+	h2 = h + z;
+	v = h2 - h;
+	l2 = (h - (h2 - v)) + (z - v) + l;
+
+	return h2 + l2;
+    }
+
+    /**
+       This function returns the square root of the argument.  This
+       implementation gives the exact result(less than or equal to 0.5
+       ulp of error).
+    */
+    public static double sqrt(double d) {
+	double q = 1;
+
+	if (d < 8.636168555094445E-78) {
+	    d *= 1.157920892373162E77;
+	    q = 2.9387358770557188E-39;
+	}
+
+	// http://en.wikipedia.org/wiki/Fast_inverse_square_root
+	double x = Double.longBitsToDouble(0x5fe6ec85e7de30daL - (Double.doubleToRawLongBits(d + 1e-320) >> 1));
+
+	x = x * (1.5 - 0.5 * d * x * x);
+	x = x * (1.5 - 0.5 * d * x * x);
+	x = x * (1.5 - 0.5 * d * x * x);
+
+	x = fma(d * x, d * x, -d) * (x * -0.5) + d * x;
+
+	return d == Double.POSITIVE_INFINITY ? Double.POSITIVE_INFINITY : x * q;
+    }
+
+    /**
+       This function returns the cube root of the argument. The
+       results may have maximum error of 2 ulps.
+    */
+    public static double cbrt(double d) {
+	double x, y, q = 1.0;
+	int e, r;
+
+	e = ilogbp1(d);
+	d = ldexp(d, -e);
+	r = (e + 6144) % 3;
+	q = (r == 1) ? 1.2599210498948731647672106 : q;
+	q = (r == 2) ? 1.5874010519681994747517056 : q;
+	q = ldexp(q, (e + 6144) / 3 - 2048);
+
+	q = mulsign(q, d);
+	d = fabs(d);
+
+	x = -0.640245898480692909870982;
+	x = x * d + 2.96155103020039511818595;
+	x = x * d + -5.73353060922947843636166;
+	x = x * d + 6.03990368989458747961407;
+	x = x * d + -3.85841935510444988821632;
+	x = x * d + 2.2307275302496609725722;
+
+	y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0);
+	y = d * x * x;
+	y = (y - (2.0 / 3.0) * y * (y * x - 1)) * q;
+
+	return y;
+    }
+
+    /**
+       Returns the value of 2 raised to the power of the argument. The
+       results may have maximum error of 1 ulp.
+    */
+    public static double exp2(double a) {
+	double u = expk(ddmul_d2_d2_d(new double2(0.69314718055994528623, 2.3190468138462995584e-17), a));
+	if (a > 1023) u = Double.POSITIVE_INFINITY;
+	if (isminf(a)) u = 0;
+	return u;
+    }
+
+    /**
+       Returns the value of 10 raised to the power of the
+       argument. The results may have maximum error of 1 ulp.
+    */
+    public static double exp10(double a) {
+	double u = expk(ddmul_d2_d2_d(new double2(2.3025850929940459011, -2.1707562233822493508e-16), a));
+	if (a > 308) u = Double.POSITIVE_INFINITY;
+	if (isminf(a)) u = 0;
+	return u;
+    }
+
+    /**
+       Returns a value equivalent to exp(a)-1. The result is accurate
+       even when the value of a is close to zero. The results may have
+       maximum error of 1 ulp.
+    */
+    public static double expm1(double a) {
+	double2 d = ddadd2_d2_d2_d(expk2(new double2(a, 0)), -1.0);
+	double x = d.x + d.y;
+	if (a > 700) x = Double.POSITIVE_INFINITY;
+	if (a < -0.36043653389117156089696070315825181539851971360337e+2) x = -1;
+	return x;
+    }
+
+    /**
+       Returns the base 10 logarithm of the argument. The results may
+       have maximum error of 1 ulp.
+    */
+    public static double log10(double a) {
+	double2 d = ddmul_d2_d2_d2(logk(a), new double2(0.43429448190325176116, 6.6494347733425473126e-17));
+	double x = d.x + d.y;
+
+	if (ispinf(a)) x = Double.POSITIVE_INFINITY;
+	if (a < 0) x = Double.NaN;
+	if (a == 0) x = -Double.POSITIVE_INFINITY;
+
+	return x;
+    }
+
+    /**
+       Returns a value equivalent to log(1+a). The result is accurate
+       even when the value of a is close to zero. The results may have
+       maximum error of 1 ulp.
+    */
+    public static double log1p(double a) {
+	double2 d = logk2(ddadd2_d2_d_d(a, 1));
+	double x = d.x + d.y;
+
+	if (ispinf(a)) x = Double.POSITIVE_INFINITY;
+	if (a < -1) x = Double.NaN;
+	if (a == -1) x = -Double.POSITIVE_INFINITY;
+
+	return x;
+    }
+
+    //
+
+    /**
+       This class represents a vector of two float values.
+    */
+    public static class float2 {
+	public float x, y;
+	public float2() {}
+	public float2(float x, float y) { this.x = x; this.y = y; }
+
+	public String toString() {
+	    return "(float2:" + x + " + " + y + ")";
+	}
+    }
+
+    private static final float PI4_Af = 0.78515625f;
+    private static final float PI4_Bf = 0.00024187564849853515625f;
+    private static final float PI4_Cf = 3.7747668102383613586e-08f;
+    private static final float PI4_Df = 1.2816720341285448015e-12f;
+
+    private static final float L2Uf = 0.693145751953125f;
+    private static final float L2Lf = 1.428606765330187045e-06f;
+
+    private static final float R_LN2f = 1.442695040888963407359924681001892137426645954152985934135449406931f;
+    private static final float M_PIf = ((float)Math.PI);
+
+    private static final float INFINITYf = Float.POSITIVE_INFINITY;
+    private static final float NANf = Float.NaN;
+
+    private static float mlaf(float x, float y, float z) { return x * y + z; }
+    private static float mulsignf(float x, float y) { return (float)(Math.copySign(1, y) * x); }
+    private static float signf(float d) { return (float)Math.copySign(1, d); }
+
+    private static float sqrtf(float f) { return (float)Math.sqrt(f); }
+
+    private static float fabsf(float d) { return (float)Math.copySign(d, 1); }
+    private static float maxf(float x, float y) { return x > y ? x : y; }
+    private static boolean isnanf(float d) { return d != d; }
+    private static boolean isinff(float d) { return fabs(d) == Float.POSITIVE_INFINITY; }
+
+    private static boolean ispinff(float d) { return d == Float.POSITIVE_INFINITY; }
+    private static boolean isminff(float d) { return d == Float.NEGATIVE_INFINITY; }
+    private static float rintf(float x) { return x < 0 ? (int)(x - 0.5f) : (int)(x + 0.5f); }
+
+    static int floatToRawIntBits(float d) { return Float.floatToRawIntBits(d); }
+    static float intBitsToFloat(int i) { return Float.intBitsToFloat(i); }
+
+    static int ilogbp1f(float d) {
+	boolean m = d < 5.421010862427522E-20f;
+	d = m ? 1.8446744073709552E19f * d : d;
+	int q = (floatToRawIntBits(d) >> 23) & 0xff;
+	q = m ? q - (64 + 0x7e) : q - 0x7e;
+	return q;
+    }
+
+    static float pow2if(int q) {
+	return intBitsToFloat(((int)(q + 0x7f)) << 23);
+    }
+
+    public static float ldexpf(float x, int q) {
+	float u;
+	int m;
+	m = q >> 31;
+	m = (((m + q) >> 6) - m) << 4;
+	q = q - (m << 2);
+	m += 127;
+	m = m <   0 ?   0 : m;
+	m = m > 255 ? 255 : m;
+	u = intBitsToFloat(((int)m) << 23);
+	x = x * u * u * u * u;
+	u = intBitsToFloat(((int)(q + 0x7f)) << 23);
+	return x * u;
+    }
+
+    static float upperf(float d) {
+	return intBitsToFloat(floatToRawIntBits(d) & 0xfffff000);
+    }
+
+    static float2 df(float h, float l) {
+	float2 ret = new float2();
+	ret.x = h; ret.y = l;
+	return ret;
+    }
+
+    static float2 dfnormalize_f2_f2(float2 t) {
+	float2 s = new float2();
+
+	s.x = t.x + t.y;
+	s.y = t.x - s.x + t.y;
+
+	return s;
+    }
+
+    static float2 dfscale_f2_f2_f(float2 d, float s) {
+	float2 r = new float2();
+
+	r.x = d.x * s;
+	r.y = d.y * s;
+
+	return r;
+    }
+
+    static float2 dfadd2_f2_f_f(float x, float y) {
+	float2 r = new float2();
+
+	r.x = x + y;
+	float v = r.x - x;
+	r.y = (x - (r.x - v)) + (y - v);
+
+	return r;
+    }
+
+    static float2 dfadd_f2_f2_f(float2 x, float y) {
+	// |x| >= |y|
+
+	float2 r = new float2();
+
+	r.x = x.x + y;
+	r.y = x.x - r.x + y + x.y;
+
+	return r;
+    }
+
+    static float2 dfadd2_f2_f2_f(float2 x, float y) {
+	// |x| >= |y|
+
+	float2 r = new float2();
+
+	r.x  = x.x + y;
+	float v = r.x - x.x;
+	r.y = (x.x - (r.x - v)) + (y - v);
+	r.y += x.y;
+
+	return r;
+    }
+
+    static float2 dfadd_f2_f_f2(float x, float2 y) {
+	// |x| >= |y|
+
+	float2 r = new float2();
+
+	r.x = x + y.x;
+	r.y = x - r.x + y.x + y.y;
+
+	return r;
+    }
+
+    static float2 dfadd_f2_f2_f2(float2 x, float2 y) {
+	// |x| >= |y|
+
+	float2 r = new float2();
+
+	r.x = x.x + y.x;
+	r.y = x.x - r.x + y.x + x.y + y.y;
+
+	return r;
+    }
+
+    static float2 dfadd2_f2_f2_f2(float2 x, float2 y) {
+	float2 r = new float2();
+
+	r.x  = x.x + y.x;
+	float v = r.x - x.x;
+	r.y = (x.x - (r.x - v)) + (y.x - v);
+	r.y += x.y + y.y;
+
+	return r;
+    }
+
+    static float2 dfsub_f2_f2_f2(float2 x, float2 y) {
+	// |x| >= |y|
+
+	float2 r = new float2();
+
+	r.x = x.x - y.x;
+	r.y = x.x - r.x - y.x + x.y - y.y;
+
+	return r;
+    }
+
+    static float2 dfdiv_f2_f2_f2(float2 n, float2 d) {
+	float t = 1.0f / d.x;
+	float dh  = upperf(d.x), dl  = d.x - dh;
+	float th  = upperf(t  ), tl  = t   - th;
+	float nhh = upperf(n.x), nhl = n.x - nhh;
+
+	float2 q = new float2();
+
+	q.x = n.x * t;
+
+	float u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl +
+	    q.x * (1 - dh * th - dh * tl - dl * th - dl * tl);
+
+	q.y = t * (n.y - q.x * d.y) + u;
+
+	return q;
+    }
+
+    static float2 dfmul_f2_f_f(float x, float y) {
+	float xh = upperf(x), xl = x - xh;
+	float yh = upperf(y), yl = y - yh;
+	float2 r = new float2();
+
+	r.x = x * y;
+	r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl;
+
+	return r;
+    }
+
+    static float2 dfmul_f2_f2_f(float2 x, float y) {
+	float xh = upperf(x.x), xl = x.x - xh;
+	float yh = upperf(y  ), yl = y   - yh;
+	float2 r = new float2();
+
+	r.x = x.x * y;
+	r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y;
+
+	return r;
+    }
+
+    static float2 dfmul_f2_f2_f2(float2 x, float2 y) {
+	float xh = upperf(x.x), xl = x.x - xh;
+	float yh = upperf(y.x), yl = y.x - yh;
+	float2 r = new float2();
+
+	r.x = x.x * y.x;
+	r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x;
+
+	return r;
+    }
+
+    static float2 dfsqu_f2_f2(float2 x) {
+	float xh = upperf(x.x), xl = x.x - xh;
+	float2 r = new float2();
+
+	r.x = x.x * x.x;
+	r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y);
+
+	return r;
+    }
+
+    static float2 dfrec_f2_f(float d) {
+	float t = 1.0f / d;
+	float dh = upperf(d), dl = d - dh;
+	float th = upperf(t), tl = t - th;
+	float2 q = new float2();
+
+	q.x = t;
+	q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl);
+
+	return q;
+    }
+
+    static float2 dfrec_f2_f2(float2 d) {
+	float t = 1.0f / d.x;
+	float dh = upperf(d.x), dl = d.x - dh;
+	float th = upperf(t  ), tl = t   - th;
+	float2 q = new float2();
+
+	q.x = t;
+	q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t);
+
+	return q;
+    }
+
+    static float2 dfsqrt_f2_f2(float2 d) {
+	float t = sqrtf(d.x + d.y);
+	return dfscale_f2_f2_f(dfmul_f2_f2_f2(dfadd2_f2_f2_f2(d, dfmul_f2_f_f(t, t)), dfrec_f2_f(t)), 0.5f);
+    }
+
+    /**
+       This function returns the cube root of the argument in single
+       precision. The results may have maximum error of 2 ulps.
+    */
+    public static float cbrtf(float d) {
+	float x, y, q = 1.0f;
+	int e, r;
+
+	e = ilogbp1f(d);
+	d = ldexpf(d, -e);
+	r = (e + 6144) % 3;
+	q = (r == 1) ? 1.2599210498948731647672106f : q;
+	q = (r == 2) ? 1.5874010519681994747517056f : q;
+	q = ldexpf(q, (e + 6144) / 3 - 2048);
+
+	q = mulsignf(q, d);
+	d = fabsf(d);
+
+	x = -0.601564466953277587890625f;
+	x = mlaf(x, d, 2.8208892345428466796875f);
+	x = mlaf(x, d, -5.532182216644287109375f);
+	x = mlaf(x, d, 5.898262500762939453125f);
+	x = mlaf(x, d, -3.8095417022705078125f);
+	x = mlaf(x, d, 2.2241256237030029296875f);
+
+	y = d * x * x;
+	y = (y - (2.0f / 3.0f) * y * (y * x - 1.0f)) * q;
+
+	return y;
+    }
+
+    /**
+       Returns the trigonometric sine of an angle in single
+       precision. The results may have maximum error of 3 ulps.
+    */
+    public static float sinf(float d) {
+	int q;
+	float u, s;
+
+	q = (int)rintf(d * (float)M_1_PI);
+
+	d = mlaf(q, -PI4_Af*4, d);
+	d = mlaf(q, -PI4_Bf*4, d);
+	d = mlaf(q, -PI4_Cf*4, d);
+	d = mlaf(q, -PI4_Df*4, d);
+
+	s = d * d;
+
+	if ((q & 1) != 0) d = -d;
+
+	u = 2.6083159809786593541503e-06f;
+	u = mlaf(u, s, -0.0001981069071916863322258f);
+	u = mlaf(u, s, 0.00833307858556509017944336f);
+	u = mlaf(u, s, -0.166666597127914428710938f);
+
+	u = mlaf(s, u * d, d);
+
+	if (isinff(d)) { u = NANf; }
+
+	return u;
+    }
+
+    /**
+       Returns the trigonometric cosine of an angle in single
+       precision. The results may have maximum error of 3 ulps.
+    */
+    public static float cosf(float d) {
+	int q;
+	float u, s;
+
+	q = 1 + 2*(int)rintf(d * (float)M_1_PI - 0.5f);
+
+	d = mlaf(q, -PI4_Af*2, d);
+	d = mlaf(q, -PI4_Bf*2, d);
+	d = mlaf(q, -PI4_Cf*2, d);
+	d = mlaf(q, -PI4_Df*2, d);
+
+	s = d * d;
+
+	if ((q & 2) == 0) d = -d;
+
+	u = 2.6083159809786593541503e-06f;
+	u = mlaf(u, s, -0.0001981069071916863322258f);
+	u = mlaf(u, s, 0.00833307858556509017944336f);
+	u = mlaf(u, s, -0.166666597127914428710938f);
+
+	u = mlaf(s, u * d, d);
+
+	if (isinff(d)) { u = NANf; }
+
+	return u;
+    }
+
+    /**
+       Returns the trigonometric sine and cosine of an angle in single
+       precision at a time. The sine and cosine of an argument is
+       returned by the x and y field of the return value,
+       respectively. The results may have maximum error of 3 ulps.
+    */
+    public static float2 sincosf(float d) {
+	int q;
+	float u, s, t;
+	float2 r = new float2();
+
+	q = (int)rintf(d * ((float)(2 * M_1_PI)));
+
+	s = d;
+
+	s = mlaf(q, -PI4_Af*2, s);
+	s = mlaf(q, -PI4_Bf*2, s);
+	s = mlaf(q, -PI4_Cf*2, s);
+	s = mlaf(q, -PI4_Df*2, s);
+
+	t = s;
+
+	s = s * s;
+
+	u = -0.000195169282960705459117889f;
+	u = mlaf(u, s, 0.00833215750753879547119141f);
+	u = mlaf(u, s, -0.166666537523269653320312f);
+	u = u * s * t;
+
+	r.x = t + u;
+
+	u = -2.71811842367242206819355e-07f;
+	u = mlaf(u, s, 2.47990446951007470488548e-05f);
+	u = mlaf(u, s, -0.00138888787478208541870117f);
+	u = mlaf(u, s, 0.0416666641831398010253906f);
+	u = mlaf(u, s, -0.5f);
+
+	r.y = u * s + 1;
+
+	if ((q & 1) != 0) { s = r.y; r.y = r.x; r.x = s; }
+	if ((q & 2) != 0) { r.x = -r.x; }
+	if (((q+1) & 2) != 0) { r.y = -r.y; }
+
+	if (isinff(d)) { r.x = r.y = NANf; }
+
+	return r;
+    }
+
+    /**
+       Returns the trigonometric tangent of an angle in single
+       precision. The results may have maximum error of 4 ulps.
+    */
+    public static float tanf(float d) {
+	int q;
+	float u, s, x;
+
+	q = (int)rintf(d * (float)(2 * M_1_PI));
+
+	x = d;
+
+	x = mlaf(q, -PI4_Af*2, x);
+	x = mlaf(q, -PI4_Bf*2, x);
+	x = mlaf(q, -PI4_Cf*2, x);
+	x = mlaf(q, -PI4_Df*2, x);
+
+	s = x * x;
+
+	if ((q & 1) != 0) x = -x;
+
+	u = 0.00927245803177356719970703f;
+	u = mlaf(u, s, 0.00331984995864331722259521f);
+	u = mlaf(u, s, 0.0242998078465461730957031f);
+	u = mlaf(u, s, 0.0534495301544666290283203f);
+	u = mlaf(u, s, 0.133383005857467651367188f);
+	u = mlaf(u, s, 0.333331853151321411132812f);
+
+	u = mlaf(s, u * x, x);
+
+	if ((q & 1) != 0) u = 1.0f / u;
+
+	if (isinff(d)) u = NANf;
+
+	return u;
+    }
+
+    /**
+       Returns the arc tangent of an angle in single precision. The
+       results may have maximum error of 3 ulps.
+    */
+    public static float atanf(float s) {
+	float t, u;
+	int q = 0;
+
+	if (s < 0) { s = -s; q = 2; }
+	if (s > 1) { s = 1.0f / s; q |= 1; }
+
+	t = s * s;
+
+	u = 0.00282363896258175373077393f;
+	u = mlaf(u, t, -0.0159569028764963150024414f);
+	u = mlaf(u, t, 0.0425049886107444763183594f);
+	u = mlaf(u, t, -0.0748900920152664184570312f);
+	u = mlaf(u, t, 0.106347933411598205566406f);
+	u = mlaf(u, t, -0.142027363181114196777344f);
+	u = mlaf(u, t, 0.199926957488059997558594f);
+	u = mlaf(u, t, -0.333331018686294555664062f);
+
+	t = s + s * (t * u);
+
+	if ((q & 1) != 0) t = 1.570796326794896557998982f - t;
+	if ((q & 2) != 0) t = -t;
+
+	return t;
+    }
+
+    private static float atan2kf(float y, float x) {
+	float s, t, u;
+	int q = 0;
+
+	if (x < 0) { x = -x; q = -2; }
+	if (y > x) { t = x; x = y; y = -t; q += 1; }
+
+	s = y / x;
+	t = s * s;
+
+	u = 0.00282363896258175373077393f;
+	u = mlaf(u, t, -0.0159569028764963150024414f);
+	u = mlaf(u, t, 0.0425049886107444763183594f);
+	u = mlaf(u, t, -0.0748900920152664184570312f);
+	u = mlaf(u, t, 0.106347933411598205566406f);
+	u = mlaf(u, t, -0.142027363181114196777344f);
+	u = mlaf(u, t, 0.199926957488059997558594f);
+	u = mlaf(u, t, -0.333331018686294555664062f);
+
+	t = u * t * s + s;
+	t = q * (float)(M_PIf/2) + t;
+
+	return t;
+    }
+
+    /**
+       This method calculates the arc tangent of y/x in single
+       precision. It uses the signs of the two arguments to determine
+       the quadrant of the result. The results may have maximum error
+       of 3 ulps.
+    */
+    public static float atan2f(float y, float x) {
+	float r = atan2kf(fabsf(y), x);
+
+	r = mulsignf(r, x);
+	if (isinff(x) || x == 0) r = M_PIf/2 - (isinff(x) ? (signf(x) * (float)(M_PIf  /2)) : 0);
+	if (isinff(y)          ) r = M_PIf/2 - (isinff(x) ? (signf(x) * (float)(M_PIf*1/4)) : 0);
+	if (              y == 0) r = (signf(x) == -1 ? M_PIf : 0);
+
+	return isnanf(x) || isnanf(y) ? NANf : mulsignf(r, y);
+    }
+
+    /**
+       This method calculates the arc sine of x in single
+       precision. The results may have maximum error of 3 ulps.
+    */
+    public static float asinf(float d) {
+	return mulsignf(atan2kf(fabsf(d), sqrtf((1.0f+d)*(1.0f-d))), d);
+    }
+
+    /**
+       This method calculates the arc cosine of x in single
+       precision. The results may have maximum error of 3 ulps.
+    */
+    public static float acosf(float d) {
+	return mulsignf(atan2kf(sqrtf((1.0f+d)*(1.0f-d)), fabsf(d)), d) + (d < 0 ? (float)M_PIf : 0.0f);
+    }
+
+    /**
+       Returns the natural logarithm of the argument in single
+       precision. The results may have maximum error of 3 ulps.
+    */
+    public static float logf(float d) {
+	float x, x2, t, m;
+	int e;
+
+	e = ilogbp1f(d * 0.7071f);
+	m = ldexpf(d, -e);
+
+	x = (m-1.0f) / (m+1.0f);
+	x2 = x * x;
+
+	t = 0.2371599674224853515625f;
+	t = mlaf(t, x2, 0.285279005765914916992188f);
+	t = mlaf(t, x2, 0.400005519390106201171875f);
+	t = mlaf(t, x2, 0.666666567325592041015625f);
+	t = mlaf(t, x2, 2.0f);
+
+	x = x * t + 0.693147180559945286226764f * e;
+
+	if (isinff(d)) x = INFINITYf;
+	if (d < 0) x = NANf;
+	if (d == 0) x = -INFINITYf;
+
+	return x;
+    }
+
+    /**
+       Returns the value of e raised to the power of the argument in
+       single precision. The results may have maximum error of 1 ulps.
+    */
+    public static float expf(float d) {
+	int q = (int)rintf(d * R_LN2f);
+	float s, u;
+
+	s = mlaf(q, -L2Uf, d);
+	s = mlaf(q, -L2Lf, s);
+
+	u = 0.00136324646882712841033936f;
+	u = mlaf(u, s, 0.00836596917361021041870117f);
+	u = mlaf(u, s, 0.0416710823774337768554688f);
+	u = mlaf(u, s, 0.166665524244308471679688f);
+	u = mlaf(u, s, 0.499999850988388061523438f);
+
+	u = s * s * u + s + 1.0f;
+	u = ldexpf(u, q);
+
+	if (isminff(d)) u = 0;
+
+	return u;
+    }
+
+    static float expkf(float2 d) {
+	int q = (int)rintf((d.x + d.y) * R_LN2f);
+	float2 s, t;
+	float u;
+
+	s = dfadd2_f2_f2_f(d, q * -L2Uf);
+	s = dfadd2_f2_f2_f(s, q * -L2Lf);
+
+	s = dfnormalize_f2_f2(s);
+
+	u = 0.00136324646882712841033936f;
+	u = mlaf(u, s.x, 0.00836596917361021041870117f);
+	u = mlaf(u, s.x, 0.0416710823774337768554688f);
+	u = mlaf(u, s.x, 0.166665524244308471679688f);
+	u = mlaf(u, s.x, 0.499999850988388061523438f);
+
+	t = dfadd_f2_f2_f2(s, dfmul_f2_f2_f(dfsqu_f2_f2(s), u));
+
+	t = dfadd_f2_f_f2(1, t);
+	return ldexpf(t.x + t.y, q);
+    }
+
+    static float2 logkf(float d) {
+	float2 x, x2;
+	float m, t;
+	int e;
+
+	e = ilogbp1f(d * 0.7071f);
+	m = ldexpf(d, -e);
+
+	x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m));
+	x2 = dfsqu_f2_f2(x);
+
+	t = 0.2371599674224853515625f;
+	t = mlaf(t, x2.x, 0.285279005765914916992188f);
+	t = mlaf(t, x2.x, 0.400005519390106201171875f);
+	t = mlaf(t, x2.x, 0.666666567325592041015625f);
+
+	return dfadd2_f2_f2_f2(dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), e),
+			       dfadd2_f2_f2_f2(dfscale_f2_f2_f(x, 2), dfmul_f2_f2_f(dfmul_f2_f2_f2(x2, x), t)));
+    }
+
+    public static float powf(float x, float y) {
+	boolean yisint = (int)y == y;
+	boolean yisodd = (1 & (int)y) != 0 && yisint;
+
+	float result = expkf(dfmul_f2_f2_f(logkf(fabsf(x)), y));
+
+	result = isnanf(result) ? INFINITYf : result;
+	result *=  (x >= 0 ? 1 : (!yisint ? NANf : (yisodd ? -1 : 1)));
+
+	float efx = mulsignf(fabsf(x) - 1, y);
+	if (isinff(y)) result = efx < 0 ? 0.0f : (efx == 0 ? 1.0f : INFINITYf);
+	if (isinff(x) || x == 0) result = (yisodd ? signf(x) : 1) * ((x == 0 ? -y : y) < 0 ? 0 : INFINITYf);
+	if (isnanf(x) || isnanf(y)) result = NANf;
+	if (y == 0 || x == 1) result = 1;
+
+	return result;
+    }
+
+    static float2 expk2f(float2 d) {
+	int q = (int)rintf((d.x + d.y) * R_LN2f);
+	float2 s, t;
+	float u;
+
+	s = dfadd2_f2_f2_f(d, q * -L2Uf);
+	s = dfadd2_f2_f2_f(s, q * -L2Lf);
+
+	s = dfnormalize_f2_f2(s);
+
+	u = 0.00136324646882712841033936f;
+	u = mlaf(u, s.x, 0.00836596917361021041870117f);
+	u = mlaf(u, s.x, 0.0416710823774337768554688f);
+	u = mlaf(u, s.x, 0.166665524244308471679688f);
+	u = mlaf(u, s.x, 0.499999850988388061523438f);
+
+	t = dfadd_f2_f2_f2(s, dfmul_f2_f2_f(dfsqu_f2_f2(s), u));
+
+	t = dfadd_f2_f_f2(1, t);
+	return dfscale_f2_f2_f(t, pow2if(q));
+    }
+
+    public static float sinhf(float x) {
+	float y = fabsf(x);
+	float2 d = expk2f(df(y, 0));
+	d = dfsub_f2_f2_f2(d, dfrec_f2_f2(d));
+	y = (d.x + d.y) * 0.5f;
+
+	y = fabsf(x) > 89 ? INFINITYf : y;
+	y = isnanf(y) ? INFINITYf : y;
+	y = mulsignf(y, x);
+	y = isnanf(x) ? NANf : y;
+
+	return y;
+    }
+
+    public static float coshf(float x) {
+	float y = fabsf(x);
+	float2 d = expk2f(df(y, 0));
+	d = dfadd_f2_f2_f2(d, dfrec_f2_f2(d));
+	y = (d.x + d.y) * 0.5f;
+
+	y = fabsf(x) > 89 ? INFINITYf : y;
+	y = isnanf(y) ? INFINITYf : y;
+	y = isnanf(x) ? NANf : y;
+
+	return y;
+    }
+
+    public static float tanhf(float x) {
+	float y = fabsf(x);
+	float2 d = expk2f(df(y, 0));
+	float2 e = dfdiv_f2_f2_f2(df(1, 0), d);
+	d = dfdiv_f2_f2_f2(dfadd2_f2_f2_f2(d, dfscale_f2_f2_f(e, -1)), dfadd2_f2_f2_f2(d, e));
+	y = d.x + d.y;
+
+	y = fabsf(x) > 8.664339742f ? 1.0f : y;
+	y = isnanf(y) ? 1.0f : y;
+	y = mulsignf(y, x);
+	y = isnanf(x) ? NANf : y;
+
+	return y;
+    }
+
+    static float2 logk2f(float2 d) {
+	float2 x, x2, m;
+	float t;
+	int e;
+
+	e = ilogbp1f(d.x * 0.7071f);
+	m = dfscale_f2_f2_f(d, pow2if(-e));
+
+	x = dfdiv_f2_f2_f2(dfadd2_f2_f2_f(m, -1), dfadd2_f2_f2_f(m, 1));
+	x2 = dfsqu_f2_f2(x);
+
+	t = 0.2371599674224853515625f;
+	t = mlaf(t, x2.x, 0.285279005765914916992188f);
+	t = mlaf(t, x2.x, 0.400005519390106201171875f);
+	t = mlaf(t, x2.x, 0.666666567325592041015625f);
+
+	return dfadd2_f2_f2_f2(dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), e),
+			       dfadd2_f2_f2_f2(dfscale_f2_f2_f(x, 2), dfmul_f2_f2_f(dfmul_f2_f2_f2(x2, x), t)));
+    }
+
+    public static float asinhf(float x) {
+	float y = fabsf(x);
+	float2 d = logk2f(dfadd2_f2_f2_f(dfsqrt_f2_f2(dfadd2_f2_f2_f(dfmul_f2_f_f(y, y),  1)), y));
+	y = d.x + d.y;
+
+	y = isinff(x) || isnanf(y) ? INFINITYf : y;
+	y = mulsignf(y, x);
+	y = isnanf(x) ? NANf : y;
+
+	return y;
+    }
+
+    public static float acoshf(float x) {
+	float2 d = logk2f(dfadd2_f2_f2_f(dfsqrt_f2_f2(dfadd2_f2_f2_f(dfmul_f2_f_f(x, x), -1)), x));
+	float y = d.x + d.y;
+
+	y = isinff(x) || isnanf(y) ? INFINITYf : y;
+	y = x == 1.0f ? 0.0f : y;
+	y = x < 1.0f ? NANf : y;
+	y = isnanf(x) ? NANf : y;
+
+	return y;
+    }
+
+    public static float atanhf(float x) {
+	float y = fabsf(x);
+	float2 d = logk2f(dfdiv_f2_f2_f2(dfadd2_f2_f_f(1, y), dfadd2_f2_f_f(1, -y)));
+	y = y > 1.0 ? NANf : (y == 1.0 ? INFINITYf : (d.x + d.y) * 0.5f);
+
+	y = isinff(x) || isnanf(y) ? NANf : y;
+	y = mulsignf(y, x);
+	y = isnanf(x) ? NANf : y;
+
+	return y;
+    }
+
+    public static float exp2f(float a) {
+	float u = expkf(dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), a));
+	if (ispinff(a)) u = INFINITYf;
+	if (isminff(a)) u = 0;
+	return u;
+    }
+
+    public static float exp10f(float a) {
+	float u = expkf(dfmul_f2_f2_f(df(2.3025851249694824219f, -3.1975436520781386207e-08f), a));
+	if (ispinff(a)) u = INFINITYf;
+	if (isminff(a)) u = 0;
+	return u;
+    }
+
+    public static float expm1f(float a) {
+	float2 d = dfadd2_f2_f2_f(expk2f(df(a, 0)), -1.0f);
+	float x = d.x + d.y;
+	if (a > 88.0f) x = INFINITYf;
+	if (a < -0.15942385152878742116596338793538061065739925620174e+2f) x = -1;
+	return x;
+    }
+
+    public static float log10f(float a) {
+	float2 d = dfmul_f2_f2_f2(logkf(a), df(0.43429449200630187988f, -1.0103050118726031315e-08f));
+	float x = d.x + d.y;
+
+	if (isinff(a)) x = INFINITYf;
+	if (a < 0) x = NANf;
+	if (a == 0) x = -INFINITYf;
+
+	return x;
+    }
+
+    public static float log1pf(float a) {
+	float2 d = logk2f(dfadd2_f2_f_f(a, 1));
+	float x = d.x + d.y;
+
+	if (isinff(a)) x = INFINITYf;
+	if (a < -1) x = NANf;
+	if (a == -1) x = -INFINITYf;
+
+	return x;
+    }
+}
diff --git a/purec/Makefile b/purec/Makefile
new file mode 100644
index 00000000..3dc6012e
--- /dev/null
+++ b/purec/Makefile
@@ -0,0 +1,25 @@
+CC=gcc
+
+iut : sleefdp.c sleefsp.c iut.c
+	$(CC) -Wall -DNDEBUG sleefdp.c sleefsp.c iut.c -o iut -lm
+
+../tester/tester :
+	cd ../tester; make tester
+
+../tester/testeru1 :
+	cd ../tester; make testeru1
+
+../tester/testersp :
+	cd ../tester; make testersp
+
+../tester/testerspu1 :
+	cd ../tester; make testerspu1
+
+test : iut ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
+	../tester/tester ./iut
+	../tester/testeru1 ./iut
+	../tester/testersp ./iut
+	../tester/testerspu1 ./iut
+
+clean :
+	rm -f *~ *.o iut
diff --git a/purec/Makefile.icc b/purec/Makefile.icc
new file mode 100644
index 00000000..8d583734
--- /dev/null
+++ b/purec/Makefile.icc
@@ -0,0 +1,19 @@
+CC=/opt/intel/bin/icc
+
+iut : sleefdp.c sleefsp.c iut.c
+	$(CC) -Wall -fp-model precise sleefdp.c sleefsp.c iut.c -o iut -lm
+
+../tester/tester :
+	cd ../tester; make tester
+
+../tester/testersp :
+	cd ../tester; make testersp
+
+test : iut ../tester/tester
+	../tester/tester ./iut
+
+testsp : iut ../tester/testersp
+	../tester/testersp ./iut
+
+clean :
+	rm -f *~ *.o iut
diff --git a/purec/iut.c b/purec/iut.c
new file mode 100644
index 00000000..89c30b5e
--- /dev/null
+++ b/purec/iut.c
@@ -0,0 +1,452 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <math.h>
+#include <inttypes.h>
+
+#include <unistd.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include "sleef.h"
+
+int readln(int fd, char *buf, int cnt) {
+  int i, rcnt = 0;
+
+  if (cnt < 1) return -1;
+
+  while(cnt >= 2) {
+    i = read(fd, buf, 1);
+    if (i != 1) return i;
+
+    if (*buf == '\n') break;
+
+    rcnt++;
+    buf++;
+    cnt--;
+  }
+
+  *++buf = '\0';
+  rcnt++;
+  return rcnt;
+}
+
+int startsWith(char *str, char *prefix) {
+  return strncmp(str, prefix, strlen(prefix)) == 0;
+}
+
+double u2d(uint64_t u) {
+  union {
+    double f;
+    uint64_t i;
+  } tmp;
+  tmp.i = u;
+  return tmp.f;
+}
+
+uint64_t d2u(double d) {
+  union {
+    double f;
+    uint64_t i;
+  } tmp;
+  tmp.f = d;
+  return tmp.i;
+}
+
+float u2f(uint32_t u) {
+  union {
+    float f;
+    uint32_t i;
+  } tmp;
+  tmp.i = u;
+  return tmp.f;
+}
+
+uint32_t f2u(float d) {
+  union {
+    float f;
+    uint32_t i;
+  } tmp;
+  tmp.f = d;
+  return tmp.i;
+}
+
+#define BUFSIZE 1024
+
+int main(int argc, char **argv) {
+  char buf[BUFSIZE];
+
+  //fprintf(stderr, "IUT start\n");
+
+  for(;;) {
+    if (readln(STDIN_FILENO, buf, BUFSIZE-1) < 1) break;
+
+    //fprintf(stderr, "iut: got %s\n", buf);
+
+    if (startsWith(buf, "sin ")) {
+      uint64_t u;
+      sscanf(buf, "sin %" PRIx64, &u);
+      u = d2u(xsin(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "sin_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "sin_u1 %" PRIx64, &u);
+      u = d2u(xsin_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "cos ")) {
+      uint64_t u;
+      sscanf(buf, "cos %" PRIx64, &u);
+      u = d2u(xcos(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "cos_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "cos_u1 %" PRIx64, &u);
+      u = d2u(xcos_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "sincos ")) {
+      uint64_t u;
+      sscanf(buf, "sincos %" PRIx64, &u);
+      double2 x = xsincos(u2d(u));
+      printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y));
+    } else if (startsWith(buf, "sincos_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "sincos_u1 %" PRIx64, &u);
+      double2 x = xsincos_u1(u2d(u));
+      printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y));
+    } else if (startsWith(buf, "tan ")) {
+      uint64_t u;
+      sscanf(buf, "tan %" PRIx64, &u);
+      u = d2u(xtan(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "tan_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "tan_u1 %" PRIx64, &u);
+      u = d2u(xtan_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "asin ")) {
+      uint64_t u;
+      sscanf(buf, "asin %" PRIx64, &u);
+      u = d2u(xasin(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "acos ")) {
+      uint64_t u;
+      sscanf(buf, "acos %" PRIx64, &u);
+      u = d2u(xacos(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "atan ")) {
+      uint64_t u;
+      sscanf(buf, "atan %" PRIx64, &u);
+      u = d2u(xatan(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "log ")) {
+      uint64_t u;
+      sscanf(buf, "log %" PRIx64, &u);
+      u = d2u(xlog(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "exp ")) {
+      uint64_t u;
+      sscanf(buf, "exp %" PRIx64, &u);
+      u = d2u(xexp(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "atan2 ")) {
+      uint64_t u, v;
+      sscanf(buf, "atan2 %" PRIx64 " %" PRIx64, &u, &v);
+      u = d2u(xatan2(u2d(u), u2d(v)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "asin_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "asin_u1 %" PRIx64, &u);
+      u = d2u(xasin_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "acos_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "acos_u1 %" PRIx64, &u);
+      u = d2u(xacos_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "atan_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "atan_u1 %" PRIx64, &u);
+      u = d2u(xatan_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "atan2_u1 ")) {
+      uint64_t u, v;
+      sscanf(buf, "atan2_u1 %" PRIx64 " %" PRIx64, &u, &v);
+      u = d2u(xatan2_u1(u2d(u), u2d(v)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "log_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "log_u1 %" PRIx64, &u);
+      u = d2u(xlog_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "pow ")) {
+      uint64_t u, v;
+      sscanf(buf, "pow %" PRIx64 " %" PRIx64, &u, &v);
+      u = d2u(xpow(u2d(u), u2d(v)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "sinh ")) {
+      uint64_t u;
+      sscanf(buf, "sinh %" PRIx64, &u);
+      u = d2u(xsinh(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "cosh ")) {
+      uint64_t u;
+      sscanf(buf, "cosh %" PRIx64, &u);
+      u = d2u(xcosh(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "tanh ")) {
+      uint64_t u;
+      sscanf(buf, "tanh %" PRIx64, &u);
+      u = d2u(xtanh(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "asinh ")) {
+      uint64_t u;
+      sscanf(buf, "asinh %" PRIx64, &u);
+      u = d2u(xasinh(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "acosh ")) {
+      uint64_t u;
+      sscanf(buf, "acosh %" PRIx64, &u);
+      u = d2u(xacosh(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "atanh ")) {
+      uint64_t u;
+      sscanf(buf, "atanh %" PRIx64, &u);
+      u = d2u(xatanh(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "fma ")) {
+      uint64_t u, v, w;
+      sscanf(buf, "fma %" PRIx64 " %" PRIx64 " %" PRIx64, &u, &v, &w);
+      u = d2u(xfma(u2d(u), u2d(v), u2d(w)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "sqrt ")) {
+      uint64_t u;
+      sscanf(buf, "sqrt %" PRIx64, &u);
+      u = d2u(xsqrt(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "cbrt ")) {
+      uint64_t u;
+      sscanf(buf, "cbrt %" PRIx64, &u);
+      u = d2u(xcbrt(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "cbrt_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "cbrt_u1 %" PRIx64, &u);
+      u = d2u(xcbrt_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "exp2 ")) {
+      uint64_t u;
+      sscanf(buf, "exp2 %" PRIx64, &u);
+      u = d2u(xexp2(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "exp10 ")) {
+      uint64_t u;
+      sscanf(buf, "exp10 %" PRIx64, &u);
+      u = d2u(xexp10(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "expm1 ")) {
+      uint64_t u;
+      sscanf(buf, "expm1 %" PRIx64, &u);
+      u = d2u(xexpm1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "log10 ")) {
+      uint64_t u;
+      sscanf(buf, "log10 %" PRIx64, &u);
+      u = d2u(xlog10(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "log1p ")) {
+      uint64_t u;
+      sscanf(buf, "log1p %" PRIx64, &u);
+      u = d2u(xlog1p(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "ldexp ")) {
+      uint64_t u, v;
+      sscanf(buf, "ldexp %" PRIx64 " %" PRIx64, &u, &v);
+      u = d2u(xldexp(u2d(u), (int)u2d(v)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "sinf ")) {
+      uint32_t u;
+      sscanf(buf, "sinf %x", &u);
+      u = f2u(xsinf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "cosf ")) {
+      uint32_t u;
+      sscanf(buf, "cosf %x", &u);
+      u = f2u(xcosf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "sincosf ")) {
+      uint32_t u;
+      sscanf(buf, "sincosf %x", &u);
+      float2 x = xsincosf(u2f(u));
+      printf("%x %x\n", f2u(x.x), f2u(x.y));
+    } else if (startsWith(buf, "tanf ")) {
+      uint32_t u;
+      sscanf(buf, "tanf %x", &u);
+      u = f2u(xtanf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "asinf ")) {
+      uint32_t u;
+      sscanf(buf, "asinf %x", &u);
+      u = f2u(xasinf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "acosf ")) {
+      uint32_t u;
+      sscanf(buf, "acosf %x", &u);
+      u = f2u(xacosf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "atanf ")) {
+      uint32_t u;
+      sscanf(buf, "atanf %x", &u);
+      u = f2u(xatanf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "atan2f ")) {
+      uint32_t u, v;
+      sscanf(buf, "atan2f %x %x", &u, &v);
+      u = f2u(xatan2f(u2f(u), u2f(v)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "logf ")) {
+      uint32_t u;
+      sscanf(buf, "logf %x", &u);
+      u = f2u(xlogf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "expf ")) {
+      uint32_t u;
+      sscanf(buf, "expf %x", &u);
+      u = f2u(xexpf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "cbrtf ")) {
+      uint32_t u;
+      sscanf(buf, "cbrtf %x", &u);
+      u = f2u(xcbrtf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "sqrtf ")) {
+      uint32_t u;
+      sscanf(buf, "sqrtf %x", &u);
+      u = f2u(sqrt(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "ldexpf ")) {
+      uint32_t u, v;
+      sscanf(buf, "ldexpf %x %x", &u, &v);
+      u = f2u(xldexpf(u2f(u), (int)u2f(v)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "powf ")) {
+      uint32_t u, v;
+      sscanf(buf, "powf %x %x", &u, &v);
+      u = f2u(xpowf(u2f(u), u2f(v)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "sinhf ")) {
+      uint32_t u;
+      sscanf(buf, "sinhf %x", &u);
+      u = f2u(xsinhf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "coshf ")) {
+      uint32_t u;
+      sscanf(buf, "coshf %x", &u);
+      u = f2u(xcoshf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "tanhf ")) {
+      uint32_t u;
+      sscanf(buf, "tanhf %x", &u);
+      u = f2u(xtanhf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "asinhf ")) {
+      uint32_t u;
+      sscanf(buf, "asinhf %x", &u);
+      u = f2u(xasinhf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "acoshf ")) {
+      uint32_t u;
+      sscanf(buf, "acoshf %x", &u);
+      u = f2u(xacoshf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "atanhf ")) {
+      uint32_t u;
+      sscanf(buf, "atanhf %x", &u);
+      u = f2u(xatanhf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "exp2f ")) {
+      uint32_t u;
+      sscanf(buf, "exp2f %x", &u);
+      u = f2u(xexp2f(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "exp10f ")) {
+      uint32_t u;
+      sscanf(buf, "exp10f %x", &u);
+      u = f2u(xexp10f(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "expm1f ")) {
+      uint32_t u;
+      sscanf(buf, "expm1f %x", &u);
+      u = f2u(xexpm1f(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "log10f ")) {
+      uint32_t u;
+      sscanf(buf, "log10f %x", &u);
+      u = f2u(xlog10f(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "log1pf ")) {
+      uint32_t u;
+      sscanf(buf, "log1pf %x", &u);
+      u = f2u(xlog1pf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "sinf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "sinf_u1 %x", &u);
+      u = f2u(xsinf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "cosf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "cosf_u1 %x", &u);
+      u = f2u(xcosf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "sincosf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "sincosf_u1 %x", &u);
+      float2 x = xsincosf_u1(u2f(u));
+      printf("%x %x\n", f2u(x.x), f2u(x.y));
+    } else if (startsWith(buf, "tanf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "tanf_u1 %x", &u);
+      u = f2u(xtanf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "asinf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "asinf_u1 %x", &u);
+      u = f2u(xasinf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "acosf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "acosf_u1 %x", &u);
+      u = f2u(xacosf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "atanf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "atanf_u1 %x", &u);
+      u = f2u(xatanf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "atan2f_u1 ")) {
+      uint32_t u, v;
+      sscanf(buf, "atan2f_u1 %x %x", &u, &v);
+      u = f2u(xatan2f_u1(u2f(u), u2f(v)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "logf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "logf_u1 %x", &u);
+      u = f2u(xlogf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "cbrtf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "cbrtf_u1 %x", &u);
+      u = f2u(xcbrtf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else {
+      break;
+    }
+
+    fflush(stdout);
+  }
+
+  return 0;
+}
diff --git a/purec/nonnumber.h b/purec/nonnumber.h
new file mode 100644
index 00000000..5d856fa9
--- /dev/null
+++ b/purec/nonnumber.h
@@ -0,0 +1,19 @@
+#if defined (__GNUC__) || defined (__INTEL_COMPILER) || defined (__clang__)
+#ifdef INFINITY
+#undef INFINITY
+#endif
+
+#ifdef NAN
+#undef NAN
+#endif
+
+#define NAN __builtin_nan("")
+#define NANf __builtin_nanf("")
+#define INFINITY __builtin_inf()
+#define INFINITYf __builtin_inff()
+#else
+
+#include <bits/nan.h>
+#include <bits/inf.h>
+
+#endif
diff --git a/purec/sleef.h b/purec/sleef.h
new file mode 100644
index 00000000..97142720
--- /dev/null
+++ b/purec/sleef.h
@@ -0,0 +1,100 @@
+typedef struct {
+  double x, y;
+} double2;
+
+typedef struct {
+  float x, y;
+} float2;
+
+double xsin(double d);
+double xcos(double d);
+double2 xsincos(double d);
+double xtan(double d);
+double xasin(double s);
+double xacos(double s);
+double xatan(double s);
+double xatan2(double y, double x);
+double xlog(double d);
+double xexp(double d);
+double xldexp(double x, int q);
+int xilogb(double d);
+
+double xpow(double x, double y);
+double xsinh(double x);
+double xcosh(double x);
+double xtanh(double x);
+double xasinh(double x);
+double xacosh(double x);
+double xatanh(double x);
+
+double xfma(double x, double y, double z);
+double xsqrt(double d);
+double xcbrt(double d);
+
+double xexp2(double a);
+double xexp10(double a);
+double xexpm1(double a);
+double xlog10(double a);
+double xlog1p(double a);
+
+double xsin_u1(double d);
+double xcos_u1(double d);
+double2 xsincos_u1(double d);
+double xtan_u1(double d);
+double xasin_u1(double s);
+double xacos_u1(double s);
+double xatan_u1(double s);
+double xatan2_u1(double y, double x);
+double xlog_u1(double d);
+double xexp_u1(double d);
+double xpow_u1(double x, double y);
+double xsinh_u1(double x);
+double xcosh_u1(double x);
+double xtanh_u1(double x);
+double xasinh_u1(double x);
+double xacosh_u1(double x);
+double xatanh_u1(double x);
+double xexp2_u1(double a);
+double xexp10_u1(double a);
+double xexpm1_u1(double a);
+double xlog10_u1(double a);
+double xlog1p_u1(double a);
+double xcbrt_u1(double d);
+
+float xsinf(float d);
+float xcosf(float d);
+float2 xsincosf(float d);
+float xtanf(float d);
+float xasinf(float s);
+float xacosf(float s);
+float xatanf(float s);
+float xatan2f(float y, float x);
+float xlogf(float d);
+float xexpf(float d);
+float xcbrtf(float d);
+float xldexpf(float x, int q);
+int xilogbf(float d);
+
+float xpowf(float x, float y);
+float xsinhf(float x);
+float xcoshf(float x);
+float xtanhf(float x);
+float xasinhf(float x);
+float xacoshf(float x);
+float xatanhf(float x);
+float xexp2f(float a);
+float xexp10f(float a);
+float xexpm1f(float a);
+float xlog10f(float a);
+float xlog1pf(float a);
+
+float xsinf_u1(float d);
+float xcosf_u1(float d);
+float2 xsincosf_u1(float d);
+float xtanf_u1(float d);
+float xasinf_u1(float s);
+float xacosf_u1(float s);
+float xatanf_u1(float s);
+float xatan2f_u1(float y, float x);
+float xlogf_u1(float d);
+float xcbrtf_u1(float d);
diff --git a/purec/sleefdp.c b/purec/sleefdp.c
new file mode 100644
index 00000000..f91c1977
--- /dev/null
+++ b/purec/sleefdp.c
@@ -0,0 +1,1247 @@
+#include <stdio.h>
+
+#include <assert.h>
+#include <stdint.h>
+#include <math.h>
+
+#include "nonnumber.h"
+
+#define PI4_A 0.78539816290140151978
+#define PI4_B 4.9604678871439933374e-10
+#define PI4_C 1.1258708853173288931e-18
+#define PI4_D 1.7607799325916000908e-27
+
+#define M_4_PI 1.273239544735162542821171882678754627704620361328125
+
+#define L2U .69314718055966295651160180568695068359375
+#define L2L .28235290563031577122588448175013436025525412068e-12
+#define R_LN2 1.442695040888963407359924681001892137426645954152985934135449406931
+
+static inline int64_t doubleToRawLongBits(double d) {
+  union {
+    double f;
+    int64_t i;
+  } tmp;
+  tmp.f = d;
+  return tmp.i;
+}
+
+static inline double longBitsToDouble(int64_t i) {
+  union {
+    double f;
+    int64_t i;
+  } tmp;
+  tmp.i = i;
+  return tmp.f;
+}
+
+static inline double xfabs(double x) {
+  return longBitsToDouble(0x7fffffffffffffffLL & doubleToRawLongBits(x));
+}
+
+static inline double mulsign(double x, double y) {
+  return longBitsToDouble(doubleToRawLongBits(x) ^ (doubleToRawLongBits(y) & (1LL << 63)));
+}
+
+static inline double sign(double d) { return mulsign(1, d); }
+static inline double mla(double x, double y, double z) { return x * y + z; }
+static inline double xrint(double x) { return x < 0 ? (int)(x - 0.5) : (int)(x + 0.5); }
+
+static inline int xisnan(double x) { return x != x; }
+static inline int xisinf(double x) { return x == INFINITY || x == -INFINITY; }
+static inline int xisminf(double x) { return x == -INFINITY; }
+static inline int xispinf(double x) { return x == INFINITY; }
+
+static inline double pow2i(int q) {
+  return longBitsToDouble(((int64_t)(q + 0x3ff)) << 52);
+}
+
+static inline double ldexpk(double x, int q) {
+  double u;
+  int m;
+  m = q >> 31;
+  m = (((m + q) >> 9) - m) << 7;
+  q = q - (m << 2);
+  m += 0x3ff;
+  m = m < 0     ? 0     : m;
+  m = m > 0x7ff ? 0x7ff : m;
+  u = longBitsToDouble(((int64_t)m) << 52);
+  x = x * u * u * u * u;
+  u = longBitsToDouble(((int64_t)(q + 0x3ff)) << 52);
+  return x * u;
+}
+
+double xldexp(double x, int q) { return ldexpk(x, q); }
+
+static inline int ilogbp1(double d) {
+  int m = d < 4.9090934652977266E-91;
+  d = m ? 2.037035976334486E90 * d : d;
+  int q = (doubleToRawLongBits(d) >> 52) & 0x7ff;
+  q = m ? q - (300 + 0x03fe) : q - 0x03fe;
+  return q;
+}
+
+int xilogb(double d) {
+  int e = ilogbp1(xfabs(d)) - 1;
+  e = d == 0 ? -2147483648 : e;
+  e = d == INFINITY || d == -INFINITY ? 2147483647 : e;
+  return e;
+}
+
+//
+
+typedef struct {
+  double x, y;
+} double2;
+
+#ifndef NDEBUG
+static int checkfp(double x) {
+  if (xisinf(x) || xisnan(x)) return 1;
+  return 0;
+}
+#endif
+
+static inline double upper(double d) {
+  return longBitsToDouble(doubleToRawLongBits(d) & 0xfffffffff8000000LL);
+}
+
+static inline double2 dd(double h, double l) {
+  double2 ret;
+  ret.x = h; ret.y = l;
+  return ret;
+}
+
+static inline double2 ddnormalize_d2_d2(double2 t) {
+  double2 s;
+
+  s.x = t.x + t.y;
+  s.y = t.x - s.x + t.y;
+
+  return s;
+}
+
+static inline double2 ddscale_d2_d2_d(double2 d, double s) {
+  double2 r;
+
+  r.x = d.x * s;
+  r.y = d.y * s;
+
+  return r;
+}
+
+static inline double2 ddneg_d2_d2(double2 d) {
+  double2 r;
+
+  r.x = -d.x;
+  r.y = -d.y;
+
+  return r;
+}
+
+static inline double2 ddadd_d2_d_d(double x, double y) {
+  // |x| >= |y|
+
+  double2 r;
+
+#ifndef NDEBUG
+  if (!(checkfp(x) || checkfp(y) || xfabs(x) >= xfabs(y))) fprintf(stderr, "[ddadd_d2_d_d : %g, %g]", x, y);
+#endif
+
+  r.x = x + y;
+  r.y = x - r.x + y;
+
+  return r;
+}
+
+static inline double2 ddadd2_d2_d_d(double x, double y) {
+  double2 r;
+
+  r.x = x + y;
+  double v = r.x - x;
+  r.y = (x - (r.x - v)) + (y - v);
+
+  return r;
+}
+
+static inline double2 ddadd_d2_d2_d(double2 x, double y) {
+  // |x| >= |y|
+
+  double2 r;
+
+#ifndef NDEBUG
+  if (!(checkfp(x.x) || checkfp(y) || xfabs(x.x) >= xfabs(y))) fprintf(stderr, "[ddadd_d2_d2_d : %g %g]", x.x, y);
+#endif
+
+  r.x = x.x + y;
+  r.y = x.x - r.x + y + x.y;
+
+  return r;
+}
+
+static inline double2 ddadd2_d2_d2_d(double2 x, double y) {
+  // |x| >= |y|
+
+  double2 r;
+
+  r.x  = x.x + y;
+  double v = r.x - x.x;
+  r.y = (x.x - (r.x - v)) + (y - v);
+  r.y += x.y;
+
+  return r;
+}
+
+static inline double2 ddadd_d2_d_d2(double x, double2 y) {
+  // |x| >= |y|
+
+  double2 r;
+
+#ifndef NDEBUG
+  if (!(checkfp(x) || checkfp(y.x) || xfabs(x) >= xfabs(y.x))) fprintf(stderr, "[ddadd_d2_d_d2 : %g %g]", x, y.x);
+#endif
+
+  r.x = x + y.x;
+  r.y = x - r.x + y.x + y.y;
+
+  return r;
+}
+
+static inline double2 ddadd2_d2_d_d2(double x, double2 y) {
+  double2 r;
+
+  r.x  = x + y.x;
+  double v = r.x - x;
+  r.y = (x - (r.x - v)) + (y.x - v) + y.y;
+
+  return r;
+}
+
+static inline double2 ddadd_d2_d2_d2(double2 x, double2 y) {
+  // |x| >= |y|
+
+  double2 r;
+
+#ifndef NDEBUG
+  if (!(checkfp(x.x) || checkfp(y.x) || xfabs(x.x) >= xfabs(y.x))) fprintf(stderr, "[ddadd_d2_d2_d2 : %g %g]", x.x, y.x);
+#endif
+
+  r.x = x.x + y.x;
+  r.y = x.x - r.x + y.x + x.y + y.y;
+
+  return r;
+}
+
+static inline double2 ddadd2_d2_d2_d2(double2 x, double2 y) {
+  double2 r;
+
+  r.x  = x.x + y.x;
+  double v = r.x - x.x;
+  r.y = (x.x - (r.x - v)) + (y.x - v);
+  r.y += x.y + y.y;
+
+  return r;
+}
+
+static inline double2 ddsub_d2_d2_d2(double2 x, double2 y) {
+  // |x| >= |y|
+
+  double2 r;
+
+#ifndef NDEBUG
+  if (!(checkfp(x.x) || checkfp(y.x) || xfabs(x.x) >= xfabs(y.x))) fprintf(stderr, "[ddsub_d2_d2_d2 : %g %g]", x.x, y.x);
+#endif
+
+  r.x = x.x - y.x;
+  r.y = x.x - r.x - y.x + x.y - y.y;
+
+  return r;
+}
+
+static inline double2 dddiv_d2_d2_d2(double2 n, double2 d) {
+  double t = 1.0 / d.x;
+  double dh  = upper(d.x), dl  = d.x - dh;
+  double th  = upper(t  ), tl  = t   - th;
+  double nhh = upper(n.x), nhl = n.x - nhh;
+
+  double2 q;
+
+  q.x = n.x * t;
+
+  double u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl +
+    q.x * (1 - dh * th - dh * tl - dl * th - dl * tl);
+
+  q.y = t * (n.y - q.x * d.y) + u;
+
+  return q;
+}
+
+static inline double2 ddmul_d2_d_d(double x, double y) {
+  double xh = upper(x), xl = x - xh;
+  double yh = upper(y), yl = y - yh;
+  double2 r;
+
+  r.x = x * y;
+  r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl;
+
+  return r;
+}
+
+static inline double2 ddmul_d2_d2_d(double2 x, double y) {
+  double xh = upper(x.x), xl = x.x - xh;
+  double yh = upper(y  ), yl = y   - yh;
+  double2 r;
+
+  r.x = x.x * y;
+  r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y;
+
+  return r;
+}
+
+static inline double2 ddmul_d2_d2_d2(double2 x, double2 y) {
+  double xh = upper(x.x), xl = x.x - xh;
+  double yh = upper(y.x), yl = y.x - yh;
+  double2 r;
+
+  r.x = x.x * y.x;
+  r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x;
+
+  return r;
+}
+
+static inline double2 ddsqu_d2_d2(double2 x) {
+  double xh = upper(x.x), xl = x.x - xh;
+  double2 r;
+
+  r.x = x.x * x.x;
+  r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y);
+
+  return r;
+}
+
+static inline double2 ddrec_d2_d(double d) {
+  double t = 1.0 / d;
+  double dh = upper(d), dl = d - dh;
+  double th = upper(t), tl = t - th;
+  double2 q;
+
+  q.x = t;
+  q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl);
+
+  return q;
+}
+
+static inline double2 ddrec_d2_d2(double2 d) {
+  double t = 1.0 / d.x;
+  double dh = upper(d.x), dl = d.x - dh;
+  double th = upper(t  ), tl = t   - th;
+  double2 q;
+
+  q.x = t;
+  q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t);
+
+  return q;
+}
+
+static inline double2 ddsqrt_d2_d2(double2 d) {
+  double t = sqrt(d.x + d.y);
+  return ddscale_d2_d2_d(ddmul_d2_d2_d2(ddadd2_d2_d2_d2(d, ddmul_d2_d_d(t, t)), ddrec_d2_d(t)), 0.5);
+}
+
+//
+
+static inline double atan2k(double y, double x) {
+  double s, t, u;
+  int q = 0;
+
+  if (x < 0) { x = -x; q = -2; }
+  if (y > x) { t = x; x = y; y = -t; q += 1; }
+
+  s = y / x;
+  t = s * s;
+
+  u = -1.88796008463073496563746e-05;
+  u = u * t + (0.000209850076645816976906797);
+  u = u * t + (-0.00110611831486672482563471);
+  u = u * t + (0.00370026744188713119232403);
+  u = u * t + (-0.00889896195887655491740809);
+  u = u * t + (0.016599329773529201970117);
+  u = u * t + (-0.0254517624932312641616861);
+  u = u * t + (0.0337852580001353069993897);
+  u = u * t + (-0.0407629191276836500001934);
+  u = u * t + (0.0466667150077840625632675);
+  u = u * t + (-0.0523674852303482457616113);
+  u = u * t + (0.0587666392926673580854313);
+  u = u * t + (-0.0666573579361080525984562);
+  u = u * t + (0.0769219538311769618355029);
+  u = u * t + (-0.090908995008245008229153);
+  u = u * t + (0.111111105648261418443745);
+  u = u * t + (-0.14285714266771329383765);
+  u = u * t + (0.199999999996591265594148);
+  u = u * t + (-0.333333333333311110369124);
+
+  t = u * t * s + s;
+  t = q * (M_PI/2) + t;
+
+  return t;
+}
+
+double xatan2(double y, double x) {
+  double r = atan2k(xfabs(y), x);
+
+  r = mulsign(r, x);
+  if (xisinf(x) || x == 0) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI  /2)) : 0);
+  if (xisinf(y)          ) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI*1/4)) : 0);
+  if (             y == 0) r = (sign(x) == -1 ? M_PI : 0);
+
+  return xisnan(x) || xisnan(y) ? NAN : mulsign(r, y);
+}
+
+double xasin(double d) {
+  return mulsign(atan2k(xfabs(d), sqrt((1+d)*(1-d))), d);
+}
+
+double xacos(double d) {
+  return mulsign(atan2k(sqrt((1+d)*(1-d)), xfabs(d)), d) + (d < 0 ? M_PI : 0);
+}
+
+double xatan(double s) {
+  double t, u;
+  int q = 0;
+
+  if (s < 0) { s = -s; q = 2; }
+  if (s > 1) { s = 1.0 / s; q |= 1; }
+
+  t = s * s;
+
+  u = -1.88796008463073496563746e-05;
+  u = u * t + (0.000209850076645816976906797);
+  u = u * t + (-0.00110611831486672482563471);
+  u = u * t + (0.00370026744188713119232403);
+  u = u * t + (-0.00889896195887655491740809);
+  u = u * t + (0.016599329773529201970117);
+  u = u * t + (-0.0254517624932312641616861);
+  u = u * t + (0.0337852580001353069993897);
+  u = u * t + (-0.0407629191276836500001934);
+  u = u * t + (0.0466667150077840625632675);
+  u = u * t + (-0.0523674852303482457616113);
+  u = u * t + (0.0587666392926673580854313);
+  u = u * t + (-0.0666573579361080525984562);
+  u = u * t + (0.0769219538311769618355029);
+  u = u * t + (-0.090908995008245008229153);
+  u = u * t + (0.111111105648261418443745);
+  u = u * t + (-0.14285714266771329383765);
+  u = u * t + (0.199999999996591265594148);
+  u = u * t + (-0.333333333333311110369124);
+
+  t = s + s * (t * u);
+
+  if ((q & 1) != 0) t = 1.570796326794896557998982 - t;
+  if ((q & 2) != 0) t = -t;
+
+  return t;
+}
+
+static double2 atan2k_u1(double2 y, double2 x) {
+  double u;
+  double2 s, t;
+  int q = 0;
+
+  if (x.x < 0) { x.x = -x.x; x.y = -x.y; q = -2; }
+  if (y.x > x.x) { t = x; x = y; y.x = -t.x; y.y = -t.y; q += 1; }
+
+  s = dddiv_d2_d2_d2(y, x);
+  t = ddsqu_d2_d2(s);
+  t = ddnormalize_d2_d2(t);
+
+  u = 1.06298484191448746607415e-05;
+  u = mla(u, t.x, -0.000125620649967286867384336);
+  u = mla(u, t.x, 0.00070557664296393412389774);
+  u = mla(u, t.x, -0.00251865614498713360352999);
+  u = mla(u, t.x, 0.00646262899036991172313504);
+  u = mla(u, t.x, -0.0128281333663399031014274);
+  u = mla(u, t.x, 0.0208024799924145797902497);
+  u = mla(u, t.x, -0.0289002344784740315686289);
+  u = mla(u, t.x, 0.0359785005035104590853656);
+  u = mla(u, t.x, -0.041848579703592507506027);
+  u = mla(u, t.x, 0.0470843011653283988193763);
+  u = mla(u, t.x, -0.0524914210588448421068719);
+  u = mla(u, t.x, 0.0587946590969581003860434);
+  u = mla(u, t.x, -0.0666620884778795497194182);
+  u = mla(u, t.x, 0.0769225330296203768654095);
+  u = mla(u, t.x, -0.0909090442773387574781907);
+  u = mla(u, t.x, 0.111111108376896236538123);
+  u = mla(u, t.x, -0.142857142756268568062339);
+  u = mla(u, t.x, 0.199999999997977351284817);
+  u = mla(u, t.x, -0.333333333333317605173818);
+
+  t = ddmul_d2_d2_d(t, u);
+  t = ddmul_d2_d2_d2(s, ddadd_d2_d_d2(1, t));
+  t = ddadd2_d2_d2_d2(ddmul_d2_d2_d(dd(1.570796326794896557998982, 6.12323399573676603586882e-17), q), t);
+
+  return t;
+}
+
+double xatan2_u1(double y, double x) {
+  double2 d = atan2k_u1(dd(xfabs(y), 0), dd(x, 0));
+  double r = d.x + d.y;
+
+  r = mulsign(r, x);
+  if (xisinf(x) || x == 0) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI  /2)) : 0);
+  if (xisinf(y)          ) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI*1/4)) : 0);
+  if (             y == 0) r = (sign(x) == -1 ? M_PI : 0);
+
+  return xisnan(x) || xisnan(y) ? NAN : mulsign(r, y);
+}
+
+double xasin_u1(double d) {
+  double2 d2 = atan2k_u1(dd(xfabs(d), 0), ddsqrt_d2_d2(ddmul_d2_d2_d2(ddadd_d2_d_d(1, d), ddadd_d2_d_d(1,-d))));
+  double r = d2.x + d2.y;
+  if (xfabs(d) == 1) r = 1.570796326794896557998982;
+  return mulsign(r, d);
+}
+
+double xacos_u1(double d) {
+  double2 d2 = atan2k_u1(ddsqrt_d2_d2(ddmul_d2_d2_d2(ddadd_d2_d_d(1, d), ddadd_d2_d_d(1,-d))), dd(xfabs(d), 0));
+  d2 = ddscale_d2_d2_d(d2, mulsign(1, d));
+  if (xfabs(d) == 1) d2 = dd(0, 0);
+  if (d < 0) d2 = ddadd_d2_d2_d2(dd(3.141592653589793116, 1.2246467991473532072e-16), d2);
+  return d2.x + d2.y;
+}
+
+double xatan_u1(double d) {
+  double2 d2 = atan2k_u1(dd(xfabs(d), 0), dd(1, 0));
+  double r = d2.x + d2.y;
+  if (xisinf(d)) r = 1.570796326794896557998982;
+  return mulsign(r, d);
+}
+
+double xsin(double d) {
+  int q;
+  double u, s;
+
+  q = (int)xrint(d * M_1_PI);
+
+  d = mla(q, -PI4_A*4, d);
+  d = mla(q, -PI4_B*4, d);
+  d = mla(q, -PI4_C*4, d);
+  d = mla(q, -PI4_D*4, d);
+
+  s = d * d;
+
+  if ((q & 1) != 0) d = -d;
+
+  u = -7.97255955009037868891952e-18;
+  u = mla(u, s, 2.81009972710863200091251e-15);
+  u = mla(u, s, -7.64712219118158833288484e-13);
+  u = mla(u, s, 1.60590430605664501629054e-10);
+  u = mla(u, s, -2.50521083763502045810755e-08);
+  u = mla(u, s, 2.75573192239198747630416e-06);
+  u = mla(u, s, -0.000198412698412696162806809);
+  u = mla(u, s, 0.00833333333333332974823815);
+  u = mla(u, s, -0.166666666666666657414808);
+
+  u = mla(s, u * d, d);
+
+  return u;
+}
+
+double xsin_u1(double d) {
+  int q;
+  double u;
+  double2 s, t, x;
+
+  q = (int)xrint(d * M_1_PI);
+
+  s = ddadd2_d2_d_d(d, q * (-PI4_A*4));
+  s = ddadd2_d2_d2_d(s, q * (-PI4_B*4));
+  s = ddadd2_d2_d2_d(s, q * (-PI4_C*4));
+  s = ddadd2_d2_d2_d(s, q * (-PI4_D*4));
+
+  t = s;
+  s = ddsqu_d2_d2(s);
+
+  u = 2.72052416138529567917983e-15;
+  u = mla(u, s.x, -7.6429259411395447190023e-13);
+  u = mla(u, s.x, 1.60589370117277896211623e-10);
+  u = mla(u, s.x, -2.5052106814843123359368e-08);
+  u = mla(u, s.x, 2.75573192104428224777379e-06);
+  u = mla(u, s.x, -0.000198412698412046454654947);
+  u = mla(u, s.x, 0.00833333333333318056201922);
+
+  x = ddadd_d2_d_d2(1, ddmul_d2_d2_d2(ddadd_d2_d_d(-0.166666666666666657414808, u * s.x), s));
+
+  x = ddmul_d2_d2_d2(t, x);
+  u = x.x + x.y;
+
+  if ((q & 1) != 0) u = -u;
+
+  return u;
+}
+
+double xcos(double d) {
+  int q;
+  double u, s;
+
+  q = 1 + 2*(int)xrint(d * M_1_PI - 0.5);
+
+  d = mla(q, -PI4_A*2, d);
+  d = mla(q, -PI4_B*2, d);
+  d = mla(q, -PI4_C*2, d);
+  d = mla(q, -PI4_D*2, d);
+
+  s = d * d;
+
+  if ((q & 2) == 0) d = -d;
+
+  u = -7.97255955009037868891952e-18;
+  u = mla(u, s, 2.81009972710863200091251e-15);
+  u = mla(u, s, -7.64712219118158833288484e-13);
+  u = mla(u, s, 1.60590430605664501629054e-10);
+  u = mla(u, s, -2.50521083763502045810755e-08);
+  u = mla(u, s, 2.75573192239198747630416e-06);
+  u = mla(u, s, -0.000198412698412696162806809);
+  u = mla(u, s, 0.00833333333333332974823815);
+  u = mla(u, s, -0.166666666666666657414808);
+
+  u = mla(s, u * d, d);
+
+  return u;
+}
+
+double xcos_u1(double d) {
+  double u, q;
+  double2 s, t, x;
+
+  d = fabs(d);
+
+  q = mla(2, xrint(d * M_1_PI - 0.5), 1);
+
+  s = ddadd2_d2_d_d(d, q * (-PI4_A*2));
+  s = ddadd2_d2_d2_d(s, q * (-PI4_B*2));
+  s = ddadd2_d2_d2_d(s, q * (-PI4_C*2));
+  s = ddadd2_d2_d2_d(s, q * (-PI4_D*2));
+
+  t = s;
+  s = ddsqu_d2_d2(s);
+
+  u = 2.72052416138529567917983e-15;
+  u = mla(u, s.x, -7.6429259411395447190023e-13);
+  u = mla(u, s.x, 1.60589370117277896211623e-10);
+  u = mla(u, s.x, -2.5052106814843123359368e-08);
+  u = mla(u, s.x, 2.75573192104428224777379e-06);
+  u = mla(u, s.x, -0.000198412698412046454654947);
+  u = mla(u, s.x, 0.00833333333333318056201922);
+
+  x = ddadd_d2_d_d2(1, ddmul_d2_d2_d2(ddadd_d2_d_d(-0.166666666666666657414808, u * s.x), s));
+
+  x = ddmul_d2_d2_d2(t, x);
+
+  u = x.x + x.y;
+
+  if ((((int)q) & 2) == 0) u = -u;
+
+  return u;
+}
+
+double2 xsincos(double d) {
+  int q;
+  double u, s, t;
+  double2 r;
+
+  q = (int)xrint(d * (2 * M_1_PI));
+
+  s = d;
+
+  s = mla(-q, PI4_A*2, s);
+  s = mla(-q, PI4_B*2, s);
+  s = mla(-q, PI4_C*2, s);
+  s = mla(-q, PI4_D*2, s);
+
+  t = s;
+
+  s = s * s;
+
+  u = 1.58938307283228937328511e-10;
+  u = mla(u, s, -2.50506943502539773349318e-08);
+  u = mla(u, s, 2.75573131776846360512547e-06);
+  u = mla(u, s, -0.000198412698278911770864914);
+  u = mla(u, s, 0.0083333333333191845961746);
+  u = mla(u, s, -0.166666666666666130709393);
+  u = u * s * t;
+
+  r.x = t + u;
+
+  u = -1.13615350239097429531523e-11;
+  u = mla(u, s, 2.08757471207040055479366e-09);
+  u = mla(u, s, -2.75573144028847567498567e-07);
+  u = mla(u, s, 2.48015872890001867311915e-05);
+  u = mla(u, s, -0.00138888888888714019282329);
+  u = mla(u, s, 0.0416666666666665519592062);
+  u = mla(u, s, -0.5);
+
+  r.y = u * s + 1;
+
+  if ((q & 1) != 0) { s = r.y; r.y = r.x; r.x = s; }
+  if ((q & 2) != 0) { r.x = -r.x; }
+  if (((q+1) & 2) != 0) { r.y = -r.y; }
+
+  if (xisinf(d)) { r.x = r.y = NAN; }
+
+  return r;
+}
+
+double2 xsincos_u1(double d) {
+  int q;
+  double u;
+  double2 r, s, t, x;
+
+  q = (int)xrint(d * (2 * M_1_PI));
+
+  s = ddadd2_d2_d_d(d, q * (-PI4_A*2));
+  s = ddadd2_d2_d2_d(s, q * (-PI4_B*2));
+  s = ddadd2_d2_d2_d(s, q * (-PI4_C*2));
+  s = ddadd2_d2_d2_d(s, q * (-PI4_D*2));
+
+  t = s;
+  s = ddsqu_d2_d2(s);
+  s.x = s.x + s.y;
+
+  u = 1.58938307283228937328511e-10;
+  u = mla(u, s.x, -2.50506943502539773349318e-08);
+  u = mla(u, s.x, 2.75573131776846360512547e-06);
+  u = mla(u, s.x, -0.000198412698278911770864914);
+  u = mla(u, s.x, 0.0083333333333191845961746);
+  u = mla(u, s.x, -0.166666666666666130709393);
+
+  u *= s.x * t.x;
+
+  x = ddadd_d2_d2_d(t, u);
+  r.x = x.x + x.y;
+
+  u = -1.13615350239097429531523e-11;
+  u = mla(u, s.x, 2.08757471207040055479366e-09);
+  u = mla(u, s.x, -2.75573144028847567498567e-07);
+  u = mla(u, s.x, 2.48015872890001867311915e-05);
+  u = mla(u, s.x, -0.00138888888888714019282329);
+  u = mla(u, s.x, 0.0416666666666665519592062);
+  u = mla(u, s.x, -0.5);
+
+  x = ddadd_d2_d_d2(1, ddmul_d2_d_d(s.x, u));
+  r.y = x.x + x.y;
+
+  if ((q & 1) != 0) { u = r.y; r.y = r.x; r.x = u; }
+  if ((q & 2) != 0) { r.x = -r.x; }
+  if (((q+1) & 2) != 0) { r.y = -r.y; }
+
+  if (xisinf(d)) { r.x = r.y = NAN; }
+
+  return r;
+}
+
+double xtan(double d) {
+  int q;
+  double u, s, x;
+
+  q = (int)xrint(d * (2 * M_1_PI));
+
+  x = mla(q, -PI4_A*2, d);
+  x = mla(q, -PI4_B*2, x);
+  x = mla(q, -PI4_C*2, x);
+  x = mla(q, -PI4_D*2, x);
+
+  s = x * x;
+
+  if ((q & 1) != 0) x = -x;
+
+  u = 1.01419718511083373224408e-05;
+  u = mla(u, s, -2.59519791585924697698614e-05);
+  u = mla(u, s, 5.23388081915899855325186e-05);
+  u = mla(u, s, -3.05033014433946488225616e-05);
+  u = mla(u, s, 7.14707504084242744267497e-05);
+  u = mla(u, s, 8.09674518280159187045078e-05);
+  u = mla(u, s, 0.000244884931879331847054404);
+  u = mla(u, s, 0.000588505168743587154904506);
+  u = mla(u, s, 0.00145612788922812427978848);
+  u = mla(u, s, 0.00359208743836906619142924);
+  u = mla(u, s, 0.00886323944362401618113356);
+  u = mla(u, s, 0.0218694882853846389592078);
+  u = mla(u, s, 0.0539682539781298417636002);
+  u = mla(u, s, 0.133333333333125941821962);
+  u = mla(u, s, 0.333333333333334980164153);
+
+  u = mla(s, u * x, x);
+
+  if ((q & 1) != 0) u = 1.0 / u;
+
+  if (xisinf(d)) u = NAN;
+
+  return u;
+}
+
+double xtan_u1(double d) {
+  int q;
+  double u;
+  double2 s, t, x;
+
+  q = (int)xrint(d * M_2_PI);
+
+  s = ddadd2_d2_d_d(d, q * (-PI4_A*2));
+  s = ddadd2_d2_d2_d(s, q * (-PI4_B*2));
+  s = ddadd2_d2_d2_d(s, q * (-PI4_C*2));
+  s = ddadd2_d2_d2_d(s, q * (-PI4_D*2));
+
+  if ((q & 1) != 0) s = ddneg_d2_d2(s);
+
+  t = s;
+  s = ddsqu_d2_d2(s);
+
+  u = 1.01419718511083373224408e-05;
+  u = mla(u, s.x, -2.59519791585924697698614e-05);
+  u = mla(u, s.x, 5.23388081915899855325186e-05);
+  u = mla(u, s.x, -3.05033014433946488225616e-05);
+  u = mla(u, s.x, 7.14707504084242744267497e-05);
+  u = mla(u, s.x, 8.09674518280159187045078e-05);
+  u = mla(u, s.x, 0.000244884931879331847054404);
+  u = mla(u, s.x, 0.000588505168743587154904506);
+  u = mla(u, s.x, 0.00145612788922812427978848);
+  u = mla(u, s.x, 0.00359208743836906619142924);
+  u = mla(u, s.x, 0.00886323944362401618113356);
+  u = mla(u, s.x, 0.0218694882853846389592078);
+  u = mla(u, s.x, 0.0539682539781298417636002);
+  u = mla(u, s.x, 0.133333333333125941821962);
+
+  x = ddadd_d2_d_d2(1, ddmul_d2_d2_d2(ddadd_d2_d_d(0.333333333333334980164153, u * s.x), s));
+  x = ddmul_d2_d2_d2(t, x);
+
+  if ((q & 1) != 0) x = ddrec_d2_d2(x);
+
+  u = x.x + x.y;
+
+  return u;
+}
+
+double xlog(double d) {
+  double x, x2, t, m;
+  int e;
+
+  e = ilogbp1(d * 0.7071);
+  m = ldexpk(d, -e);
+
+  x = (m-1) / (m+1);
+  x2 = x * x;
+
+  t = 0.148197055177935105296783;
+  t = mla(t, x2, 0.153108178020442575739679);
+  t = mla(t, x2, 0.181837339521549679055568);
+  t = mla(t, x2, 0.22222194152736701733275);
+  t = mla(t, x2, 0.285714288030134544449368);
+  t = mla(t, x2, 0.399999999989941956712869);
+  t = mla(t, x2, 0.666666666666685503450651);
+  t = mla(t, x2, 2);
+
+  x = x * t + 0.693147180559945286226764 * e;
+
+  if (xisinf(d)) x = INFINITY;
+  if (d < 0) x = NAN;
+  if (d == 0) x = -INFINITY;
+
+  return x;
+}
+
+double xexp(double d) {
+  int q = (int)xrint(d * R_LN2);
+  double s, u;
+
+  s = mla(q, -L2U, d);
+  s = mla(q, -L2L, s);
+
+  u = 2.08860621107283687536341e-09;
+  u = mla(u, s, 2.51112930892876518610661e-08);
+  u = mla(u, s, 2.75573911234900471893338e-07);
+  u = mla(u, s, 2.75572362911928827629423e-06);
+  u = mla(u, s, 2.4801587159235472998791e-05);
+  u = mla(u, s, 0.000198412698960509205564975);
+  u = mla(u, s, 0.00138888888889774492207962);
+  u = mla(u, s, 0.00833333333331652721664984);
+  u = mla(u, s, 0.0416666666666665047591422);
+  u = mla(u, s, 0.166666666666666851703837);
+  u = mla(u, s, 0.5);
+
+  u = s * s * u + s + 1;
+  u = ldexpk(u, q);
+
+  if (xisminf(d)) u = 0;
+
+  return u;
+}
+
+static inline double2 logk(double d) {
+  double2 x, x2;
+  double m, t;
+  int e;
+
+  e = ilogbp1(d * 0.7071);
+  m = ldexpk(d, -e);
+
+  x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m));
+  x2 = ddsqu_d2_d2(x);
+
+  t = 0.134601987501262130076155;
+  t = mla(t, x2.x, 0.132248509032032670243288);
+  t = mla(t, x2.x, 0.153883458318096079652524);
+  t = mla(t, x2.x, 0.181817427573705403298686);
+  t = mla(t, x2.x, 0.222222231326187414840781);
+  t = mla(t, x2.x, 0.285714285651261412873718);
+  t = mla(t, x2.x, 0.400000000000222439910458);
+  t = mla(t, x2.x, 0.666666666666666371239645);
+
+  return ddadd2_d2_d2_d2(ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e),
+			 ddadd2_d2_d2_d2(ddscale_d2_d2_d(x, 2), ddmul_d2_d2_d(ddmul_d2_d2_d2(x2, x), t)));
+}
+
+double xlog_u1(double d) {
+  double2 s = logk(d);
+  double x = s.x + s.y;
+
+  if (xisinf(d)) x = INFINITY;
+  if (d < 0) x = NAN;
+  if (d == 0) x = -INFINITY;
+
+  return x;
+}
+
+static inline double expk(double2 d) {
+  int q = (int)xrint((d.x + d.y) * R_LN2);
+  double2 s, t;
+  double u;
+
+  s = ddadd2_d2_d2_d(d, q * -L2U);
+  s = ddadd2_d2_d2_d(s, q * -L2L);
+
+  s = ddnormalize_d2_d2(s);
+
+  u = 2.51069683420950419527139e-08;
+  u = mla(u, s.x, 2.76286166770270649116855e-07);
+  u = mla(u, s.x, 2.75572496725023574143864e-06);
+  u = mla(u, s.x, 2.48014973989819794114153e-05);
+  u = mla(u, s.x, 0.000198412698809069797676111);
+  u = mla(u, s.x, 0.0013888888939977128960529);
+  u = mla(u, s.x, 0.00833333333332371417601081);
+  u = mla(u, s.x, 0.0416666666665409524128449);
+  u = mla(u, s.x, 0.166666666666666740681535);
+  u = mla(u, s.x, 0.500000000000000999200722);
+
+  t = ddadd_d2_d2_d2(s, ddmul_d2_d2_d(ddsqu_d2_d2(s), u));
+
+  t = ddadd_d2_d_d2(1, t);
+  return ldexpk(t.x + t.y, q);
+}
+
+double xpow(double x, double y) {
+  int yisint = (int)y == y;
+  int yisodd = (1 & (int)y) != 0 && yisint;
+
+  double result = expk(ddmul_d2_d2_d(logk(xfabs(x)), y));
+
+  result = xisnan(result) ? INFINITY : result;
+  result *=  (x >= 0 ? 1 : (!yisint ? NAN : (yisodd ? -1 : 1)));
+
+  double efx = mulsign(xfabs(x) - 1, y);
+  if (xisinf(y)) result = efx < 0 ? 0.0 : (efx == 0 ? 1.0 : INFINITY);
+  if (xisinf(x) || x == 0) result = (yisodd ? sign(x) : 1) * ((x == 0 ? -y : y) < 0 ? 0 : INFINITY);
+  if (xisnan(x) || xisnan(y)) result = NAN;
+  if (y == 0 || x == 1) result = 1;
+
+  return result;
+}
+
+static inline double2 expk2(double2 d) {
+  int q = (int)xrint((d.x + d.y) * R_LN2);
+  double2 s, t;
+  double u;
+
+  s = ddadd2_d2_d2_d(d, q * -L2U);
+  s = ddadd2_d2_d2_d(s, q * -L2L);
+
+  u = 2.51069683420950419527139e-08;
+  u = mla(u, s.x, 2.76286166770270649116855e-07);
+  u = mla(u, s.x, 2.75572496725023574143864e-06);
+  u = mla(u, s.x, 2.48014973989819794114153e-05);
+  u = mla(u, s.x, 0.000198412698809069797676111);
+  u = mla(u, s.x, 0.0013888888939977128960529);
+  u = mla(u, s.x, 0.00833333333332371417601081);
+  u = mla(u, s.x, 0.0416666666665409524128449);
+  u = mla(u, s.x, 0.166666666666666740681535);
+  u = mla(u, s.x, 0.500000000000000999200722);
+
+  t = ddadd_d2_d2_d2(s, ddmul_d2_d2_d(ddsqu_d2_d2(s), u));
+
+  t = ddadd_d2_d_d2(1, t);
+  return ddscale_d2_d2_d(t, pow2i(q));
+}
+
+double xsinh(double x) {
+  double y = xfabs(x);
+  double2 d = expk2(dd(y, 0));
+  d = ddsub_d2_d2_d2(d, ddrec_d2_d2(d));
+  y = (d.x + d.y) * 0.5;
+
+  y = xfabs(x) > 710 ? INFINITY : y;
+  y = xisnan(y) ? INFINITY : y;
+  y = mulsign(y, x);
+  y = xisnan(x) ? NAN : y;
+
+  return y;
+}
+
+double xcosh(double x) {
+  double y = xfabs(x);
+  double2 d = expk2(dd(y, 0));
+  d = ddadd_d2_d2_d2(d, ddrec_d2_d2(d));
+  y = (d.x + d.y) * 0.5;
+
+  y = xfabs(x) > 710 ? INFINITY : y;
+  y = xisnan(y) ? INFINITY : y;
+  y = xisnan(x) ? NAN : y;
+
+  return y;
+}
+
+double xtanh(double x) {
+  double y = xfabs(x);
+  double2 d = expk2(dd(y, 0));
+  double2 e = ddrec_d2_d2(d);
+  d = dddiv_d2_d2_d2(ddsub_d2_d2_d2(d, e), ddadd_d2_d2_d2(d, e));
+  y = d.x + d.y;
+
+  y = xfabs(x) > 18.714973875 ? 1.0 : y;
+  y = xisnan(y) ? 1.0 : y;
+  y = mulsign(y, x);
+  y = xisnan(x) ? NAN : y;
+
+  return y;
+}
+
+static inline double2 logk2(double2 d) {
+  double2 x, x2, m;
+  double t;
+  int e;
+
+  e = ilogbp1(d.x * 0.7071);
+  m = ddscale_d2_d2_d(d, pow2i(-e));
+
+  x = dddiv_d2_d2_d2(ddadd2_d2_d2_d(m, -1), ddadd2_d2_d2_d(m, 1));
+  x2 = ddsqu_d2_d2(x);
+
+  t = 0.134601987501262130076155;
+  t = mla(t, x2.x, 0.132248509032032670243288);
+  t = mla(t, x2.x, 0.153883458318096079652524);
+  t = mla(t, x2.x, 0.181817427573705403298686);
+  t = mla(t, x2.x, 0.222222231326187414840781);
+  t = mla(t, x2.x, 0.285714285651261412873718);
+  t = mla(t, x2.x, 0.400000000000222439910458);
+  t = mla(t, x2.x, 0.666666666666666371239645);
+
+  return ddadd2_d2_d2_d2(ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e),
+			 ddadd2_d2_d2_d2(ddscale_d2_d2_d(x, 2), ddmul_d2_d2_d(ddmul_d2_d2_d2(x2, x), t)));
+}
+
+double xasinh(double x) {
+  double y = xfabs(x);
+  double2 d = logk2(ddadd_d2_d2_d(ddsqrt_d2_d2(ddadd2_d2_d2_d(ddmul_d2_d_d(y, y),  1)), y));
+  y = d.x + d.y;
+
+  y = xisinf(x) || xisnan(y) ? INFINITY : y;
+  y = mulsign(y, x);
+  y = xisnan(x) ? NAN : y;
+
+  return y;
+}
+
+double xacosh(double x) {
+  double2 d = logk2(ddadd2_d2_d2_d(ddsqrt_d2_d2(ddadd2_d2_d2_d(ddmul_d2_d_d(x, x), -1)), x));
+  double y = d.x + d.y;
+
+  y = xisinf(x) || xisnan(y) ? INFINITY : y;
+  y = x == 1.0 ? 0.0 : y;
+  y = x < 1.0 ? NAN : y;
+  y = xisnan(x) ? NAN : y;
+
+  return y;
+}
+
+double xatanh(double x) {
+  double y = xfabs(x);
+  double2 d = logk2(dddiv_d2_d2_d2(ddadd2_d2_d_d(1, y), ddadd2_d2_d_d(1, -y)));
+  y = y > 1.0 ? NAN : (y == 1.0 ? INFINITY : (d.x + d.y) * 0.5);
+
+  y = xisinf(x) || xisnan(y) ? NAN : y;
+  y = mulsign(y, x);
+  y = xisnan(x) ? NAN : y;
+
+  return y;
+}
+
+//
+
+double xfma(double x, double y, double z) {
+  union {
+    double f;
+    long long int i;
+  } tmp;
+
+  tmp.f = x;
+  tmp.i = (tmp.i + 0x4000000) & 0xfffffffff8000000LL;
+  double xh = tmp.f, xl = x - xh;
+
+  tmp.f = y;
+  tmp.i = (tmp.i + 0x4000000) & 0xfffffffff8000000LL;
+  double yh = tmp.f, yl = y - yh;
+
+  double h = x * y;
+  double l = xh * yh - h + xl * yh + xh * yl + xl * yl;
+
+  double h2, l2, v;
+
+  h2 = h + z;
+  v = h2 - h;
+  l2 = (h - (h2 - v)) + (z - v) + l;
+
+  return h2 + l2;
+}
+
+double xsqrt(double d) { // max error : 0.5 ulp
+  double q = 1;
+
+  if (d < 8.636168555094445E-78) {
+    d *= 1.157920892373162E77;
+    q = 2.9387358770557188E-39;
+  }
+
+  // http://en.wikipedia.org/wiki/Fast_inverse_square_root
+  double x = longBitsToDouble(0x5fe6ec85e7de30da - (doubleToRawLongBits(d + 1e-320) >> 1));
+
+  x = x * (1.5 - 0.5 * d * x * x);
+  x = x * (1.5 - 0.5 * d * x * x);
+  x = x * (1.5 - 0.5 * d * x * x);
+
+  // You can change xfma to fma if fma is correctly implemented
+  x = xfma(d * x, d * x, -d) * (x * -0.5) + d * x;
+
+  return d == INFINITY ? INFINITY : x * q;
+}
+
+double xcbrt(double d) { // max error : 2 ulps
+  double x, y, q = 1.0;
+  int e, r;
+
+  e = ilogbp1(d);
+  d = ldexpk(d, -e);
+  r = (e + 6144) % 3;
+  q = (r == 1) ? 1.2599210498948731647672106 : q;
+  q = (r == 2) ? 1.5874010519681994747517056 : q;
+  q = ldexpk(q, (e + 6144) / 3 - 2048);
+
+  q = mulsign(q, d);
+  d = xfabs(d);
+
+  x = -0.640245898480692909870982;
+  x = x * d + 2.96155103020039511818595;
+  x = x * d + -5.73353060922947843636166;
+  x = x * d + 6.03990368989458747961407;
+  x = x * d + -3.85841935510444988821632;
+  x = x * d + 2.2307275302496609725722;
+
+  y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0);
+  y = d * x * x;
+  y = (y - (2.0 / 3.0) * y * (y * x - 1)) * q;
+
+  return y;
+}
+
+double xcbrt_u1(double d) {
+  double x, y, z;
+  double2 q2 = dd(1, 0), u, v;
+  int e, r;
+
+  e = ilogbp1(d);
+  d = ldexpk(d, -e);
+  r = (e + 6144) % 3;
+  q2 = (r == 1) ? dd(1.2599210498948731907, -2.5899333753005069177e-17) : q2;
+  q2 = (r == 2) ? dd(1.5874010519681995834, -1.0869008194197822986e-16) : q2;
+
+  q2.x = mulsign(q2.x, d); q2.y = mulsign(q2.y, d);
+  d = xfabs(d);
+
+  x = -0.640245898480692909870982;
+  x = x * d + 2.96155103020039511818595;
+  x = x * d + -5.73353060922947843636166;
+  x = x * d + 6.03990368989458747961407;
+  x = x * d + -3.85841935510444988821632;
+  x = x * d + 2.2307275302496609725722;
+
+  y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0);
+
+  z = x;
+
+  u = ddmul_d2_d_d(x, x);
+  u = ddmul_d2_d2_d2(u, u);
+  u = ddmul_d2_d2_d(u, d);
+  u = ddadd2_d2_d2_d(u, -x);
+  y = u.x + u.y;
+
+  y = -2.0 / 3.0 * y * z;
+  v = ddadd2_d2_d2_d(ddmul_d2_d_d(z, z), y);
+  v = ddmul_d2_d2_d(v, d);
+  v = ddmul_d2_d2_d2(v, q2);
+  z = ldexp(v.x + v.y, (e + 6144) / 3 - 2048);
+
+  if (xisinf(d)) { z = mulsign(INFINITY, q2.x); }
+  if (d == 0) { z = mulsign(0, q2.x); }
+
+  return z;
+}
+
+double xexp2(double a) {
+  double u = expk(ddmul_d2_d2_d(dd(0.69314718055994528623, 2.3190468138462995584e-17), a));
+  if (a > 1023) u = INFINITY;
+  if (xisminf(a)) u = 0;
+  return u;
+}
+
+double xexp10(double a) {
+  double u = expk(ddmul_d2_d2_d(dd(2.3025850929940459011, -2.1707562233822493508e-16), a));
+  if (a > 308) u = INFINITY;
+  if (xisminf(a)) u = 0;
+  return u;
+}
+
+double xexpm1(double a) {
+  double2 d = ddadd2_d2_d2_d(expk2(dd(a, 0)), -1.0);
+  double x = d.x + d.y;
+  if (a > 700) x = INFINITY;
+  if (a < -0.36043653389117156089696070315825181539851971360337e+2) x = -1;
+  return x;
+}
+
+double xlog10(double a) {
+  double2 d = ddmul_d2_d2_d2(logk(a), dd(0.43429448190325176116, 6.6494347733425473126e-17));
+  double x = d.x + d.y;
+
+  if (xisinf(a)) x = INFINITY;
+  if (a < 0) x = NAN;
+  if (a == 0) x = -INFINITY;
+
+  return x;
+}
+
+double xlog1p(double a) {
+  double2 d = logk2(ddadd2_d2_d_d(a, 1));
+  double x = d.x + d.y;
+
+  if (xisinf(a)) x = INFINITY;
+  if (a < -1) x = NAN;
+  if (a == -1) x = -INFINITY;
+
+  return x;
+}
diff --git a/purec/sleefsp.c b/purec/sleefsp.c
new file mode 100644
index 00000000..3dc0a1a2
--- /dev/null
+++ b/purec/sleefsp.c
@@ -0,0 +1,1093 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdint.h>
+#include <math.h>
+
+#include "nonnumber.h"
+
+#define PI4_Af 0.78515625f
+#define PI4_Bf 0.00024187564849853515625f
+#define PI4_Cf 3.7747668102383613586e-08f
+#define PI4_Df 1.2816720341285448015e-12f
+
+#define L2Uf 0.693145751953125f
+#define L2Lf 1.428606765330187045e-06f
+
+#define R_LN2f 1.442695040888963407359924681001892137426645954152985934135449406931f
+#define M_PIf ((float)M_PI)
+
+static inline int32_t floatToRawIntBits(float d) {
+  union {
+    float f;
+    int32_t i;
+  } tmp;
+  tmp.f = d;
+  return tmp.i;
+}
+
+static inline float intBitsToFloat(int32_t i) {
+  union {
+    float f;
+    int32_t i;
+  } tmp;
+  tmp.i = i;
+  return tmp.f;
+}
+
+static inline float xfabsf(float x) {
+  return intBitsToFloat(0x7fffffffL & floatToRawIntBits(x));
+}
+
+static inline float mulsignf(float x, float y) {
+  return intBitsToFloat(floatToRawIntBits(x) ^ (floatToRawIntBits(y) & (1 << 31)));
+}
+
+static inline float signf(float d) { return mulsignf(1, d); }
+static inline float mlaf(float x, float y, float z) { return x * y + z; }
+static inline float xrintf(float x) { return x < 0 ? (int)(x - 0.5f) : (int)(x + 0.5f); }
+
+static inline int xisnanf(float x) { return x != x; }
+static inline int xisinff(float x) { return x == INFINITYf || x == -INFINITYf; }
+static inline int xisminff(float x) { return x == -INFINITYf; }
+static inline int xispinff(float x) { return x == INFINITYf; }
+
+static inline int ilogbp1f(float d) {
+  int m = d < 5.421010862427522E-20f;
+  d = m ? 1.8446744073709552E19f * d : d;
+  int q = (floatToRawIntBits(d) >> 23) & 0xff;
+  q = m ? q - (64 + 0x7e) : q - 0x7e;
+  return q;
+}
+
+static inline float pow2if(int q) {
+  return intBitsToFloat(((int32_t)(q + 0x7f)) << 23);
+}
+
+static inline float ldexpkf(float x, int q) {
+  float u;
+  int m;
+  m = q >> 31;
+  m = (((m + q) >> 6) - m) << 4;
+  q = q - (m << 2);
+  m += 127;
+  m = m <   0 ?   0 : m;
+  m = m > 255 ? 255 : m;
+  u = intBitsToFloat(((int32_t)m) << 23);
+  x = x * u * u * u * u;
+  u = intBitsToFloat(((int32_t)(q + 0x7f)) << 23);
+  return x * u;
+}
+
+float xldexpf(float x, int q) { return ldexpkf(x, q); }
+
+//
+
+typedef struct {
+  float x, y;
+} float2;
+
+#ifndef NDEBUG
+static int checkfp(float x) {
+  if (xisinff(x) || xisnanf(x)) return 1;
+  return 0;
+}
+#endif
+
+static inline float upperf(float d) {
+  return intBitsToFloat(floatToRawIntBits(d) & 0xfffff000);
+}
+
+static inline float2 df(float h, float l) {
+  float2 ret;
+  ret.x = h; ret.y = l;
+  return ret;
+}
+
+static inline float2 dfnormalize_f2_f2(float2 t) {
+  float2 s;
+
+  s.x = t.x + t.y;
+  s.y = t.x - s.x + t.y;
+
+  return s;
+}
+
+static inline float2 dfscale_f2_f2_f(float2 d, float s) {
+  float2 r;
+
+  r.x = d.x * s;
+  r.y = d.y * s;
+
+  return r;
+}
+
+static inline float2 dfneg_f2_f2(float2 d) {
+  float2 r;
+
+  r.x = -d.x;
+  r.y = -d.y;
+
+  return r;
+}
+
+static inline float2 dfadd_f2_f_f(float x, float y) {
+  // |x| >= |y|
+
+  float2 r;
+
+#ifndef NDEBUG
+  if (!(checkfp(x) || checkfp(y) || xfabsf(x) >= xfabsf(y))) fprintf(stderr, "[dfadd_f2_f_f : %g, %g]", x, y);
+#endif
+
+  r.x = x + y;
+  r.y = x - r.x + y;
+
+  return r;
+}
+
+static inline float2 dfadd2_f2_f_f(float x, float y) {
+  float2 r;
+
+  r.x = x + y;
+  float v = r.x - x;
+  r.y = (x - (r.x - v)) + (y - v);
+
+  return r;
+}
+
+static inline float2 dfadd_f2_f2_f(float2 x, float y) {
+  // |x| >= |y|
+
+  float2 r;
+
+#ifndef NDEBUG
+  if (!(checkfp(x.x) || checkfp(y) || xfabsf(x.x) >= xfabsf(y))) fprintf(stderr, "[dfadd_f2_f2_f : %g %g]", x.x, y);
+#endif
+
+  r.x = x.x + y;
+  r.y = x.x - r.x + y + x.y;
+
+  return r;
+}
+
+static inline float2 dfadd2_f2_f2_f(float2 x, float y) {
+  // |x| >= |y|
+
+  float2 r;
+
+  r.x  = x.x + y;
+  float v = r.x - x.x;
+  r.y = (x.x - (r.x - v)) + (y - v);
+  r.y += x.y;
+
+  return r;
+}
+
+static inline float2 dfadd_f2_f_f2(float x, float2 y) {
+  // |x| >= |y|
+
+  float2 r;
+
+#ifndef NDEBUG
+  if (!(checkfp(x) || checkfp(y.x) || xfabsf(x) >= xfabsf(y.x))) fprintf(stderr, "[dfadd_df_f_f2 : %g %g]", x, y.x);
+#endif
+
+  r.x = x + y.x;
+  r.y = x - r.x + y.x + y.y;
+
+  return r;
+}
+
+static inline float2 dfadd_f2_f2_f2(float2 x, float2 y) {
+  // |x| >= |y|
+
+  float2 r;
+
+#ifndef NDEBUG
+  if (!(checkfp(x.x) || checkfp(y.x) || xfabsf(x.x) >= xfabsf(y.x))) fprintf(stderr, "[dfadd_f2_f2_f2 : %g %g]", x.x, y.x);
+#endif
+
+  r.x = x.x + y.x;
+  r.y = x.x - r.x + y.x + x.y + y.y;
+
+  return r;
+}
+
+static inline float2 dfadd2_f2_f2_f2(float2 x, float2 y) {
+  float2 r;
+
+  r.x  = x.x + y.x;
+  float v = r.x - x.x;
+  r.y = (x.x - (r.x - v)) + (y.x - v);
+  r.y += x.y + y.y;
+
+  return r;
+}
+
+static inline float2 dfsub_f2_f2_f2(float2 x, float2 y) {
+  // |x| >= |y|
+
+  float2 r;
+
+#ifndef NDEBUG
+  if (!(checkfp(x.x) || checkfp(y.x) || xfabsf(x.x) >= xfabsf(y.x))) fprintf(stderr, "[dfsub_f2_f2_f2 : %g %g]", x.x, y.x);
+#endif
+
+  r.x = x.x - y.x;
+  r.y = x.x - r.x - y.x + x.y - y.y;
+
+  return r;
+}
+
+static inline float2 dfdiv_f2_f2_f2(float2 n, float2 d) {
+  float t = 1.0f / d.x;
+  float dh  = upperf(d.x), dl  = d.x - dh;
+  float th  = upperf(t  ), tl  = t   - th;
+  float nhh = upperf(n.x), nhl = n.x - nhh;
+
+  float2 q;
+
+  q.x = n.x * t;
+
+  float u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl +
+    q.x * (1 - dh * th - dh * tl - dl * th - dl * tl);
+
+  q.y = t * (n.y - q.x * d.y) + u;
+
+  return q;
+}
+
+static inline float2 dfmul_f2_f_f(float x, float y) {
+  float xh = upperf(x), xl = x - xh;
+  float yh = upperf(y), yl = y - yh;
+  float2 r;
+
+  r.x = x * y;
+  r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl;
+
+  return r;
+}
+
+static inline float2 dfmul_f2_f2_f(float2 x, float y) {
+  float xh = upperf(x.x), xl = x.x - xh;
+  float yh = upperf(y  ), yl = y   - yh;
+  float2 r;
+
+  r.x = x.x * y;
+  r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y;
+
+  return r;
+}
+
+static inline float2 dfmul_f2_f2_f2(float2 x, float2 y) {
+  float xh = upperf(x.x), xl = x.x - xh;
+  float yh = upperf(y.x), yl = y.x - yh;
+  float2 r;
+
+  r.x = x.x * y.x;
+  r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x;
+
+  return r;
+}
+
+static inline float2 dfsqu_f2_f2(float2 x) {
+  float xh = upperf(x.x), xl = x.x - xh;
+  float2 r;
+
+  r.x = x.x * x.x;
+  r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y);
+
+  return r;
+}
+
+static inline float2 dfrec_f2_f(float d) {
+  float t = 1.0f / d;
+  float dh = upperf(d), dl = d - dh;
+  float th = upperf(t), tl = t - th;
+  float2 q;
+
+  q.x = t;
+  q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl);
+
+  return q;
+}
+
+static inline float2 dfrec_f2_f2(float2 d) {
+  float t = 1.0f / d.x;
+  float dh = upperf(d.x), dl = d.x - dh;
+  float th = upperf(t  ), tl = t   - th;
+  float2 q;
+
+  q.x = t;
+  q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t);
+
+  return q;
+}
+
+static inline float2 dfsqrt_f2_f2(float2 d) {
+  float t = sqrtf(d.x + d.y);
+  return dfscale_f2_f2_f(dfmul_f2_f2_f2(dfadd2_f2_f2_f2(d, dfmul_f2_f_f(t, t)), dfrec_f2_f(t)), 0.5f);
+}
+
+//
+
+float xsinf(float d) {
+  int q;
+  float u, s;
+
+  q = (int)xrintf(d * (float)M_1_PI);
+
+  d = mlaf(q, -PI4_Af*4, d);
+  d = mlaf(q, -PI4_Bf*4, d);
+  d = mlaf(q, -PI4_Cf*4, d);
+  d = mlaf(q, -PI4_Df*4, d);
+
+  s = d * d;
+
+  if ((q & 1) != 0) d = -d;
+
+  u = 2.6083159809786593541503e-06f;
+  u = mlaf(u, s, -0.0001981069071916863322258f);
+  u = mlaf(u, s, 0.00833307858556509017944336f);
+  u = mlaf(u, s, -0.166666597127914428710938f);
+
+  u = mlaf(s, u * d, d);
+
+  if (xisinff(d)) u = NANf;
+
+  return u;
+}
+
+float xsinf_u1(float d) {
+  int q;
+  float u;
+  float2 s, t, x;
+
+  q = (int)xrintf(d * (float)M_1_PI);
+
+  s = dfadd2_f2_f_f(d, q * (-PI4_Af*4));
+  s = dfadd2_f2_f2_f(s, q * (-PI4_Bf*4));
+  s = dfadd2_f2_f2_f(s, q * (-PI4_Cf*4));
+  s = dfadd2_f2_f2_f(s, q * (-PI4_Df*4));
+
+  t = s;
+  s = dfsqu_f2_f2(s);
+
+  u = 2.6083159809786593541503e-06f;
+  u = mlaf(u, s.x, -0.0001981069071916863322258f);
+  u = mlaf(u, s.x, 0.00833307858556509017944336f);
+
+  x = dfadd_f2_f_f2(1, dfmul_f2_f2_f2(dfadd_f2_f_f(-0.166666597127914428710938f, u * s.x), s));
+
+  x = dfmul_f2_f2_f2(t, x);
+  u = x.x + x.y;
+
+  if ((q & 1) != 0) u = -u;
+
+  return u;
+}
+
+float xcosf(float d) {
+  int q;
+  float u, s;
+
+  q = 1 + 2*(int)xrintf(d * (float)M_1_PI - 0.5f);
+
+  d = mlaf(q, -PI4_Af*2, d);
+  d = mlaf(q, -PI4_Bf*2, d);
+  d = mlaf(q, -PI4_Cf*2, d);
+  d = mlaf(q, -PI4_Df*2, d);
+
+  s = d * d;
+
+  if ((q & 2) == 0) d = -d;
+
+  u = 2.6083159809786593541503e-06f;
+  u = mlaf(u, s, -0.0001981069071916863322258f);
+  u = mlaf(u, s, 0.00833307858556509017944336f);
+  u = mlaf(u, s, -0.166666597127914428710938f);
+
+  u = mlaf(s, u * d, d);
+
+  if (xisinff(d)) u = NANf;
+
+  return u;
+}
+
+float xcosf_u1(float d) {
+  float u, q;
+  float2 s, t, x;
+
+  d = fabsf(d);
+
+  q = 1 + 2*(int)xrintf(d * (float)M_1_PI - 0.5f);
+
+  s = dfadd2_f2_f_f(d, q * (-PI4_Af*2));
+  s = dfadd2_f2_f2_f(s, q * (-PI4_Bf*2));
+  s = dfadd2_f2_f2_f(s, q * (-PI4_Cf*2));
+  s = dfadd2_f2_f2_f(s, q * (-PI4_Df*2));
+
+  t = s;
+  s = dfsqu_f2_f2(s);
+
+  u = 2.6083159809786593541503e-06f;
+  u = mlaf(u, s.x, -0.0001981069071916863322258f);
+  u = mlaf(u, s.x, 0.00833307858556509017944336f);
+
+  x = dfadd_f2_f_f2(1, dfmul_f2_f2_f2(dfadd_f2_f_f(-0.166666597127914428710938f, u * s.x), s));
+
+  x = dfmul_f2_f2_f2(t, x);
+  u = x.x + x.y;
+
+  if ((((int)q) & 2) == 0) u = -u;
+
+  return u;
+}
+
+float2 xsincosf(float d) {
+  int q;
+  float u, s, t;
+  float2 r;
+
+  q = (int)xrintf(d * ((float)(2 * M_1_PI)));
+
+  s = d;
+
+  s = mlaf(q, -PI4_Af*2, s);
+  s = mlaf(q, -PI4_Bf*2, s);
+  s = mlaf(q, -PI4_Cf*2, s);
+  s = mlaf(q, -PI4_Df*2, s);
+
+  t = s;
+
+  s = s * s;
+
+  u = -0.000195169282960705459117889f;
+  u = mlaf(u, s, 0.00833215750753879547119141f);
+  u = mlaf(u, s, -0.166666537523269653320312f);
+  u = u * s * t;
+
+  r.x = t + u;
+
+  u = -2.71811842367242206819355e-07f;
+  u = mlaf(u, s, 2.47990446951007470488548e-05f);
+  u = mlaf(u, s, -0.00138888787478208541870117f);
+  u = mlaf(u, s, 0.0416666641831398010253906f);
+  u = mlaf(u, s, -0.5f);
+
+  r.y = u * s + 1;
+
+  if ((q & 1) != 0) { s = r.y; r.y = r.x; r.x = s; }
+  if ((q & 2) != 0) { r.x = -r.x; }
+  if (((q+1) & 2) != 0) { r.y = -r.y; }
+
+  if (xisinff(d)) { r.x = r.y = NANf; }
+
+  return r;
+}
+
+float2 xsincosf_u1(float d) {
+  int q;
+  float u;
+  float2 r, s, t, x;
+
+  q = (int)xrintf(d * (float)(2 * M_1_PI));
+
+  s = dfadd2_f2_f_f(d, q * (-PI4_Af*2));
+  s = dfadd2_f2_f2_f(s, q * (-PI4_Bf*2));
+  s = dfadd2_f2_f2_f(s, q * (-PI4_Cf*2));
+  s = dfadd2_f2_f2_f(s, q * (-PI4_Df*2));
+
+  t = s;
+  s = dfsqu_f2_f2(s);
+  s.x = s.x + s.y;
+
+  u = -0.000195169282960705459117889f;
+  u = mlaf(u, s.x, 0.00833215750753879547119141f);
+  u = mlaf(u, s.x, -0.166666537523269653320312f);
+
+  u *= s.x * t.x;
+
+  x = dfadd_f2_f2_f(t, u);
+  r.x = x.x + x.y;
+
+  u = -2.71811842367242206819355e-07f;
+  u = mlaf(u, s.x, 2.47990446951007470488548e-05f);
+  u = mlaf(u, s.x, -0.00138888787478208541870117f);
+  u = mlaf(u, s.x, 0.0416666641831398010253906f);
+  u = mlaf(u, s.x, -0.5f);
+
+  x = dfadd_f2_f_f2(1, dfmul_f2_f_f(s.x, u));
+  r.y = x.x + x.y;
+
+  if ((q & 1) != 0) { u = r.y; r.y = r.x; r.x = u; }
+  if ((q & 2) != 0) { r.x = -r.x; }
+  if (((q+1) & 2) != 0) { r.y = -r.y; }
+
+  if (xisinff(d)) { r.x = r.y = NAN; }
+
+  return r;
+}
+
+float xtanf(float d) {
+  int q;
+  float u, s, x;
+
+  q = (int)xrintf(d * (float)(2 * M_1_PI));
+
+  x = d;
+
+  x = mlaf(q, -PI4_Af*2, x);
+  x = mlaf(q, -PI4_Bf*2, x);
+  x = mlaf(q, -PI4_Cf*2, x);
+  x = mlaf(q, -PI4_Df*2, x);
+
+  s = x * x;
+
+  if ((q & 1) != 0) x = -x;
+
+  u = 0.00927245803177356719970703f;
+  u = mlaf(u, s, 0.00331984995864331722259521f);
+  u = mlaf(u, s, 0.0242998078465461730957031f);
+  u = mlaf(u, s, 0.0534495301544666290283203f);
+  u = mlaf(u, s, 0.133383005857467651367188f);
+  u = mlaf(u, s, 0.333331853151321411132812f);
+
+  u = mlaf(s, u * x, x);
+
+  if ((q & 1) != 0) u = 1.0f / u;
+
+  if (xisinff(d)) u = NANf;
+
+  return u;
+}
+
+float xtanf_u1(float d) {
+  int q;
+  float u;
+  float2 s, t, x;
+
+  q = (int)xrintf(d * (float)(2 * M_1_PI));
+
+  s = dfadd2_f2_f_f(d, q * (-PI4_Af*2));
+  s = dfadd2_f2_f2_f(s, q * (-PI4_Bf*2));
+  s = dfadd2_f2_f2_f(s, q * (-PI4_Cf*2));
+  s = dfadd2_f2_f2_f(s, q * (-PI4_Df*2));
+
+  if ((q & 1) != 0) s = dfneg_f2_f2(s);
+
+  t = s;
+  s = dfsqu_f2_f2(s);
+  s = dfnormalize_f2_f2(s);
+
+  u = 0.00446636462584137916564941f;
+  u = mlaf(u, s.x, -8.3920182078145444393158e-05f);
+  u = mlaf(u, s.x, 0.0109639242291450500488281f);
+  u = mlaf(u, s.x, 0.0212360303848981857299805f);
+  u = mlaf(u, s.x, 0.0540687143802642822265625f);
+
+  x = dfadd_f2_f_f(0.133325666189193725585938f, u * s.x);
+  x = dfadd_f2_f_f2(1, dfmul_f2_f2_f2(dfadd_f2_f_f2(0.33333361148834228515625f, dfmul_f2_f2_f2(s, x)), s));
+  x = dfmul_f2_f2_f2(t, x);
+
+  if ((q & 1) != 0) x = dfrec_f2_f2(x);
+
+  u = x.x + x.y;
+
+  return u;
+}
+
+float xatanf(float s) {
+  float t, u;
+  int q = 0;
+
+  if (s < 0) { s = -s; q = 2; }
+  if (s > 1) { s = 1.0f / s; q |= 1; }
+
+  t = s * s;
+
+  u = 0.00282363896258175373077393f;
+  u = mlaf(u, t, -0.0159569028764963150024414f);
+  u = mlaf(u, t, 0.0425049886107444763183594f);
+  u = mlaf(u, t, -0.0748900920152664184570312f);
+  u = mlaf(u, t, 0.106347933411598205566406f);
+  u = mlaf(u, t, -0.142027363181114196777344f);
+  u = mlaf(u, t, 0.199926957488059997558594f);
+  u = mlaf(u, t, -0.333331018686294555664062f);
+
+  t = s + s * (t * u);
+
+  if ((q & 1) != 0) t = 1.570796326794896557998982f - t;
+  if ((q & 2) != 0) t = -t;
+
+  return t;
+}
+
+static inline float atan2kf(float y, float x) {
+  float s, t, u;
+  int q = 0;
+
+  if (x < 0) { x = -x; q = -2; }
+  if (y > x) { t = x; x = y; y = -t; q += 1; }
+
+  s = y / x;
+  t = s * s;
+
+  u = 0.00282363896258175373077393f;
+  u = mlaf(u, t, -0.0159569028764963150024414f);
+  u = mlaf(u, t, 0.0425049886107444763183594f);
+  u = mlaf(u, t, -0.0748900920152664184570312f);
+  u = mlaf(u, t, 0.106347933411598205566406f);
+  u = mlaf(u, t, -0.142027363181114196777344f);
+  u = mlaf(u, t, 0.199926957488059997558594f);
+  u = mlaf(u, t, -0.333331018686294555664062f);
+
+  t = u * t * s + s;
+  t = q * (float)(M_PI/2) + t;
+
+  return t;
+}
+
+float xatan2f(float y, float x) {
+  float r = atan2kf(xfabsf(y), x);
+
+  r = mulsignf(r, x);
+  if (xisinff(x) || x == 0) r = M_PIf/2 - (xisinff(x) ? (signf(x) * (float)(M_PI  /2)) : 0);
+  if (xisinff(y)          ) r = M_PIf/2 - (xisinff(x) ? (signf(x) * (float)(M_PI*1/4)) : 0);
+  if (              y == 0) r = (signf(x) == -1 ? M_PIf : 0);
+
+  return xisnanf(x) || xisnanf(y) ? NANf : mulsignf(r, y);
+}
+
+float xasinf(float d) {
+  return mulsignf(atan2kf(fabsf(d), sqrtf((1.0f+d)*(1.0f-d))), d);
+}
+
+float xacosf(float d) {
+  return mulsignf(atan2kf(sqrtf((1.0f+d)*(1.0f-d)), fabsf(d)), d) + (d < 0 ? (float)M_PI : 0.0f);
+}
+
+static float2 atan2kf_u1(float2 y, float2 x) {
+  float u;
+  float2 s, t;
+  int q = 0;
+
+  if (x.x < 0) { x.x = -x.x; x.y = -x.y; q = -2; }
+  if (y.x > x.x) { t = x; x = y; y.x = -t.x; y.y = -t.y; q += 1; }
+
+  s = dfdiv_f2_f2_f2(y, x);
+  t = dfsqu_f2_f2(s);
+  t = dfnormalize_f2_f2(t);
+
+  u = -0.00176397908944636583328247f;
+  u = mlaf(u, t.x, 0.0107900900766253471374512f);
+  u = mlaf(u, t.x, -0.0309564601629972457885742f);
+  u = mlaf(u, t.x, 0.0577365085482597351074219f);
+  u = mlaf(u, t.x, -0.0838950723409652709960938f);
+  u = mlaf(u, t.x, 0.109463557600975036621094f);
+  u = mlaf(u, t.x, -0.142626821994781494140625f);
+  u = mlaf(u, t.x, 0.199983194470405578613281f);
+
+  //u = mlaf(u, t.x, -0.333332866430282592773438f);
+  //t = dfmul_f2_f2_f(t, u);
+
+  t = dfmul_f2_f2_f2(t, dfadd_f2_f_f(-0.333332866430282592773438f, u * t.x));
+  t = dfmul_f2_f2_f2(s, dfadd_f2_f_f2(1, t));
+  t = dfadd2_f2_f2_f2(dfmul_f2_f2_f(df(1.5707963705062866211f, -4.3711388286737928865e-08f), q), t);
+
+  return t;
+}
+
+float xatan2f_u1(float y, float x) {
+  float2 d = atan2kf_u1(df(xfabsf(y), 0), df(x, 0));
+  float r = d.x + d.y;
+
+  r = mulsignf(r, x);
+  if (xisinff(x) || x == 0) r = (float)M_PI/2 - (xisinff(x) ? (signf(x) * (float)(M_PI  /2)) : 0.0f);
+  if (xisinff(y)          ) r = (float)M_PI/2 - (xisinff(x) ? (signf(x) * (float)(M_PI*1/4)) : 0.0f);
+  if (              y == 0) r = (signf(x) == -1 ? (float)M_PI : 0.0f);
+
+  return xisnanf(x) || xisnanf(y) ? NANf : mulsignf(r, y);
+}
+
+float xasinf_u1(float d) {
+  float2 d2 = atan2kf_u1(df(xfabsf(d), 0), dfsqrt_f2_f2(dfmul_f2_f2_f2(dfadd_f2_f_f(1, d), dfadd_f2_f_f(1,-d))));
+  float r = d2.x + d2.y;
+  if (xfabsf(d) == 1) r = 1.570796326794896557998982f;
+  return mulsignf(r, d);
+}
+
+float xacosf_u1(float d) {
+  float2 d2 = atan2kf_u1(dfsqrt_f2_f2(dfmul_f2_f2_f2(dfadd_f2_f_f(1, d), dfadd_f2_f_f(1,-d))), df(xfabsf(d), 0));
+  d2 = dfscale_f2_f2_f(d2, mulsignf(1.0f, d));
+  if (xfabsf(d) == 1) d2 = df(0.0f, 0.0f);
+  if (d < 0) d2 = dfadd_f2_f2_f2(df(3.1415927410125732422f,-8.7422776573475857731e-08f), d2);
+  return d2.x + d2.y;
+}
+
+float xatanf_u1(float d) {
+  float2 d2 = atan2kf_u1(df(xfabsf(d), 0.0f), df(1.0f, 0.0f));
+  float r = d2.x + d2.y;
+  if (xisinff(d)) r = 1.570796326794896557998982f;
+  return mulsignf(r, d);
+}
+
+float xlogf(float d) {
+  float x, x2, t, m;
+  int e;
+
+  e = ilogbp1f(d * 0.7071f);
+  m = ldexpkf(d, -e);
+
+  x = (m-1.0f) / (m+1.0f);
+  x2 = x * x;
+
+  t = 0.2371599674224853515625f;
+  t = mlaf(t, x2, 0.285279005765914916992188f);
+  t = mlaf(t, x2, 0.400005519390106201171875f);
+  t = mlaf(t, x2, 0.666666567325592041015625f);
+  t = mlaf(t, x2, 2.0f);
+
+  x = x * t + 0.693147180559945286226764f * e;
+
+  if (xisinff(d)) x = INFINITYf;
+  if (d < 0) x = NANf;
+  if (d == 0) x = -INFINITYf;
+
+  return x;
+}
+
+float xexpf(float d) {
+  int q = (int)xrintf(d * R_LN2f);
+  float s, u;
+
+  s = mlaf(q, -L2Uf, d);
+  s = mlaf(q, -L2Lf, s);
+
+  u = 0.00136324646882712841033936f;
+  u = mlaf(u, s, 0.00836596917361021041870117f);
+  u = mlaf(u, s, 0.0416710823774337768554688f);
+  u = mlaf(u, s, 0.166665524244308471679688f);
+  u = mlaf(u, s, 0.499999850988388061523438f);
+
+  u = s * s * u + s + 1.0f;
+  u = ldexpkf(u, q);
+
+  if (xisminff(d)) u = 0;
+
+  return u;
+}
+
+//#define L2Af 0.693145751953125
+//#define L2Bf 1.4285906217992305756e-06
+//#define L2Cf 1.619850954759360917e-11
+
+static inline float expkf(float2 d) {
+  int q = (int)xrintf((d.x + d.y) * R_LN2f);
+  float2 s, t;
+  float u;
+
+  s = dfadd2_f2_f2_f(d, q * -L2Uf);
+  s = dfadd2_f2_f2_f(s, q * -L2Lf);
+
+  //s = dfadd2_f2_f2_f(d, q * -L2Af);
+  //s = dfadd2_f2_f2_f(s, q * -L2Bf);
+  //s = dfadd2_f2_f2_f(s, q * -L2Cf);
+
+  s = dfnormalize_f2_f2(s);
+
+  u = 0.00136324646882712841033936f;
+  u = mlaf(u, s.x, 0.00836596917361021041870117f);
+  u = mlaf(u, s.x, 0.0416710823774337768554688f);
+  u = mlaf(u, s.x, 0.166665524244308471679688f);
+  u = mlaf(u, s.x, 0.499999850988388061523438f);
+
+  t = dfadd_f2_f2_f2(s, dfmul_f2_f2_f(dfsqu_f2_f2(s), u));
+
+  t = dfadd_f2_f_f2(1, t);
+  return ldexpkf(t.x + t.y, q);
+}
+
+static inline float2 logkf(float d) {
+  float2 x, x2;
+  float m, t;
+  int e;
+
+  e = ilogbp1f(d * 0.7071f);
+  m = ldexpkf(d, -e);
+
+  x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m));
+  x2 = dfsqu_f2_f2(x);
+
+  t = 0.2371599674224853515625f;
+  t = mlaf(t, x2.x, 0.285279005765914916992188f);
+  t = mlaf(t, x2.x, 0.400005519390106201171875f);
+  t = mlaf(t, x2.x, 0.666666567325592041015625f);
+
+  return dfadd2_f2_f2_f2(dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), e),
+			 dfadd2_f2_f2_f2(dfscale_f2_f2_f(x, 2), dfmul_f2_f2_f(dfmul_f2_f2_f2(x2, x), t)));
+}
+
+float xlogf_u1(float d) {
+  float2 s = logkf(d);
+  float x = s.x + s.y;
+
+  if (xisinff(d)) x = INFINITYf;
+  if (d < 0) x = NANf;
+  if (d == 0) x = -INFINITYf;
+
+  return x;
+}
+
+static inline float2 expk2f(float2 d) {
+  int q = (int)xrintf((d.x + d.y) * R_LN2f);
+  float2 s, t;
+  float u;
+
+  s = dfadd2_f2_f2_f(d, q * -L2Uf);
+  s = dfadd2_f2_f2_f(s, q * -L2Lf);
+
+  u = 0.00136324646882712841033936f;
+  u = mlaf(u, s.x, 0.00836596917361021041870117f);
+  u = mlaf(u, s.x, 0.0416710823774337768554688f);
+  u = mlaf(u, s.x, 0.166665524244308471679688f);
+  u = mlaf(u, s.x, 0.499999850988388061523438f);
+
+  t = dfadd_f2_f2_f2(s, dfmul_f2_f2_f(dfsqu_f2_f2(s), u));
+
+  t = dfadd_f2_f_f2(1, t);
+  return dfscale_f2_f2_f(t, pow2if(q));
+}
+
+float xpowf(float x, float y) {
+  int yisint = (int)y == y;
+  int yisodd = (1 & (int)y) != 0 && yisint;
+
+  float result = expkf(dfmul_f2_f2_f(logkf(xfabsf(x)), y));
+
+  result = xisnanf(result) ? INFINITYf : result;
+  result *=  (x >= 0 ? 1 : (!yisint ? NANf : (yisodd ? -1 : 1)));
+
+  float efx = mulsignf(xfabsf(x) - 1, y);
+  if (xisinff(y)) result = efx < 0 ? 0.0f : (efx == 0 ? 1.0f : INFINITYf);
+  if (xisinff(x) || x == 0) result = (yisodd ? signf(x) : 1) * ((x == 0 ? -y : y) < 0 ? 0 : INFINITYf);
+  if (xisnanf(x) || xisnanf(y)) result = NANf;
+  if (y == 0 || x == 1) result = 1;
+
+  return result;
+}
+
+float xsinhf(float x) {
+  float y = xfabsf(x);
+  float2 d = expk2f(df(y, 0));
+  d = dfsub_f2_f2_f2(d, dfrec_f2_f2(d));
+  y = (d.x + d.y) * 0.5f;
+
+  y = xfabsf(x) > 89 ? INFINITY : y;
+  y = xisnanf(y) ? INFINITYf : y;
+  y = mulsignf(y, x);
+  y = xisnanf(x) ? NANf : y;
+
+  return y;
+}
+
+float xcoshf(float x) {
+  float y = xfabsf(x);
+  float2 d = expk2f(df(y, 0));
+  d = dfadd_f2_f2_f2(d, dfrec_f2_f2(d));
+  y = (d.x + d.y) * 0.5f;
+
+  y = xfabsf(x) > 89 ? INFINITY : y;
+  y = xisnanf(y) ? INFINITYf : y;
+  y = xisnanf(x) ? NANf : y;
+
+  return y;
+}
+
+float xtanhf(float x) {
+  float y = xfabsf(x);
+  float2 d = expk2f(df(y, 0));
+  float2 e = dfrec_f2_f2(d);
+  d = dfdiv_f2_f2_f2(dfsub_f2_f2_f2(d, e), dfadd_f2_f2_f2(d, e));
+  y = d.x + d.y;
+
+  y = xfabsf(x) > 8.664339742f ? 1.0f : y;
+  y = xisnanf(y) ? 1.0f : y;
+  y = mulsignf(y, x);
+  y = xisnanf(x) ? NANf : y;
+
+  return y;
+}
+
+static inline float2 logk2f(float2 d) {
+  float2 x, x2, m;
+  float t;
+  int e;
+
+  e = ilogbp1f(d.x * 0.7071f);
+  m = dfscale_f2_f2_f(d, pow2if(-e));
+
+  x = dfdiv_f2_f2_f2(dfadd2_f2_f2_f(m, -1), dfadd2_f2_f2_f(m, 1));
+  x2 = dfsqu_f2_f2(x);
+
+  t = 0.2371599674224853515625f;
+  t = mlaf(t, x2.x, 0.285279005765914916992188f);
+  t = mlaf(t, x2.x, 0.400005519390106201171875f);
+  t = mlaf(t, x2.x, 0.666666567325592041015625f);
+
+  return dfadd2_f2_f2_f2(dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), e),
+			 dfadd2_f2_f2_f2(dfscale_f2_f2_f(x, 2), dfmul_f2_f2_f(dfmul_f2_f2_f2(x2, x), t)));
+}
+
+float xasinhf(float x) {
+  float y = xfabsf(x);
+  float2 d = logk2f(dfadd2_f2_f2_f(dfsqrt_f2_f2(dfadd2_f2_f2_f(dfmul_f2_f_f(y, y),  1)), y));
+  y = d.x + d.y;
+
+  y = xisinff(x) || xisnanf(y) ? INFINITYf : y;
+  y = mulsignf(y, x);
+  y = xisnanf(x) ? NANf : y;
+
+  return y;
+}
+
+float xacoshf(float x) {
+  float2 d = logk2f(dfadd2_f2_f2_f(dfsqrt_f2_f2(dfadd2_f2_f2_f(dfmul_f2_f_f(x, x), -1)), x));
+  float y = d.x + d.y;
+
+  y = xisinff(x) || xisnanf(y) ? INFINITYf : y;
+  y = x == 1.0f ? 0.0f : y;
+  y = x < 1.0f ? NANf : y;
+  y = xisnanf(x) ? NANf : y;
+
+  return y;
+}
+
+float xatanhf(float x) {
+  float y = xfabsf(x);
+  float2 d = logk2f(dfdiv_f2_f2_f2(dfadd2_f2_f_f(1, y), dfadd2_f2_f_f(1, -y)));
+  y = y > 1.0 ? NANf : (y == 1.0 ? INFINITYf : (d.x + d.y) * 0.5f);
+
+  y = xisinff(x) || xisnanf(y) ? NANf : y;
+  y = mulsignf(y, x);
+  y = xisnanf(x) ? NANf : y;
+
+  return y;
+}
+
+float xexp2f(float a) {
+  float u = expkf(dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), a));
+  if (xispinff(a)) u = INFINITYf;
+  if (xisminff(a)) u = 0;
+  return u;
+}
+
+float xexp10f(float a) {
+  float u = expkf(dfmul_f2_f2_f(df(2.3025851249694824219f, -3.1975436520781386207e-08f), a));
+  if (xispinff(a)) u = INFINITYf;
+  if (xisminff(a)) u = 0;
+  return u;
+}
+
+float xexpm1f(float a) {
+  float2 d = dfadd2_f2_f2_f(expk2f(df(a, 0)), -1.0f);
+  float x = d.x + d.y;
+  if (a > 88.0f) x = INFINITYf;
+  if (a < -0.15942385152878742116596338793538061065739925620174e+2f) x = -1;
+  return x;
+}
+
+float xlog10f(float a) {
+  float2 d = dfmul_f2_f2_f2(logkf(a), df(0.43429449200630187988f, -1.0103050118726031315e-08f));
+  float x = d.x + d.y;
+
+  if (xisinff(a)) x = INFINITYf;
+  if (a < 0) x = NANf;
+  if (a == 0) x = -INFINITYf;
+
+  return x;
+}
+
+float xlog1pf(float a) {
+  float2 d = logk2f(dfadd2_f2_f_f(a, 1));
+  float x = d.x + d.y;
+
+  if (xisinff(a)) x = INFINITYf;
+  if (a < -1) x = NANf;
+  if (a == -1) x = -INFINITYf;
+
+  return x;
+}
+
+float xsqrtf(float f) { return sqrtf(f); }
+
+float xcbrtf(float d) {
+  float x, y, q = 1.0f;
+  int e, r;
+
+  e = ilogbp1f(d);
+  d = ldexpkf(d, -e);
+  r = (e + 6144) % 3;
+  q = (r == 1) ? 1.2599210498948731647672106f : q;
+  q = (r == 2) ? 1.5874010519681994747517056f : q;
+  q = ldexpkf(q, (e + 6144) / 3 - 2048);
+
+  q = mulsignf(q, d);
+  d = xfabsf(d);
+
+  x = -0.601564466953277587890625f;
+  x = mlaf(x, d, 2.8208892345428466796875f);
+  x = mlaf(x, d, -5.532182216644287109375f);
+  x = mlaf(x, d, 5.898262500762939453125f);
+  x = mlaf(x, d, -3.8095417022705078125f);
+  x = mlaf(x, d, 2.2241256237030029296875f);
+
+  y = d * x * x;
+  y = (y - (2.0f / 3.0f) * y * (y * x - 1.0f)) * q;
+
+  return y;
+}
+
+float xcbrtf_u1(float d) {
+  float x, y, z;
+  float2 q2 = df(1, 0), u, v;
+  int e, r;
+
+  e = ilogbp1f(d);
+  d = ldexpkf(d, -e);
+  r = (e + 6144) % 3;
+  q2 = (r == 1) ? df(1.2599210739135742188, -2.4018701694217270415e-08) : q2;
+  q2 = (r == 2) ? df(1.5874010324478149414,  1.9520385308169352356e-08) : q2;
+
+  q2.x = mulsignf(q2.x, d); q2.y = mulsignf(q2.y, d);
+  d = xfabsf(d);
+
+  x = -0.601564466953277587890625f;
+  x = mlaf(x, d, 2.8208892345428466796875f);
+  x = mlaf(x, d, -5.532182216644287109375f);
+  x = mlaf(x, d, 5.898262500762939453125f);
+  x = mlaf(x, d, -3.8095417022705078125f);
+  x = mlaf(x, d, 2.2241256237030029296875f);
+
+  y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0f);
+
+  z = x;
+
+  u = dfmul_f2_f_f(x, x);
+  u = dfmul_f2_f2_f2(u, u);
+  u = dfmul_f2_f2_f(u, d);
+  u = dfadd2_f2_f2_f(u, -x);
+  y = u.x + u.y;
+
+  y = -2.0 / 3.0 * y * z;
+  v = dfadd2_f2_f2_f(dfmul_f2_f_f(z, z), y);
+  v = dfmul_f2_f2_f(v, d);
+  v = dfmul_f2_f2_f2(v, q2);
+  z = ldexpf(v.x + v.y, (e + 6144) / 3 - 2048);
+
+  if (xisinff(d)) { z = mulsignf(INFINITYf, q2.x); }
+  if (d == 0) { z = mulsignf(0, q2.x); }
+
+  return z;
+}
diff --git a/simd/Makefile b/simd/Makefile
new file mode 100644
index 00000000..d6bf9934
--- /dev/null
+++ b/simd/Makefile
@@ -0,0 +1,56 @@
+CC=gcc
+OPT=-O -Wall -Wno-unused -Wno-attributes
+SDE=/opt/sde-bdw-external-5.38.0-2013-01-03-lin/sde
+
+all : testsse2 testavx
+
+iutsse2 : sleefsimddp.c sleefsimdsp.c helpersse2.h iut.c
+	$(CC) $(OPT) -DENABLE_SSE2 -msse2 iut.c sleefsimddp.c sleefsimdsp.c -o iutsse2 -lm
+
+iutavx : sleefsimddp.c sleefsimdsp.c helperavx.h iut.c
+	$(CC) $(OPT) -DENABLE_AVX -mavx iut.c sleefsimddp.c sleefsimdsp.c -o iutavx -lm
+
+iutavx2 : sleefsimddp.c sleefsimdsp.c helperavx2.h iut.c
+	$(CC) $(OPT) -DENABLE_AVX2 -mavx2 -mfma iut.c sleefsimddp.c sleefsimdsp.c -o iutavx2 -lm
+
+iutfma4 : sleefsimddp.c sleefsimdsp.c helperfma4.h iut.c
+	$(CC) $(OPT) -DENABLE_FMA4 -mavx -mfma4 iut.c sleefsimddp.c sleefsimdsp.c -o iutfma4 -lm
+
+../tester/tester :
+	cd ../tester; make tester
+
+../tester/testeru1 :
+	cd ../tester; make testeru1
+
+../tester/testersp :
+	cd ../tester; make testersp
+
+../tester/testerspu1 :
+	cd ../tester; make testerspu1
+
+testsse2 : iutsse2 ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
+	../tester/tester ./iutsse2
+	../tester/testeru1 ./iutsse2
+	../tester/testersp ./iutsse2
+	../tester/testerspu1 ./iutsse2
+
+testavx : iutavx ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
+	../tester/tester ./iutavx
+	../tester/testeru1 ./iutavx
+	../tester/testersp ./iutavx
+	../tester/testerspu1 ./iutavx
+
+testavx2 : iutavx2 ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
+	../tester/tester $(SDE) -- ./iutavx2
+	../tester/testeru1 $(SDE) -- ./iutavx2
+	../tester/testersp $(SDE) -- ./iutavx2
+	../tester/testerspu1 $(SDE) -- ./iutavx2
+
+testfma4 : iutfma4 ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
+	../tester/tester ./iutfma4
+	../tester/testeru1 ./iutfma4
+	../tester/testersp ./iutfma4
+	../tester/testerspu1 ./iutfma4
+
+clean :
+	rm -f *~ *.o *.s iutsse2 iutavx iutavx2 iutfma4 iutneon
diff --git a/simd/Makefile.arm b/simd/Makefile.arm
new file mode 100644
index 00000000..e8955822
--- /dev/null
+++ b/simd/Makefile.arm
@@ -0,0 +1,26 @@
+CC=arm-linux-gnueabi-gcc 
+OPT=-O -Wall -Wno-unused -Wno-attributes -mfloat-abi=softfp -mfpu=neon -static
+
+all : testneon
+
+iutneon : sleefsimdsp.c helperneon.h iut.c
+	$(CC) $(OPT) -DENABLE_NEON iut.c sleefsimdsp.c -o iutneon -lm
+
+../tester/tester :
+	cd ../tester; make tester
+
+../tester/testeru1 :
+	cd ../tester; make testeru1
+
+../tester/testersp :
+	cd ../tester; make testersp
+
+../tester/testerspu1 :
+	cd ../tester; make testerspu1
+
+testneon : iutneon ../tester/testersp ../tester/testerspu1
+	../tester/testersp --flushtozero ./iutneon
+	../tester/testerspu1 --flushtozero ./iutneon
+
+clean :
+	rm -f *~ *.o *.s iutsse2 iutavx iutavx2 iutfma4 iutneon
diff --git a/simd/Makefile.icc b/simd/Makefile.icc
new file mode 100644
index 00000000..7a73238d
--- /dev/null
+++ b/simd/Makefile.icc
@@ -0,0 +1,44 @@
+CC=/opt/intel/bin/icc
+SDE=/opt/sde-bdw-external-5.38.0-2013-01-03-lin/sde
+
+iutsse2 : sleefsimdsp.c sleefsimddp.c helpersse2.h iut.c
+	$(CC) -DENABLE_SSE2 -fp-model precise -Wall -Wno-unused -O -msse2 iut.c sleefsimdsp.c sleefsimddp.c -o iutsse2 -lm
+
+iutavx : sleefsimdsp.c sleefsimddp.c helperavx.h iut.c
+	$(CC) -DENABLE_AVX -fp-model precise -Wall -Wno-unused -O -mavx iut.c sleefsimdsp.c sleefsimddp.c -o iutavx -lm
+
+iutavx2 : sleefsimdsp.c sleefsimddp.c helperavx2.h iut.c
+	$(CC) -DENABLE_AVX2 -fp-model precise -Wall -Wno-unused -O -march=core-avx2 iut.c sleefsimdsp.c sleefsimddp.c -o iutavx2 -lm
+
+../tester/tester :
+	cd ../tester; make tester
+
+../tester/testeru1 :
+	cd ../tester; make testeru1
+
+../tester/testersp :
+	cd ../tester; make testersp
+
+../tester/testerspu1 :
+	cd ../tester; make testerspu1
+
+testsse2 : iutsse2 ../tester/tester ../tester/testersp ../tester/testeru1 ../tester/testerspu1
+	../tester/tester ./iutsse2
+	../tester/testeru1 ./iutsse2
+	../tester/testersp ./iutsse2
+	../tester/testerspu1 ./iutsse2
+
+testavx : iutavx ../tester/tester ../tester/testersp ../tester/testeru1 ../tester/testerspu1
+	../tester/tester ./iutavx
+	../tester/testeru1 ./iutavx
+	../tester/testersp ./iutavx
+	../tester/testerspu1 ./iutavx
+
+testavx2 : iutavx2 ../tester/tester ../tester/testersp ../tester/testeru1 ../tester/testerspu1
+	../tester/tester $(SDE) -- ./iutavx2
+	../tester/testeru1 $(SDE) -- ./iutavx2
+	../tester/testersp $(SDE) -- ./iutavx2
+	../tester/testerspu1 $(SDE) -- ./iutavx2
+
+clean :
+	rm -f *~ *.o *.s iutsse2 iutavx iutavx2 iutneon
diff --git a/simd/dd.h b/simd/dd.h
new file mode 100644
index 00000000..696c49cb
--- /dev/null
+++ b/simd/dd.h
@@ -0,0 +1,342 @@
+typedef struct {
+  vdouble x, y;
+} vdouble2;
+
+static INLINE vdouble2 vcast_vd2_vd_vd(vdouble h, vdouble l) {
+  vdouble2 ret = {h, l};
+  return ret;
+}
+
+static INLINE vdouble2 vcast_vd2_d_d(double h, double l) {
+  vdouble2 ret = {vcast_vd_d(h), vcast_vd_d(l)};
+  return ret;
+}
+
+static INLINE vdouble2 vsel_vd2_vm_vd2_vd2(vmask m, vdouble2 x, vdouble2 y) {
+  vdouble2 r;
+  r.x = vsel_vd_vm_vd_vd(m, x.x, y.x);
+  r.y = vsel_vd_vm_vd_vd(m, x.y, y.y);
+  return r;
+}
+
+static INLINE vdouble vadd_vd_3vd(vdouble v0, vdouble v1, vdouble v2) {
+  return vadd_vd_vd_vd(vadd_vd_vd_vd(v0, v1), v2);
+}
+
+static INLINE vdouble vadd_vd_4vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3) {
+  return vadd_vd_3vd(vadd_vd_vd_vd(v0, v1), v2, v3);
+}
+
+static INLINE vdouble vadd_vd_5vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4) {
+  return vadd_vd_4vd(vadd_vd_vd_vd(v0, v1), v2, v3, v4);
+}
+
+static INLINE vdouble vadd_vd_6vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4, vdouble v5) {
+  return vadd_vd_5vd(vadd_vd_vd_vd(v0, v1), v2, v3, v4, v5);
+}
+
+static INLINE vdouble vadd_vd_7vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4, vdouble v5, vdouble v6) {
+  return vadd_vd_6vd(vadd_vd_vd_vd(v0, v1), v2, v3, v4, v5, v6);
+}
+
+static INLINE vdouble vsub_vd_3vd(vdouble v0, vdouble v1, vdouble v2) {
+  return vsub_vd_vd_vd(vsub_vd_vd_vd(v0, v1), v2);
+}
+
+static INLINE vdouble vsub_vd_4vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3) {
+  return vsub_vd_3vd(vsub_vd_vd_vd(v0, v1), v2, v3);
+}
+
+static INLINE vdouble vsub_vd_5vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4) {
+  return vsub_vd_4vd(vsub_vd_vd_vd(v0, v1), v2, v3, v4);
+}
+
+static INLINE vdouble vsub_vd_6vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4, vdouble v5) {
+  return vsub_vd_5vd(vsub_vd_vd_vd(v0, v1), v2, v3, v4, v5);
+}
+
+//
+
+static INLINE vdouble2 ddneg_vd2_vd2(vdouble2 x) {
+  return vcast_vd2_vd_vd(vneg_vd_vd(x.x), vneg_vd_vd(x.y));
+}
+
+static INLINE vdouble2 ddnormalize_vd2_vd2(vdouble2 t) {
+  vdouble2 s;
+
+  s.x = vadd_vd_vd_vd(t.x, t.y);
+  s.y = vadd_vd_vd_vd(vsub_vd_vd_vd(t.x, s.x), t.y);
+
+  return s;
+}
+
+static INLINE vdouble2 ddscale_vd2_vd2_vd(vdouble2 d, vdouble s) {
+  vdouble2 r = {vmul_vd_vd_vd(d.x, s), vmul_vd_vd_vd(d.y, s)};
+  return r;
+}
+
+static INLINE vdouble2 ddadd_vd2_vd_vd(vdouble x, vdouble y) {
+  vdouble2 r;
+
+  r.x = vadd_vd_vd_vd(x, y);
+  r.y = vadd_vd_vd_vd(vsub_vd_vd_vd(x, r.x), y);
+
+  return r;
+}
+
+static INLINE vdouble2 ddadd2_vd2_vd_vd(vdouble x, vdouble y) {
+  vdouble2 r;
+
+  r.x = vadd_vd_vd_vd(x, y);
+  vdouble v = vsub_vd_vd_vd(r.x, x);
+  r.y = vadd_vd_vd_vd(vsub_vd_vd_vd(x, vsub_vd_vd_vd(r.x, v)), vsub_vd_vd_vd(y, v));
+
+  return r;
+}
+
+static INLINE vdouble2 ddadd_vd2_vd2_vd(vdouble2 x, vdouble y) {
+  vdouble2 r;
+
+  r.x = vadd_vd_vd_vd(x.x, y);
+  r.y = vadd_vd_3vd(vsub_vd_vd_vd(x.x, r.x), y, x.y);
+
+  return r;
+}
+
+static INLINE vdouble2 ddadd2_vd2_vd2_vd(vdouble2 x, vdouble y) {
+  vdouble2 r;
+
+  r.x = vadd_vd_vd_vd(x.x, y);
+  vdouble v = vsub_vd_vd_vd(r.x, x.x);
+  r.y = vadd_vd_vd_vd(vsub_vd_vd_vd(x.x, vsub_vd_vd_vd(r.x, v)), vsub_vd_vd_vd(y, v));
+  r.y = vadd_vd_vd_vd(r.y, x.y);
+
+  return r;
+}
+
+static INLINE vdouble2 ddadd_vd2_vd_vd2(vdouble x, vdouble2 y) {
+  vdouble2 r;
+
+  r.x = vadd_vd_vd_vd(x, y.x);
+  r.y = vadd_vd_3vd(vsub_vd_vd_vd(x, r.x), y.x, y.y);
+
+  return r;
+}
+
+static INLINE vdouble2 ddadd_vd2_vd2_vd2(vdouble2 x, vdouble2 y) {
+  // |x| >= |y|
+
+  vdouble2 r;
+
+  r.x = vadd_vd_vd_vd(x.x, y.x);
+  r.y = vadd_vd_4vd(vsub_vd_vd_vd(x.x, r.x), y.x, x.y, y.y);
+
+  return r;
+}
+
+static INLINE vdouble2 ddadd2_vd2_vd2_vd2(vdouble2 x, vdouble2 y) {
+  vdouble2 r;
+
+  r.x  = vadd_vd_vd_vd(x.x, y.x);
+  vdouble v = vsub_vd_vd_vd(r.x, x.x);
+  r.y = vadd_vd_vd_vd(vsub_vd_vd_vd(x.x, vsub_vd_vd_vd(r.x, v)), vsub_vd_vd_vd(y.x, v));
+  r.y = vadd_vd_vd_vd(r.y, vadd_vd_vd_vd(x.y, y.y));
+
+  return r;
+}
+
+static inline vdouble2 ddsub_vd2_vd_vd(vdouble x, vdouble y) {
+  // |x| >= |y|
+
+  vdouble2 r;
+
+  r.x = vsub_vd_vd_vd(x, y);
+  r.y = vsub_vd_vd_vd(vsub_vd_vd_vd(x, r.x), y);
+
+  return r;
+}
+
+static INLINE vdouble2 ddsub_vd2_vd2_vd2(vdouble2 x, vdouble2 y) {
+  // |x| >= |y|
+
+  vdouble2 r;
+
+  r.x = vsub_vd_vd_vd(x.x, y.x);
+  r.y = vsub_vd_vd_vd(x.x, r.x);
+  r.y = vsub_vd_vd_vd(r.y, y.x);
+  r.y = vadd_vd_vd_vd(r.y, x.y);
+  r.y = vsub_vd_vd_vd(r.y, y.y);
+
+  return r;
+}
+
+#if 0
+static inline vdouble2 ddsub_vd2_vd2_vd2(vdouble2 x, vdouble2 y) {
+  // |x| >= |y|
+
+  vdouble2 r;
+
+  r.x = vsub_vd_vd_vd(x.x, y.x);
+  r.y = vsub_vd_vd_vd(vadd_vd_vd_vd(vsub_vd_vd_vd(vsub_vd_vd_vd(x.x, r.x), y.x), x.y), y.y);
+
+  return r;
+}
+#endif
+
+#ifdef ENABLE_FMA_DP
+static INLINE vdouble2 dddiv_vd2_vd2_vd2(vdouble2 n, vdouble2 d) {
+  vdouble2 q;
+  vdouble t = vrec_vd_vd(d.x), u;
+
+  q.x = vmul_vd_vd_vd(n.x, t);
+  u = vfmapn_vd_vd_vd_vd(t, n.x, q.x);
+  q.y = vfmanp_vd_vd_vd_vd(d.y, t, vfmanp_vd_vd_vd_vd(d.x, t, vcast_vd_d(1)));
+  q.y = vfma_vd_vd_vd_vd(q.x, q.y, vfma_vd_vd_vd_vd(n.y, t, u));
+
+  return q;
+}
+
+static INLINE vdouble2 ddmul_vd2_vd_vd(vdouble x, vdouble y) {
+  vdouble2 r;
+
+  r.x = vmul_vd_vd_vd(x, y);
+  r.y = vfmapn_vd_vd_vd_vd(x, y, r.x);
+
+  return r;
+}
+
+static INLINE vdouble2 ddsqu_vd2_vd2(vdouble2 x) {
+  vdouble2 r;
+
+  r.x = vmul_vd_vd_vd(x.x, x.x);
+  r.y = vfma_vd_vd_vd_vd(vadd_vd_vd_vd(x.x, x.x), x.y, vfmapn_vd_vd_vd_vd(x.x, x.x, r.x));
+
+  return r;
+}
+
+static INLINE vdouble2 ddmul_vd2_vd2_vd2(vdouble2 x, vdouble2 y) {
+  vdouble2 r;
+
+  r.x = vmul_vd_vd_vd(x.x, y.x);
+  r.y = vfma_vd_vd_vd_vd(x.x, y.y, vfma_vd_vd_vd_vd(x.y, y.x, vfmapn_vd_vd_vd_vd(x.x, y.x, r.x)));
+
+  return r;
+}
+
+static INLINE vdouble2 ddmul_vd2_vd2_vd(vdouble2 x, vdouble y) {
+  vdouble2 r;
+
+  r.x = vmul_vd_vd_vd(x.x, y);
+  r.y = vfma_vd_vd_vd_vd(x.y, y, vfmapn_vd_vd_vd_vd(x.x, y, r.x));
+
+  return r;
+}
+
+static inline vdouble2 ddrec_vd2_vd(vdouble d) {
+  vdouble2 q;
+
+  q.x = vrec_vd_vd(d);
+  q.y = vmul_vd_vd_vd(q.x, vfmanp_vd_vd_vd_vd(d, q.x, vcast_vd_d(1)));
+
+  return q;
+}
+
+static INLINE vdouble2 ddrec_vd2_vd2(vdouble2 d) {
+  vdouble2 q;
+
+  q.x = vrec_vd_vd(d.x);
+  q.y = vmul_vd_vd_vd(q.x, vfmanp_vd_vd_vd_vd(d.y, q.x, vfmanp_vd_vd_vd_vd(d.x, q.x, vcast_vd_d(1))));
+
+  return q;
+}
+#else
+static INLINE vdouble2 dddiv_vd2_vd2_vd2(vdouble2 n, vdouble2 d) {
+  vdouble t = vrec_vd_vd(d.x);
+  vdouble dh  = vupper_vd_vd(d.x), dl  = vsub_vd_vd_vd(d.x,  dh);
+  vdouble th  = vupper_vd_vd(t  ), tl  = vsub_vd_vd_vd(t  ,  th);
+  vdouble nhh = vupper_vd_vd(n.x), nhl = vsub_vd_vd_vd(n.x, nhh);
+
+  vdouble2 q;
+
+  q.x = vmul_vd_vd_vd(n.x, t);
+
+  vdouble u = vadd_vd_5vd(vsub_vd_vd_vd(vmul_vd_vd_vd(nhh, th), q.x), vmul_vd_vd_vd(nhh, tl), vmul_vd_vd_vd(nhl, th), vmul_vd_vd_vd(nhl, tl),
+		    vmul_vd_vd_vd(q.x, vsub_vd_5vd(vcast_vd_d(1), vmul_vd_vd_vd(dh, th), vmul_vd_vd_vd(dh, tl), vmul_vd_vd_vd(dl, th), vmul_vd_vd_vd(dl, tl))));
+
+  q.y = vmla_vd_vd_vd_vd(t, vsub_vd_vd_vd(n.y, vmul_vd_vd_vd(q.x, d.y)), u);
+
+  return q;
+}
+
+static INLINE vdouble2 ddmul_vd2_vd_vd(vdouble x, vdouble y) {
+  vdouble xh = vupper_vd_vd(x), xl = vsub_vd_vd_vd(x, xh);
+  vdouble yh = vupper_vd_vd(y), yl = vsub_vd_vd_vd(y, yh);
+  vdouble2 r;
+
+  r.x = vmul_vd_vd_vd(x, y);
+  r.y = vadd_vd_5vd(vmul_vd_vd_vd(xh, yh), vneg_vd_vd(r.x), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yl));
+
+  return r;
+}
+
+static INLINE vdouble2 ddmul_vd2_vd2_vd(vdouble2 x, vdouble y) {
+  vdouble xh = vupper_vd_vd(x.x), xl = vsub_vd_vd_vd(x.x, xh);
+  vdouble yh = vupper_vd_vd(y  ), yl = vsub_vd_vd_vd(y  , yh);
+  vdouble2 r;
+
+  r.x = vmul_vd_vd_vd(x.x, y);
+  r.y = vadd_vd_6vd(vmul_vd_vd_vd(xh, yh), vneg_vd_vd(r.x), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yl), vmul_vd_vd_vd(x.y, y));
+
+  return r;
+}
+
+static INLINE vdouble2 ddmul_vd2_vd2_vd2(vdouble2 x, vdouble2 y) {
+  vdouble xh = vupper_vd_vd(x.x), xl = vsub_vd_vd_vd(x.x, xh);
+  vdouble yh = vupper_vd_vd(y.x), yl = vsub_vd_vd_vd(y.x, yh);
+  vdouble2 r;
+
+  r.x = vmul_vd_vd_vd(x.x, y.x);
+  r.y = vadd_vd_7vd(vmul_vd_vd_vd(xh, yh), vneg_vd_vd(r.x), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yl), vmul_vd_vd_vd(x.x, y.y), vmul_vd_vd_vd(x.y, y.x));
+
+  return r;
+}
+
+static INLINE vdouble2 ddsqu_vd2_vd2(vdouble2 x) {
+  vdouble xh = vupper_vd_vd(x.x), xl = vsub_vd_vd_vd(x.x, xh);
+  vdouble2 r;
+
+  r.x = vmul_vd_vd_vd(x.x, x.x);
+  r.y = vadd_vd_5vd(vmul_vd_vd_vd(xh, xh), vneg_vd_vd(r.x), vmul_vd_vd_vd(vadd_vd_vd_vd(xh, xh), xl), vmul_vd_vd_vd(xl, xl), vmul_vd_vd_vd(x.x, vadd_vd_vd_vd(x.y, x.y)));
+
+  return r;
+}
+
+static INLINE vdouble2 ddrec_vd2_vd(vdouble d) {
+  vdouble t = vrec_vd_vd(d);
+  vdouble dh = vupper_vd_vd(d), dl = vsub_vd_vd_vd(d, dh);
+  vdouble th = vupper_vd_vd(t), tl = vsub_vd_vd_vd(t, th);
+  vdouble2 q;
+
+  q.x = t;
+  q.y = vmul_vd_vd_vd(t, vsub_vd_5vd(vcast_vd_d(1), vmul_vd_vd_vd(dh, th), vmul_vd_vd_vd(dh, tl), vmul_vd_vd_vd(dl, th), vmul_vd_vd_vd(dl, tl)));
+
+  return q;
+}
+
+static INLINE vdouble2 ddrec_vd2_vd2(vdouble2 d) {
+  vdouble t = vrec_vd_vd(d.x);
+  vdouble dh = vupper_vd_vd(d.x), dl = vsub_vd_vd_vd(d.x, dh);
+  vdouble th = vupper_vd_vd(t  ), tl = vsub_vd_vd_vd(t  , th);
+  vdouble2 q;
+
+  q.x = t;
+  q.y = vmul_vd_vd_vd(t, vsub_vd_6vd(vcast_vd_d(1), vmul_vd_vd_vd(dh, th), vmul_vd_vd_vd(dh, tl), vmul_vd_vd_vd(dl, th), vmul_vd_vd_vd(dl, tl), vmul_vd_vd_vd(d.y, t)));
+
+  return q;
+}
+#endif
+
+static INLINE vdouble2 ddsqrt_vd2_vd2(vdouble2 d) {
+  vdouble t = vsqrt_vd_vd(vadd_vd_vd_vd(d.x, d.y));
+  return ddscale_vd2_vd2_vd(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd2(d, ddmul_vd2_vd_vd(t, t)), ddrec_vd2_vd(t)), vcast_vd_d(0.5));
+}
diff --git a/simd/df.h b/simd/df.h
new file mode 100644
index 00000000..19779f11
--- /dev/null
+++ b/simd/df.h
@@ -0,0 +1,396 @@
+typedef struct {
+  vfloat x, y;
+} vfloat2;
+
+static INLINE vfloat2 vcast_vf2_vf_vf(vfloat h, vfloat l) {
+  vfloat2 ret = {h, l};
+  return ret;
+}
+
+static INLINE vfloat2 vcast_vf2_f_f(float h, float l) {
+  vfloat2 ret = {vcast_vf_f(h), vcast_vf_f(l)};
+  return ret;
+}
+
+static INLINE vfloat2 vsel_vf2_vm_vf2_vf2(vmask m, vfloat2 x, vfloat2 y) {
+  vfloat2 r;
+  r.x = vsel_vf_vm_vf_vf(m, x.x, y.x);
+  r.y = vsel_vf_vm_vf_vf(m, x.y, y.y);
+  return r;
+}
+
+static INLINE vfloat2 vabs_vf2_vf2(vfloat2 x) {
+  return vcast_vf2_vf_vf((vfloat)vxor_vm_vm_vm(vand_vm_vm_vm((vmask)vcast_vf_f(-0.0), (vmask)x.x), (vmask)x.x),
+			 (vfloat)vxor_vm_vm_vm(vand_vm_vm_vm((vmask)vcast_vf_f(-0.0), (vmask)x.x), (vmask)x.y));
+}
+
+static INLINE vfloat vadd_vf_3vf(vfloat v0, vfloat v1, vfloat v2) {
+  return vadd_vf_vf_vf(vadd_vf_vf_vf(v0, v1), v2);
+}
+
+static INLINE vfloat vadd_vf_4vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3) {
+  return vadd_vf_3vf(vadd_vf_vf_vf(v0, v1), v2, v3);
+}
+
+static INLINE vfloat vadd_vf_5vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4) {
+  return vadd_vf_4vf(vadd_vf_vf_vf(v0, v1), v2, v3, v4);
+}
+
+static INLINE vfloat vadd_vf_6vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4, vfloat v5) {
+  return vadd_vf_5vf(vadd_vf_vf_vf(v0, v1), v2, v3, v4, v5);
+}
+
+static INLINE vfloat vadd_vf_7vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4, vfloat v5, vfloat v6) {
+  return vadd_vf_6vf(vadd_vf_vf_vf(v0, v1), v2, v3, v4, v5, v6);
+}
+
+static INLINE vfloat vsub_vf_3vf(vfloat v0, vfloat v1, vfloat v2) {
+  return vsub_vf_vf_vf(vsub_vf_vf_vf(v0, v1), v2);
+}
+
+static INLINE vfloat vsub_vf_4vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3) {
+  return vsub_vf_3vf(vsub_vf_vf_vf(v0, v1), v2, v3);
+}
+
+static INLINE vfloat vsub_vf_5vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4) {
+  return vsub_vf_4vf(vsub_vf_vf_vf(v0, v1), v2, v3, v4);
+}
+
+//
+
+static INLINE vfloat2 dfneg_vf2_vf2(vfloat2 x) {
+  return vcast_vf2_vf_vf(vneg_vf_vf(x.x), vneg_vf_vf(x.y));
+}
+
+static INLINE vfloat2 dfnormalize_vf2_vf2(vfloat2 t) {
+  vfloat2 s;
+
+  s.x = vadd_vf_vf_vf(t.x, t.y);
+  s.y = vadd_vf_vf_vf(vsub_vf_vf_vf(t.x, s.x), t.y);
+
+  return s;
+}
+
+static INLINE vfloat2 dfscale_vf2_vf2_vf(vfloat2 d, vfloat s) {
+  vfloat2 r = {vmul_vf_vf_vf(d.x, s), vmul_vf_vf_vf(d.y, s)};
+  return r;
+}
+
+static INLINE vfloat2 dfadd_vf2_vf_vf(vfloat x, vfloat y) {
+  vfloat2 r;
+
+  r.x = vadd_vf_vf_vf(x, y);
+  r.y = vadd_vf_vf_vf(vsub_vf_vf_vf(x, r.x), y);
+
+  return r;
+}
+
+static INLINE vfloat2 dfadd2_vf2_vf_vf(vfloat x, vfloat y) {
+  vfloat2 r;
+
+  r.x = vadd_vf_vf_vf(x, y);
+  vfloat v = vsub_vf_vf_vf(r.x, x);
+  r.y = vadd_vf_vf_vf(vsub_vf_vf_vf(x, vsub_vf_vf_vf(r.x, v)), vsub_vf_vf_vf(y, v));
+
+  return r;
+}
+
+static INLINE vfloat2 dfadd_vf2_vf2_vf(vfloat2 x, vfloat y) {
+  vfloat2 r;
+
+  r.x = vadd_vf_vf_vf(x.x, y);
+  r.y = vadd_vf_3vf(vsub_vf_vf_vf(x.x, r.x), y, x.y);
+
+  return r;
+}
+
+static INLINE vfloat2 dfadd2_vf2_vf2_vf(vfloat2 x, vfloat y) {
+  vfloat2 r;
+
+  r.x = vadd_vf_vf_vf(x.x, y);
+  vfloat v = vsub_vf_vf_vf(r.x, x.x);
+  r.y = vadd_vf_vf_vf(vsub_vf_vf_vf(x.x, vsub_vf_vf_vf(r.x, v)), vsub_vf_vf_vf(y, v));
+  r.y = vadd_vf_vf_vf(r.y, x.y);
+
+  return r;
+}
+
+static INLINE vfloat2 dfadd_vf2_vf_vf2(vfloat x, vfloat2 y) {
+  vfloat2 r;
+
+  r.x = vadd_vf_vf_vf(x, y.x);
+  r.y = vadd_vf_3vf(vsub_vf_vf_vf(x, r.x), y.x, y.y);
+
+  return r;
+}
+
+static INLINE vfloat2 dfadd_vf2_vf2_vf2(vfloat2 x, vfloat2 y) {
+  // |x| >= |y|
+
+  vfloat2 r;
+
+  r.x = vadd_vf_vf_vf(x.x, y.x);
+  r.y = vadd_vf_4vf(vsub_vf_vf_vf(x.x, r.x), y.x, x.y, y.y);
+
+  return r;
+}
+
+static INLINE vfloat2 dfadd2_vf2_vf2_vf2(vfloat2 x, vfloat2 y) {
+  vfloat2 r;
+
+  r.x  = vadd_vf_vf_vf(x.x, y.x);
+  vfloat v = vsub_vf_vf_vf(r.x, x.x);
+  r.y = vadd_vf_vf_vf(vsub_vf_vf_vf(x.x, vsub_vf_vf_vf(r.x, v)), vsub_vf_vf_vf(y.x, v));
+  r.y = vadd_vf_vf_vf(r.y, vadd_vf_vf_vf(x.y, y.y));
+
+  return r;
+}
+
+static inline vfloat2 dfsub_vf2_vf_vf(vfloat x, vfloat y) {
+  // |x| >= |y|
+
+  vfloat2 r;
+
+  r.x = vsub_vf_vf_vf(x, y);
+  r.y = vsub_vf_vf_vf(vsub_vf_vf_vf(x, r.x), y);
+
+  return r;
+}
+
+static INLINE vfloat2 dfsub_vf2_vf2_vf2(vfloat2 x, vfloat2 y) {
+  // |x| >= |y|
+
+  vfloat2 r;
+
+  r.x = vsub_vf_vf_vf(x.x, y.x);
+  r.y = vsub_vf_vf_vf(x.x, r.x);
+  r.y = vsub_vf_vf_vf(r.y, y.x);
+  r.y = vadd_vf_vf_vf(r.y, x.y);
+  r.y = vsub_vf_vf_vf(r.y, y.y);
+
+  return r;
+}
+
+#ifdef ENABLE_FMA_SP
+static INLINE vfloat2 dfdiv_vf2_vf2_vf2(vfloat2 n, vfloat2 d) {
+  vfloat2 q;
+  vfloat t = vrec_vf_vf(d.x), u;
+
+  q.x = vmul_vf_vf_vf(n.x, t);
+  u = vfmapn_vf_vf_vf_vf(t, n.x, q.x);
+  q.y = vfmanp_vf_vf_vf_vf(d.y, t, vfmanp_vf_vf_vf_vf(d.x, t, vcast_vf_f(1)));
+  q.y = vfma_vf_vf_vf_vf(q.x, q.y, vfma_vf_vf_vf_vf(n.y, t, u));
+
+  return q;
+}
+
+static INLINE vfloat2 dfmul_vf2_vf_vf(vfloat x, vfloat y) {
+  vfloat2 r;
+
+  r.x = vmul_vf_vf_vf(x, y);
+  r.y = vfmapn_vf_vf_vf_vf(x, y, r.x);
+
+  return r;
+}
+
+static INLINE vfloat2 dfsqu_vf2_vf2(vfloat2 x) {
+  vfloat2 r;
+
+  r.x = vmul_vf_vf_vf(x.x, x.x);
+  r.y = vfma_vf_vf_vf_vf(vadd_vf_vf_vf(x.x, x.x), x.y, vfmapn_vf_vf_vf_vf(x.x, x.x, r.x));
+
+  return r;
+}
+
+static INLINE vfloat2 dfmul_vf2_vf2_vf2(vfloat2 x, vfloat2 y) {
+  vfloat2 r;
+
+  r.x = vmul_vf_vf_vf(x.x, y.x);
+  r.y = vfma_vf_vf_vf_vf(x.x, y.y, vfma_vf_vf_vf_vf(x.y, y.x, vfmapn_vf_vf_vf_vf(x.x, y.x, r.x)));
+
+  return r;
+}
+
+static INLINE vfloat2 dfmul_vf2_vf2_vf(vfloat2 x, vfloat y) {
+  vfloat2 r;
+
+  r.x = vmul_vf_vf_vf(x.x, y);
+  r.y = vfma_vf_vf_vf_vf(x.y, y, vfmapn_vf_vf_vf_vf(x.x, y, r.x));
+
+  return r;
+}
+
+static inline vfloat2 dfrec_vf2_vf(vfloat d) {
+  vfloat2 q;
+
+  q.x = vrec_vf_vf(d);
+  q.y = vmul_vf_vf_vf(q.x, vfmanp_vf_vf_vf_vf(d, q.x, vcast_vf_f(1)));
+
+  return q;
+}
+
+static INLINE vfloat2 dfrec_vf2_vf2(vfloat2 d) {
+  vfloat2 q;
+
+  q.x = vrec_vf_vf(d.x);
+  q.y = vmul_vf_vf_vf(q.x, vfmanp_vf_vf_vf_vf(d.y, q.x, vfmanp_vf_vf_vf_vf(d.x, q.x, vcast_vf_f(1))));
+
+  return q;
+}
+#else
+static INLINE vfloat2 dfdiv_vf2_vf2_vf2(vfloat2 n, vfloat2 d) {
+  vfloat t = vrec_vf_vf(d.x);
+  vfloat dh  = vupper_vf_vf(d.x), dl  = vsub_vf_vf_vf(d.x,  dh);
+  vfloat th  = vupper_vf_vf(t  ), tl  = vsub_vf_vf_vf(t  ,  th);
+  vfloat nhh = vupper_vf_vf(n.x), nhl = vsub_vf_vf_vf(n.x, nhh);
+
+  vfloat2 q;
+
+  q.x = vmul_vf_vf_vf(n.x, t);
+
+  //vfloat u = vadd_vf_5vf(vsub_vf_vf_vf(vmul_vf_vf_vf(nhh, th), q.x), vmul_vf_vf_vf(nhh, tl), vmul_vf_vf_vf(nhl, th), vmul_vf_vf_vf(nhl, tl),
+  //vmul_vf_vf_vf(q.x, vsub_vf_5vf(vcast_vf_f(1), vmul_vf_vf_vf(dh, th), vmul_vf_vf_vf(dh, tl), vmul_vf_vf_vf(dl, th), vmul_vf_vf_vf(dl, tl))));
+
+  vfloat u, w;
+  w = vcast_vf_f(-1);
+  w = vmla_vf_vf_vf_vf(dh, th, w);
+  w = vmla_vf_vf_vf_vf(dh, tl, w);
+  w = vmla_vf_vf_vf_vf(dl, th, w);
+  w = vmla_vf_vf_vf_vf(dl, tl, w);
+  w = vneg_vf_vf(w);
+
+  u = vmla_vf_vf_vf_vf(nhh, th, vneg_vf_vf(q.x));
+  u = vmla_vf_vf_vf_vf(nhh, tl, u);
+  u = vmla_vf_vf_vf_vf(nhl, th, u);
+  u = vmla_vf_vf_vf_vf(nhl, tl, u);
+  u = vmla_vf_vf_vf_vf(q.x, w , u);
+
+  q.y = vmla_vf_vf_vf_vf(t, vsub_vf_vf_vf(n.y, vmul_vf_vf_vf(q.x, d.y)), u);
+
+  return q;
+}
+
+static INLINE vfloat2 dfmul_vf2_vf_vf(vfloat x, vfloat y) {
+  vfloat xh = vupper_vf_vf(x), xl = vsub_vf_vf_vf(x, xh);
+  vfloat yh = vupper_vf_vf(y), yl = vsub_vf_vf_vf(y, yh);
+  vfloat2 r;
+
+  r.x = vmul_vf_vf_vf(x, y);
+  //r.y = vadd_vf_5vf(vmul_vf_vf_vf(xh, yh), vneg_vf_vf(r.x), vmul_vf_vf_vf(xl, yh), vmul_vf_vf_vf(xh, yl), vmul_vf_vf_vf(xl, yl));
+
+  vfloat t;
+  t = vmla_vf_vf_vf_vf(xh, yh, vneg_vf_vf(r.x));
+  t = vmla_vf_vf_vf_vf(xl, yh, t);
+  t = vmla_vf_vf_vf_vf(xh, yl, t);
+  t = vmla_vf_vf_vf_vf(xl, yl, t);
+  r.y = t;
+
+  return r;
+}
+
+static INLINE vfloat2 dfmul_vf2_vf2_vf(vfloat2 x, vfloat y) {
+  vfloat xh = vupper_vf_vf(x.x), xl = vsub_vf_vf_vf(x.x, xh);
+  vfloat yh = vupper_vf_vf(y  ), yl = vsub_vf_vf_vf(y  , yh);
+  vfloat2 r;
+
+  r.x = vmul_vf_vf_vf(x.x, y);
+  //r.y = vadd_vf_6vf(vmul_vf_vf_vf(xh, yh), vneg_vf_vf(r.x), vmul_vf_vf_vf(xl, yh), vmul_vf_vf_vf(xh, yl), vmul_vf_vf_vf(xl, yl), vmul_vf_vf_vf(x.y, y));
+
+  vfloat t;
+  t = vmla_vf_vf_vf_vf(xh, yh, vneg_vf_vf(r.x));
+  t = vmla_vf_vf_vf_vf(xl, yh, t);
+  t = vmla_vf_vf_vf_vf(xh, yl, t);
+  t = vmla_vf_vf_vf_vf(xl, yl, t);
+  t = vmla_vf_vf_vf_vf(x.y, y, t);
+  r.y = t;
+
+  return r;
+}
+
+static INLINE vfloat2 dfmul_vf2_vf2_vf2(vfloat2 x, vfloat2 y) {
+  vfloat xh = vupper_vf_vf(x.x), xl = vsub_vf_vf_vf(x.x, xh);
+  vfloat yh = vupper_vf_vf(y.x), yl = vsub_vf_vf_vf(y.x, yh);
+  vfloat2 r;
+
+  r.x = vmul_vf_vf_vf(x.x, y.x);
+  //r.y = vadd_vf_7vf(vmul_vf_vf_vf(xh, yh), vneg_vf_vf(r.x), vmul_vf_vf_vf(xl, yh), vmul_vf_vf_vf(xh, yl), vmul_vf_vf_vf(xl, yl), vmul_vf_vf_vf(x.x, y.y), vmul_vf_vf_vf(x.y, y.x));
+
+  vfloat t;
+  t = vmla_vf_vf_vf_vf(xh, yh, vneg_vf_vf(r.x));
+  t = vmla_vf_vf_vf_vf(xl, yh, t);
+  t = vmla_vf_vf_vf_vf(xh, yl, t);
+  t = vmla_vf_vf_vf_vf(xl, yl, t);
+  t = vmla_vf_vf_vf_vf(x.x, y.y, t);
+  t = vmla_vf_vf_vf_vf(x.y, y.x, t);
+  r.y = t;
+
+  return r;
+}
+
+static INLINE vfloat2 dfsqu_vf2_vf2(vfloat2 x) {
+  vfloat xh = vupper_vf_vf(x.x), xl = vsub_vf_vf_vf(x.x, xh);
+  vfloat2 r;
+
+  r.x = vmul_vf_vf_vf(x.x, x.x);
+  //r.y = vadd_vf_5vf(vmul_vf_vf_vf(xh, xh), vneg_vf_vf(r.x), vmul_vf_vf_vf(vadd_vf_vf_vf(xh, xh), xl), vmul_vf_vf_vf(xl, xl), vmul_vf_vf_vf(x.x, vadd_vf_vf_vf(x.y, x.y)));
+
+  vfloat t;
+  t = vmla_vf_vf_vf_vf(xh, xh, vneg_vf_vf(r.x));
+  t = vmla_vf_vf_vf_vf(vadd_vf_vf_vf(xh, xh), xl, t);
+  t = vmla_vf_vf_vf_vf(xl, xl, t);
+  t = vmla_vf_vf_vf_vf(x.x, vadd_vf_vf_vf(x.y, x.y), t);
+  r.y = t;
+
+  return r;
+}
+
+static INLINE vfloat2 dfrec_vf2_vf(vfloat d) {
+  vfloat t = vrec_vf_vf(d);
+  vfloat dh = vupper_vf_vf(d), dl = vsub_vf_vf_vf(d, dh);
+  vfloat th = vupper_vf_vf(t), tl = vsub_vf_vf_vf(t, th);
+  vfloat2 q;
+
+  q.x = t;
+  //q.y = vmul_vf_vf_vf(t, vsub_vf_5vf(vcast_vf_f(1), vmul_vf_vf_vf(dh, th), vmul_vf_vf_vf(dh, tl), vmul_vf_vf_vf(dl, th), vmul_vf_vf_vf(dl, tl)));
+
+  vfloat u = vcast_vf_f(-1);
+  u = vmla_vf_vf_vf_vf(dh, th, u);
+  u = vmla_vf_vf_vf_vf(dh, tl, u);
+  u = vmla_vf_vf_vf_vf(dl, th, u);
+  u = vmla_vf_vf_vf_vf(dl, tl, u);
+  q.y = vmul_vf_vf_vf(vneg_vf_vf(t), u);
+
+  return q;
+}
+
+static INLINE vfloat2 dfrec_vf2_vf2(vfloat2 d) {
+  vfloat t = vrec_vf_vf(d.x);
+  vfloat dh = vupper_vf_vf(d.x), dl = vsub_vf_vf_vf(d.x, dh);
+  vfloat th = vupper_vf_vf(t  ), tl = vsub_vf_vf_vf(t  , th);
+  vfloat2 q;
+
+  q.x = t;
+  //q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t);
+
+  vfloat u = vcast_vf_f(-1);
+  u = vmla_vf_vf_vf_vf(dh, th, u);
+  u = vmla_vf_vf_vf_vf(dh, tl, u);
+  u = vmla_vf_vf_vf_vf(dl, th, u);
+  u = vmla_vf_vf_vf_vf(dl, tl, u);
+  u = vmla_vf_vf_vf_vf(d.y, t, u);
+  q.y = vmul_vf_vf_vf(vneg_vf_vf(t), u);
+
+  return q;
+}
+#endif
+
+static INLINE vfloat2 dfsqrt_vf2_vf2(vfloat2 d) {
+#ifdef ENABLE_RECSQRT_SP
+  vfloat x = vrecsqrt_vf_vf(vadd_vf_vf_vf(d.x, d.y));
+  vfloat2 r = dfmul_vf2_vf2_vf(d, x);
+  return dfscale_vf2_vf2_vf(dfmul_vf2_vf2_vf2(r, dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf(r, x), vcast_vf_f(-3.0))), vcast_vf_f(-0.5));
+#else
+  vfloat t = vsqrt_vf_vf(vadd_vf_vf_vf(d.x, d.y));
+  return dfscale_vf2_vf2_vf(dfmul_vf2_vf2_vf2(dfadd2_vf2_vf2_vf2(d, dfmul_vf2_vf_vf(t, t)), dfrec_vf2_vf(t)), vcast_vf_f(0.5));
+#endif
+}
diff --git a/simd/helperavx.h b/simd/helperavx.h
new file mode 100644
index 00000000..426d7182
--- /dev/null
+++ b/simd/helperavx.h
@@ -0,0 +1,283 @@
+#ifndef __AVX__
+#error Please specify -mavx.
+#endif
+
+#include <immintrin.h>
+#include <stdint.h>
+
+typedef __m256d vdouble;
+typedef __m128i vint;
+typedef __m256i vmask;
+
+typedef __m256 vfloat;
+typedef struct { vint x, y; } vint2;
+
+//
+
+static INLINE vint vrint_vi_vd(vdouble vd) { return _mm256_cvtpd_epi32(vd); }
+static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm256_cvttpd_epi32(vd); }
+static INLINE vdouble vcast_vd_vi(vint vi) { return _mm256_cvtepi32_pd(vi); }
+static INLINE vdouble vcast_vd_d(double d) { return _mm256_set_pd(d, d, d, d); }
+static INLINE vint vcast_vi_i(int i) { return _mm_set_epi32(i, i, i, i); }
+
+static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return (__m256i)vd; }
+static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return (__m256d)vm;  }
+
+static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return (__m256i)vf; }
+static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return (__m256)vm; }
+
+//
+
+static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); }
+static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); }
+static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); }
+
+static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); }
+static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); }
+static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); }
+static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); }
+
+static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32(x, c); }
+static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32(x, c); }
+static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32(x, c); }
+
+//
+
+static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return (vmask)_mm256_and_pd((__m256d)x, (__m256d)y); }
+static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return (vmask)_mm256_andnot_pd((__m256d)x, (__m256d)y); }
+static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return (vmask)_mm256_or_pd((__m256d)x, (__m256d)y); }
+static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return (vmask)_mm256_xor_pd((__m256d)x, (__m256d)y); }
+
+static INLINE vmask veq_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_EQ_OQ); }
+static INLINE vmask vneq_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_NEQ_UQ); }
+static INLINE vmask vlt_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_LT_OQ); }
+static INLINE vmask vle_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_LE_OQ); }
+static INLINE vmask vgt_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_GT_OQ); }
+static INLINE vmask vge_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_GE_OQ); }
+
+static INLINE vmask veq_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_EQ_OQ); }
+static INLINE vmask vneq_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_NEQ_UQ); }
+static INLINE vmask vlt_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_LT_OQ); }
+static INLINE vmask vle_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_LE_OQ); }
+static INLINE vmask vgt_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_GT_OQ); }
+static INLINE vmask vge_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_GE_OQ); }
+
+//
+
+static INLINE vfloat vcast_vf_f(float f) { return _mm256_set_ps(f, f, f, f, f, f, f, f); }
+
+static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_add_ps(x, y); }
+static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm256_sub_ps(x, y); }
+static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm256_mul_ps(x, y); }
+static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm256_div_ps(x, y); }
+static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); }
+static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm256_sqrt_ps(x); }
+static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm256_max_ps(x, y); }
+static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm256_min_ps(x, y); }
+
+static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); }
+static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); }
+static INLINE vfloat vabs_vf_vf(vfloat f) { return (vfloat)vandnot_vm_vm_vm((vmask)vcast_vf_f(-0.0f), (vmask)f); }
+static INLINE vfloat vneg_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)vcast_vf_f(-0.0f), (vmask)d); }
+
+//
+
+static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_add_pd(x, y); }
+static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm256_sub_pd(x, y); }
+static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm256_mul_pd(x, y); }
+static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm256_div_pd(x, y); }
+static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm256_div_pd(_mm256_set_pd(1, 1, 1, 1), x); }
+static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm256_sqrt_pd(x); }
+static INLINE vdouble vabs_vd_vd(vdouble d) { return (__m256d)_mm256_andnot_pd(_mm256_set_pd(-0.0,-0.0,-0.0,-0.0), d); }
+static INLINE vdouble vneg_vd_vd(vdouble d) { return (__m256d)_mm256_xor_pd(_mm256_set_pd(-0.0,-0.0,-0.0,-0.0), d); }
+static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
+static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
+
+static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm256_max_pd(x, y); }
+static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm256_min_pd(x, y); }
+
+
+//
+
+static INLINE vmask veq_vm_vi_vi(vint x, vint y) {
+  __m256d r = _mm256_cvtepi32_pd(_mm_and_si128(_mm_cmpeq_epi32(x, y), _mm_set_epi32(1, 1, 1, 1)));
+  return veq_vm_vd_vd(r, _mm256_set_pd(1, 1, 1, 1));
+}
+
+static INLINE vdouble vsel_vd_vm_vd_vd(vmask mask, vdouble x, vdouble y) {
+  return (__m256d)vor_vm_vm_vm(vand_vm_vm_vm(mask, (__m256i)x), vandnot_vm_vm_vm(mask, (__m256i)y));
+}
+
+static INLINE vfloat vsel_vf_vm_vf_vf(vmask mask, vfloat x, vfloat y) {
+  return (vfloat)vor_vm_vm_vm(vand_vm_vm_vm(mask, (vmask)x), vandnot_vm_vm_vm(mask, (vmask)y));
+}
+
+static INLINE vint vsel_vi_vd_vd_vi_vi(vdouble d0, vdouble d1, vint x, vint y) {
+  __m128i mask = _mm256_cvtpd_epi32(_mm256_and_pd(_mm256_cmp_pd(d0, d1, _CMP_LT_OQ), _mm256_set_pd(1.0, 1.0, 1.0, 1.0)));
+  mask = _mm_cmpeq_epi32(mask, _mm_set_epi32(1, 1, 1, 1));
+  return vor_vi_vi_vi(vand_vi_vi_vi(mask, x), vandnot_vi_vi_vi(mask, y));
+}
+
+//
+
+static INLINE vint2 vcast_vi2_vm(vmask vm) {
+  vint2 r;
+  r.x = _mm256_castsi256_si128(vm);
+  r.y = _mm256_extractf128_si256(vm, 1);
+  return r;
+}
+
+static INLINE vmask vcast_vm_vi2(vint2 vi) {
+  vmask m = _mm256_castsi128_si256(vi.x);
+  m = _mm256_insertf128_si256(m, vi.y, 1);
+  return m;
+}
+
+static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vcast_vi2_vm((vmask)_mm256_cvtps_epi32(vf)); }
+static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcast_vi2_vm((vmask)_mm256_cvttps_epi32(vf)); }
+static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm256_cvtepi32_ps((vmask)vcast_vm_vi2(vi)); }
+static INLINE vint2 vcast_vi2_i(int i) { vint2 r; r.x = r.y = vcast_vi_i(i); return r; }
+
+static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = vadd_vi_vi_vi(x.x, y.x); r.y = vadd_vi_vi_vi(x.y, y.y); return r; }
+static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = vsub_vi_vi_vi(x.x, y.x); r.y = vsub_vi_vi_vi(x.y, y.y); return r; }
+static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); }
+
+static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = vand_vi_vi_vi(x.x, y.x); r.y = vand_vi_vi_vi(x.y, y.y); return r; }
+static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = vandnot_vi_vi_vi(x.x, y.x); r.y = vandnot_vi_vi_vi(x.y, y.y); return r; }
+static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = vor_vi_vi_vi(x.x, y.x); r.y = vor_vi_vi_vi(x.y, y.y); return r; }
+static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = vxor_vi_vi_vi(x.x, y.x); r.y = vxor_vi_vi_vi(x.y, y.y); return r; }
+
+static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { vint2 r; r.x = vsll_vi_vi_i(x.x, c); r.y = vsll_vi_vi_i(x.y, c); return r; }
+static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { vint2 r; r.x = vsrl_vi_vi_i(x.x, c); r.y = vsrl_vi_vi_i(x.y, c); return r; }
+static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { vint2 r; r.x = vsra_vi_vi_i(x.x, c); r.y = vsra_vi_vi_i(x.y, c); return r; }
+
+static INLINE vmask veq_vm_vi2_vi2(vint2 x, vint2 y) {
+  vint2 r;
+  r.x = _mm_cmpeq_epi32(x.x, y.x);
+  r.y = _mm_cmpeq_epi32(x.y, y.y);
+  return vcast_vm_vi2(r);
+}
+
+static INLINE vmask vgt_vm_vi2_vi2(vint2 x, vint2 y) {
+  vint2 r;
+  r.x = _mm_cmpgt_epi32(x.x, y.x);
+  r.y = _mm_cmpgt_epi32(x.y, y.y);
+  return vcast_vm_vi2(r);
+}
+
+static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) {
+  vint2 r;
+  r.x = _mm_cmpgt_epi32(x.x, y.x);
+  r.y = _mm_cmpgt_epi32(x.y, y.y);
+  return r;
+}
+
+static INLINE vint2 vsel_vi2_vm_vi2_vi2(vmask m, vint2 x, vint2 y) {
+  vint2 r, m2 = vcast_vi2_vm(m);
+  r.x = vor_vi_vi_vi(vand_vi_vi_vi(m2.x, x.x), vandnot_vi_vi_vi(m2.x, y.x));
+  r.y = vor_vi_vi_vi(vand_vi_vi_vi(m2.y, x.y), vandnot_vi_vi_vi(m2.y, y.y));
+  return r;
+}
+
+//
+
+static INLINE double vcast_d_vd(vdouble v) {
+  double s[4];
+  _mm256_storeu_pd(s, v);
+  return s[0];
+}
+
+static INLINE float vcast_f_vf(vfloat v) {
+  float s[8];
+  _mm256_storeu_ps(s, v);
+  return s[0];
+}
+
+static INLINE vmask vsignbit_vm_vd(vdouble d) {
+  return (vmask)_mm256_and_pd(d, _mm256_set_pd(-0.0,-0.0,-0.0,-0.0));
+}
+
+static INLINE vdouble vsign_vd_vd(vdouble d) {
+  return _mm256_or_pd(_mm256_set_pd(1.0, 1.0, 1.0, 1.0), (vdouble)vsignbit_vm_vd(d));
+}
+
+static INLINE vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) {
+  return (__m256d)vxor_vm_vm_vm((__m256i)x, vsignbit_vm_vd(y));
+}
+
+static INLINE vmask visinf_vm_vd(vdouble d) {
+  return (vmask)_mm256_cmp_pd(vabs_vd_vd(d), _mm256_set_pd(INFINITY, INFINITY, INFINITY, INFINITY), _CMP_EQ_OQ);
+}
+
+static INLINE vmask vispinf_vm_vd(vdouble d) {
+  return (vmask)_mm256_cmp_pd(d, _mm256_set_pd(INFINITY, INFINITY, INFINITY, INFINITY), _CMP_EQ_OQ);
+}
+
+static INLINE vmask visminf_vm_vd(vdouble d) {
+  return (vmask)_mm256_cmp_pd(d, _mm256_set_pd(-INFINITY, -INFINITY, -INFINITY, -INFINITY), _CMP_EQ_OQ);
+}
+
+static INLINE vmask visnan_vm_vd(vdouble d) {
+  return (vmask)_mm256_cmp_pd(d, d, _CMP_NEQ_UQ);
+}
+
+static INLINE vdouble visinf(vdouble d) {
+  return _mm256_and_pd((vdouble)visinf_vm_vd(d), vsign_vd_vd(d));
+}
+
+static INLINE vdouble visinf2(vdouble d, vdouble m) {
+  return _mm256_and_pd((vdouble)visinf_vm_vd(d), _mm256_or_pd((vdouble)vsignbit_vm_vd(d), m));
+}
+
+static INLINE vdouble vpow2i_vd_vi(vint q) {
+  vint r;
+  vdouble y;
+  q = _mm_add_epi32(_mm_set_epi32(0x3ff, 0x3ff, 0x3ff, 0x3ff), q);
+  q = _mm_slli_epi32(q, 20);
+  r = (__m128i)_mm_shuffle_ps((__m128)q, (__m128)q, _MM_SHUFFLE(1,0,0,0));
+  y = _mm256_castpd128_pd256((__m128d)r);
+  r = (__m128i)_mm_shuffle_ps((__m128)q, (__m128)q, _MM_SHUFFLE(3,2,2,2));
+  y = _mm256_insertf128_pd(y, (__m128d)r, 1);
+  y = _mm256_and_pd(y, (__m256d)_mm256_set_epi32(0xfff00000, 0, 0xfff00000, 0, 0xfff00000, 0, 0xfff00000, 0));
+  return y;
+}
+
+static INLINE vdouble vldexp_vd_vd_vi(vdouble x, vint q) {
+  vint m = _mm_srai_epi32(q, 31);
+  m = _mm_slli_epi32(_mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(m, q), 9), m), 7);
+  q = _mm_sub_epi32(q, _mm_slli_epi32(m, 2));
+  m = _mm_add_epi32(_mm_set_epi32(0x3ff, 0x3ff, 0x3ff, 0x3ff), m);
+  m = _mm_andnot_si128(_mm_cmplt_epi32(m, _mm_set_epi32(0, 0, 0, 0)), m);
+  vint n = _mm_cmpgt_epi32(m, _mm_set_epi32(0x7ff, 0x7ff, 0x7ff, 0x7ff));
+  m = _mm_or_si128(_mm_andnot_si128(n, m), _mm_and_si128(n, _mm_set_epi32(0x7ff, 0x7ff, 0x7ff, 0x7ff)));
+  m = _mm_slli_epi32(m, 20);
+  vint r = (__m128i)_mm_shuffle_ps((__m128)m, (__m128)m, _MM_SHUFFLE(1,0,0,0));
+  vdouble y = _mm256_castpd128_pd256((__m128d)r);
+  r = (__m128i)_mm_shuffle_ps((__m128)m, (__m128)m, _MM_SHUFFLE(3,2,2,2));
+  y = _mm256_insertf128_pd(y, (__m128d)r, 1);
+  y = _mm256_and_pd(y, (__m256d)_mm256_set_epi32(0xfff00000, 0, 0xfff00000, 0, 0xfff00000, 0, 0xfff00000, 0));
+  return vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(x, y), y), y), y), vpow2i_vd_vi(q));
+}
+
+static INLINE vint vilogbp1_vi_vd(vdouble d) {
+  vint q, r, c;
+  vmask m = vlt_vm_vd_vd(d, vcast_vd_d(4.9090934652977266E-91));
+  d = vsel_vd_vm_vd_vd(m, vmul_vd_vd_vd(vcast_vd_d(2.037035976334486E90), d), d);
+  c = _mm256_cvtpd_epi32(vsel_vd_vm_vd_vd(m, vcast_vd_d(300+0x3fe), vcast_vd_d(0x3fe)));
+  q = (__m128i)_mm256_castpd256_pd128(d);
+  q = (__m128i)_mm_shuffle_ps((__m128)q, _mm_set_ps(0, 0, 0, 0), _MM_SHUFFLE(0,0,3,1));
+  r = (__m128i)_mm256_extractf128_pd(d, 1);
+  r = (__m128i)_mm_shuffle_ps(_mm_set_ps(0, 0, 0, 0), (__m128)r, _MM_SHUFFLE(3,1,0,0));
+  q = _mm_or_si128(q, r);
+  q = _mm_srli_epi32(q, 20);
+  q = _mm_sub_epi32(q, c);
+  return q;
+}
+
+static INLINE vdouble vupper_vd_vd(vdouble d) {
+  return (__m256d)_mm256_and_pd(d, (vdouble)_mm256_set_epi32(0xffffffff, 0xf8000000, 0xffffffff, 0xf8000000, 0xffffffff, 0xf8000000, 0xffffffff, 0xf8000000));
+}
+
+static INLINE vfloat vupper_vf_vf(vfloat d) {
+  return (vfloat)vand_vm_vm_vm((vmask)d, _mm256_set_epi32(0xfffff000, 0xfffff000, 0xfffff000, 0xfffff000,0xfffff000, 0xfffff000, 0xfffff000, 0xfffff000));
+}
diff --git a/simd/helperavx2.h b/simd/helperavx2.h
new file mode 100644
index 00000000..fa527a49
--- /dev/null
+++ b/simd/helperavx2.h
@@ -0,0 +1,254 @@
+#ifndef __AVX2__
+#error Please specify -mavx2.
+#endif
+
+#include <immintrin.h>
+#include <stdint.h>
+
+typedef __m256d vdouble;
+typedef __m128i vint;
+typedef __m256i vmask;
+
+typedef __m256 vfloat;
+typedef __m256i vint2;
+
+#define ENABLE_FMA_DP
+#define ENABLE_FMA_SP
+
+//
+
+static INLINE vint vrint_vi_vd(vdouble vd) { return _mm256_cvtpd_epi32(vd); }
+static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm256_cvttpd_epi32(vd); }
+static INLINE vdouble vcast_vd_vi(vint vi) { return _mm256_cvtepi32_pd(vi); }
+static INLINE vdouble vcast_vd_d(double d) { return _mm256_set_pd(d, d, d, d); }
+static INLINE vint vcast_vi_i(int i) { return _mm_set_epi32(i, i, i, i); }
+
+static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return (__m256i)vd; }
+static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return (__m256d)vm;  }
+
+static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return (__m256i)vf; }
+static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return (__m256)vm; }
+
+//
+
+static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); }
+static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); }
+static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); }
+
+static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); }
+static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); }
+static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); }
+static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); }
+
+static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32 (x, c); }
+static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32 (x, c); }
+static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32 (x, c); }
+
+//
+
+static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return (vmask)_mm256_and_pd((__m256d)x, (__m256d)y); }
+static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return (vmask)_mm256_andnot_pd((__m256d)x, (__m256d)y); }
+static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return (vmask)_mm256_or_pd((__m256d)x, (__m256d)y); }
+static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return (vmask)_mm256_xor_pd((__m256d)x, (__m256d)y); }
+
+static INLINE vmask veq_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_EQ_OQ); }
+static INLINE vmask vneq_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_NEQ_UQ); }
+static INLINE vmask vlt_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_LT_OQ); }
+static INLINE vmask vle_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_LE_OQ); }
+static INLINE vmask vgt_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_GT_OQ); }
+static INLINE vmask vge_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_GE_OQ); }
+
+static INLINE vmask veq_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_EQ_OQ); }
+static INLINE vmask vneq_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_NEQ_UQ); }
+static INLINE vmask vlt_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_LT_OQ); }
+static INLINE vmask vle_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_LE_OQ); }
+static INLINE vmask vgt_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_GT_OQ); }
+static INLINE vmask vge_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_GE_OQ); }
+
+//
+
+static INLINE vfloat vcast_vf_f(float f) { return _mm256_set_ps(f, f, f, f, f, f, f, f); }
+
+static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_add_ps(x, y); }
+static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm256_sub_ps(x, y); }
+static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm256_mul_ps(x, y); }
+static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm256_div_ps(x, y); }
+static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); }
+static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm256_sqrt_ps(x); }
+static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm256_max_ps(x, y); }
+static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm256_min_ps(x, y); }
+
+static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmadd_ps(x, y, z); }
+static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmsub_ps(x, y, z); }
+static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fnmadd_ps(x, y, z); }
+static INLINE vfloat vabs_vf_vf(vfloat f) { return (vfloat)vandnot_vm_vm_vm((vmask)vcast_vf_f(-0.0f), (vmask)f); }
+static INLINE vfloat vneg_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)vcast_vf_f(-0.0f), (vmask)d); }
+
+static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmadd_ps(x, y, z); }
+static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmadd_ps(x, y, z); }
+static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmsub_ps(x, y, z); }
+static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fnmadd_ps(x, y, z); }
+static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fnmsub_ps(x, y, z); }
+
+//
+
+static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_add_pd(x, y); }
+static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm256_sub_pd(x, y); }
+static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm256_mul_pd(x, y); }
+static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm256_div_pd(x, y); }
+static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm256_div_pd(_mm256_set_pd(1, 1, 1, 1), x); }
+static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm256_sqrt_pd(x); }
+static INLINE vdouble vabs_vd_vd(vdouble d) { return (__m256d)_mm256_andnot_pd(_mm256_set_pd(-0.0,-0.0,-0.0,-0.0), d); }
+static INLINE vdouble vneg_vd_vd(vdouble d) { return (__m256d)_mm256_xor_pd(_mm256_set_pd(-0.0,-0.0,-0.0,-0.0), d); }
+static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmadd_pd(x, y, z); }
+static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmsub_pd(x, y, z); }
+static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fnmadd_pd(x, y, z); }
+
+static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm256_max_pd(x, y); }
+static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm256_min_pd(x, y); }
+
+static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmadd_pd(x, y, z); }
+static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmadd_pd(x, y, z); }
+static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmsub_pd(x, y, z); }
+static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fnmadd_pd(x, y, z); }
+static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fnmsub_pd(x, y, z); }
+
+//
+
+static INLINE vmask veq_vm_vi_vi(vint x, vint y) {
+  return _mm256_cvtepi32_epi64(_mm_cmpeq_epi32(x, y));
+}
+
+static INLINE vdouble vsel_vd_vm_vd_vd(vmask mask, vdouble x, vdouble y) {
+  return (__m256d)vor_vm_vm_vm(vand_vm_vm_vm(mask, (__m256i)x), vandnot_vm_vm_vm(mask, (__m256i)y));
+}
+
+static INLINE vfloat vsel_vf_vm_vf_vf(vmask mask, vfloat x, vfloat y) {
+  return (vfloat)vor_vm_vm_vm(vand_vm_vm_vm(mask, (vmask)x), vandnot_vm_vm_vm(mask, (vmask)y));
+}
+
+static INLINE vint vsel_vi_vd_vd_vi_vi(vdouble d0, vdouble d1, vint x, vint y) {
+  __m128i mask = _mm256_cvtpd_epi32(_mm256_and_pd(_mm256_cmp_pd(d0, d1, _CMP_LT_OQ), _mm256_set_pd(1.0, 1.0, 1.0, 1.0)));
+  mask = _mm_cmpeq_epi32(mask, _mm_set_epi32(1, 1, 1, 1));
+  return vor_vi_vi_vi(vand_vi_vi_vi(mask, x), vandnot_vi_vi_vi(mask, y));
+}
+
+//
+
+static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; }
+static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; }
+
+static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vcast_vi2_vm((vmask)_mm256_cvtps_epi32(vf)); }
+static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcast_vi2_vm((vmask)_mm256_cvttps_epi32(vf)); }
+static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm256_cvtepi32_ps((vmask)vcast_vm_vi2(vi)); }
+static INLINE vint2 vcast_vi2_i(int i) { return _mm256_set1_epi32(i); }
+
+static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_add_epi32(x, y); }
+static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_sub_epi32(x, y); }
+static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); }
+
+static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_and_si256(x, y); }
+static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_andnot_si256(x, y); }
+static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_or_si256(x, y); }
+static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_xor_si256(x, y); }
+
+static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return _mm256_slli_epi32(x, c); }
+static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return _mm256_srli_epi32(x, c); }
+static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return _mm256_srai_epi32(x, c); }
+
+static INLINE vmask veq_vm_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpeq_epi32(x, y); }
+static INLINE vmask vgt_vm_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpgt_epi32(x, y); }
+static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpgt_epi32(x, y); }
+static INLINE vint2 vsel_vi2_vm_vi2_vi2(vmask m, vint2 x, vint2 y) { return vor_vm_vm_vm(vand_vm_vm_vm(m, x), vandnot_vm_vm_vm(m, y)); }
+
+//
+
+static INLINE double vcast_d_vd(vdouble v) {
+  double s[4];
+  _mm256_storeu_pd(s, v);
+  return s[0];
+}
+
+static INLINE float vcast_f_vf(vfloat v) {
+  float s[8];
+  _mm256_storeu_ps(s, v);
+  return s[0];
+}
+
+static INLINE vmask vsignbit_vm_vd(vdouble d) {
+  return (vmask)_mm256_and_pd(d, _mm256_set_pd(-0.0,-0.0,-0.0,-0.0));
+}
+
+static INLINE vdouble vsign_vd_vd(vdouble d) {
+  return _mm256_or_pd(_mm256_set_pd(1.0, 1.0, 1.0, 1.0), (vdouble)vsignbit_vm_vd(d));
+}
+
+static INLINE vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) {
+  return (__m256d)vxor_vm_vm_vm((__m256i)x, vsignbit_vm_vd(y));
+}
+
+static INLINE vmask visinf_vm_vd(vdouble d) {
+  return (vmask)_mm256_cmp_pd(vabs_vd_vd(d), _mm256_set_pd(INFINITY, INFINITY, INFINITY, INFINITY), _CMP_EQ_OQ);
+}
+
+static INLINE vmask vispinf_vm_vd(vdouble d) {
+  return (vmask)_mm256_cmp_pd(d, _mm256_set_pd(INFINITY, INFINITY, INFINITY, INFINITY), _CMP_EQ_OQ);
+}
+
+static INLINE vmask visminf_vm_vd(vdouble d) {
+  return (vmask)_mm256_cmp_pd(d, _mm256_set_pd(-INFINITY, -INFINITY, -INFINITY, -INFINITY), _CMP_EQ_OQ);
+}
+
+static INLINE vmask visnan_vm_vd(vdouble d) {
+  return (vmask)_mm256_cmp_pd(d, d, _CMP_NEQ_UQ);
+}
+
+static INLINE vdouble visinf(vdouble d) {
+  return _mm256_and_pd((vdouble)visinf_vm_vd(d), vsign_vd_vd(d));
+}
+
+static INLINE vdouble visinf2(vdouble d, vdouble m) {
+  return _mm256_and_pd((vdouble)visinf_vm_vd(d), _mm256_or_pd((vdouble)vsignbit_vm_vd(d), m));
+}
+
+static INLINE vdouble vpow2i_vd_vi(vint q) {
+  vint2 r = _mm256_slli_epi64(_mm256_cvtepi32_epi64(_mm_add_epi32(_mm_set_epi32(0x3ff, 0x3ff, 0x3ff, 0x3ff), q)), 52);
+  r = _mm256_and_si256(r, _mm256_set_epi32(0xfff00000, 0, 0xfff00000, 0, 0xfff00000, 0, 0xfff00000, 0));
+  return (vdouble)r;
+}
+
+static INLINE vdouble vldexp_vd_vd_vi(vdouble x, vint q) {
+  vint m = _mm_srai_epi32(q, 31);
+  m = _mm_slli_epi32(_mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(m, q), 9), m), 7);
+  q = _mm_sub_epi32(q, _mm_slli_epi32(m, 2));
+  m = _mm_add_epi32(_mm_set_epi32(0x3ff, 0x3ff, 0x3ff, 0x3ff), m);
+  m = _mm_andnot_si128(_mm_cmplt_epi32(m, _mm_set_epi32(0, 0, 0, 0)), m);
+  vint n = _mm_cmpgt_epi32(m, _mm_set_epi32(0x7ff, 0x7ff, 0x7ff, 0x7ff));
+  m = _mm_or_si128(_mm_andnot_si128(n, m), _mm_and_si128(n, _mm_set_epi32(0x7ff, 0x7ff, 0x7ff, 0x7ff)));
+  vint2 r = _mm256_slli_epi64(_mm256_cvtepi32_epi64(m), 52);
+  vdouble y = (vdouble)_mm256_and_si256(r, _mm256_set_epi32(0xfff00000, 0, 0xfff00000, 0, 0xfff00000, 0, 0xfff00000, 0));
+  return vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(x, y), y), y), y), vpow2i_vd_vi(q));
+}
+
+static INLINE vint vilogbp1_vi_vd(vdouble d) {
+  vint q, r, c;
+  vmask m = vlt_vm_vd_vd(d, vcast_vd_d(4.9090934652977266E-91));
+  d = vsel_vd_vm_vd_vd(m, vmul_vd_vd_vd(vcast_vd_d(2.037035976334486E90), d), d);
+  c = _mm256_cvtpd_epi32(vsel_vd_vm_vd_vd(m, vcast_vd_d(300+0x3fe), vcast_vd_d(0x3fe)));
+  q = (__m128i)_mm256_castpd256_pd128(d);
+  q = (__m128i)_mm_shuffle_ps((__m128)q, _mm_set_ps(0, 0, 0, 0), _MM_SHUFFLE(0,0,3,1));
+  r = (__m128i)_mm256_extractf128_pd(d, 1);
+  r = (__m128i)_mm_shuffle_ps(_mm_set_ps(0, 0, 0, 0), (__m128)r, _MM_SHUFFLE(3,1,0,0));
+  q = _mm_or_si128(q, r);
+  q = _mm_srli_epi32(q, 20);
+  q = _mm_sub_epi32(q, c);
+  return q;
+}
+
+static INLINE vdouble vupper_vd_vd(vdouble d) {
+  return (__m256d)_mm256_and_pd(d, (vdouble)_mm256_set_epi32(0xffffffff, 0xf8000000, 0xffffffff, 0xf8000000, 0xffffffff, 0xf8000000, 0xffffffff, 0xf8000000));
+}
+
+static INLINE vfloat vupper_vf_vf(vfloat d) {
+  return (vfloat)vand_vm_vm_vm((vmask)d, _mm256_set_epi32(0xfffff000, 0xfffff000, 0xfffff000, 0xfffff000,0xfffff000, 0xfffff000, 0xfffff000, 0xfffff000));
+}
diff --git a/simd/helperfma4.h b/simd/helperfma4.h
new file mode 100644
index 00000000..4e4f6099
--- /dev/null
+++ b/simd/helperfma4.h
@@ -0,0 +1,298 @@
+#ifndef __FMA4__
+#error Please specify -mfma4.
+#endif
+
+#include <x86intrin.h>
+#include <stdint.h>
+
+typedef __m256d vdouble;
+typedef __m128i vint;
+typedef __m256i vmask;
+
+typedef __m256 vfloat;
+typedef struct { vint x, y; } vint2;
+
+#define ENABLE_FMA_DP
+#define ENABLE_FMA_SP
+
+//
+
+static INLINE vint vrint_vi_vd(vdouble vd) { return _mm256_cvtpd_epi32(vd); }
+static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm256_cvttpd_epi32(vd); }
+static INLINE vdouble vcast_vd_vi(vint vi) { return _mm256_cvtepi32_pd(vi); }
+static INLINE vdouble vcast_vd_d(double d) { return _mm256_set_pd(d, d, d, d); }
+static INLINE vint vcast_vi_i(int i) { return _mm_set_epi32(i, i, i, i); }
+
+static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return (__m256i)vd; }
+static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return (__m256d)vm;  }
+
+static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return (__m256i)vf; }
+static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return (__m256)vm; }
+
+//
+
+static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); }
+static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); }
+static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); }
+
+static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); }
+static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); }
+static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); }
+static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); }
+
+static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32(x, c); }
+static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32(x, c); }
+static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32(x, c); }
+
+//
+
+static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return (vmask)_mm256_and_pd((__m256d)x, (__m256d)y); }
+static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return (vmask)_mm256_andnot_pd((__m256d)x, (__m256d)y); }
+static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return (vmask)_mm256_or_pd((__m256d)x, (__m256d)y); }
+static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return (vmask)_mm256_xor_pd((__m256d)x, (__m256d)y); }
+
+static INLINE vmask veq_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_EQ_OQ); }
+static INLINE vmask vneq_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_NEQ_UQ); }
+static INLINE vmask vlt_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_LT_OQ); }
+static INLINE vmask vle_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_LE_OQ); }
+static INLINE vmask vgt_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_GT_OQ); }
+static INLINE vmask vge_vm_vd_vd(vdouble x, vdouble y) { return (__m256i)_mm256_cmp_pd(x, y, _CMP_GE_OQ); }
+
+static INLINE vmask veq_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_EQ_OQ); }
+static INLINE vmask vneq_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_NEQ_UQ); }
+static INLINE vmask vlt_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_LT_OQ); }
+static INLINE vmask vle_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_LE_OQ); }
+static INLINE vmask vgt_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_GT_OQ); }
+static INLINE vmask vge_vm_vf_vf(vfloat x, vfloat y) { return (__m256i)_mm256_cmp_ps(x, y, _CMP_GE_OQ); }
+
+//
+
+static INLINE vfloat vcast_vf_f(float f) { return _mm256_set_ps(f, f, f, f, f, f, f, f); }
+
+static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_add_ps(x, y); }
+static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm256_sub_ps(x, y); }
+static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm256_mul_ps(x, y); }
+static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm256_div_ps(x, y); }
+static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); }
+static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm256_sqrt_ps(x); }
+static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm256_max_ps(x, y); }
+static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm256_min_ps(x, y); }
+
+static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_macc_ps(x, y, z); }
+static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_msub_ps(x, y, z); }
+static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_nmacc_ps(x, y, z); }
+static INLINE vfloat vabs_vf_vf(vfloat f) { return (vfloat)vandnot_vm_vm_vm((vmask)vcast_vf_f(-0.0f), (vmask)f); }
+static INLINE vfloat vneg_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)vcast_vf_f(-0.0f), (vmask)d); }
+
+static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_macc_ps(x, y, z); }
+static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_macc_ps(x, y, z); }
+static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_msub_ps(x, y, z); }
+static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_nmacc_ps(x, y, z); }
+static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_nmsub_ps(x, y, z); }
+
+//
+
+static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_add_pd(x, y); }
+static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm256_sub_pd(x, y); }
+static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm256_mul_pd(x, y); }
+static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm256_div_pd(x, y); }
+static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm256_div_pd(_mm256_set_pd(1, 1, 1, 1), x); }
+static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm256_sqrt_pd(x); }
+static INLINE vdouble vabs_vd_vd(vdouble d) { return (__m256d)_mm256_andnot_pd(_mm256_set_pd(-0.0,-0.0,-0.0,-0.0), d); }
+static INLINE vdouble vneg_vd_vd(vdouble d) { return (__m256d)_mm256_xor_pd(_mm256_set_pd(-0.0,-0.0,-0.0,-0.0), d); }
+static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); }
+static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_msub_pd(x, y, z); }
+
+static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm256_max_pd(x, y); }
+static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm256_min_pd(x, y); }
+
+static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); }
+static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); }
+static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_msub_pd(x, y, z); }
+static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_nmacc_pd(x, y, z); }
+static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_nmsub_pd(x, y, z); }
+
+//
+
+static INLINE vmask veq_vm_vi_vi(vint x, vint y) {
+  __m256d r = _mm256_cvtepi32_pd(_mm_and_si128(_mm_cmpeq_epi32(x, y), _mm_set_epi32(1, 1, 1, 1)));
+  return veq_vm_vd_vd(r, _mm256_set_pd(1, 1, 1, 1));
+}
+
+static INLINE vdouble vsel_vd_vm_vd_vd(vmask mask, vdouble x, vdouble y) {
+  return (__m256d)vor_vm_vm_vm(vand_vm_vm_vm(mask, (__m256i)x), vandnot_vm_vm_vm(mask, (__m256i)y));
+}
+
+static INLINE vfloat vsel_vf_vm_vf_vf(vmask mask, vfloat x, vfloat y) {
+  return (vfloat)vor_vm_vm_vm(vand_vm_vm_vm(mask, (vmask)x), vandnot_vm_vm_vm(mask, (vmask)y));
+}
+
+static INLINE vint vsel_vi_vd_vd_vi_vi(vdouble d0, vdouble d1, vint x, vint y) {
+  __m128i mask = _mm256_cvtpd_epi32(_mm256_and_pd(_mm256_cmp_pd(d0, d1, _CMP_LT_OQ), _mm256_set_pd(1.0, 1.0, 1.0, 1.0)));
+  mask = _mm_cmpeq_epi32(mask, _mm_set_epi32(1, 1, 1, 1));
+  return vor_vi_vi_vi(vand_vi_vi_vi(mask, x), vandnot_vi_vi_vi(mask, y));
+}
+
+//
+
+static INLINE vint2 vcast_vi2_vm(vmask vm) {
+  vint2 r;
+  r.x = _mm256_castsi256_si128(vm);
+  r.y = _mm256_extractf128_si256(vm, 1);
+  return r;
+}
+
+static INLINE vmask vcast_vm_vi2(vint2 vi) {
+  vmask m = _mm256_castsi128_si256(vi.x);
+  m = _mm256_insertf128_si256(m, vi.y, 1);
+  return m;
+}
+
+static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vcast_vi2_vm((vmask)_mm256_cvtps_epi32(vf)); }
+static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcast_vi2_vm((vmask)_mm256_cvttps_epi32(vf)); }
+static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm256_cvtepi32_ps((vmask)vcast_vm_vi2(vi)); }
+static INLINE vint2 vcast_vi2_i(int i) { vint2 r; r.x = r.y = vcast_vi_i(i); return r; }
+
+static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = vadd_vi_vi_vi(x.x, y.x); r.y = vadd_vi_vi_vi(x.y, y.y); return r; }
+static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = vsub_vi_vi_vi(x.x, y.x); r.y = vsub_vi_vi_vi(x.y, y.y); return r; }
+static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); }
+
+static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = vand_vi_vi_vi(x.x, y.x); r.y = vand_vi_vi_vi(x.y, y.y); return r; }
+static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = vandnot_vi_vi_vi(x.x, y.x); r.y = vandnot_vi_vi_vi(x.y, y.y); return r; }
+static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = vor_vi_vi_vi(x.x, y.x); r.y = vor_vi_vi_vi(x.y, y.y); return r; }
+static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = vxor_vi_vi_vi(x.x, y.x); r.y = vxor_vi_vi_vi(x.y, y.y); return r; }
+
+static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { vint2 r; r.x = vsll_vi_vi_i(x.x, c); r.y = vsll_vi_vi_i(x.y, c); return r; }
+static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { vint2 r; r.x = vsrl_vi_vi_i(x.x, c); r.y = vsrl_vi_vi_i(x.y, c); return r; }
+static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { vint2 r; r.x = vsra_vi_vi_i(x.x, c); r.y = vsra_vi_vi_i(x.y, c); return r; }
+
+static INLINE vmask veq_vm_vi2_vi2(vint2 x, vint2 y) {
+  vint2 r;
+  r.x = _mm_cmpeq_epi32(x.x, y.x);
+  r.y = _mm_cmpeq_epi32(x.y, y.y);
+  return vcast_vm_vi2(r);
+}
+
+static INLINE vmask vgt_vm_vi2_vi2(vint2 x, vint2 y) {
+  vint2 r;
+  r.x = _mm_cmpgt_epi32(x.x, y.x);
+  r.y = _mm_cmpgt_epi32(x.y, y.y);
+  return vcast_vm_vi2(r);
+}
+
+static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) {
+  vint2 r;
+  r.x = _mm_cmpgt_epi32(x.x, y.x);
+  r.y = _mm_cmpgt_epi32(x.y, y.y);
+  return r;
+}
+
+static INLINE vint2 vsel_vi2_vm_vi2_vi2(vmask m, vint2 x, vint2 y) {
+  vint2 r, m2 = vcast_vi2_vm(m);
+  r.x = vor_vi_vi_vi(vand_vi_vi_vi(m2.x, x.x), vandnot_vi_vi_vi(m2.x, y.x));
+  r.y = vor_vi_vi_vi(vand_vi_vi_vi(m2.y, x.y), vandnot_vi_vi_vi(m2.y, y.y));
+  return r;
+}
+
+//
+
+static INLINE double vcast_d_vd(vdouble v) {
+  double s[4];
+  _mm256_storeu_pd(s, v);
+  return s[0];
+}
+
+static INLINE float vcast_f_vf(vfloat v) {
+  float s[8];
+  _mm256_storeu_ps(s, v);
+  return s[0];
+}
+
+static INLINE vmask vsignbit_vm_vd(vdouble d) {
+  return (vmask)_mm256_and_pd(d, _mm256_set_pd(-0.0,-0.0,-0.0,-0.0));
+}
+
+static INLINE vdouble vsign_vd_vd(vdouble d) {
+  return _mm256_or_pd(_mm256_set_pd(1.0, 1.0, 1.0, 1.0), (vdouble)vsignbit_vm_vd(d));
+}
+
+static INLINE vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) {
+  return (__m256d)vxor_vm_vm_vm((__m256i)x, vsignbit_vm_vd(y));
+}
+
+static INLINE vmask visinf_vm_vd(vdouble d) {
+  return (vmask)_mm256_cmp_pd(vabs_vd_vd(d), _mm256_set_pd(INFINITY, INFINITY, INFINITY, INFINITY), _CMP_EQ_OQ);
+}
+
+static INLINE vmask vispinf_vm_vd(vdouble d) {
+  return (vmask)_mm256_cmp_pd(d, _mm256_set_pd(INFINITY, INFINITY, INFINITY, INFINITY), _CMP_EQ_OQ);
+}
+
+static INLINE vmask visminf_vm_vd(vdouble d) {
+  return (vmask)_mm256_cmp_pd(d, _mm256_set_pd(-INFINITY, -INFINITY, -INFINITY, -INFINITY), _CMP_EQ_OQ);
+}
+
+static INLINE vmask visnan_vm_vd(vdouble d) {
+  return (vmask)_mm256_cmp_pd(d, d, _CMP_NEQ_UQ);
+}
+
+static INLINE vdouble visinf(vdouble d) {
+  return _mm256_and_pd((vdouble)visinf_vm_vd(d), vsign_vd_vd(d));
+}
+
+static INLINE vdouble visinf2(vdouble d, vdouble m) {
+  return _mm256_and_pd((vdouble)visinf_vm_vd(d), _mm256_or_pd((vdouble)vsignbit_vm_vd(d), m));
+}
+
+static INLINE vdouble vpow2i_vd_vi(vint q) {
+  vint r;
+  vdouble y;
+  q = _mm_add_epi32(_mm_set_epi32(0x3ff, 0x3ff, 0x3ff, 0x3ff), q);
+  q = _mm_slli_epi32(q, 20);
+  r = (__m128i)_mm_shuffle_ps((__m128)q, (__m128)q, _MM_SHUFFLE(1,0,0,0));
+  y = _mm256_castpd128_pd256((__m128d)r);
+  r = (__m128i)_mm_shuffle_ps((__m128)q, (__m128)q, _MM_SHUFFLE(3,2,2,2));
+  y = _mm256_insertf128_pd(y, (__m128d)r, 1);
+  y = _mm256_and_pd(y, (__m256d)_mm256_set_epi32(0xfff00000, 0, 0xfff00000, 0, 0xfff00000, 0, 0xfff00000, 0));
+  return y;
+}
+
+static INLINE vdouble vldexp_vd_vd_vi(vdouble x, vint q) {
+  vint m = _mm_srai_epi32(q, 31);
+  m = _mm_slli_epi32(_mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(m, q), 9), m), 7);
+  q = _mm_sub_epi32(q, _mm_slli_epi32(m, 2));
+  m = _mm_add_epi32(_mm_set_epi32(0x3ff, 0x3ff, 0x3ff, 0x3ff), m);
+  m = _mm_andnot_si128(_mm_cmplt_epi32(m, _mm_set_epi32(0, 0, 0, 0)), m);
+  vint n = _mm_cmpgt_epi32(m, _mm_set_epi32(0x7ff, 0x7ff, 0x7ff, 0x7ff));
+  m = _mm_or_si128(_mm_andnot_si128(n, m), _mm_and_si128(n, _mm_set_epi32(0x7ff, 0x7ff, 0x7ff, 0x7ff)));
+  m = _mm_slli_epi32(m, 20);
+  vint r = (__m128i)_mm_shuffle_ps((__m128)m, (__m128)m, _MM_SHUFFLE(1,0,0,0));
+  vdouble y = _mm256_castpd128_pd256((__m128d)r);
+  r = (__m128i)_mm_shuffle_ps((__m128)m, (__m128)m, _MM_SHUFFLE(3,2,2,2));
+  y = _mm256_insertf128_pd(y, (__m128d)r, 1);
+  y = _mm256_and_pd(y, (__m256d)_mm256_set_epi32(0xfff00000, 0, 0xfff00000, 0, 0xfff00000, 0, 0xfff00000, 0));
+  return vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(x, y), y), y), y), vpow2i_vd_vi(q));
+}
+
+static INLINE vint vilogbp1_vi_vd(vdouble d) {
+  vint q, r, c;
+  vmask m = vlt_vm_vd_vd(d, vcast_vd_d(4.9090934652977266E-91));
+  d = vsel_vd_vm_vd_vd(m, vmul_vd_vd_vd(vcast_vd_d(2.037035976334486E90), d), d);
+  c = _mm256_cvtpd_epi32(vsel_vd_vm_vd_vd(m, vcast_vd_d(300+0x3fe), vcast_vd_d(0x3fe)));
+  q = (__m128i)_mm256_castpd256_pd128(d);
+  q = (__m128i)_mm_shuffle_ps((__m128)q, _mm_set_ps(0, 0, 0, 0), _MM_SHUFFLE(0,0,3,1));
+  r = (__m128i)_mm256_extractf128_pd(d, 1);
+  r = (__m128i)_mm_shuffle_ps(_mm_set_ps(0, 0, 0, 0), (__m128)r, _MM_SHUFFLE(3,1,0,0));
+  q = _mm_or_si128(q, r);
+  q = _mm_srli_epi32(q, 20);
+  q = _mm_sub_epi32(q, c);
+  return q;
+}
+
+static INLINE vdouble vupper_vd_vd(vdouble d) {
+  return (__m256d)_mm256_and_pd(d, (vdouble)_mm256_set_epi32(0xffffffff, 0xf8000000, 0xffffffff, 0xf8000000, 0xffffffff, 0xf8000000, 0xffffffff, 0xf8000000));
+}
+
+static INLINE vfloat vupper_vf_vf(vfloat d) {
+  return (vfloat)vand_vm_vm_vm((vmask)d, _mm256_set_epi32(0xfffff000, 0xfffff000, 0xfffff000, 0xfffff000,0xfffff000, 0xfffff000, 0xfffff000, 0xfffff000));
+}
diff --git a/simd/helperneon.h b/simd/helperneon.h
new file mode 100644
index 00000000..d1248db6
--- /dev/null
+++ b/simd/helperneon.h
@@ -0,0 +1,145 @@
+#ifndef __ARM_NEON__
+#error Please specify -mfpu=neon.
+#endif
+
+#include <arm_neon.h>
+#include <stdint.h>
+
+typedef int32x4_t vint;
+typedef uint32x4_t vmask;
+
+typedef float32x4_t vfloat;
+typedef int32x4_t vint2;
+
+//
+
+static INLINE vint vcast_vi_i(int i) { return vdupq_n_s32(i); }
+
+static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return (vmask)vf; }
+static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return (vfloat)vm; }
+
+//
+
+static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return vaddq_s32(x, y); }
+static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return vsubq_s32(x, y); }
+static INLINE vint vneg_vi_vi(vint e) { return vnegq_s32(e); }
+
+static INLINE vint vand_vi_vi_vi(vint x, vint y) { return vandq_s32(x, y); }
+static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return vbicq_s32(y, x); }
+static INLINE vint vor_vi_vi_vi(vint x, vint y) { return vorrq_s32(x, y); }
+static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return veorq_s32(x, y); }
+
+static INLINE vint vsll_vi_vi_i(vint x, int c) { return (int32x4_t) vshlq_n_u32((uint32x4_t)x, c); }
+static INLINE vint vsrl_vi_vi_i(vint x, int c) { return (int32x4_t) vshrq_n_u32((uint32x4_t)x, c); }
+static INLINE vint vsra_vi_vi_i(vint x, int c) { return vshrq_n_s32(x, c); }
+
+//
+
+static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vandq_u32(x, y); }
+static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vbicq_u32(y, x); }
+static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vorrq_u32(x, y); }
+static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return veorq_u32(x, y); }
+
+static INLINE vmask veq_vm_vf_vf(vfloat x, vfloat y) { return vceqq_f32(x, y); }
+static INLINE vmask vneq_vm_vf_vf(vfloat x, vfloat y) { return vmvnq_u32(vceqq_f32(x, y)); }
+static INLINE vmask vlt_vm_vf_vf(vfloat x, vfloat y) { return vcltq_f32(x, y); }
+static INLINE vmask vle_vm_vf_vf(vfloat x, vfloat y) { return vcleq_f32(x, y); }
+static INLINE vmask vgt_vm_vf_vf(vfloat x, vfloat y) { return vcgtq_f32(x, y); }
+static INLINE vmask vge_vm_vf_vf(vfloat x, vfloat y) { return vcgeq_f32(x, y); }
+
+//
+
+static INLINE vfloat vcast_vf_f(float f) { return vdupq_n_f32(f); }
+
+static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return vaddq_f32(x, y); }
+static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return vsubq_f32(x, y); }
+static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return vmulq_f32(x, y); }
+static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmlaq_f32(z, x, y); }
+static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmlsq_f32(z, x, y); }
+
+static INLINE vfloat vabs_vf_vf(vfloat f) { return vabsq_f32(f); }
+static INLINE vfloat vneg_vf_vf(vfloat f) { return vnegq_f32(f); }
+static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return vmaxq_f32(x, y); }
+static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return vminq_f32(x, y); }
+
+static INLINE vfloat vsel_vf_vm_vf_vf(vmask mask, vfloat x, vfloat y) {
+  return (vfloat)vbslq_u32(mask, (vmask)x, (vmask)y);
+}
+
+static INLINE vfloat vrec_vf_vf(vfloat d) {
+  float32x4_t x = vrecpeq_f32(d);
+  x = vmulq_f32(x, vrecpsq_f32(d, x));
+  return vmlsq_f32(vaddq_f32(x, x), vmulq_f32(x, x), d);
+}
+
+static INLINE vfloat vdiv_vf_vf_vf(vfloat n, vfloat d) {
+  float32x4_t x = vrecpeq_f32(d);
+  x = vmulq_f32(x, vrecpsq_f32(d, x));
+  float32x4_t t = vmulq_f32(n, x);
+  return vmlsq_f32(vaddq_f32(t, t), vmulq_f32(t, x), d);
+}
+
+static INLINE vfloat vsqrt_vf_vf(vfloat d) {
+  float32x4_t x = vrsqrteq_f32(d);
+  x = vmulq_f32(x, vrsqrtsq_f32(d, vmulq_f32(x, x)));
+  float32x4_t u = vmulq_f32(x, d);
+  u = vmlaq_f32(u, vmlsq_f32(d, u, u), vmulq_f32(x, vdupq_n_f32(0.5)));
+  return (float32x4_t)vbicq_u32((uint32x4_t)u, vceqq_f32(d, vdupq_n_f32(0.0f)));
+}
+
+static INLINE vfloat vrecsqrt_vf_vf(vfloat d) {
+  float32x4_t x = vrsqrteq_f32(d);
+  x = vmulq_f32(x, vrsqrtsq_f32(d, vmulq_f32(x, x)));
+  return vmlaq_f32(x, vmlsq_f32(vdupq_n_f32(1), x, vmulq_f32(x, d)), vmulq_f32(x, vdupq_n_f32(0.5)));
+}
+
+#define ENABLE_RECSQRT_SP
+
+//
+
+static INLINE vmask veq_vm_vi_vi(vint x, vint y) { return vceqq_s32(x, y); }
+
+//
+
+static INLINE vint2 vcast_vi2_vm(vmask vm) { return (vint2)vm; }
+static INLINE vmask vcast_vm_vi2(vint2 vi) { return (vmask)vi; }
+
+static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcvtq_s32_f32(vf); }
+
+static INLINE vint2 vrint_vi2_vf(vfloat d) {
+  //return vcvtq_s32_f32(vrndqn_f32(d));
+  return vcvtq_s32_f32(vaddq_f32(d, (float32x4_t)vorrq_u32(vandq_u32((uint32x4_t)d, (uint32x4_t)vdupq_n_f32(-0.0f)), (uint32x4_t)vdupq_n_f32(0.5f))));
+}
+
+static INLINE vfloat vcast_vf_vi2(vint2 vi) { return vcvtq_f32_s32(vi); }
+static INLINE vint2 vcast_vi2_i(int i) { return vdupq_n_s32(i); }
+
+static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return vadd_vi_vi_vi(x, y); }
+static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return vsub_vi_vi_vi(x, y); }
+static INLINE vint vneg_vi2_vi2(vint2 e) { return vneg_vi_vi(e); }
+
+static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return vand_vi_vi_vi(x, y); }
+static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return vandnot_vi_vi_vi(x, y); }
+static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return vor_vi_vi_vi(x, y); }
+static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return vxor_vi_vi_vi(x, y); }
+
+static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return vsll_vi_vi_i(x, c); }
+static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return vsrl_vi_vi_i(x, c); }
+static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return vsra_vi_vi_i(x, c); }
+
+static INLINE vmask veq_vm_vi2_vi2(vint2 x, vint2 y) { return vceqq_s32(x, y); }
+static INLINE vmask vgt_vm_vi2_vi2(vint2 x, vint2 y) { return vcgeq_s32(x, y); }
+static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return (vint2)vcgeq_s32(x, y); }
+static INLINE vint2 vsel_vi2_vm_vi2_vi2(vmask m, vint2 x, vint2 y) { return (vint2)vbslq_u32(m, (vmask)x, (vmask)y); }
+
+//
+
+static INLINE float vcast_f_vf(vfloat v) {
+  float p[4];
+  vst1q_f32 (p, v);
+  return p[0];
+}
+
+static INLINE vfloat vupper_vf_vf(vfloat d) {
+  return (vfloat)vandq_s32((vint)d, vdupq_n_s32(0xfffff000));
+}
diff --git a/simd/helpersse2.h b/simd/helpersse2.h
new file mode 100644
index 00000000..0801b496
--- /dev/null
+++ b/simd/helpersse2.h
@@ -0,0 +1,235 @@
+#ifndef __SSE2__
+#error Please specify -msse2.
+#endif
+
+#include <immintrin.h>
+#include <stdint.h>
+
+typedef __m128d vdouble;
+typedef __m128i vint;
+typedef __m128i vmask;
+
+typedef __m128 vfloat;
+typedef __m128i vint2;
+
+//
+
+static INLINE vint vrint_vi_vd(vdouble vd) { return _mm_cvtpd_epi32(vd); }
+static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm_cvttpd_epi32(vd); }
+static INLINE vdouble vcast_vd_vi(vint vi) { return _mm_cvtepi32_pd(vi); }
+static INLINE vdouble vcast_vd_d(double d) { return _mm_set_pd(d, d); }
+static INLINE vint vcast_vi_i(int i) { return _mm_set_epi32(0, 0, i, i); }
+
+static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return (__m128i)vd; }
+static INLINE vdouble vreinterpret_vd_vm(vint vm) { return (__m128d)vm; }
+
+static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return (__m128i)vf; }
+static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return (__m128)vm; }
+
+//
+
+static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); }
+static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); }
+static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); }
+
+static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); }
+static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); }
+static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); }
+static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); }
+
+static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32(x, c); }
+static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32(x, c); }
+static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32(x, c); }
+
+//
+
+static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return _mm_and_si128(x, y); }
+static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return _mm_andnot_si128(x, y); }
+static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return _mm_or_si128(x, y); }
+static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return _mm_xor_si128(x, y); }
+
+static INLINE vmask veq_vm_vd_vd(vdouble x, vdouble y) { return (__m128i)_mm_cmpeq_pd(x, y); }
+static INLINE vmask vneq_vm_vd_vd(vdouble x, vdouble y) { return (__m128i)_mm_cmpneq_pd(x, y); }
+static INLINE vmask vlt_vm_vd_vd(vdouble x, vdouble y) { return (__m128i)_mm_cmplt_pd(x, y); }
+static INLINE vmask vle_vm_vd_vd(vdouble x, vdouble y) { return (__m128i)_mm_cmple_pd(x, y); }
+static INLINE vmask vgt_vm_vd_vd(vdouble x, vdouble y) { return (__m128i)_mm_cmpgt_pd(x, y); }
+static INLINE vmask vge_vm_vd_vd(vdouble x, vdouble y) { return (__m128i)_mm_cmpge_pd(x, y); }
+
+static INLINE vmask veq_vm_vf_vf(vfloat x, vfloat y) { return (__m128i)_mm_cmpeq_ps(x, y); }
+static INLINE vmask vneq_vm_vf_vf(vfloat x, vfloat y) { return (__m128i)_mm_cmpneq_ps(x, y); }
+static INLINE vmask vlt_vm_vf_vf(vfloat x, vfloat y) { return (__m128i)_mm_cmplt_ps(x, y); }
+static INLINE vmask vle_vm_vf_vf(vfloat x, vfloat y) { return (__m128i)_mm_cmple_ps(x, y); }
+static INLINE vmask vgt_vm_vf_vf(vfloat x, vfloat y) { return (__m128i)_mm_cmpgt_ps(x, y); }
+static INLINE vmask vge_vm_vf_vf(vfloat x, vfloat y) { return (__m128i)_mm_cmpge_ps(x, y); }
+
+//
+
+static INLINE vfloat vcast_vf_f(float f) { return _mm_set_ps(f, f, f, f); }
+
+static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm_add_ps(x, y); }
+static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm_sub_ps(x, y); }
+static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm_mul_ps(x, y); }
+static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm_div_ps(x, y); }
+static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); }
+static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm_sqrt_ps(x); }
+static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm_max_ps(x, y); }
+static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm_min_ps(x, y); }
+
+static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); }
+static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); }
+static INLINE vfloat vabs_vf_vf(vfloat f) { return (vfloat)vandnot_vm_vm_vm((vmask)vcast_vf_f(-0.0f), (vmask)f); }
+static INLINE vfloat vneg_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)vcast_vf_f(-0.0f), (vmask)d); }
+
+//
+
+static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm_add_pd(x, y); }
+static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm_sub_pd(x, y); }
+static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm_mul_pd(x, y); }
+static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm_div_pd(x, y); }
+static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm_div_pd(_mm_set_pd(1, 1), x); }
+static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm_sqrt_pd(x); }
+static INLINE vdouble vabs_vd_vd(vdouble d) { return (__m128d)_mm_andnot_pd(_mm_set_pd(-0.0,-0.0), d); }
+static INLINE vdouble vneg_vd_vd(vdouble d) { return (__m128d)_mm_xor_pd(_mm_set_pd(-0.0,-0.0), d); }
+static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
+static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
+
+static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm_max_pd(x, y); }
+static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm_min_pd(x, y); }
+
+//
+
+static INLINE vmask veq_vm_vi_vi(vint x, vint y) {
+  __m128 s = (__m128)_mm_cmpeq_epi32(x, y);
+  return (__m128i)_mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 0, 0));
+}
+
+static INLINE vdouble vsel_vd_vm_vd_vd(vmask mask, vdouble x, vdouble y) {
+  return (__m128d)vor_vm_vm_vm(vand_vm_vm_vm(mask, (__m128i)x), vandnot_vm_vm_vm(mask, (__m128i)y));
+}
+
+static INLINE vfloat vsel_vf_vm_vf_vf(vmask mask, vfloat x, vfloat y) {
+  return (vfloat)vor_vm_vm_vm(vand_vm_vm_vm(mask, (vmask)x), vandnot_vm_vm_vm(mask, (vmask)y));
+}
+
+static INLINE vint vsel_vi_vd_vd_vi_vi(vdouble d0, vdouble d1, vint x, vint y) {
+  vmask mask = (vmask)_mm_cmpeq_ps(_mm_cvtpd_ps((vdouble)vlt_vm_vd_vd(d0, d1)), _mm_set_ps(0, 0, 0, 0));
+  return vor_vi_vi_vi(vandnot_vi_vi_vi(mask, x), vand_vi_vi_vi(mask, y));
+}
+
+//
+
+static INLINE vint2 vcast_vi2_vm(vmask vm) { return (vint2)vm; }
+static INLINE vmask vcast_vm_vi2(vint2 vi) { return (vmask)vi; }
+
+static INLINE vint2 vrint_vi2_vf(vfloat vf) { return _mm_cvtps_epi32(vf); }
+static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return _mm_cvttps_epi32(vf); }
+static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm_cvtepi32_ps(vcast_vm_vi2(vi)); }
+static INLINE vint2 vcast_vi2_i(int i) { return _mm_set_epi32(i, i, i, i); }
+
+static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return vadd_vi_vi_vi(x, y); }
+static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return vsub_vi_vi_vi(x, y); }
+static INLINE vint vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); }
+
+static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return vand_vi_vi_vi(x, y); }
+static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return vandnot_vi_vi_vi(x, y); }
+static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return vor_vi_vi_vi(x, y); }
+static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return vxor_vi_vi_vi(x, y); }
+
+static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return vsll_vi_vi_i(x, c); }
+static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return vsrl_vi_vi_i(x, c); }
+static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return vsra_vi_vi_i(x, c); }
+
+static INLINE vmask veq_vm_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpeq_epi32(x, y); }
+static INLINE vmask vgt_vm_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32(x, y); }
+static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32(x, y); }
+static INLINE vint2 vsel_vi2_vm_vi2_vi2(vmask m, vint2 x, vint2 y) { return vor_vm_vm_vm(vand_vm_vm_vm(m, x), vandnot_vm_vm_vm(m, y)); }
+
+//
+
+static INLINE double vcast_d_vd(vdouble v) {
+  double s[2];
+  _mm_storeu_pd(s, v);
+  return s[0];
+}
+
+static INLINE float vcast_f_vf(vfloat v) {
+  float s[4];
+  _mm_storeu_ps(s, v);
+  return s[0];
+}
+
+static INLINE vmask vsignbit_vm_vd(vdouble d) {
+  return _mm_and_si128((__m128i)d, _mm_set_epi32(0x80000000, 0x0, 0x80000000, 0x0));
+}
+
+static INLINE vdouble vsign_vd_vd(vdouble d) {
+  return (__m128d)_mm_or_si128((__m128i)_mm_set_pd(1, 1), _mm_and_si128((__m128i)d, _mm_set_epi32(0x80000000, 0x0, 0x80000000, 0x0)));
+}
+
+static INLINE vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) {
+  return (__m128d)vxor_vi_vi_vi((__m128i)x, vsignbit_vm_vd(y));
+}
+
+static INLINE vmask visinf_vm_vd(vdouble d) {
+  return (vmask)_mm_cmpeq_pd(vabs_vd_vd(d), _mm_set_pd(INFINITY, INFINITY));
+}
+
+static INLINE vmask vispinf_vm_vd(vdouble d) {
+  return (vmask)_mm_cmpeq_pd(d, _mm_set_pd(INFINITY, INFINITY));
+}
+
+static INLINE vmask visminf_vm_vd(vdouble d) {
+  return (vmask)_mm_cmpeq_pd(d, _mm_set_pd(-INFINITY, -INFINITY));
+}
+
+static INLINE vmask visnan_vm_vd(vdouble d) {
+  return (vmask)_mm_cmpneq_pd(d, d);
+}
+
+static INLINE vdouble visinf(vdouble d) {
+  return (__m128d)_mm_and_si128(visinf_vm_vd(d), _mm_or_si128(vsignbit_vm_vd(d), (__m128i)_mm_set_pd(1, 1)));
+}
+
+static INLINE vdouble visinf2(vdouble d, vdouble m) {
+  return (__m128d)_mm_and_si128(visinf_vm_vd(d), _mm_or_si128(vsignbit_vm_vd(d), (__m128i)m));
+}
+
+//
+
+static INLINE vdouble vpow2i_vd_vi(vint q) {
+  q = _mm_add_epi32(_mm_set_epi32(0x0, 0x0, 0x3ff, 0x3ff), q);
+  q = (__m128i)_mm_shuffle_ps((__m128)q, (__m128)q, _MM_SHUFFLE(1,3,0,3));
+  return (__m128d)_mm_slli_epi32(q, 20);
+}
+
+static INLINE vdouble vldexp_vd_vd_vi(vdouble x, vint q) {
+  vint m = _mm_srai_epi32(q, 31);
+  m = _mm_slli_epi32(_mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(m, q), 9), m), 7);
+  q = _mm_sub_epi32(q, _mm_slli_epi32(m, 2));
+  m = _mm_add_epi32(_mm_set_epi32(0x0, 0x0, 0x3ff, 0x3ff), m);
+  m = _mm_andnot_si128(_mm_cmplt_epi32(m, _mm_set_epi32(0, 0, 0, 0)), m);
+  vint n = _mm_cmpgt_epi32(m, _mm_set_epi32(0x0, 0x0, 0x7ff, 0x7ff));
+  m = _mm_or_si128(_mm_andnot_si128(n, m), _mm_and_si128(n, _mm_set_epi32(0x0, 0x0, 0x7ff, 0x7ff)));
+  m = (__m128i)_mm_shuffle_ps((__m128)m, (__m128)m, _MM_SHUFFLE(1,3,0,3));
+  vdouble y = (__m128d)_mm_slli_epi32(m, 20);
+  return vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(x, y), y), y), y), vpow2i_vd_vi(q));
+}
+
+static INLINE vint vilogbp1_vi_vd(vdouble d) {
+  vint m = vlt_vm_vd_vd(d, vcast_vd_d(4.9090934652977266E-91));
+  d = vsel_vd_vm_vd_vd(m, vmul_vd_vd_vd(vcast_vd_d(2.037035976334486E90), d), d);
+  __m128i q = _mm_and_si128((__m128i)d, _mm_set_epi32(((1 << 12)-1) << 20, 0, ((1 << 12)-1) << 20, 0));
+  q = _mm_srli_epi32(q, 20);
+  q = vor_vm_vm_vm(vand_vm_vm_vm   (m, _mm_sub_epi32(q, _mm_set_epi32(300 + 0x3fe, 0, 300 + 0x3fe, 0))),
+		   vandnot_vm_vm_vm(m, _mm_sub_epi32(q, _mm_set_epi32(      0x3fe, 0,       0x3fe, 0))));
+  q = (__m128i)_mm_shuffle_ps((__m128)q, (__m128)q, _MM_SHUFFLE(0,0,3,1));
+  return q;
+}
+
+static INLINE vdouble vupper_vd_vd(vdouble d) {
+  return (__m128d)_mm_and_si128((__m128i)d, _mm_set_epi32(0xffffffff, 0xf8000000, 0xffffffff, 0xf8000000));
+}
+
+static INLINE vfloat vupper_vf_vf(vfloat d) {
+  return (__m128)_mm_and_si128((__m128i)d, _mm_set_epi32(0xfffff000, 0xfffff000, 0xfffff000, 0xfffff000));
+}
diff --git a/simd/iut.c b/simd/iut.c
new file mode 100644
index 00000000..d85d8c40
--- /dev/null
+++ b/simd/iut.c
@@ -0,0 +1,1730 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <time.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <inttypes.h>
+
+#include <math.h>
+#include <bits/nan.h>
+#include <bits/inf.h>
+
+#include <unistd.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include "sleefsimd.h"
+
+static jmp_buf sigjmp;
+
+static void sighandler(int signum) {
+  longjmp(sigjmp, 1);
+}
+
+int detectFeature() {
+  signal(SIGILL, sighandler);
+
+  if (setjmp(sigjmp) == 0) {
+#ifdef ENABLE_DP
+    double s[VECTLENDP];
+    int i;
+    for(i=0;i<VECTLENDP;i++) {
+      s[i] = 1.0;
+    }
+    vdouble a = vloadu(s);
+    a = xpow(a, a);
+    vstoreu(s, a);
+#elif defined(ENABLE_SP)
+    float s[VECTLENSP];
+    int i;
+    for(i=0;i<VECTLENSP;i++) {
+      s[i] = 1.0;
+    }
+    vfloat a = vloaduf(s);
+    a = xpowf(a, a);
+    vstoreuf(s, a);
+#endif
+    signal(SIGILL, SIG_DFL);
+    return 1;
+  } else {
+    signal(SIGILL, SIG_DFL);
+    return 0;
+  }
+}
+
+int readln(int fd, char *buf, int cnt) {
+  int i, rcnt = 0;
+
+  if (cnt < 1) return -1;
+
+  while(cnt >= 2) {
+    i = read(fd, buf, 1);
+    if (i != 1) return i;
+
+    if (*buf == '\n') break;
+
+    rcnt++;
+    buf++;
+    cnt--;
+  }
+
+  *++buf = '\0';
+  rcnt++;
+  return rcnt;
+}
+
+int startsWith(char *str, char *prefix) {
+  return strncmp(str, prefix, strlen(prefix)) == 0;
+}
+
+double u2d(uint64_t u) {
+  union {
+    double f;
+    uint64_t i;
+  } tmp;
+  tmp.i = u;
+  return tmp.f;
+}
+
+uint64_t d2u(double d) {
+  union {
+    double f;
+    uint64_t i;
+  } tmp;
+  tmp.f = d;
+  return tmp.i;
+}
+
+float u2f(uint32_t u) {
+  union {
+    float f;
+    uint32_t i;
+  } tmp;
+  tmp.i = u;
+  return tmp.f;
+}
+
+uint32_t f2u(float d) {
+  union {
+    float f;
+    uint32_t i;
+  } tmp;
+  tmp.f = d;
+  return tmp.i;
+}
+
+typedef struct {
+  double x, y;
+} double2;
+
+#ifdef ENABLE_DP
+
+double xxsin(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xsin(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxcos(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xcos(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxtan(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xtan(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxasin(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xasin(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxacos(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xacos(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxatan(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xatan(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxlog(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xlog(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxexp(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xexp(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double2 xxsincos(double d) {
+  double s[VECTLENDP], t[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+    t[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+
+  s[idx] = d;
+
+  vdouble2 v;
+
+  vdouble a = vloadu(s);
+  v = xsincos(a);
+  vstoreu(s, v.x);
+  vstoreu(t, v.y);
+
+  double2 d2;
+  d2.x = s[idx];
+  d2.y = t[idx];
+
+  return d2;
+}
+
+double xxsinh(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xsinh(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxcosh(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xcosh(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxtanh(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xtanh(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxasinh(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xasinh(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxacosh(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xacosh(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxatanh(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xatanh(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxcbrt(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xcbrt(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxexp2(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xexp2(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxexp10(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xexp10(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxexpm1(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xexpm1(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxlog10(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xlog10(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxlog1p(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xlog1p(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxpow(double x, double y) {
+  double s[VECTLENDP], t[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+    t[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+
+  s[idx] = x;
+  t[idx] = y;
+
+  s[0] = x;
+  s[1] = x;
+  t[0] = y;
+  t[1] = y;
+
+  vdouble a, b;
+
+  a = vloadu(s);
+  b = vloadu(t);
+  a = xpow(a, b);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxatan2(double y, double x) {
+  double s[VECTLENDP], t[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+    t[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+
+  s[idx] = y;
+  t[idx] = x;
+
+  vdouble a, b;
+
+  a = vloadu(s);
+  b = vloadu(t);
+  a = xatan2(a, b);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxldexp(double x, int q) {
+  double s[VECTLENDP];
+  int t[4];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+    t[i] = (int)(random()/(double)RAND_MAX*20000-10000);
+  }
+
+  int idx = random() & (VECTLENDP-1);
+
+  s[idx] = x;
+  t[idx] = q;
+
+  vdouble a;
+  vint b;
+
+  a = vloadu(s);
+  b = _mm_loadu_si128((__m128i *)t);
+  a = xldexp(a, b);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxsin_u1(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xsin_u1(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxcos_u1(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xcos_u1(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxtan_u1(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xtan_u1(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxasin_u1(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xasin_u1(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxacos_u1(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xacos_u1(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxatan_u1(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xatan_u1(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxatan2_u1(double y, double x) {
+  double s[VECTLENDP], t[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+    t[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+
+  s[idx] = y;
+  t[idx] = x;
+
+  vdouble a, b;
+
+  a = vloadu(s);
+  b = vloadu(t);
+  a = xatan2_u1(a, b);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double xxlog_u1(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xlog_u1(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+double2 xxsincos_u1(double d) {
+  double s[VECTLENDP], t[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+    t[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+
+  s[idx] = d;
+
+  vdouble2 v;
+
+  vdouble a = vloadu(s);
+  v = xsincos_u1(a);
+  vstoreu(s, v.x);
+  vstoreu(t, v.y);
+
+  double2 d2;
+  d2.x = s[idx];
+  d2.y = t[idx];
+
+  return d2;
+}
+
+double xxcbrt_u1(double d) {
+  double s[VECTLENDP];
+  int i;
+  for(i=0;i<VECTLENDP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENDP-1);
+  s[idx] = d;
+
+  vdouble a = vloadu(s);
+  a = xcbrt_u1(a);
+  vstoreu(s, a);
+
+  return s[idx];
+}
+
+#endif
+
+//
+
+typedef struct {
+  float x, y;
+} float2;
+
+#ifdef ENABLE_SP
+
+float xxsinf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xsinf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxcosf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xcosf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxtanf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xtanf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxasinf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xasinf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxacosf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xacosf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxatanf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xatanf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxatan2f(float y, float x) {
+  float s[VECTLENSP], t[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+    t[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+
+  s[idx] = y;
+  t[idx] = x;
+
+  vfloat a, b;
+
+  a = vloaduf(s);
+  b = vloaduf(t);
+  a = xatan2f(a, b);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxlogf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xlogf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxexpf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xexpf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxsqrtf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xsqrtf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxcbrtf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xcbrtf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float2 xxsincosf(float d) {
+  float s[VECTLENSP], t[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+    t[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+
+  s[idx] = d;
+
+  vfloat2 v;
+
+  vfloat a = vloaduf(s);
+  v = xsincosf(a);
+  vstoreuf(s, v.x);
+  vstoreuf(t, v.y);
+
+  float2 d2;
+  d2.x = s[idx];
+  d2.y = t[idx];
+
+  return d2;
+}
+
+float xxsinhf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xsinhf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxcoshf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xcoshf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxtanhf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xtanhf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxasinhf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xasinhf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxacoshf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xacoshf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxatanhf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xatanhf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxexp2f(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xexp2f(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxexp10f(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xexp10f(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxexpm1f(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xexpm1f(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxlog10f(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xlog10f(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxlog1pf(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xlog1pf(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxpowf(float x, float y) {
+  float s[VECTLENSP], t[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+    t[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+
+  s[idx] = x;
+  t[idx] = y;
+
+  s[0] = x;
+  s[1] = x;
+  t[0] = y;
+  t[1] = y;
+
+  vfloat a, b;
+
+  a = vloaduf(s);
+  b = vloaduf(t);
+  a = xpowf(a, b);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxldexpf(float x, int q) {
+  float s[VECTLENSP];
+  int t[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(double)RAND_MAX*20000-10000;
+    t[i] = (int)(random()/(double)RAND_MAX*20000-10000);
+  }
+
+  int idx = random() & (VECTLENSP-1);
+
+  s[idx] = x;
+  t[idx] = q;
+
+  vfloat a;
+  vint2 b;
+
+  a = vloaduf(s);
+  b = vloadui2(t);
+  a = xldexpf(a, b);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxsinf_u1(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xsinf_u1(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxcosf_u1(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xcosf_u1(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxtanf_u1(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xtanf_u1(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxasinf_u1(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xasinf_u1(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxacosf_u1(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xacosf_u1(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxatanf_u1(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xatanf_u1(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxatan2f_u1(float y, float x) {
+  float s[VECTLENSP], t[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+    t[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+
+  s[idx] = y;
+  t[idx] = x;
+
+  vfloat a, b;
+
+  a = vloaduf(s);
+  b = vloaduf(t);
+  a = xatan2f_u1(a, b);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxlogf_u1(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xlogf_u1(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float xxcbrtf_u1(float d) {
+  float s[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+  s[idx] = d;
+
+  vfloat a = vloaduf(s);
+  a = xcbrtf_u1(a);
+  vstoreuf(s, a);
+
+  return s[idx];
+}
+
+float2 xxsincosf_u1(float d) {
+  float s[VECTLENSP], t[VECTLENSP];
+  int i;
+  for(i=0;i<VECTLENSP;i++) {
+    s[i] = random()/(float)RAND_MAX*20000-10000;
+    t[i] = random()/(float)RAND_MAX*20000-10000;
+  }
+  int idx = random() & (VECTLENSP-1);
+
+  s[idx] = d;
+
+  vfloat2 v;
+
+  vfloat a = vloaduf(s);
+  v = xsincosf_u1(a);
+  vstoreuf(s, v.x);
+  vstoreuf(t, v.y);
+
+  float2 d2;
+  d2.x = s[idx];
+  d2.y = t[idx];
+
+  return d2;
+}
+#endif
+
+//
+
+#define BUFSIZE 1024
+
+int main(int argc, char **argv) {
+  srandom(time(NULL));
+
+  if (!detectFeature()) {
+    fprintf(stderr, "\n\n***** This host does not support the necessary CPU features to execute this program *****\n\n\n");
+    exit(-1);
+  }
+
+  char buf[BUFSIZE];
+
+  //fprintf(stderr, "IUT start\n");
+
+  for(;;) {
+    if (readln(STDIN_FILENO, buf, BUFSIZE-1) < 1) break;
+
+    //fprintf(stderr, "iut: got %s\n", buf);
+
+#ifdef ENABLE_DP
+    if (startsWith(buf, "sin ")) {
+      uint64_t u;
+      sscanf(buf, "sin %" PRIx64, &u);
+      u = d2u(xxsin(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "cos ")) {
+      uint64_t u;
+      sscanf(buf, "cos %" PRIx64, &u);
+      u = d2u(xxcos(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "sincos ")) {
+      uint64_t u;
+      sscanf(buf, "sincos %" PRIx64, &u);
+      double2 x = xxsincos(u2d(u));
+      printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y));
+    } else if (startsWith(buf, "tan ")) {
+      uint64_t u;
+      sscanf(buf, "tan %" PRIx64, &u);
+      u = d2u(xxtan(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "asin ")) {
+      uint64_t u;
+      sscanf(buf, "asin %" PRIx64, &u);
+      u = d2u(xxasin(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "acos ")) {
+      uint64_t u;
+      sscanf(buf, "acos %" PRIx64, &u);
+      u = d2u(xxacos(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "atan ")) {
+      uint64_t u;
+      sscanf(buf, "atan %" PRIx64, &u);
+      u = d2u(xxatan(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "log ")) {
+      uint64_t u;
+      sscanf(buf, "log %" PRIx64, &u);
+      u = d2u(xxlog(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "exp ")) {
+      uint64_t u;
+      sscanf(buf, "exp %" PRIx64, &u);
+      u = d2u(xxexp(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "atan2 ")) {
+      uint64_t u, v;
+      sscanf(buf, "atan2 %" PRIx64 " %" PRIx64, &u, &v);
+      u = d2u(xxatan2(u2d(u), u2d(v)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "pow ")) {
+      uint64_t u, v;
+      sscanf(buf, "pow %" PRIx64 " %" PRIx64, &u, &v);
+      u = d2u(xxpow(u2d(u), u2d(v)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "sinh ")) {
+      uint64_t u;
+      sscanf(buf, "sinh %" PRIx64, &u);
+      u = d2u(xxsinh(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "cosh ")) {
+      uint64_t u;
+      sscanf(buf, "cosh %" PRIx64, &u);
+      u = d2u(xxcosh(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "tanh ")) {
+      uint64_t u;
+      sscanf(buf, "tanh %" PRIx64, &u);
+      u = d2u(xxtanh(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "asinh ")) {
+      uint64_t u;
+      sscanf(buf, "asinh %" PRIx64, &u);
+      u = d2u(xxasinh(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "acosh ")) {
+      uint64_t u;
+      sscanf(buf, "acosh %" PRIx64, &u);
+      u = d2u(xxacosh(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "atanh ")) {
+      uint64_t u;
+      sscanf(buf, "atanh %" PRIx64, &u);
+      u = d2u(xxatanh(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "sqrt ")) {
+      uint64_t u;
+      sscanf(buf, "sqrt %" PRIx64, &u);
+      u = d2u(sqrt(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "cbrt ")) {
+      uint64_t u;
+      sscanf(buf, "cbrt %" PRIx64, &u);
+      u = d2u(xxcbrt(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "exp2 ")) {
+      uint64_t u;
+      sscanf(buf, "exp2 %" PRIx64, &u);
+      u = d2u(xxexp2(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "exp10 ")) {
+      uint64_t u;
+      sscanf(buf, "exp10 %" PRIx64, &u);
+      u = d2u(xxexp10(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "expm1 ")) {
+      uint64_t u;
+      sscanf(buf, "expm1 %" PRIx64, &u);
+      u = d2u(xxexpm1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "log10 ")) {
+      uint64_t u;
+      sscanf(buf, "log10 %" PRIx64, &u);
+      u = d2u(xxlog10(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "ldexp ")) {
+      uint64_t u, v;
+      sscanf(buf, "ldexp %" PRIx64 " %" PRIx64, &u, &v);
+      u = d2u(xxldexp(u2d(u), (int)u2d(v)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "log1p ")) {
+      uint64_t u;
+      sscanf(buf, "log1p %" PRIx64, &u);
+      u = d2u(xxlog1p(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "sin_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "sin_u1 %" PRIx64, &u);
+      u = d2u(xxsin_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "cos_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "cos_u1 %" PRIx64, &u);
+      u = d2u(xxcos_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "sincos_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "sincos_u1 %" PRIx64, &u);
+      double2 x = xxsincos_u1(u2d(u));
+      printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y));
+    } else if (startsWith(buf, "tan_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "tan_u1 %" PRIx64, &u);
+      u = d2u(xxtan_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "asin_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "asin_u1 %" PRIx64, &u);
+      u = d2u(xxasin_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "acos_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "acos_u1 %" PRIx64, &u);
+      u = d2u(xxacos_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "atan_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "atan_u1 %" PRIx64, &u);
+      u = d2u(xxatan_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "atan2_u1 ")) {
+      uint64_t u, v;
+      sscanf(buf, "atan2_u1 %" PRIx64 " %" PRIx64, &u, &v);
+      u = d2u(xxatan2_u1(u2d(u), u2d(v)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "log_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "log_u1 %" PRIx64, &u);
+      u = d2u(xxlog_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    } else if (startsWith(buf, "cbrt_u1 ")) {
+      uint64_t u;
+      sscanf(buf, "cbrt_u1 %" PRIx64, &u);
+      u = d2u(xxcbrt_u1(u2d(u)));
+      printf("%" PRIx64 "\n", u);
+    }
+#ifdef ENABLE_SP
+    else 
+#endif
+#endif
+
+#ifdef ENABLE_SP
+    if (startsWith(buf, "sinf ")) {
+      uint32_t u;
+      sscanf(buf, "sinf %x", &u);
+      u = f2u(xxsinf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "cosf ")) {
+      uint32_t u;
+      sscanf(buf, "cosf %x", &u);
+      u = f2u(xxcosf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "tanf ")) {
+      uint32_t u;
+      sscanf(buf, "tanf %x", &u);
+      u = f2u(xxtanf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "asinf ")) {
+      uint32_t u;
+      sscanf(buf, "asinf %x", &u);
+      u = f2u(xxasinf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "acosf ")) {
+      uint32_t u;
+      sscanf(buf, "acosf %x", &u);
+      u = f2u(xxacosf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "atanf ")) {
+      uint32_t u;
+      sscanf(buf, "atanf %x", &u);
+      u = f2u(xxatanf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "logf ")) {
+      uint32_t u;
+      sscanf(buf, "logf %x", &u);
+      u = f2u(xxlogf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "expf ")) {
+      uint32_t u;
+      sscanf(buf, "expf %x", &u);
+      u = f2u(xxexpf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "atan2f ")) {
+      uint32_t u, v;
+      sscanf(buf, "atan2f %x %x", &u, &v);
+      u = f2u(xxatan2f(u2f(u), u2f(v)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "cbrtf ")) {
+      uint32_t u;
+      sscanf(buf, "cbrtf %x", &u);
+      u = f2u(xxcbrtf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "sincosf ")) {
+      uint32_t u;
+      sscanf(buf, "sincosf %x", &u);
+      float2 x = xxsincosf(u2f(u));
+      printf("%x %x\n", f2u(x.x), f2u(x.y));
+    } else if (startsWith(buf, "ldexpf ")) {
+      uint32_t u, v;
+      sscanf(buf, "ldexpf %x %x", &u, &v);
+      u = f2u(xxldexpf(u2f(u), (int)u2f(v)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "powf ")) {
+      uint32_t u, v;
+      sscanf(buf, "powf %x %x", &u, &v);
+      u = f2u(xxpowf(u2f(u), u2f(v)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "sinhf ")) {
+      uint32_t u;
+      sscanf(buf, "sinhf %x", &u);
+      u = f2u(xxsinhf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "coshf ")) {
+      uint32_t u;
+      sscanf(buf, "coshf %x", &u);
+      u = f2u(xxcoshf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "tanhf ")) {
+      uint32_t u;
+      sscanf(buf, "tanhf %x", &u);
+      u = f2u(xxtanhf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "asinhf ")) {
+      uint32_t u;
+      sscanf(buf, "asinhf %x", &u);
+      u = f2u(xxasinhf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "acoshf ")) {
+      uint32_t u;
+      sscanf(buf, "acoshf %x", &u);
+      u = f2u(xxacoshf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "atanhf ")) {
+      uint32_t u;
+      sscanf(buf, "atanhf %x", &u);
+      u = f2u(xxatanhf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "sqrtf ")) {
+      uint32_t u;
+      sscanf(buf, "sqrtf %x", &u);
+      u = f2u(xxsqrtf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "exp2f ")) {
+      uint32_t u;
+      sscanf(buf, "exp2f %x", &u);
+      u = f2u(xxexp2f(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "exp10f ")) {
+      uint32_t u;
+      sscanf(buf, "exp10f %x", &u);
+      u = f2u(xxexp10f(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "expm1f ")) {
+      uint32_t u;
+      sscanf(buf, "expm1f %x", &u);
+      u = f2u(xxexpm1f(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "log10f ")) {
+      uint32_t u;
+      sscanf(buf, "log10f %x", &u);
+      u = f2u(xxlog10f(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "log1pf ")) {
+      uint32_t u;
+      sscanf(buf, "log1pf %x", &u);
+      u = f2u(xxlog1pf(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "sinf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "sinf_u1 %x", &u);
+      u = f2u(xxsinf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "cosf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "cosf_u1 %x", &u);
+      u = f2u(xxcosf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "tanf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "tanf_u1 %x", &u);
+      u = f2u(xxtanf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "asinf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "asinf_u1 %x", &u);
+      u = f2u(xxasinf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "acosf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "acosf_u1 %x", &u);
+      u = f2u(xxacosf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "atanf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "atanf_u1 %x", &u);
+      u = f2u(xxatanf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "logf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "logf_u1 %x", &u);
+      u = f2u(xxlogf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "atan2f_u1 ")) {
+      uint32_t u, v;
+      sscanf(buf, "atan2f_u1 %x %x", &u, &v);
+      u = f2u(xxatan2f_u1(u2f(u), u2f(v)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "cbrtf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "cbrtf_u1 %x", &u);
+      u = f2u(xxcbrtf_u1(u2f(u)));
+      printf("%x\n", u);
+    } else if (startsWith(buf, "sincosf_u1 ")) {
+      uint32_t u;
+      sscanf(buf, "sincosf_u1 %x", &u);
+      float2 x = xxsincosf_u1(u2f(u));
+      printf("%x %x\n", f2u(x.x), f2u(x.y));
+    }
+#endif
+
+    else {
+      break;
+    }
+
+    fflush(stdout);
+  }
+
+  return 0;
+}
diff --git a/simd/nonnumber.h b/simd/nonnumber.h
new file mode 100644
index 00000000..5d856fa9
--- /dev/null
+++ b/simd/nonnumber.h
@@ -0,0 +1,19 @@
+#if defined (__GNUC__) || defined (__INTEL_COMPILER) || defined (__clang__)
+#ifdef INFINITY
+#undef INFINITY
+#endif
+
+#ifdef NAN
+#undef NAN
+#endif
+
+#define NAN __builtin_nan("")
+#define NANf __builtin_nanf("")
+#define INFINITY __builtin_inf()
+#define INFINITYf __builtin_inff()
+#else
+
+#include <bits/nan.h>
+#include <bits/inf.h>
+
+#endif
diff --git a/simd/sleefsimd.h b/simd/sleefsimd.h
new file mode 100644
index 00000000..c9418689
--- /dev/null
+++ b/simd/sleefsimd.h
@@ -0,0 +1,218 @@
+#include <stdint.h>
+
+// ******** SSE2 ********
+
+#ifdef ENABLE_SSE2
+#include <immintrin.h>
+
+#define VECTLENDP 2
+#define VECTLENSP 4
+
+typedef __m128d vdouble;
+typedef __m128i vint;
+
+typedef __m128 vfloat;
+typedef __m128i vint2;
+
+static vdouble vloadu(double *p) { return _mm_loadu_pd(p); }
+static void vstoreu(double *p, vdouble v) { _mm_storeu_pd(p, v); }
+
+static vfloat vloaduf(float *p) { return _mm_loadu_ps(p); }
+static void vstoreuf(float *p, vfloat v) { _mm_storeu_ps(p, v); }
+
+static vint2 vloadui2(int32_t *p) { return (vint2)_mm_loadu_si128((__m128i *)p); }
+static void vstoreui2(int32_t *p, vint2 v) { _mm_storeu_si128((__m128i *)p, (__m128i)v); }
+
+#define ENABLE_DP
+#define ENABLE_SP
+#endif
+
+
+// ******** AVX ********
+
+#if defined(ENABLE_AVX) || defined(ENABLE_FMA4)
+#include <immintrin.h>
+
+#define VECTLENDP 4
+#define VECTLENSP 8
+
+typedef __m256d vdouble;
+typedef __m128i vint;
+
+typedef __m256 vfloat;
+typedef struct {
+  vint x, y;
+} vint2;
+
+static vdouble vloadu(double *p) { return _mm256_loadu_pd(p); }
+static void vstoreu(double *p, vdouble v) { return _mm256_storeu_pd(p, v); }
+
+static vfloat vloaduf(float *p) { return _mm256_loadu_ps(p); }
+static void vstoreuf(float *p, vfloat v) { return _mm256_storeu_ps(p, v); }
+
+static vint2 vloadui2(int32_t *p) {
+  vint2 r;
+  r.x = _mm_loadu_si128((__m128i *) p     );
+  r.y = _mm_loadu_si128((__m128i *)(p + 4));
+  return r;
+}
+
+static void vstoreui2(int32_t *p, vint2 v) {
+  _mm_storeu_si128((__m128i *) p     , v.x);
+  _mm_storeu_si128((__m128i *)(p + 4), v.y);  
+}
+
+#define ENABLE_DP
+#define ENABLE_SP
+#endif
+
+
+// ******** AVX2 ********
+
+#ifdef ENABLE_AVX2
+#include <immintrin.h>
+
+#define VECTLENDP 4
+#define VECTLENSP 8
+
+typedef __m256d vdouble;
+typedef __m128i vint;
+
+typedef __m256 vfloat;
+typedef __m256i vint2;
+
+static vdouble vloadu(double *p) { return _mm256_loadu_pd(p); }
+static void vstoreu(double *p, vdouble v) { return _mm256_storeu_pd(p, v); }
+
+static vfloat vloaduf(float *p) { return _mm256_loadu_ps(p); }
+static void vstoreuf(float *p, vfloat v) { return _mm256_storeu_ps(p, v); }
+
+static vint2 vloadui2(int32_t *p) { return _mm256_loadu_si256((__m256i const *)p); }
+static void vstoreui2(int32_t *p, vint2 v) { return _mm256_storeu_si256((__m256i *)p, v); }
+
+#define ENABLE_DP
+#define ENABLE_SP
+#endif
+
+
+// ******** ARM NEON ********
+
+#ifdef ENABLE_NEON
+#include <arm_neon.h>
+
+#define VECTLENDP 2
+#define VECTLENSP 4
+
+//typedef __m128d vdouble;
+typedef int32x4_t vint;
+typedef uint32x4_t vmask;
+
+typedef float32x4_t vfloat;
+typedef int32x4_t vint2;
+
+//static vdouble vloadu(double *p) { return _mm_loadu_pd(p); }
+//static void vstoreu(double *p, vdouble v) { _mm_storeu_pd(p, v); }
+
+static vfloat vloaduf(float *p) { return vld1q_f32(p); }
+static void vstoreuf(float *p, vfloat v) { vst1q_f32(p, v); }
+
+static vint2 vloadui2(int32_t *p) { return (vint2)vld1q_s32(p); }
+static void vstoreui2(int32_t *p, vint2 v) { vst1q_s32(p, v); }
+
+#define ENABLE_SP
+#endif
+
+
+#ifdef ENABLE_DP
+typedef struct {
+  vdouble x, y;
+} vdouble2;
+
+vdouble xldexp(vdouble x, vint q);
+vint xilogb(vdouble d);
+
+vdouble xsin(vdouble d);
+vdouble xcos(vdouble d);
+vdouble2 xsincos(vdouble d);
+vdouble xtan(vdouble d);
+vdouble xasin(vdouble s);
+vdouble xacos(vdouble s);
+vdouble xatan(vdouble s);
+vdouble xatan2(vdouble y, vdouble x);
+vdouble xlog(vdouble d);
+vdouble xexp(vdouble d);
+vdouble xpow(vdouble x, vdouble y);
+
+vdouble xsinh(vdouble d);
+vdouble xcosh(vdouble d);
+vdouble xtanh(vdouble d);
+vdouble xasinh(vdouble s);
+vdouble xacosh(vdouble s);
+vdouble xatanh(vdouble s);
+
+vdouble xcbrt(vdouble d);
+
+vdouble xexp2(vdouble a);
+vdouble xexp10(vdouble a);
+vdouble xexpm1(vdouble a);
+vdouble xlog10(vdouble a);
+vdouble xlog1p(vdouble a);
+
+vdouble xsin_u1(vdouble d);
+vdouble xcos_u1(vdouble d);
+vdouble2 xsincos_u1(vdouble d);
+vdouble xtan_u1(vdouble d);
+vdouble xasin_u1(vdouble s);
+vdouble xacos_u1(vdouble s);
+vdouble xatan_u1(vdouble s);
+vdouble xatan2_u1(vdouble y, vdouble x);
+vdouble xlog_u1(vdouble d);
+vdouble xcbrt_u1(vdouble d);
+#endif
+
+//
+
+#ifdef ENABLE_SP
+typedef struct {
+  vfloat x, y;
+} vfloat2;
+
+vfloat xldexpf(vfloat x, vint2 q);
+
+vfloat xsinf(vfloat d);
+vfloat xcosf(vfloat d);
+vfloat2 xsincosf(vfloat d);
+vfloat xtanf(vfloat d);
+vfloat xasinf(vfloat s);
+vfloat xacosf(vfloat s);
+vfloat xatanf(vfloat s);
+vfloat xatan2f(vfloat y, vfloat x);
+vfloat xlogf(vfloat d);
+vfloat xexpf(vfloat d);
+vfloat xcbrtf(vfloat s);
+vfloat xsqrtf(vfloat s);
+
+vfloat xpowf(vfloat x, vfloat y);
+vfloat xsinhf(vfloat x);
+vfloat xcoshf(vfloat x);
+vfloat xtanhf(vfloat x);
+vfloat xasinhf(vfloat x);
+vfloat xacoshf(vfloat x);
+vfloat xatanhf(vfloat x);
+vfloat xexp2f(vfloat a);
+vfloat xexp10f(vfloat a);
+vfloat xexpm1f(vfloat a);
+vfloat xlog10f(vfloat a);
+vfloat xlog1pf(vfloat a);
+
+vfloat xsinf_u1(vfloat d);
+vfloat xcosf_u1(vfloat d);
+vfloat2 xsincosf_u1(vfloat d);
+vfloat xtanf_u1(vfloat d);
+vfloat xasinf_u1(vfloat s);
+vfloat xacosf_u1(vfloat s);
+vfloat xatanf_u1(vfloat s);
+vfloat xatan2f_u1(vfloat y, vfloat x);
+vfloat xlogf_u1(vfloat d);
+vfloat xcbrtf_u1(vfloat s);
+#endif
diff --git a/simd/sleefsimddp.c b/simd/sleefsimddp.c
new file mode 100644
index 00000000..c3cd51ec
--- /dev/null
+++ b/simd/sleefsimddp.c
@@ -0,0 +1,1022 @@
+#include <assert.h>
+#include <math.h>
+
+#if defined (__GNUC__) || defined (__INTEL_COMPILER) || defined (__clang__)
+#define INLINE __attribute__((always_inline))
+#else
+#define INLINE inline
+#endif
+
+#include "nonnumber.h"
+
+#ifdef ENABLE_SSE2
+#include "helpersse2.h"
+#endif
+
+#ifdef ENABLE_AVX
+#include "helperavx.h"
+#endif
+
+#ifdef ENABLE_AVX2
+#include "helperavx2.h"
+#endif
+
+#ifdef ENABLE_FMA4
+#include "helperfma4.h"
+#endif
+
+//
+
+#include "dd.h"
+
+//
+
+#define PI4_A 0.78539816290140151978
+#define PI4_B 4.9604678871439933374e-10
+#define PI4_C 1.1258708853173288931e-18
+#define PI4_D 1.7607799325916000908e-27
+
+#define M_4_PI 1.273239544735162542821171882678754627704620361328125
+
+#define L2U .69314718055966295651160180568695068359375
+#define L2L .28235290563031577122588448175013436025525412068e-12
+#define R_LN2 1.442695040888963407359924681001892137426645954152985934135449406931
+
+//
+
+#define PI4_Af 0.78515625f
+#define PI4_Bf 0.00024187564849853515625f
+#define PI4_Cf 3.7747668102383613586e-08f
+#define PI4_Df 1.2816720341285448015e-12f
+
+#define L2Uf 0.693145751953125f
+#define L2Lf 1.428606765330187045e-06f
+#define R_LN2f 1.442695040888963407359924681001892137426645954152985934135449406931f
+
+//
+
+vdouble xldexp(vdouble x, vint q) { return vldexp_vd_vd_vi(x, q); }
+
+vint xilogb(vdouble d) {
+  vdouble e = vcast_vd_vi(vsub_vi_vi_vi(vilogbp1_vi_vd(vabs_vd_vd(d)), vcast_vi_i(1)));
+  e = vsel_vd_vm_vd_vd(veq_vm_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-2147483648.0), e);
+  e = vsel_vd_vm_vd_vd(veq_vm_vd_vd(vabs_vd_vd(d), vcast_vd_d(INFINITY)), vcast_vd_d(2147483647), e);
+  return vrint_vi_vd(e);
+}
+
+vdouble xsin(vdouble d) {
+  vint q;
+  vdouble u, s;
+
+  q = vrint_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI)));
+
+  u = vcast_vd_vi(q);
+  d = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_A*4), d);
+  d = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_B*4), d);
+  d = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_C*4), d);
+  d = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_D*4), d);
+
+  s = vmul_vd_vd_vd(d, d);
+
+  d = (vdouble)vxor_vm_vm_vm(vand_vm_vm_vm(veq_vm_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(1)), vcast_vi_i(1)), (vmask)vcast_vd_d(-0.0)), (vmask)d);
+
+  u = vcast_vd_d(-7.97255955009037868891952e-18);
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.81009972710863200091251e-15));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-7.64712219118158833288484e-13));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(1.60590430605664501629054e-10));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.50521083763502045810755e-08));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75573192239198747630416e-06));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.000198412698412696162806809));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00833333333333332974823815));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666657414808));
+
+  u = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(u, d), d);
+
+  return u;
+}
+
+vdouble xsin_u1(vdouble d) {
+  vint q;
+  vdouble u;
+  vdouble2 s, t, x;
+
+  q = vrint_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI)));
+  u = vcast_vd_vi(q);
+
+  s = ddadd2_vd2_vd_vd (d, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_A*4)));
+  s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_B*4)));
+  s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_C*4)));
+  s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_D*4)));
+
+  t = s;
+  s = ddsqu_vd2_vd2(s);
+
+  u = vcast_vd_d(2.72052416138529567917983e-15);
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-7.6429259411395447190023e-13));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(1.60589370117277896211623e-10));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-2.5052106814843123359368e-08));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.75573192104428224777379e-06));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-0.000198412698412046454654947));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.00833333333333318056201922));
+
+  x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(-0.166666666666666657414808), vmul_vd_vd_vd(u, s.x)), s));
+
+  x = ddmul_vd2_vd2_vd2(t, x);
+  u = vadd_vd_vd_vd(x.x, x.y);
+
+  u = (vdouble)vxor_vm_vm_vm(vand_vm_vm_vm(veq_vm_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(1)), vcast_vi_i(1)), (vmask)vcast_vd_d(-0.0)), (vmask)u);
+
+  return u;
+}
+
+vdouble xcos(vdouble d) {
+  vint q;
+  vdouble u, s;
+
+  q = vrint_vi_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), vcast_vd_d(-0.5)));
+  q = vadd_vi_vi_vi(vadd_vi_vi_vi(q, q), vcast_vi_i(1));
+
+  u = vcast_vd_vi(q);
+  d = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_A*2), d);
+  d = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_B*2), d);
+  d = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_C*2), d);
+  d = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_D*2), d);
+
+  s = vmul_vd_vd_vd(d, d);
+
+  d = (vdouble)vxor_vm_vm_vm(vand_vm_vm_vm(veq_vm_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(0)), (vmask)vcast_vd_d(-0.0)), (vmask)d);
+
+  u = vcast_vd_d(-7.97255955009037868891952e-18);
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.81009972710863200091251e-15));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-7.64712219118158833288484e-13));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(1.60590430605664501629054e-10));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.50521083763502045810755e-08));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75573192239198747630416e-06));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.000198412698412696162806809));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00833333333333332974823815));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666657414808));
+
+  u = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(u, d), d);
+
+  return u;
+}
+
+vdouble xcos_u1(vdouble d) {
+  vint q;
+  vdouble u;
+  vdouble2 s, t, x;
+
+  q = vrint_vi_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), vcast_vd_d(-0.5)));
+  q = vadd_vi_vi_vi(vadd_vi_vi_vi(q, q), vcast_vi_i(1));
+  u = vcast_vd_vi(q);
+
+  s = ddadd2_vd2_vd_vd (d, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_A*2)));
+  s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_B*2)));
+  s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_C*2)));
+  s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_D*2)));
+
+  t = s;
+  s = ddsqu_vd2_vd2(s);
+
+  u = vcast_vd_d(2.72052416138529567917983e-15);
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-7.6429259411395447190023e-13));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(1.60589370117277896211623e-10));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-2.5052106814843123359368e-08));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.75573192104428224777379e-06));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-0.000198412698412046454654947));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.00833333333333318056201922));
+
+  x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(-0.166666666666666657414808), vmul_vd_vd_vd(u, s.x)), s));
+
+  x = ddmul_vd2_vd2_vd2(t, x);
+  u = vadd_vd_vd_vd(x.x, x.y);
+
+  u = (vdouble)vxor_vm_vm_vm(vand_vm_vm_vm(veq_vm_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(0)), (vmask)vcast_vd_d(-0.0)), (vmask)u);
+
+  return u;
+}
+
+vdouble2 xsincos(vdouble d) {
+  vint q;
+  vmask m;
+  vdouble u, s, t, rx, ry;
+  vdouble2 r;
+
+  q = vrint_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_2_PI)));
+
+  s = d;
+
+  u = vcast_vd_vi(q);
+  s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_A*2), s);
+  s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_B*2), s);
+  s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_C*2), s);
+  s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_D*2), s);
+
+  t = s;
+
+  s = vmul_vd_vd_vd(s, s);
+
+  u = vcast_vd_d(1.58938307283228937328511e-10);
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.50506943502539773349318e-08));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75573131776846360512547e-06));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.000198412698278911770864914));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0083333333333191845961746));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666130709393));
+  u = vmul_vd_vd_vd(vmul_vd_vd_vd(u, s), t);
+
+  rx = vadd_vd_vd_vd(t, u);
+
+  u = vcast_vd_d(-1.13615350239097429531523e-11);
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.08757471207040055479366e-09));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.75573144028847567498567e-07));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.48015872890001867311915e-05));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.00138888888888714019282329));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0416666666666665519592062));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.5));
+
+  ry = vmla_vd_vd_vd_vd(s, u, vcast_vd_d(1));
+
+  m = veq_vm_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(1)), vcast_vi_i(0));
+  r.x = vsel_vd_vm_vd_vd(m, rx, ry);
+  r.y = vsel_vd_vm_vd_vd(m, ry, rx);
+
+  m = veq_vm_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(2));
+  r.x = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vm_vm(m, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.x)));
+
+  m = veq_vm_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(q, vcast_vi_i(1)), vcast_vi_i(2)), vcast_vi_i(2));
+  r.y = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vm_vm(m, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.y)));
+
+  m = visinf_vm_vd(d);
+  r.x = (vdouble)vor_vm_vm_vm(m, (vmask)r.x);
+  r.y = (vdouble)vor_vm_vm_vm(m, (vmask)r.y);
+
+  return r;
+}
+
+vdouble2 xsincos_u1(vdouble d) {
+  vint q;
+  vmask m;
+  vdouble u, rx, ry;
+  vdouble2 r, s, t, x;
+
+  q = vrint_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI)));
+  u = vcast_vd_vi(q);
+
+  s = ddadd2_vd2_vd_vd (d, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_A*2)));
+  s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_B*2)));
+  s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_C*2)));
+  s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_D*2)));
+
+  t = s;
+
+  s = ddsqu_vd2_vd2(s);
+  s.x = vadd_vd_vd_vd(s.x, s.y);
+
+  u = vcast_vd_d(1.58938307283228937328511e-10);
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-2.50506943502539773349318e-08));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.75573131776846360512547e-06));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-0.000198412698278911770864914));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.0083333333333191845961746));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-0.166666666666666130709393));
+
+  u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(s.x, t.x));
+
+  x = ddadd_vd2_vd2_vd(t, u);
+  rx = vadd_vd_vd_vd(x.x, x.y);
+
+  u = vcast_vd_d(-1.13615350239097429531523e-11);
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.08757471207040055479366e-09));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-2.75573144028847567498567e-07));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.48015872890001867311915e-05));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-0.00138888888888714019282329));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.0416666666666665519592062));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-0.5));
+
+  x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd_vd(s.x, u));
+  ry = vadd_vd_vd_vd(x.x, x.y);
+
+  m = veq_vm_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(1)), vcast_vi_i(0));
+  r.x = vsel_vd_vm_vd_vd(m, rx, ry);
+  r.y = vsel_vd_vm_vd_vd(m, ry, rx);
+
+  m = veq_vm_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(2));
+  r.x = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vm_vm(m, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.x)));
+
+  m = veq_vm_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(q, vcast_vi_i(1)), vcast_vi_i(2)), vcast_vi_i(2));
+  r.y = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vm_vm(m, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.y)));
+
+  m = visinf_vm_vd(d);
+  r.x = (vdouble)vor_vm_vm_vm(m, (vmask)r.x);
+  r.y = (vdouble)vor_vm_vm_vm(m, (vmask)r.y);
+
+  return r;
+}
+
+vdouble xtan(vdouble d) {
+  vint q;
+  vdouble u, s, x;
+  vmask m;
+
+  q = vrint_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_2_PI)));
+
+  u = vcast_vd_vi(q);
+  x = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_A*2), d);
+  x = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_B*2), x);
+  x = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_C*2), x);
+  x = vmla_vd_vd_vd_vd(u, vcast_vd_d(-PI4_D*2), x);
+
+  s = vmul_vd_vd_vd(x, x);
+
+  m = veq_vm_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(1)), vcast_vi_i(1));
+  x = (vdouble)vxor_vm_vm_vm(vand_vm_vm_vm(m, (vmask)vcast_vd_d(-0.0)), (vmask)x);
+
+  u = vcast_vd_d(1.01419718511083373224408e-05);
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.59519791585924697698614e-05));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(5.23388081915899855325186e-05));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-3.05033014433946488225616e-05));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(7.14707504084242744267497e-05));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(8.09674518280159187045078e-05));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.000244884931879331847054404));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.000588505168743587154904506));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00145612788922812427978848));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00359208743836906619142924));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00886323944362401618113356));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0218694882853846389592078));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0539682539781298417636002));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.133333333333125941821962));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.333333333333334980164153));
+
+  u = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(u, x), x);
+
+  u = vsel_vd_vm_vd_vd(m, vrec_vd_vd(u), u);
+
+  u = (vdouble)vor_vm_vm_vm(visinf_vm_vd(d), (vmask)u);
+
+  return u;
+}
+
+vdouble xtan_u1(vdouble d) {
+  vint q;
+  vdouble u;
+  vdouble2 s, t, x;
+  vmask m;
+
+  q = vrint_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_2_PI)));
+  u = vcast_vd_vi(q);
+
+  s = ddadd2_vd2_vd_vd (d, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_A*2)));
+  s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_B*2)));
+  s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_C*2)));
+  s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(u, vcast_vd_d(-PI4_D*2)));
+
+  m = veq_vm_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(1)), vcast_vi_i(1));
+  vmask n = vand_vm_vm_vm(m, (vmask)vcast_vd_d(-0.0));
+  s.x = (vdouble)vxor_vm_vm_vm((vmask)s.x, n);
+  s.y = (vdouble)vxor_vm_vm_vm((vmask)s.y, n);
+
+  t = s;
+  s = ddsqu_vd2_vd2(s);
+
+  u = vcast_vd_d(1.01419718511083373224408e-05);
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-2.59519791585924697698614e-05));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(5.23388081915899855325186e-05));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-3.05033014433946488225616e-05));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(7.14707504084242744267497e-05));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(8.09674518280159187045078e-05));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.000244884931879331847054404));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.000588505168743587154904506));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.00145612788922812427978848));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.00359208743836906619142924));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.00886323944362401618113356));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.0218694882853846389592078));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.0539682539781298417636002));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.133333333333125941821962));
+
+  x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(0.333333333333334980164153), vmul_vd_vd_vd(u, s.x)), s));
+  x = ddmul_vd2_vd2_vd2(t, x);
+
+  x = vsel_vd2_vm_vd2_vd2(m, ddrec_vd2_vd2(x), x);
+
+  u = vadd_vd_vd_vd(x.x, x.y);
+
+  return u;
+}
+
+static INLINE vdouble atan2k(vdouble y, vdouble x) {
+  vdouble s, t, u;
+  vint q;
+  vmask p;
+
+  q = vsel_vi_vd_vd_vi_vi(x, vcast_vd_d(0), vcast_vi_i(-2), vcast_vi_i(0));
+  x = vabs_vd_vd(x);
+
+  q = vsel_vi_vd_vd_vi_vi(x, y, vadd_vi_vi_vi(q, vcast_vi_i(1)), q);
+  p = vlt_vm_vd_vd(x, y);
+  s = vsel_vd_vm_vd_vd(p, vneg_vd_vd(x), y);
+  t = vmax_vd_vd_vd(x, y);
+
+  s = vdiv_vd_vd_vd(s, t);
+  t = vmul_vd_vd_vd(s, s);
+
+  u = vcast_vd_d(-1.88796008463073496563746e-05);
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.000209850076645816976906797));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.00110611831486672482563471));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.00370026744188713119232403));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.00889896195887655491740809));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.016599329773529201970117));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0254517624932312641616861));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0337852580001353069993897));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0407629191276836500001934));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0466667150077840625632675));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0523674852303482457616113));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0587666392926673580854313));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0666573579361080525984562));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0769219538311769618355029));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.090908995008245008229153));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.111111105648261418443745));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.14285714266771329383765));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.199999999996591265594148));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.333333333333311110369124));
+
+  t = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(t, u), s);
+  t = vmla_vd_vd_vd_vd(vcast_vd_vi(q), vcast_vd_d(M_PI/2), t);
+
+  return t;
+}
+
+static INLINE vdouble2 atan2k_u1(vdouble2 y, vdouble2 x) {
+  vdouble u;
+  vdouble2 s, t;
+  vint q;
+  vmask p;
+
+  q = vsel_vi_vd_vd_vi_vi(x.x, vcast_vd_d(0), vcast_vi_i(-2), vcast_vi_i(0));
+  p = vlt_vm_vd_vd(x.x, vcast_vd_d(0));
+  p = vand_vm_vm_vm(p, (vmask)vcast_vd_d(-0.0));
+  x.x = (vdouble)vxor_vm_vm_vm((vmask)x.x, p);
+  x.y = (vdouble)vxor_vm_vm_vm((vmask)x.y, p);
+
+  q = vsel_vi_vd_vd_vi_vi(x.x, y.x, vadd_vi_vi_vi(q, vcast_vi_i(1)), q);
+  p = vlt_vm_vd_vd(x.x, y.x);
+  s = vsel_vd2_vm_vd2_vd2(p, ddneg_vd2_vd2(x), y);
+  t = vsel_vd2_vm_vd2_vd2(p, y, x);
+
+  s = dddiv_vd2_vd2_vd2(s, t);
+  t = ddsqu_vd2_vd2(s);
+  t = ddnormalize_vd2_vd2(t);
+
+  u = vcast_vd_d(1.06298484191448746607415e-05);
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.000125620649967286867384336));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.00070557664296393412389774));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.00251865614498713360352999));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.00646262899036991172313504));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.0128281333663399031014274));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.0208024799924145797902497));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.0289002344784740315686289));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.0359785005035104590853656));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.041848579703592507506027));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.0470843011653283988193763));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.0524914210588448421068719));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.0587946590969581003860434));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.0666620884778795497194182));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.0769225330296203768654095));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.0909090442773387574781907));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.111111108376896236538123));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.142857142756268568062339));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.199999999997977351284817));
+  u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.333333333333317605173818));
+
+  t = ddmul_vd2_vd2_vd(t, u);
+  t = ddmul_vd2_vd2_vd2(s, ddadd_vd2_vd_vd2(vcast_vd_d(1), t));
+  t = ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd(vcast_vd2_d_d(1.570796326794896557998982, 6.12323399573676603586882e-17), vcast_vd_vi(q)), t);
+
+  return t;
+}
+
+vdouble xatan2(vdouble y, vdouble x) {
+  vdouble r = atan2k(vabs_vd_vd(y), x);
+
+  r = vmulsign_vd_vd_vd(r, x);
+  r = vsel_vd_vm_vd_vd(vor_vm_vm_vm(visinf_vm_vd(x), veq_vm_vd_vd(x, vcast_vd_d(0))), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/2), x))), r);
+  r = vsel_vd_vm_vd_vd(visinf_vm_vd(y), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/4), x))), r);
+  r = vsel_vd_vm_vd_vd(veq_vm_vd_vd(y, vcast_vd_d(0.0)), (vdouble)vand_vm_vm_vm(veq_vm_vd_vd(vsign_vd_vd(x), vcast_vd_d(-1.0)), (vmask)vcast_vd_d(M_PI)), r);
+
+  r = (vdouble)vor_vm_vm_vm(vor_vm_vm_vm(visnan_vm_vd(x), visnan_vm_vd(y)), (vmask)vmulsign_vd_vd_vd(r, y));
+  return r;
+}
+
+vdouble xatan2_u1(vdouble y, vdouble x) {
+  vdouble2 d = atan2k_u1(vcast_vd2_vd_vd(vabs_vd_vd(y), vcast_vd_d(0)), vcast_vd2_vd_vd(x, vcast_vd_d(0)));
+  vdouble r = vadd_vd_vd_vd(d.x, d.y);
+
+  r = vmulsign_vd_vd_vd(r, x);
+  r = vsel_vd_vm_vd_vd(vor_vm_vm_vm(visinf_vm_vd(x), veq_vm_vd_vd(x, vcast_vd_d(0))), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/2), x))), r);
+  r = vsel_vd_vm_vd_vd(visinf_vm_vd(y), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/4), x))), r);
+  r = vsel_vd_vm_vd_vd(veq_vm_vd_vd(y, vcast_vd_d(0.0)), (vdouble)vand_vm_vm_vm(veq_vm_vd_vd(vsign_vd_vd(x), vcast_vd_d(-1.0)), (vmask)vcast_vd_d(M_PI)), r);
+
+  r = (vdouble)vor_vm_vm_vm(vor_vm_vm_vm(visnan_vm_vd(x), visnan_vm_vd(y)), (vmask)vmulsign_vd_vd_vd(r, y));
+  return r;
+}
+
+vdouble xasin(vdouble d) {
+  vdouble x, y;
+  x = vadd_vd_vd_vd(vcast_vd_d(1), d);
+  y = vsub_vd_vd_vd(vcast_vd_d(1), d);
+  x = vmul_vd_vd_vd(x, y);
+  x = vsqrt_vd_vd(x);
+  x = (vdouble)vor_vm_vm_vm(visnan_vm_vd(x), (vmask)atan2k(vabs_vd_vd(d), x));
+  return vmulsign_vd_vd_vd(x, d);
+}
+
+vdouble xasin_u1(vdouble d) {
+  vdouble2 d2 = atan2k_u1(vcast_vd2_vd_vd(vabs_vd_vd(d), vcast_vd_d(0)), ddsqrt_vd2_vd2(ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(1), d), ddsub_vd2_vd_vd(vcast_vd_d(1), d))));
+  vdouble r = vadd_vd_vd_vd(d2.x, d2.y);
+  r = vsel_vd_vm_vd_vd(veq_vm_vd_vd(vabs_vd_vd(d), vcast_vd_d(1)), vcast_vd_d(1.570796326794896557998982), r);
+  return vmulsign_vd_vd_vd(r, d);
+}
+
+vdouble xacos(vdouble d) {
+  vdouble x, y;
+  x = vadd_vd_vd_vd(vcast_vd_d(1), d);
+  y = vsub_vd_vd_vd(vcast_vd_d(1), d);
+  x = vmul_vd_vd_vd(x, y);
+  x = vsqrt_vd_vd(x);
+  x = vmulsign_vd_vd_vd(atan2k(x, vabs_vd_vd(d)), d);
+  y = (vdouble)vand_vm_vm_vm(vlt_vm_vd_vd(d, vcast_vd_d(0)), (vmask)vcast_vd_d(M_PI));
+  x = vadd_vd_vd_vd(x, y);
+  return x;
+}
+
+vdouble xacos_u1(vdouble d) {
+  vdouble2 d2 = atan2k_u1(ddsqrt_vd2_vd2(ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(1), d), ddsub_vd2_vd_vd(vcast_vd_d(1), d))), vcast_vd2_vd_vd(vabs_vd_vd(d), vcast_vd_d(0)));
+  d2 = ddscale_vd2_vd2_vd(d2, vmulsign_vd_vd_vd(vcast_vd_d(1), d));
+
+  vmask m;
+  m = vneq_vm_vd_vd(vabs_vd_vd(d), vcast_vd_d(1));
+  d2.x = (vdouble)vand_vm_vm_vm(m, (vmask)d2.x);
+  d2.y = (vdouble)vand_vm_vm_vm(m, (vmask)d2.y);
+  m = vlt_vm_vd_vd(d, vcast_vd_d(0));
+  d2 = vsel_vd2_vm_vd2_vd2(m, ddadd_vd2_vd2_vd2(vcast_vd2_d_d(3.141592653589793116, 1.2246467991473532072e-16), d2), d2);
+
+  return vadd_vd_vd_vd(d2.x, d2.y);
+}
+
+vdouble xatan_u1(vdouble d) {
+  vdouble2 d2 = atan2k_u1(vcast_vd2_vd_vd(vabs_vd_vd(d), vcast_vd_d(0)), vcast_vd2_d_d(1, 0));
+  vdouble r = vadd_vd_vd_vd(d2.x, d2.y);
+  r = vsel_vd_vm_vd_vd(visinf_vm_vd(d), vcast_vd_d(1.570796326794896557998982), r);
+  return vmulsign_vd_vd_vd(r, d);
+}
+
+vdouble xatan(vdouble s) {
+  vdouble t, u;
+  vint q;
+
+  q = vsel_vi_vd_vd_vi_vi(s, vcast_vd_d(0), vcast_vi_i(2), vcast_vi_i(0));
+  s = vabs_vd_vd(s);
+
+  q = vsel_vi_vd_vd_vi_vi(vcast_vd_d(1), s, vadd_vi_vi_vi(q, vcast_vi_i(1)), q);
+  s = vsel_vd_vm_vd_vd(vlt_vm_vd_vd(vcast_vd_d(1), s), vrec_vd_vd(s), s);
+
+  t = vmul_vd_vd_vd(s, s);
+
+  u = vcast_vd_d(-1.88796008463073496563746e-05);
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.000209850076645816976906797));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.00110611831486672482563471));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.00370026744188713119232403));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.00889896195887655491740809));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.016599329773529201970117));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0254517624932312641616861));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0337852580001353069993897));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0407629191276836500001934));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0466667150077840625632675));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0523674852303482457616113));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0587666392926673580854313));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0666573579361080525984562));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0769219538311769618355029));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.090908995008245008229153));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.111111105648261418443745));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.14285714266771329383765));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.199999999996591265594148));
+  u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.333333333333311110369124));
+
+  t = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(t, u), s);
+
+  t = vsel_vd_vm_vd_vd(veq_vm_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(1)), vcast_vi_i(1)), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), t), t);
+  t = (vdouble)vxor_vm_vm_vm(vand_vm_vm_vm(veq_vm_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(2)), (vmask)vcast_vd_d(-0.0)), (vmask)t);
+
+  return t;
+}
+
+vdouble xlog(vdouble d) {
+  vdouble x, x2;
+  vdouble t, m;
+  vint e;
+
+  e = vilogbp1_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(0.7071)));
+  m = vldexp_vd_vd_vi(d, vneg_vi_vi(e));
+
+  x = vdiv_vd_vd_vd(vadd_vd_vd_vd(vcast_vd_d(-1), m), vadd_vd_vd_vd(vcast_vd_d(1), m));
+  x2 = vmul_vd_vd_vd(x, x);
+
+  t = vcast_vd_d(0.148197055177935105296783);
+  t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.153108178020442575739679));
+  t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.181837339521549679055568));
+  t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.22222194152736701733275));
+  t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.285714288030134544449368));
+  t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.399999999989941956712869));
+  t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.666666666666685503450651));
+  t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(2));
+
+  x = vmla_vd_vd_vd_vd(x, t, vmul_vd_vd_vd(vcast_vd_d(0.693147180559945286226764), vcast_vd_vi(e)));
+
+  x = vsel_vd_vm_vd_vd(vispinf_vm_vd(d), vcast_vd_d(INFINITY), x);
+  x = (vdouble)vor_vm_vm_vm(vgt_vm_vd_vd(vcast_vd_d(0), d), (vmask)x);
+  x = vsel_vd_vm_vd_vd(veq_vm_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-INFINITY), x);
+
+  return x;
+}
+
+vdouble xexp(vdouble d) {
+  vint q = vrint_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(R_LN2)));
+  vdouble s, u;
+
+  s = vmla_vd_vd_vd_vd(vcast_vd_vi(q), vcast_vd_d(-L2U), d);
+  s = vmla_vd_vd_vd_vd(vcast_vd_vi(q), vcast_vd_d(-L2L), s);
+
+  u = vcast_vd_d(2.08860621107283687536341e-09);
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.51112930892876518610661e-08));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75573911234900471893338e-07));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75572362911928827629423e-06));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.4801587159235472998791e-05));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.000198412698960509205564975));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00138888888889774492207962));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00833333333331652721664984));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0416666666666665047591422));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.166666666666666851703837));
+  u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.5));
+
+  u = vadd_vd_vd_vd(vcast_vd_d(1), vmla_vd_vd_vd_vd(vmul_vd_vd_vd(s, s), u, s));
+
+  u = vldexp_vd_vd_vi(u, q);
+
+  u = (vdouble)vandnot_vm_vm_vm(visminf_vm_vd(d), (vmask)u);
+
+  return u;
+}
+
+static INLINE vdouble2 logk(vdouble d) {
+  vdouble2 x, x2;
+  vdouble t, m;
+  vint e;
+
+  e = vilogbp1_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(0.7071)));
+  m = vldexp_vd_vd_vi(d, vneg_vi_vi(e));
+
+  x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(-1), m), ddadd2_vd2_vd_vd(vcast_vd_d(1), m));
+  x2 = ddsqu_vd2_vd2(x);
+
+  t = vcast_vd_d(0.134601987501262130076155);
+  t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.132248509032032670243288));
+  t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.153883458318096079652524));
+  t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.181817427573705403298686));
+  t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.222222231326187414840781));
+  t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.285714285651261412873718));
+  t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.400000000000222439910458));
+  t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.666666666666666371239645));
+
+  return ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd(vcast_vd2_vd_vd(vcast_vd_d(0.693147180559945286226764), vcast_vd_d(2.319046813846299558417771e-17)),
+		       vcast_vd_vi(e)),
+		ddadd2_vd2_vd2_vd2(ddscale_vd2_vd2_vd(x, vcast_vd_d(2)), ddmul_vd2_vd2_vd(ddmul_vd2_vd2_vd2(x2, x), t)));
+}
+
+vdouble xlog_u1(vdouble d) {
+  vdouble2 s = logk(d);
+  vdouble x = vadd_vd_vd_vd(s.x, s.y);
+
+  x = vsel_vd_vm_vd_vd(vispinf_vm_vd(d), vcast_vd_d(INFINITY), x);
+  x = (vdouble)vor_vm_vm_vm(vgt_vm_vd_vd(vcast_vd_d(0), d), (vmask)x);
+  x = vsel_vd_vm_vd_vd(veq_vm_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-INFINITY), x);
+
+  return x;
+}
+
+static INLINE vdouble expk(vdouble2 d) {
+  vdouble u = vmul_vd_vd_vd(vadd_vd_vd_vd(d.x, d.y), vcast_vd_d(R_LN2));
+  vint q = vrint_vi_vd(u);
+  vdouble2 s, t;
+
+  s = ddadd2_vd2_vd2_vd(d, vmul_vd_vd_vd(vcast_vd_vi(q), vcast_vd_d(-L2U)));
+  s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(vcast_vd_vi(q), vcast_vd_d(-L2L)));
+
+  s = ddnormalize_vd2_vd2(s);
+
+  u = vcast_vd_d(2.51069683420950419527139e-08);
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.76286166770270649116855e-07));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.75572496725023574143864e-06));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.48014973989819794114153e-05));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.000198412698809069797676111));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.0013888888939977128960529));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.00833333333332371417601081));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.0416666666665409524128449));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.166666666666666740681535));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.500000000000000999200722));
+
+  t = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd(ddsqu_vd2_vd2(s), u));
+
+  t = ddadd_vd2_vd_vd2(vcast_vd_d(1), t);
+  u = vadd_vd_vd_vd(t.x, t.y);
+  u = vldexp_vd_vd_vi(u, q);
+
+  return u;
+}
+
+vdouble xpow(vdouble x, vdouble y) {
+#if 1
+  vmask yisnint = vneq_vm_vd_vd(vcast_vd_vi(vrint_vi_vd(y)), y);
+  vmask yisodd = vandnot_vm_vm_vm(yisnint, veq_vm_vi_vi(vand_vi_vi_vi(vrint_vi_vd(y), vcast_vi_i(1)), vcast_vi_i(1)));
+
+  vdouble result = expk(ddmul_vd2_vd2_vd(logk(vabs_vd_vd(x)), y));
+
+  result = vmul_vd_vd_vd(result,
+			 vsel_vd_vm_vd_vd(vgt_vm_vd_vd(x, vcast_vd_d(0)),
+					  vcast_vd_d(1),
+					  (vdouble)vor_vm_vm_vm(yisnint, (vmask)vsel_vd_vm_vd_vd(yisodd, vcast_vd_d(-1.0), vcast_vd_d(1)))));
+
+  vdouble efx = (vdouble)vxor_vm_vm_vm((vmask)vsub_vd_vd_vd(vabs_vd_vd(x), vcast_vd_d(1)), vsignbit_vm_vd(y));
+
+  result = vsel_vd_vm_vd_vd(visinf_vm_vd(y),
+			    (vdouble)vandnot_vm_vm_vm(vlt_vm_vd_vd(efx, vcast_vd_d(0.0)),
+						      (vmask)vsel_vd_vm_vd_vd(veq_vm_vd_vd(efx, vcast_vd_d(0.0)),
+									      vcast_vd_d(1.0),
+									      vcast_vd_d(INFINITY))),
+			    result);
+
+  result = vsel_vd_vm_vd_vd(vor_vm_vm_vm(visinf_vm_vd(x), veq_vm_vd_vd(x, vcast_vd_d(0.0))),
+			    vmul_vd_vd_vd(vsel_vd_vm_vd_vd(yisodd, vsign_vd_vd(x), vcast_vd_d(1.0)),
+					  (vdouble)vandnot_vm_vm_vm(vlt_vm_vd_vd(vsel_vd_vm_vd_vd(veq_vm_vd_vd(x, vcast_vd_d(0.0)), vneg_vd_vd(y), y), vcast_vd_d(0.0)),
+								   (vmask)vcast_vd_d(INFINITY))),
+			    result);
+
+  result = (vdouble)vor_vm_vm_vm(vor_vm_vm_vm(visnan_vm_vd(x), visnan_vm_vd(y)), (vmask)result);
+
+  result = vsel_vd_vm_vd_vd(vor_vm_vm_vm(veq_vm_vd_vd(y, vcast_vd_d(0)), veq_vm_vd_vd(x, vcast_vd_d(1))), vcast_vd_d(1), result);
+
+  return result;
+#else
+  return expk(ddmul_vd2_vd2_vd(logk(x), y));
+#endif
+}
+
+static INLINE vdouble2 expk2(vdouble2 d) {
+  vdouble u = vmul_vd_vd_vd(vadd_vd_vd_vd(d.x, d.y), vcast_vd_d(R_LN2));
+  vint q = vrint_vi_vd(u);
+  vdouble2 s, t;
+
+  s = ddadd2_vd2_vd2_vd(d, vmul_vd_vd_vd(vcast_vd_vi(q), vcast_vd_d(-L2U)));
+  s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(vcast_vd_vi(q), vcast_vd_d(-L2L)));
+
+  u = vcast_vd_d(2.51069683420950419527139e-08);
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.76286166770270649116855e-07));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.75572496725023574143864e-06));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.48014973989819794114153e-05));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.000198412698809069797676111));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.0013888888939977128960529));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.00833333333332371417601081));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.0416666666665409524128449));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.166666666666666740681535));
+  u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.500000000000000999200722));
+
+  t = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd(ddsqu_vd2_vd2(s), u));
+
+  t = ddadd_vd2_vd_vd2(vcast_vd_d(1), t);
+
+  return ddscale_vd2_vd2_vd(t, vpow2i_vd_vi(q));
+}
+
+vdouble xsinh(vdouble x) {
+  vdouble y = vabs_vd_vd(x);
+  vdouble2 d = expk2(vcast_vd2_vd_vd(y, vcast_vd_d(0)));
+  d = ddsub_vd2_vd2_vd2(d, ddrec_vd2_vd2(d));
+  y = vmul_vd_vd_vd(vadd_vd_vd_vd(d.x, d.y), vcast_vd_d(0.5));
+
+  y = vsel_vd_vm_vd_vd(vor_vm_vm_vm(vgt_vm_vd_vd(vabs_vd_vd(x), vcast_vd_d(710)), visnan_vm_vd(y)), vcast_vd_d(INFINITY), y);
+  y = vmulsign_vd_vd_vd(y, x);
+  y = (vdouble)vor_vm_vm_vm(visnan_vm_vd(x), (vmask)y);
+
+  return y;
+}
+
+vdouble xcosh(vdouble x) {
+  vdouble y = vabs_vd_vd(x);
+  vdouble2 d = expk2(vcast_vd2_vd_vd(y, vcast_vd_d(0)));
+  d = ddadd_vd2_vd2_vd2(d, ddrec_vd2_vd2(d));
+  y = vmul_vd_vd_vd(vadd_vd_vd_vd(d.x, d.y), vcast_vd_d(0.5));
+
+  y = vsel_vd_vm_vd_vd(vor_vm_vm_vm(vgt_vm_vd_vd(vabs_vd_vd(x), vcast_vd_d(710)), visnan_vm_vd(y)), vcast_vd_d(INFINITY), y);
+  y = (vdouble)vor_vm_vm_vm(visnan_vm_vd(x), (vmask)y);
+
+  return y;
+}
+
+vdouble xtanh(vdouble x) {
+  vdouble y = vabs_vd_vd(x);
+  vdouble2 d = expk2(vcast_vd2_vd_vd(y, vcast_vd_d(0)));
+  vdouble2 e = ddrec_vd2_vd2(d);
+  d = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd2_vd2(d, ddneg_vd2_vd2(e)), ddadd2_vd2_vd2_vd2(d, e));
+  y = vadd_vd_vd_vd(d.x, d.y);
+
+  y = vsel_vd_vm_vd_vd(vor_vm_vm_vm(vgt_vm_vd_vd(vabs_vd_vd(x), vcast_vd_d(18.714973875)), visnan_vm_vd(y)), vcast_vd_d(1.0), y);
+  y = vmulsign_vd_vd_vd(y, x);
+  y = (vdouble)vor_vm_vm_vm(visnan_vm_vd(x), (vmask)y);
+
+  return y;
+}
+
+static INLINE vdouble2 logk2(vdouble2 d) {
+  vdouble2 x, x2, m;
+  vdouble t;
+  vint e;
+
+  e = vilogbp1_vi_vd(vmul_vd_vd_vd(d.x, vcast_vd_d(0.7071)));
+  m = ddscale_vd2_vd2_vd(d, vpow2i_vd_vi(vneg_vi_vi(e)));
+
+  x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(m, vcast_vd_d(-1)), ddadd2_vd2_vd2_vd(m, vcast_vd_d(1)));
+  x2 = ddsqu_vd2_vd2(x);
+
+  t = vcast_vd_d(0.134601987501262130076155);
+  t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.132248509032032670243288));
+  t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.153883458318096079652524));
+  t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.181817427573705403298686));
+  t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.222222231326187414840781));
+  t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.285714285651261412873718));
+  t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.400000000000222439910458));
+  t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.666666666666666371239645));
+
+  return ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd(vcast_vd2_vd_vd(vcast_vd_d(0.693147180559945286226764), vcast_vd_d(2.319046813846299558417771e-17)),
+		       vcast_vd_vi(e)),
+		ddadd2_vd2_vd2_vd2(ddscale_vd2_vd2_vd(x, vcast_vd_d(2)), ddmul_vd2_vd2_vd(ddmul_vd2_vd2_vd2(x2, x), t)));
+}
+
+vdouble xasinh(vdouble x) {
+  vdouble y = vabs_vd_vd(x);
+  vdouble2 d = logk2(ddadd2_vd2_vd2_vd(ddsqrt_vd2_vd2(ddadd2_vd2_vd2_vd(ddmul_vd2_vd_vd(y, y),  vcast_vd_d(1))), y));
+  y = vadd_vd_vd_vd(d.x, d.y);
+
+  y = vsel_vd_vm_vd_vd(vor_vm_vm_vm(visinf_vm_vd(x), visnan_vm_vd(y)), vcast_vd_d(INFINITY), y);
+  y = vmulsign_vd_vd_vd(y, x);
+  y = (vdouble)vor_vm_vm_vm(visnan_vm_vd(x), (vmask)y);
+
+  return y;
+}
+
+vdouble xacosh(vdouble x) {
+  vdouble2 d = logk2(ddadd2_vd2_vd2_vd(ddsqrt_vd2_vd2(ddadd2_vd2_vd2_vd(ddmul_vd2_vd_vd(x, x), vcast_vd_d(-1))), x));
+  vdouble y = vadd_vd_vd_vd(d.x, d.y);
+
+  y = vsel_vd_vm_vd_vd(vor_vm_vm_vm(visinf_vm_vd(x), visnan_vm_vd(y)), vcast_vd_d(INFINITY), y);
+  y = (vdouble)vandnot_vm_vm_vm(veq_vm_vd_vd(x, vcast_vd_d(1.0)), (vmask)y);
+
+  y = (vdouble)vor_vm_vm_vm(vlt_vm_vd_vd(x, vcast_vd_d(1.0)), (vmask)y);
+  y = (vdouble)vor_vm_vm_vm(visnan_vm_vd(x), (vmask)y);
+
+  return y;
+}
+
+vdouble xatanh(vdouble x) {
+  vdouble y = vabs_vd_vd(x);
+  vdouble2 d = logk2(dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(1), y), ddadd2_vd2_vd_vd(vcast_vd_d(1), vneg_vd_vd(y))));
+  y = (vdouble)vor_vm_vm_vm(vgt_vm_vd_vd(y, vcast_vd_d(1.0)), (vmask)vsel_vd_vm_vd_vd(veq_vm_vd_vd(y, vcast_vd_d(1.0)), vcast_vd_d(INFINITY), vmul_vd_vd_vd(vadd_vd_vd_vd(d.x, d.y), vcast_vd_d(0.5))));
+
+  y = (vdouble)vor_vm_vm_vm(vor_vm_vm_vm(visinf_vm_vd(x), visnan_vm_vd(y)), (vmask)y);
+
+  y = vmulsign_vd_vd_vd(y, x);
+  y = (vdouble)vor_vm_vm_vm(visnan_vm_vd(x), (vmask)y);
+
+  return y;
+}
+
+vdouble xcbrt(vdouble d) {
+  vdouble x, y, q = vcast_vd_d(1.0);
+  vint e, qu, re;
+  vdouble t;
+
+  e = vilogbp1_vi_vd(vabs_vd_vd(d));
+  d = vldexp_vd_vd_vi(d, vneg_vi_vi(e));
+
+  t = vadd_vd_vd_vd(vcast_vd_vi(e), vcast_vd_d(6144));
+  qu = vtruncate_vi_vd(vmul_vd_vd_vd(t, vcast_vd_d(1.0/3.0)));
+  re = vtruncate_vi_vd(vsub_vd_vd_vd(t, vmul_vd_vd_vd(vcast_vd_vi(qu), vcast_vd_d(3))));
+
+  q = vsel_vd_vm_vd_vd(veq_vm_vi_vi(re, vcast_vi_i(1)), vcast_vd_d(1.2599210498948731647672106), q);
+  q = vsel_vd_vm_vd_vd(veq_vm_vi_vi(re, vcast_vi_i(2)), vcast_vd_d(1.5874010519681994747517056), q);
+  q = vldexp_vd_vd_vi(q, vsub_vi_vi_vi(qu, vcast_vi_i(2048)));
+
+  q = vmulsign_vd_vd_vd(q, d);
+
+  d = vabs_vd_vd(d);
+
+  x = vcast_vd_d(-0.640245898480692909870982);
+  x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.96155103020039511818595));
+  x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-5.73353060922947843636166));
+  x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(6.03990368989458747961407));
+  x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-3.85841935510444988821632));
+  x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.2307275302496609725722));
+
+  y = vmul_vd_vd_vd(x, x); y = vmul_vd_vd_vd(y, y); x = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vmlapn_vd_vd_vd_vd(d, y, x), vcast_vd_d(1.0 / 3.0)));
+  y = vmul_vd_vd_vd(vmul_vd_vd_vd(d, x), x);
+  y = vmul_vd_vd_vd(vsub_vd_vd_vd(y, vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(2.0 / 3.0), y), vmla_vd_vd_vd_vd(y, x, vcast_vd_d(-1.0)))), q);
+
+  return y;
+}
+
+vdouble xcbrt_u1(vdouble d) {
+  vdouble x, y, z, t;
+  vdouble2 q2 = vcast_vd2_d_d(1, 0), u, v;
+  vint e, qu, re;
+
+  e = vilogbp1_vi_vd(vabs_vd_vd(d));
+  d = vldexp_vd_vd_vi(d, vneg_vi_vi(e));
+
+  t = vadd_vd_vd_vd(vcast_vd_vi(e), vcast_vd_d(6144));
+  qu = vtruncate_vi_vd(vmul_vd_vd_vd(t, vcast_vd_d(1.0/3.0)));
+  re = vtruncate_vi_vd(vsub_vd_vd_vd(t, vmul_vd_vd_vd(vcast_vd_vi(qu), vcast_vd_d(3))));
+
+  q2 = vsel_vd2_vm_vd2_vd2(veq_vm_vi_vi(re, vcast_vi_i(1)), vcast_vd2_d_d(1.2599210498948731907, -2.5899333753005069177e-17), q2);
+  q2 = vsel_vd2_vm_vd2_vd2(veq_vm_vi_vi(re, vcast_vi_i(2)), vcast_vd2_d_d(1.5874010519681995834, -1.0869008194197822986e-16), q2);
+
+  q2.x = vmulsign_vd_vd_vd(q2.x, d); q2.y = vmulsign_vd_vd_vd(q2.y, d);
+  d = vabs_vd_vd(d);
+
+  x = vcast_vd_d(-0.640245898480692909870982);
+  x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.96155103020039511818595));
+  x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-5.73353060922947843636166));
+  x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(6.03990368989458747961407));
+  x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-3.85841935510444988821632));
+  x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.2307275302496609725722));
+
+  y = vmul_vd_vd_vd(x, x); y = vmul_vd_vd_vd(y, y); x = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vmlapn_vd_vd_vd_vd(d, y, x), vcast_vd_d(1.0 / 3.0)));
+
+  z = x;
+
+  u = ddmul_vd2_vd_vd(x, x);
+  u = ddmul_vd2_vd2_vd2(u, u);
+  u = ddmul_vd2_vd2_vd(u, d);
+  u = ddadd2_vd2_vd2_vd(u, vneg_vd_vd(x));
+  y = vadd_vd_vd_vd(u.x, u.y);
+
+  y = vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(-2.0 / 3.0), y), z);
+  v = ddadd2_vd2_vd2_vd(ddmul_vd2_vd_vd(z, z), y);
+  v = ddmul_vd2_vd2_vd(v, d);
+  v = ddmul_vd2_vd2_vd2(v, q2);
+  z = vldexp_vd_vd_vi(vadd_vd_vd_vd(v.x, v.y), vsub_vi_vi_vi(qu, vcast_vi_i(2048)));
+
+  z = vsel_vd_vm_vd_vd(visinf_vm_vd(d), vmulsign_vd_vd_vd(vcast_vd_d(INFINITY), q2.x), z);
+  z = vsel_vd_vm_vd_vd(veq_vm_vd_vd(d, vcast_vd_d(0)), (vdouble)vsignbit_vm_vd(q2.x), z);
+
+  return z;
+}
+
+vdouble xexp2(vdouble a) {
+  vdouble u = expk(ddmul_vd2_vd2_vd(vcast_vd2_vd_vd(vcast_vd_d(0.69314718055994528623), vcast_vd_d(2.3190468138462995584e-17)), a));
+  u = vsel_vd_vm_vd_vd(vgt_vm_vd_vd(a, vcast_vd_d(1023)), vcast_vd_d(INFINITY), u);
+  u = (vdouble)vandnot_vm_vm_vm(visminf_vm_vd(a), (vmask)u);
+  return u;
+}
+
+vdouble xexp10(vdouble a) {
+  vdouble u = expk(ddmul_vd2_vd2_vd(vcast_vd2_vd_vd(vcast_vd_d(2.3025850929940459011), vcast_vd_d(-2.1707562233822493508e-16)), a));
+  u = vsel_vd_vm_vd_vd(vgt_vm_vd_vd(a, vcast_vd_d(308)), vcast_vd_d(INFINITY), u);
+  u = (vdouble)vandnot_vm_vm_vm(visminf_vm_vd(a), (vmask)u);
+  return u;
+}
+
+vdouble xexpm1(vdouble a) {
+  vdouble2 d = ddadd2_vd2_vd2_vd(expk2(vcast_vd2_vd_vd(a, vcast_vd_d(0))), vcast_vd_d(-1.0));
+  vdouble x = vadd_vd_vd_vd(d.x, d.y);
+  x = vsel_vd_vm_vd_vd(vgt_vm_vd_vd(a, vcast_vd_d(700)), vcast_vd_d(INFINITY), x);
+  x = vsel_vd_vm_vd_vd(vlt_vm_vd_vd(a, vcast_vd_d(-0.36043653389117156089696070315825181539851971360337e+2)), vcast_vd_d(-1), x);
+  return x;
+}
+
+vdouble xlog10(vdouble a) {
+  vdouble2 d = ddmul_vd2_vd2_vd2(logk(a), vcast_vd2_vd_vd(vcast_vd_d(0.43429448190325176116), vcast_vd_d(6.6494347733425473126e-17)));
+  vdouble x = vadd_vd_vd_vd(d.x, d.y);
+
+  x = vsel_vd_vm_vd_vd(vispinf_vm_vd(a), vcast_vd_d(INFINITY), x);
+  x = (vdouble)vor_vm_vm_vm(vgt_vm_vd_vd(vcast_vd_d(0), a), (vmask)x);
+  x = vsel_vd_vm_vd_vd(veq_vm_vd_vd(a, vcast_vd_d(0)), vcast_vd_d(-INFINITY), x);
+
+  return x;
+}
+
+vdouble xlog1p(vdouble a) {
+  vdouble2 d = logk2(ddadd2_vd2_vd_vd(a, vcast_vd_d(1)));
+  vdouble x = vadd_vd_vd_vd(d.x, d.y);
+
+  x = vsel_vd_vm_vd_vd(vispinf_vm_vd(a), vcast_vd_d(INFINITY), x);
+  x = (vdouble)vor_vm_vm_vm(vgt_vm_vd_vd(vcast_vd_d(-1.0), a), (vmask)x);
+  x = vsel_vd_vm_vd_vd(veq_vm_vd_vd(a, vcast_vd_d(-1)), vcast_vd_d(-INFINITY), x);
+
+  return x;
+}
diff --git a/simd/sleefsimdsp.c b/simd/sleefsimdsp.c
new file mode 100644
index 00000000..9dc00a28
--- /dev/null
+++ b/simd/sleefsimdsp.c
@@ -0,0 +1,1005 @@
+#include <assert.h>
+#include <math.h>
+
+#if defined (__GNUC__) || defined (__INTEL_COMPILER) || defined (__clang__)
+#define INLINE __attribute__((always_inline))
+#else
+#define INLINE inline
+#endif
+
+#include "nonnumber.h"
+
+#ifdef ENABLE_SSE2
+#include "helpersse2.h"
+#endif
+
+#ifdef ENABLE_AVX
+#include "helperavx.h"
+#endif
+
+#ifdef ENABLE_AVX2
+#include "helperavx2.h"
+#endif
+
+#ifdef ENABLE_FMA4
+#include "helperfma4.h"
+#endif
+
+#ifdef ENABLE_NEON
+#include "helperneon.h"
+#endif
+
+//
+
+#include "df.h"
+
+//
+
+#define PI4_Af 0.78515625f
+#define PI4_Bf 0.00024187564849853515625f
+#define PI4_Cf 3.7747668102383613586e-08f
+#define PI4_Df 1.2816720341285448015e-12f
+
+#define L2Uf 0.693145751953125f
+#define L2Lf 1.428606765330187045e-06f
+#define R_LN2f 1.442695040888963407359924681001892137426645954152985934135449406931f
+
+//
+
+static INLINE vint2 vsel_vi2_vf_vf_vi2_vi2(vfloat f0, vfloat f1, vint2 x, vint2 y) {
+  vint2 m2 = vcast_vi2_vm(vlt_vm_vf_vf(f0, f1));
+  return vor_vi2_vi2_vi2(vand_vi2_vi2_vi2(m2, x), vandnot_vi2_vi2_vi2(m2, y));
+}
+
+static INLINE vmask vsignbit_vm_vf(vfloat f) {
+  return vand_vm_vm_vm((vmask)f, (vmask)vcast_vf_f(-0.0f));
+}
+
+static INLINE vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) {
+  return (vfloat)vxor_vm_vm_vm((vmask)x, vsignbit_vm_vf(y));
+}
+
+static INLINE vfloat vsign_vf_vf(vfloat f) {
+  return (vfloat)vor_vm_vm_vm((vmask)vcast_vf_f(1.0f), vand_vm_vm_vm((vmask)vcast_vf_f(-0.0f), (vmask)f));
+}
+
+static INLINE vmask visinf_vm_vf(vfloat d) { return veq_vm_vf_vf(vabs_vf_vf(d), vcast_vf_f(INFINITYf)); }
+static INLINE vmask vispinf_vm_vf(vfloat d) { return veq_vm_vf_vf(d, vcast_vf_f(INFINITYf)); }
+static INLINE vmask visminf_vm_vf(vfloat d) { return veq_vm_vf_vf(d, vcast_vf_f(-INFINITYf)); }
+static INLINE vmask visnan_vm_vf(vfloat d) { return vneq_vm_vf_vf(d, d); }
+static INLINE vfloat visinf2_vf_vf_vm(vfloat d, vfloat m) { return (vfloat)vand_vm_vm_vm(visinf_vm_vf(d), vor_vm_vm_vm(vsignbit_vm_vf(d), (vmask)m)); }
+static INLINE vfloat visinff(vfloat d) { return visinf2_vf_vf_vm(d, vcast_vf_f(1.0f)); }
+
+static INLINE vint2 vilogbp1_vi2_vf(vfloat d) {
+  vmask m = vlt_vm_vf_vf(d, vcast_vf_f(5.421010862427522E-20f));
+  d = vsel_vf_vm_vf_vf(m, vmul_vf_vf_vf(vcast_vf_f(1.8446744073709552E19f), d), d);
+  vint2 q = vand_vi2_vi2_vi2(vsrl_vi2_vi2_i(vcast_vi2_vm(vreinterpret_vm_vf(d)), 23), vcast_vi2_i(0xff));
+  q = vsub_vi2_vi2_vi2(q, vsel_vi2_vm_vi2_vi2(m, vcast_vi2_i(64 + 0x7e), vcast_vi2_i(0x7e)));
+  return q;
+}
+
+static INLINE vfloat vpow2i_vf_vi2(vint2 q) {
+  return (vfloat)vcast_vm_vi2(vsll_vi2_vi2_i(vadd_vi2_vi2_vi2(q, vcast_vi2_i(0x7f)), 23));
+}
+
+static INLINE vfloat vldexp_vf_vf_vi2(vfloat x, vint2 q) {
+  vfloat u;
+  vint2 m = vsra_vi2_vi2_i(q, 31);
+  m = vsll_vi2_vi2_i(vsub_vi2_vi2_vi2(vsra_vi2_vi2_i(vadd_vi2_vi2_vi2(m, q), 6), m), 4);
+  q = vsub_vi2_vi2_vi2(q, vsll_vi2_vi2_i(m, 2));
+  m = vadd_vi2_vi2_vi2(m, vcast_vi2_i(0x7f));
+  m = vand_vi2_vi2_vi2(vgt_vi2_vi2_vi2(m, vcast_vi2_i(0)), m);
+  vint2 n = vgt_vi2_vi2_vi2(m, vcast_vi2_i(0xff));
+  m = vor_vi2_vi2_vi2(vandnot_vi2_vi2_vi2(n, m), vand_vi2_vi2_vi2(n, vcast_vi2_i(0xff)));
+  u = vreinterpret_vf_vm(vcast_vm_vi2(vsll_vi2_vi2_i(m, 23)));
+  x = vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(x, u), u), u), u);
+  u = vreinterpret_vf_vm(vcast_vm_vi2(vsll_vi2_vi2_i(vadd_vi2_vi2_vi2(q, vcast_vi2_i(0x7f)), 23)));
+  return vmul_vf_vf_vf(x, u);
+}
+
+vfloat xldexpf(vfloat x, vint2 q) { return vldexp_vf_vf_vi2(x, q); }
+
+vfloat xsinf(vfloat d) {
+  vint2 q;
+  vfloat u, s;
+
+  q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI)));
+  u = vcast_vf_vi2(q);
+
+  d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Af*4), d);
+  d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Bf*4), d);
+  d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Cf*4), d);
+  d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Df*4), d);
+
+  s = vmul_vf_vf_vf(d, d);
+
+  d = (vfloat)vxor_vm_vm_vm(vand_vm_vm_vm(veq_vm_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), (vmask)vcast_vf_f(-0.0f)), (vmask)d);
+
+  u = vcast_vf_f(2.6083159809786593541503e-06f);
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.0001981069071916863322258f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833307858556509017944336f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666597127914428710938f));
+
+  u = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(u, d), d);
+
+  u = (vfloat)vor_vm_vm_vm(visinf_vm_vf(d), (vmask)u);
+
+  return u;
+}
+
+vfloat xcosf(vfloat d) {
+  vint2 q;
+  vfloat u, s;
+
+  q = vrint_vi2_vf(vsub_vf_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI)), vcast_vf_f(0.5f)));
+  q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vcast_vi2_i(1));
+
+  u = vcast_vf_vi2(q);
+  d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Af*2), d);
+  d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Bf*2), d);
+  d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Cf*2), d);
+  d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Df*2), d);
+
+  s = vmul_vf_vf_vf(d, d);
+
+  d = (vfloat)vxor_vm_vm_vm(vand_vm_vm_vm(veq_vm_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)), (vmask)vcast_vf_f(-0.0f)), (vmask)d);
+
+  u = vcast_vf_f(2.6083159809786593541503e-06f);
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.0001981069071916863322258f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833307858556509017944336f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666597127914428710938f));
+
+  u = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(u, d), d);
+
+  u = (vfloat)vor_vm_vm_vm(visinf_vm_vf(d), (vmask)u);
+
+  return u;
+}
+
+vfloat2 xsincosf(vfloat d) {
+  vint2 q;
+  vmask m;
+  vfloat u, s, t, rx, ry;
+  vfloat2 r;
+
+  q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_2_PI)));
+
+  s = d;
+
+  u = vcast_vf_vi2(q);
+  s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Af*2), s);
+  s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Bf*2), s);
+  s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Cf*2), s);
+  s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Df*2), s);
+
+  t = s;
+
+  s = vmul_vf_vf_vf(s, s);
+
+  u = vcast_vf_f(-0.000195169282960705459117889f);
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833215750753879547119141f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666537523269653320312f));
+  u = vmul_vf_vf_vf(vmul_vf_vf_vf(u, s), t);
+
+  rx = vadd_vf_vf_vf(t, u);
+
+  u = vcast_vf_f(-2.71811842367242206819355e-07f);
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(2.47990446951007470488548e-05f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.00138888787478208541870117f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416666641831398010253906f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.5));
+
+  ry = vmla_vf_vf_vf_vf(s, u, vcast_vf_f(1));
+
+  m = veq_vm_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(0));
+  r.x = vsel_vf_vm_vf_vf(m, rx, ry);
+  r.y = vsel_vf_vm_vf_vf(m, ry, rx);
+
+  m = veq_vm_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2));
+  r.x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vm_vm(m, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.x)));
+
+  m = veq_vm_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(2)), vcast_vi2_i(2));
+  r.y = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vm_vm(m, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.y)));
+
+  m = visinf_vm_vf(d);
+
+  r.x = (vfloat)vor_vm_vm_vm(m, (vmask)r.x);
+  r.y = (vfloat)vor_vm_vm_vm(m, (vmask)r.y);
+
+  return r;
+}
+
+vfloat xtanf(vfloat d) {
+  vint2 q;
+  vmask m;
+  vfloat u, s, x;
+
+  q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI))));
+
+  x = d;
+
+  u = vcast_vf_vi2(q);
+  x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Af*2), x);
+  x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Bf*2), x);
+  x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Cf*2), x);
+  x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI4_Df*2), x);
+
+  s = vmul_vf_vf_vf(x, x);
+
+  m = veq_vm_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1));
+  x = (vfloat)vxor_vm_vm_vm(vand_vm_vm_vm(m, (vmask)vcast_vf_f(-0.0f)), (vmask)x);
+
+  u = vcast_vf_f(0.00927245803177356719970703f);
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00331984995864331722259521f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0242998078465461730957031f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0534495301544666290283203f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.133383005857467651367188f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.333331853151321411132812f));
+
+  u = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(u, x), x);
+
+  u = vsel_vf_vm_vf_vf(m, vrec_vf_vf(u), u);
+
+  u = (vfloat)vor_vm_vm_vm(visinf_vm_vf(d), (vmask)u);
+
+  return u;
+}
+
+vfloat xsinf_u1(vfloat d) {
+  vint2 q;
+  vfloat u;
+  vfloat2 s, t, x;
+
+  q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(M_1_PI)));
+  u = vcast_vf_vi2(q);
+
+  s = dfadd2_vf2_vf_vf (d, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Af*4)));
+  s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Bf*4)));
+  s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Cf*4)));
+  s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Df*4)));
+
+  t = s;
+  s = dfsqu_vf2_vf2(s);
+
+  u = vcast_vf_f(2.6083159809786593541503e-06f);
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(-0.0001981069071916863322258f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.00833307858556509017944336f));
+
+  x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf(vcast_vf_f(-0.166666597127914428710938f), vmul_vf_vf_vf(u, s.x)), s));
+
+  x = dfmul_vf2_vf2_vf2(t, x);
+  u = vadd_vf_vf_vf(x.x, x.y);
+
+  u = (vfloat)vxor_vm_vm_vm(vand_vm_vm_vm(veq_vm_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), (vmask)vcast_vf_f(-0.0)), (vmask)u);
+
+  return u;
+}
+
+vfloat xcosf_u1(vfloat d) {
+  vint2 q;
+  vfloat u;
+  vfloat2 s, t, x;
+
+  q = vrint_vi2_vf(vmla_vf_vf_vf_vf(d, vcast_vf_f(M_1_PI), vcast_vf_f(-0.5)));
+  q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vcast_vi2_i(1));
+  u = vcast_vf_vi2(q);
+
+  s = dfadd2_vf2_vf_vf (d, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Af*2)));
+  s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Bf*2)));
+  s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Cf*2)));
+  s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Df*2)));
+
+  t = s;
+  s = dfsqu_vf2_vf2(s);
+
+  u = vcast_vf_f(2.6083159809786593541503e-06f);
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(-0.0001981069071916863322258f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.00833307858556509017944336f));
+
+  x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf(vcast_vf_f(-0.166666597127914428710938f), vmul_vf_vf_vf(u, s.x)), s));
+
+  x = dfmul_vf2_vf2_vf2(t, x);
+  u = vadd_vf_vf_vf(x.x, x.y);
+
+  u = (vfloat)vxor_vm_vm_vm(vand_vm_vm_vm(veq_vm_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)), (vmask)vcast_vf_f(-0.0)), (vmask)u);
+
+  return u;
+}
+
+vfloat2 xsincosf_u1(vfloat d) {
+  vint2 q;
+  vmask m;
+  vfloat u, rx, ry;
+  vfloat2 r, s, t, x;
+
+  q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(2 * M_1_PI)));
+  u = vcast_vf_vi2(q);
+
+  s = dfadd2_vf2_vf_vf (d, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Af*2)));
+  s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Bf*2)));
+  s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Cf*2)));
+  s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Df*2)));
+
+  t = s;
+
+  s = dfsqu_vf2_vf2(s);
+  s.x = vadd_vf_vf_vf(s.x, s.y);
+
+  u = vcast_vf_f(-0.000195169282960705459117889f);
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.00833215750753879547119141f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(-0.166666537523269653320312f));
+
+  u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(s.x, t.x));
+
+  x = dfadd_vf2_vf2_vf(t, u);
+  rx = vadd_vf_vf_vf(x.x, x.y);
+
+  u = vcast_vf_f(-2.71811842367242206819355e-07f);
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(2.47990446951007470488548e-05f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(-0.00138888787478208541870117f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.0416666641831398010253906f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(-0.5));
+
+  x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf_vf(s.x, u));
+  ry = vadd_vf_vf_vf(x.x, x.y);
+
+  m = veq_vm_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(0));
+  r.x = vsel_vf_vm_vf_vf(m, rx, ry);
+  r.y = vsel_vf_vm_vf_vf(m, ry, rx);
+
+  m = veq_vm_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2));
+  r.x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vm_vm(m, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.x)));
+
+  m = veq_vm_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(2)), vcast_vi2_i(2));
+  r.y = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vm_vm(m, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.y)));
+
+  m = visinf_vm_vf(d);
+  r.x = (vfloat)vor_vm_vm_vm(m, (vmask)r.x);
+  r.y = (vfloat)vor_vm_vm_vm(m, (vmask)r.y);
+
+  return r;
+}
+
+vfloat xtanf_u1(vfloat d) {
+  vint2 q;
+  vfloat u;
+  vfloat2 s, t, x;
+  vmask m;
+
+  q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(M_2_PI)));
+  u = vcast_vf_vi2(q);
+
+  s = dfadd2_vf2_vf_vf (d, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Af*2)));
+  s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Bf*2)));
+  s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Cf*2)));
+  s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI4_Df*2)));
+
+  m = veq_vm_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1));
+  vmask n = vand_vm_vm_vm(m, (vmask)vcast_vf_f(-0.0));
+  s.x = (vfloat)vxor_vm_vm_vm((vmask)s.x, n);
+  s.y = (vfloat)vxor_vm_vm_vm((vmask)s.y, n);
+
+  t = s;
+  s = dfsqu_vf2_vf2(s);
+  s = dfnormalize_vf2_vf2(s);
+
+  u = vcast_vf_f(0.00446636462584137916564941f);
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(-8.3920182078145444393158e-05f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.0109639242291450500488281f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.0212360303848981857299805f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.0540687143802642822265625f));
+
+  x = dfadd_vf2_vf_vf(vcast_vf_f(0.133325666189193725585938f), vmul_vf_vf_vf(u, s.x));
+  x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf2(vcast_vf_f(0.33333361148834228515625f), dfmul_vf2_vf2_vf2(s, x)), s));
+  x = dfmul_vf2_vf2_vf2(t, x);
+
+  x = vsel_vf2_vm_vf2_vf2(m, dfrec_vf2_vf2(x), x);
+
+  u = vadd_vf_vf_vf(x.x, x.y);
+
+  return u;
+}
+
+vfloat xatanf(vfloat d) {
+  vfloat s, t, u;
+  vint2 q;
+
+  q = vsel_vi2_vf_vf_vi2_vi2(d, vcast_vf_f(0.0f), vcast_vi2_i(2), vcast_vi2_i(0));
+  s = vabs_vf_vf(d);
+
+  q = vsel_vi2_vf_vf_vi2_vi2(vcast_vf_f(1.0f), s, vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), q);
+  s = vsel_vf_vm_vf_vf(vlt_vm_vf_vf(vcast_vf_f(1.0f), s), vrec_vf_vf(s), s);
+
+  t = vmul_vf_vf_vf(s, s);
+
+  u = vcast_vf_f(0.00282363896258175373077393f);
+  u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.0159569028764963150024414f));
+  u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.0425049886107444763183594f));
+  u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.0748900920152664184570312f));
+  u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.106347933411598205566406f));
+  u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.142027363181114196777344f));
+  u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.199926957488059997558594f));
+  u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.333331018686294555664062f));
+
+  t = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(t, u), s);
+
+  t = vsel_vf_vm_vf_vf(veq_vm_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vsub_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), t), t);
+
+  t = (vfloat)vxor_vm_vm_vm(vand_vm_vm_vm(veq_vm_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)), (vmask)vcast_vf_f(-0.0f)), (vmask)t);
+
+#ifdef __ARM_NEON__
+  t = vsel_vf_vm_vf_vf(visinf_vm_vf(d), vmulsign_vf_vf_vf(vcast_vf_f(1.5874010519681994747517056f), d), t);
+#endif
+
+  return t;
+}
+
+static INLINE vfloat atan2kf(vfloat y, vfloat x) {
+  vfloat s, t, u;
+  vint2 q;
+  vmask p;
+
+  q = vsel_vi2_vf_vf_vi2_vi2(x, vcast_vf_f(0.0f), vcast_vi2_i(-2), vcast_vi2_i(0));
+  x = vabs_vf_vf(x);
+
+  q = vsel_vi2_vf_vf_vi2_vi2(x, y, vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), q);
+  p = vlt_vm_vf_vf(x, y);
+  s = vsel_vf_vm_vf_vf(p, vneg_vf_vf(x), y);
+  t = vmax_vf_vf_vf(x, y);
+
+  s = vdiv_vf_vf_vf(s, t);
+  t = vmul_vf_vf_vf(s, s);
+
+  u = vcast_vf_f(0.00282363896258175373077393f);
+  u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.0159569028764963150024414f));
+  u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.0425049886107444763183594f));
+  u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.0748900920152664184570312f));
+  u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.106347933411598205566406f));
+  u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.142027363181114196777344f));
+  u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.199926957488059997558594f));
+  u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.333331018686294555664062f));
+
+  t = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(t, u), s);
+  t = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f((float)(M_PI/2)), t);
+
+  return t;
+}
+
+vfloat xatan2f(vfloat y, vfloat x) {
+  vfloat r = atan2kf(vabs_vf_vf(y), x);
+
+  r = vmulsign_vf_vf_vf(r, x);
+  r = vsel_vf_vm_vf_vf(vor_vm_vm_vm(visinf_vm_vf(x), veq_vm_vf_vf(x, vcast_vf_f(0.0f))), vsub_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), visinf2_vf_vf_vm(x, vmulsign_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), x))), r);
+  r = vsel_vf_vm_vf_vf(visinf_vm_vf(y), vsub_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), visinf2_vf_vf_vm(x, vmulsign_vf_vf_vf(vcast_vf_f((float)(M_PI/4)), x))), r);
+
+  r = vsel_vf_vm_vf_vf(veq_vm_vf_vf(y, vcast_vf_f(0.0f)), (vfloat)vand_vm_vm_vm(veq_vm_vf_vf(vsign_vf_vf(x), vcast_vf_f(-1.0f)), (vmask)vcast_vf_f((float)M_PI)), r);
+
+  r = (vfloat)vor_vm_vm_vm(vor_vm_vm_vm(visnan_vm_vf(x), visnan_vm_vf(y)), (vmask)vmulsign_vf_vf_vf(r, y));
+  return r;
+}
+
+vfloat xasinf(vfloat d) {
+  vfloat x, y;
+  x = vadd_vf_vf_vf(vcast_vf_f(1.0f), d);
+  y = vsub_vf_vf_vf(vcast_vf_f(1.0f), d);
+  x = vmul_vf_vf_vf(x, y);
+  x = vsqrt_vf_vf(x);
+  x = (vfloat)vor_vm_vm_vm(visnan_vm_vf(x), (vmask)atan2kf(vabs_vf_vf(d), x));
+  return vmulsign_vf_vf_vf(x, d);
+}
+
+vfloat xacosf(vfloat d) {
+  vfloat x, y;
+  x = vadd_vf_vf_vf(vcast_vf_f(1.0f), d);
+  y = vsub_vf_vf_vf(vcast_vf_f(1.0f), d);
+  x = vmul_vf_vf_vf(x, y);
+  x = vsqrt_vf_vf(x);
+  x = vmulsign_vf_vf_vf(atan2kf(x, vabs_vf_vf(d)), d);
+  y = (vfloat)vand_vm_vm_vm(vlt_vm_vf_vf(d, vcast_vf_f(0.0f)), (vmask)vcast_vf_f((float)M_PI));
+  x = vadd_vf_vf_vf(x, y);
+  return x;
+}
+
+//
+
+static INLINE vfloat2 atan2kf_u1(vfloat2 y, vfloat2 x) {
+  vfloat u;
+  vfloat2 s, t;
+  vint2 q;
+  vmask p;
+
+  q = vsel_vi2_vf_vf_vi2_vi2(x.x, vcast_vf_f(0), vcast_vi2_i(-2), vcast_vi2_i(0));
+  p = vlt_vm_vf_vf(x.x, vcast_vf_f(0));
+  p = vand_vm_vm_vm(p, (vmask)vcast_vf_f(-0.0));
+  x.x = (vfloat)vxor_vm_vm_vm((vmask)x.x, p);
+  x.y = (vfloat)vxor_vm_vm_vm((vmask)x.y, p);
+
+  q = vsel_vi2_vf_vf_vi2_vi2(x.x, y.x, vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), q);
+  p = vlt_vm_vf_vf(x.x, y.x);
+  s = vsel_vf2_vm_vf2_vf2(p, dfneg_vf2_vf2(x), y);
+  t = vsel_vf2_vm_vf2_vf2(p, y, x);
+
+  s = dfdiv_vf2_vf2_vf2(s, t);
+  t = dfsqu_vf2_vf2(s);
+  t = dfnormalize_vf2_vf2(t);
+
+  u = vcast_vf_f(-0.00176397908944636583328247f);
+  u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(0.0107900900766253471374512f));
+  u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(-0.0309564601629972457885742f));
+  u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(0.0577365085482597351074219f));
+  u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(-0.0838950723409652709960938f));
+  u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(0.109463557600975036621094f));
+  u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(-0.142626821994781494140625f));
+  u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(0.199983194470405578613281f));
+
+  //u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(-0.333332866430282592773438f));
+  //t = dfmul_vf2_vf2_vf(t, u);
+
+  t = dfmul_vf2_vf2_vf2(t, dfadd_vf2_vf_vf(vcast_vf_f(-0.333332866430282592773438f), vmul_vf_vf_vf(u, t.x)));
+  t = dfmul_vf2_vf2_vf2(s, dfadd_vf2_vf_vf2(vcast_vf_f(1), t));
+  t = dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf(vcast_vf2_f_f(1.5707963705062866211f, -4.3711388286737928865e-08f), vcast_vf_vi2(q)), t);
+
+  return t;
+}
+
+vfloat xatan2f_u1(vfloat y, vfloat x) {
+  vfloat2 d = atan2kf_u1(vcast_vf2_vf_vf(vabs_vf_vf(y), vcast_vf_f(0)), vcast_vf2_vf_vf(x, vcast_vf_f(0)));
+  vfloat r = vadd_vf_vf_vf(d.x, d.y);
+
+  r = vmulsign_vf_vf_vf(r, x);
+  r = vsel_vf_vm_vf_vf(vor_vm_vm_vm(visinf_vm_vf(x), veq_vm_vf_vf(x, vcast_vf_f(0))), vsub_vf_vf_vf(vcast_vf_f(M_PI/2), visinf2_vf_vf_vm(x, vmulsign_vf_vf_vf(vcast_vf_f(M_PI/2), x))), r);
+  r = vsel_vf_vm_vf_vf(visinf_vm_vf(y), vsub_vf_vf_vf(vcast_vf_f(M_PI/2), visinf2_vf_vf_vm(x, vmulsign_vf_vf_vf(vcast_vf_f(M_PI/4), x))), r);
+  r = vsel_vf_vm_vf_vf(veq_vm_vf_vf(y, vcast_vf_f(0.0)), (vfloat)vand_vm_vm_vm(veq_vm_vf_vf(vsign_vf_vf(x), vcast_vf_f(-1.0)), (vmask)vcast_vf_f(M_PI)), r);
+
+  r = (vfloat)vor_vm_vm_vm(vor_vm_vm_vm(visnan_vm_vf(x), visnan_vm_vf(y)), (vmask)vmulsign_vf_vf_vf(r, y));
+  return r;
+}
+
+vfloat xasinf_u1(vfloat d) {
+  vfloat2 d2 = atan2kf_u1(vcast_vf2_vf_vf(vabs_vf_vf(d), vcast_vf_f(0)), dfsqrt_vf2_vf2(dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf(vcast_vf_f(1), d), dfsub_vf2_vf_vf(vcast_vf_f(1), d))));
+  vfloat r = vadd_vf_vf_vf(d2.x, d2.y);
+  r = vsel_vf_vm_vf_vf(veq_vm_vf_vf(vabs_vf_vf(d), vcast_vf_f(1)), vcast_vf_f(1.570796326794896557998982), r);
+  return vmulsign_vf_vf_vf(r, d);
+}
+
+vfloat xacosf_u1(vfloat d) {
+  vfloat2 d2 = atan2kf_u1(dfsqrt_vf2_vf2(dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf(vcast_vf_f(1), d), dfsub_vf2_vf_vf(vcast_vf_f(1), d))), vcast_vf2_vf_vf(vabs_vf_vf(d), vcast_vf_f(0)));
+  d2 = dfscale_vf2_vf2_vf(d2, vmulsign_vf_vf_vf(vcast_vf_f(1), d));
+
+  vmask m;
+  m = vneq_vm_vf_vf(vabs_vf_vf(d), vcast_vf_f(1));
+  d2.x = (vfloat)vand_vm_vm_vm(m, (vmask)d2.x);
+  d2.y = (vfloat)vand_vm_vm_vm(m, (vmask)d2.y);
+  m = vlt_vm_vf_vf(d, vcast_vf_f(0));
+  d2 = vsel_vf2_vm_vf2_vf2(m, dfadd_vf2_vf2_vf2(vcast_vf2_f_f(3.1415927410125732422f,-8.7422776573475857731e-08f), d2), d2);
+
+  return vadd_vf_vf_vf(d2.x, d2.y);
+}
+
+vfloat xatanf_u1(vfloat d) {
+  vfloat2 d2 = atan2kf_u1(vcast_vf2_vf_vf(vabs_vf_vf(d), vcast_vf_f(0)), vcast_vf2_f_f(1, 0));
+  vfloat r = vadd_vf_vf_vf(d2.x, d2.y);
+  r = vsel_vf_vm_vf_vf(visinf_vm_vf(d), vcast_vf_f(1.570796326794896557998982), r);
+  return vmulsign_vf_vf_vf(r, d);
+}
+
+//
+
+vfloat xlogf(vfloat d) {
+  vfloat x, x2, t, m;
+  vint2 e;
+
+  e = vilogbp1_vi2_vf(x = vmul_vf_vf_vf(d, vcast_vf_f(0.7071f)));
+  m = vldexp_vf_vf_vi2(d, vneg_vi2_vi2(e));
+  d = x;
+
+  x = vdiv_vf_vf_vf(vadd_vf_vf_vf(vcast_vf_f(-1.0f), m), vadd_vf_vf_vf(vcast_vf_f(1.0f), m));
+  x2 = vmul_vf_vf_vf(x, x);
+
+  t = vcast_vf_f(0.2371599674224853515625f);
+  t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.285279005765914916992188f));
+  t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.400005519390106201171875f));
+  t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.666666567325592041015625f));
+  t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(2.0f));
+
+  x = vmla_vf_vf_vf_vf(x, t, vmul_vf_vf_vf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e)));
+
+  x = vsel_vf_vm_vf_vf(vispinf_vm_vf(d), vcast_vf_f(INFINITYf), x);
+  x = (vfloat)vor_vm_vm_vm(vgt_vm_vf_vf(vcast_vf_f(0), d), (vmask)x);
+  x = vsel_vf_vm_vf_vf(veq_vm_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-INFINITYf), x);
+
+  return x;
+}
+
+vfloat xexpf(vfloat d) {
+  vint2 q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(R_LN2f)));
+  vfloat s, u;
+
+  s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf), d);
+  s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf), s);
+
+  u = vcast_vf_f(0.00136324646882712841033936f);
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00836596917361021041870117f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416710823774337768554688f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.166665524244308471679688f));
+  u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.499999850988388061523438f));
+
+  u = vadd_vf_vf_vf(vcast_vf_f(1.0f), vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, s), u, s));
+
+  u = vldexp_vf_vf_vi2(u, q);
+
+  u = (vfloat)vandnot_vm_vm_vm(visminf_vm_vf(d), (vmask)u);
+
+  return u;
+}
+
+#ifdef __ARM_NEON__
+vfloat xsqrtf(vfloat d) {
+  vfloat e = (vfloat)vadd_vi2_vi2_vi2(vcast_vi2_i(0x20000000), vand_vi2_vi2_vi2(vcast_vi2_i(0x7f000000), vsrl_vi2_vi2_i((vint2)d, 1)));
+  vfloat m = (vfloat)vadd_vi2_vi2_vi2(vcast_vi2_i(0x3f000000), vand_vi2_vi2_vi2(vcast_vi2_i(0x01ffffff), (vint2)d));
+  float32x4_t x = vrsqrteq_f32(m);
+  x = vmulq_f32(x, vrsqrtsq_f32(m, vmulq_f32(x, x)));
+  float32x4_t u = vmulq_f32(x, m);
+  u = vmlaq_f32(u, vmlsq_f32(m, u, u), vmulq_f32(x, vdupq_n_f32(0.5)));
+  e = (vfloat)vandnot_vm_vm_vm(veq_vm_vf_vf(d, vcast_vf_f(0)), (vmask)e);
+  u = vmul_vf_vf_vf(e, u);
+
+  u = vsel_vf_vm_vf_vf(visinf_vm_vf(d), vcast_vf_f(INFINITYf), u);
+  u = (vfloat)vor_vm_vm_vm(vor_vm_vm_vm(visnan_vm_vf(d), vlt_vm_vf_vf(d, vcast_vf_f(0))), (vmask)u);
+  u = vmulsign_vf_vf_vf(u, d);
+
+  return u;
+}
+#else
+vfloat xsqrtf(vfloat d) { return vsqrt_vf_vf(d); }
+#endif
+
+vfloat xcbrtf(vfloat d) {
+  vfloat x, y, q = vcast_vf_f(1.0), t;
+  vint2 e, qu, re;
+
+  e = vilogbp1_vi2_vf(vabs_vf_vf(d));
+  d = vldexp_vf_vf_vi2(d, vneg_vi2_vi2(e));
+
+  t = vadd_vf_vf_vf(vcast_vf_vi2(e), vcast_vf_f(6144));
+  qu = vtruncate_vi2_vf(vmul_vf_vf_vf(t, vcast_vf_f(1.0f/3.0f)));
+  re = vtruncate_vi2_vf(vsub_vf_vf_vf(t, vmul_vf_vf_vf(vcast_vf_vi2(qu), vcast_vf_f(3))));
+
+  q = vsel_vf_vm_vf_vf(veq_vm_vi2_vi2(re, vcast_vi2_i(1)), vcast_vf_f(1.2599210498948731647672106f), q);
+  q = vsel_vf_vm_vf_vf(veq_vm_vi2_vi2(re, vcast_vi2_i(2)), vcast_vf_f(1.5874010519681994747517056f), q);
+  q = vldexp_vf_vf_vi2(q, vsub_vi2_vi2_vi2(qu, vcast_vi2_i(2048)));
+
+  q = vmulsign_vf_vf_vf(q, d);
+  d = vabs_vf_vf(d);
+
+  x = vcast_vf_f(-0.601564466953277587890625f);
+  x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.8208892345428466796875f));
+  x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-5.532182216644287109375f));
+  x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(5.898262500762939453125f));
+  x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-3.8095417022705078125f));
+  x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.2241256237030029296875f));
+
+  y = vmul_vf_vf_vf(vmul_vf_vf_vf(d, x), x);
+  y = vmul_vf_vf_vf(vsub_vf_vf_vf(y, vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(2.0f / 3.0f), y), vmla_vf_vf_vf_vf(y, x, vcast_vf_f(-1.0f)))), q);
+
+  return y;
+}
+
+vfloat xcbrtf_u1(vfloat d) {
+  vfloat x, y, z, t;
+  vfloat2 q2 = vcast_vf2_f_f(1, 0), u, v;
+  vint2 e, qu, re;
+
+  e = vilogbp1_vi2_vf(vabs_vf_vf(d));
+  d = vldexp_vf_vf_vi2(d, vneg_vi2_vi2(e));
+
+  t = vadd_vf_vf_vf(vcast_vf_vi2(e), vcast_vf_f(6144));
+  qu = vtruncate_vi2_vf(vmul_vf_vf_vf(t, vcast_vf_f(1.0/3.0)));
+  re = vtruncate_vi2_vf(vsub_vf_vf_vf(t, vmul_vf_vf_vf(vcast_vf_vi2(qu), vcast_vf_f(3))));
+
+  q2 = vsel_vf2_vm_vf2_vf2(veq_vm_vi2_vi2(re, vcast_vi2_i(1)), vcast_vf2_f_f(1.2599210739135742188f, -2.4018701694217270415e-08), q2);
+  q2 = vsel_vf2_vm_vf2_vf2(veq_vm_vi2_vi2(re, vcast_vi2_i(2)), vcast_vf2_f_f(1.5874010324478149414f,  1.9520385308169352356e-08), q2);
+
+  q2.x = vmulsign_vf_vf_vf(q2.x, d); q2.y = vmulsign_vf_vf_vf(q2.y, d);
+  d = vabs_vf_vf(d);
+
+  x = vcast_vf_f(-0.601564466953277587890625f);
+  x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.8208892345428466796875f));
+  x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-5.532182216644287109375f));
+  x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(5.898262500762939453125f));
+  x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-3.8095417022705078125f));
+  x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.2241256237030029296875f));
+
+  y = vmul_vf_vf_vf(x, x); y = vmul_vf_vf_vf(y, y); x = vsub_vf_vf_vf(x, vmul_vf_vf_vf(vmlanp_vf_vf_vf_vf(d, y, x), vcast_vf_f(-1.0 / 3.0)));
+
+  z = x;
+
+  u = dfmul_vf2_vf_vf(x, x);
+  u = dfmul_vf2_vf2_vf2(u, u);
+  u = dfmul_vf2_vf2_vf(u, d);
+  u = dfadd2_vf2_vf2_vf(u, vneg_vf_vf(x));
+  y = vadd_vf_vf_vf(u.x, u.y);
+
+  y = vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(-2.0 / 3.0), y), z);
+  v = dfadd2_vf2_vf2_vf(dfmul_vf2_vf_vf(z, z), y);
+  v = dfmul_vf2_vf2_vf(v, d);
+  v = dfmul_vf2_vf2_vf2(v, q2);
+  z = vldexp_vf_vf_vi2(vadd_vf_vf_vf(v.x, v.y), vsub_vi2_vi2_vi2(qu, vcast_vi2_i(2048)));
+
+  z = vsel_vf_vm_vf_vf(visinf_vm_vf(d), vmulsign_vf_vf_vf(vcast_vf_f(INFINITY), q2.x), z);
+  z = vsel_vf_vm_vf_vf(veq_vm_vf_vf(d, vcast_vf_f(0)), (vfloat)vsignbit_vm_vf(q2.x), z);
+
+  return z;
+}
+
+static INLINE vfloat2 logkf(vfloat d) {
+  vfloat2 x, x2;
+  vfloat t, m;
+  vint2 e;
+
+  e = vilogbp1_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(0.7071f)));
+  m = vldexp_vf_vf_vi2(d, vneg_vi2_vi2(e));
+
+  x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(-1), m), dfadd2_vf2_vf_vf(vcast_vf_f(1), m));
+  x2 = dfsqu_vf2_vf2(x);
+
+  t = vcast_vf_f(0.2371599674224853515625f);
+  t = vmla_vf_vf_vf_vf(t, x2.x, vcast_vf_f(0.285279005765914916992188f));
+  t = vmla_vf_vf_vf_vf(t, x2.x, vcast_vf_f(0.400005519390106201171875f));
+  t = vmla_vf_vf_vf_vf(t, x2.x, vcast_vf_f(0.666666567325592041015625f));
+
+  return dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf(vcast_vf2_vf_vf(vcast_vf_f(0.69314718246459960938f), vcast_vf_f(-1.904654323148236017e-09f)),
+		       vcast_vf_vi2(e)),
+		dfadd2_vf2_vf2_vf2(dfscale_vf2_vf2_vf(x, vcast_vf_f(2)), dfmul_vf2_vf2_vf(dfmul_vf2_vf2_vf2(x2, x), t)));
+}
+
+vfloat xlogf_u1(vfloat d) {
+  vfloat2 s = logkf(d);
+  vfloat x = vadd_vf_vf_vf(s.x, s.y);
+
+  x = vsel_vf_vm_vf_vf(vispinf_vm_vf(d), vcast_vf_f(INFINITY), x);
+#ifdef __ARM_NEON__
+  x = vsel_vf_vm_vf_vf(vlt_vm_vf_vf(d, vcast_vf_f(1e-37f)), vcast_vf_f(-INFINITY), x);
+#else
+  x = vsel_vf_vm_vf_vf(veq_vm_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-INFINITY), x);
+#endif
+  x = (vfloat)vor_vm_vm_vm(vgt_vm_vf_vf(vcast_vf_f(0), d), (vmask)x);
+
+  return x;
+}
+
+static INLINE vfloat expkf(vfloat2 d) {
+  vfloat u = vmul_vf_vf_vf(vadd_vf_vf_vf(d.x, d.y), vcast_vf_f(R_LN2f));
+  vint2 q = vrint_vi2_vf(u);
+  vfloat2 s, t;
+
+  s = dfadd2_vf2_vf2_vf(d, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf)));
+  s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf)));
+
+  s = dfnormalize_vf2_vf2(s);
+
+  u = vcast_vf_f(0.00136324646882712841033936f);
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.00836596917361021041870117f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.0416710823774337768554688f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.166665524244308471679688f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.499999850988388061523438f));
+
+  t = dfadd_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf(dfsqu_vf2_vf2(s), u));
+
+  t = dfadd_vf2_vf_vf2(vcast_vf_f(1), t);
+  u = vadd_vf_vf_vf(t.x, t.y);
+  u = vldexp_vf_vf_vi2(u, q);
+
+  return u;
+}
+
+vfloat xpowf(vfloat x, vfloat y) {
+#if 1
+  vmask yisnint = vneq_vm_vf_vf(vcast_vf_vi2(vrint_vi2_vf(y)), y);
+  vmask yisodd = vandnot_vm_vm_vm(yisnint, veq_vm_vi2_vi2(vand_vi2_vi2_vi2(vrint_vi2_vf(y), vcast_vi2_i(1)), vcast_vi2_i(1)));
+
+  vfloat result = expkf(dfmul_vf2_vf2_vf(logkf(vabs_vf_vf(x)), y));
+
+  result = vmul_vf_vf_vf(result,
+			 vsel_vf_vm_vf_vf(vgt_vm_vf_vf(x, vcast_vf_f(0)),
+					  vcast_vf_f(1),
+					  (vfloat)vor_vm_vm_vm(yisnint, (vmask)vsel_vf_vm_vf_vf(yisodd, vcast_vf_f(-1), vcast_vf_f(1)))));
+
+  vfloat efx = (vfloat)vxor_vm_vm_vm((vmask)vsub_vf_vf_vf(vabs_vf_vf(x), vcast_vf_f(1)), vsignbit_vm_vf(y));
+
+  result = vsel_vf_vm_vf_vf(visinf_vm_vf(y),
+			    (vfloat)vandnot_vm_vm_vm(vlt_vm_vf_vf(efx, vcast_vf_f(0.0f)),
+						     (vmask)vsel_vf_vm_vf_vf(veq_vm_vf_vf(efx, vcast_vf_f(0.0f)),
+									     vcast_vf_f(1.0f),
+									     vcast_vf_f(INFINITYf))),
+			    result);
+
+  result = vsel_vf_vm_vf_vf(vor_vm_vm_vm(visinf_vm_vf(x), veq_vm_vf_vf(x, vcast_vf_f(0))),
+			    vmul_vf_vf_vf(vsel_vf_vm_vf_vf(yisodd, vsign_vf_vf(x), vcast_vf_f(1)),
+					  (vfloat)vandnot_vm_vm_vm(vlt_vm_vf_vf(vsel_vf_vm_vf_vf(veq_vm_vf_vf(x, vcast_vf_f(0)), vneg_vf_vf(y), y), vcast_vf_f(0)),
+								   (vmask)vcast_vf_f(INFINITYf))),
+			    result);
+
+  result = (vfloat)vor_vm_vm_vm(vor_vm_vm_vm(visnan_vm_vf(x), visnan_vm_vf(y)), (vmask)result);
+
+  result = vsel_vf_vm_vf_vf(vor_vm_vm_vm(veq_vm_vf_vf(y, vcast_vf_f(0)), veq_vm_vf_vf(x, vcast_vf_f(1))), vcast_vf_f(1), result);
+
+  return result;
+#else
+  return expkf(dfmul_vf2_vf2_vf(logkf(x), y));
+#endif
+}
+
+static INLINE vfloat2 expk2f(vfloat2 d) {
+  vfloat u = vmul_vf_vf_vf(vadd_vf_vf_vf(d.x, d.y), vcast_vf_f(R_LN2f));
+  vint2 q = vrint_vi2_vf(u);
+  vfloat2 s, t;
+
+  s = dfadd2_vf2_vf2_vf(d, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf)));
+  s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf)));
+
+  u = vcast_vf_f(0.00136324646882712841033936f);
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.00836596917361021041870117f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.0416710823774337768554688f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.166665524244308471679688f));
+  u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.499999850988388061523438f));
+
+  t = dfadd_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf(dfsqu_vf2_vf2(s), u));
+
+  t = dfadd_vf2_vf_vf2(vcast_vf_f(1), t);
+
+  return dfscale_vf2_vf2_vf(t, vpow2i_vf_vi2(q));
+}
+
+vfloat xsinhf(vfloat x) {
+  vfloat y = vabs_vf_vf(x);
+  vfloat2 d = expk2f(vcast_vf2_vf_vf(y, vcast_vf_f(0)));
+  d = dfsub_vf2_vf2_vf2(d, dfrec_vf2_vf2(d));
+  y = vmul_vf_vf_vf(vadd_vf_vf_vf(d.x, d.y), vcast_vf_f(0.5));
+
+  y = vsel_vf_vm_vf_vf(vor_vm_vm_vm(vgt_vm_vf_vf(vabs_vf_vf(x), vcast_vf_f(89)),
+				    visnan_vm_vf(y)), vcast_vf_f(INFINITYf), y);
+  y = vmulsign_vf_vf_vf(y, x);
+  y = (vfloat)vor_vm_vm_vm(visnan_vm_vf(x), (vmask)y);
+
+  return y;
+}
+
+vfloat xcoshf(vfloat x) {
+  vfloat y = vabs_vf_vf(x);
+  vfloat2 d = expk2f(vcast_vf2_vf_vf(y, vcast_vf_f(0)));
+  d = dfadd_vf2_vf2_vf2(d, dfrec_vf2_vf2(d));
+  y = vmul_vf_vf_vf(vadd_vf_vf_vf(d.x, d.y), vcast_vf_f(0.5));
+
+  y = vsel_vf_vm_vf_vf(vor_vm_vm_vm(vgt_vm_vf_vf(vabs_vf_vf(x), vcast_vf_f(89)),
+				    visnan_vm_vf(y)), vcast_vf_f(INFINITYf), y);
+  y = (vfloat)vor_vm_vm_vm(visnan_vm_vf(x), (vmask)y);
+
+  return y;
+}
+
+vfloat xtanhf(vfloat x) {
+  vfloat y = vabs_vf_vf(x);
+  vfloat2 d = expk2f(vcast_vf2_vf_vf(y, vcast_vf_f(0)));
+  vfloat2 e = dfrec_vf2_vf2(d);
+  d = dfdiv_vf2_vf2_vf2(dfadd_vf2_vf2_vf2(d, dfneg_vf2_vf2(e)), dfadd_vf2_vf2_vf2(d, e));
+  y = vadd_vf_vf_vf(d.x, d.y);
+
+  y = vsel_vf_vm_vf_vf(vor_vm_vm_vm(vgt_vm_vf_vf(vabs_vf_vf(x), vcast_vf_f(8.664339742f)),
+				    visnan_vm_vf(y)), vcast_vf_f(1.0f), y);
+  y = vmulsign_vf_vf_vf(y, x);
+  y = (vfloat)vor_vm_vm_vm(visnan_vm_vf(x), (vmask)y);
+
+  return y;
+}
+
+static INLINE vfloat2 logk2f(vfloat2 d) {
+  vfloat2 x, x2, m;
+  vfloat t;
+  vint2 e;
+
+  e = vilogbp1_vi2_vf(vmul_vf_vf_vf(d.x, vcast_vf_f(0.7071)));
+  m = dfscale_vf2_vf2_vf(d, vpow2i_vf_vi2(vneg_vi2_vi2(e)));
+
+  x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf2_vf(m, vcast_vf_f(-1)), dfadd2_vf2_vf2_vf(m, vcast_vf_f(1)));
+  x2 = dfsqu_vf2_vf2(x);
+
+  t = vcast_vf_f(0.2371599674224853515625f);
+  t = vmla_vf_vf_vf_vf(t, x2.x, vcast_vf_f(0.285279005765914916992188f));
+  t = vmla_vf_vf_vf_vf(t, x2.x, vcast_vf_f(0.400005519390106201171875f));
+  t = vmla_vf_vf_vf_vf(t, x2.x, vcast_vf_f(0.666666567325592041015625f));
+
+  return dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf(vcast_vf2_vf_vf(vcast_vf_f(0.69314718246459960938f), vcast_vf_f(-1.904654323148236017e-09f)),
+					     vcast_vf_vi2(e)),
+			    dfadd2_vf2_vf2_vf2(dfscale_vf2_vf2_vf(x, vcast_vf_f(2)), dfmul_vf2_vf2_vf(dfmul_vf2_vf2_vf2(x2, x), t)));
+}
+
+vfloat xasinhf(vfloat x) {
+  vfloat y = vabs_vf_vf(x);
+  vfloat2 d = logk2f(dfadd_vf2_vf2_vf(dfsqrt_vf2_vf2(dfadd2_vf2_vf2_vf(dfmul_vf2_vf_vf(y, y),  vcast_vf_f(1))), y));
+  y = vadd_vf_vf_vf(d.x, d.y);
+
+  y = vsel_vf_vm_vf_vf(vor_vm_vm_vm(visinf_vm_vf(x), visnan_vm_vf(y)), vcast_vf_f(INFINITYf), y);
+  y = vmulsign_vf_vf_vf(y, x);
+  y = (vfloat)vor_vm_vm_vm(visnan_vm_vf(x), (vmask)y);
+
+  return y;
+}
+
+vfloat xacoshf(vfloat x) {
+  vfloat2 d = logk2f(dfadd2_vf2_vf2_vf(dfsqrt_vf2_vf2(dfadd2_vf2_vf2_vf(dfmul_vf2_vf_vf(x, x), vcast_vf_f(-1))), x));
+  vfloat y = vadd_vf_vf_vf(d.x, d.y);
+
+  y = vsel_vf_vm_vf_vf(vor_vm_vm_vm(visinf_vm_vf(x), visnan_vm_vf(y)), vcast_vf_f(INFINITYf), y);
+
+  y = (vfloat)vandnot_vm_vm_vm(veq_vm_vf_vf(x, vcast_vf_f(1.0f)), (vmask)y);
+
+  y = (vfloat)vor_vm_vm_vm(vlt_vm_vf_vf(x, vcast_vf_f(1.0f)), (vmask)y);
+  y = (vfloat)vor_vm_vm_vm(visnan_vm_vf(x), (vmask)y);
+
+  return y;
+}
+
+vfloat xatanhf(vfloat x) {
+  vfloat y = vabs_vf_vf(x);
+  vfloat2 d = logk2f(dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(1), y), dfadd2_vf2_vf_vf(vcast_vf_f(1), vneg_vf_vf(y))));
+  y = (vfloat)vor_vm_vm_vm(vgt_vm_vf_vf(y, vcast_vf_f(1.0)), (vmask)vsel_vf_vm_vf_vf(veq_vm_vf_vf(y, vcast_vf_f(1.0)), vcast_vf_f(INFINITYf), vmul_vf_vf_vf(vadd_vf_vf_vf(d.x, d.y), vcast_vf_f(0.5))));
+
+  y = (vfloat)vor_vm_vm_vm(vor_vm_vm_vm(visinf_vm_vf(x), visnan_vm_vf(y)), (vmask)y);
+  y = vmulsign_vf_vf_vf(y, x);
+  y = (vfloat)vor_vm_vm_vm(visnan_vm_vf(x), (vmask)y);
+
+  return y;
+}
+
+vfloat xexp2f(vfloat a) {
+  vfloat u = expkf(dfmul_vf2_vf2_vf(vcast_vf2_vf_vf(vcast_vf_f(0.69314718246459960938f), vcast_vf_f(-1.904654323148236017e-09f)), a));
+#ifdef __ARM_NEON__
+  u = vsel_vf_vm_vf_vf(vgt_vm_vf_vf(a, vcast_vf_f(127.0f)), vcast_vf_f(INFINITYf), u);
+#else
+  u = vsel_vf_vm_vf_vf(vispinf_vm_vf(a), vcast_vf_f(INFINITYf), u);
+#endif
+  u = (vfloat)vandnot_vm_vm_vm(visminf_vm_vf(a), (vmask)u);
+  return u;
+}
+
+vfloat xexp10f(vfloat a) {
+  vfloat u = expkf(dfmul_vf2_vf2_vf(vcast_vf2_vf_vf(vcast_vf_f(2.3025851249694824219f), vcast_vf_f(-3.1975436520781386207e-08f)), a));
+#ifdef __ARM_NEON__
+  u = vsel_vf_vm_vf_vf(vgt_vm_vf_vf(a, vcast_vf_f(38.0f)), vcast_vf_f(INFINITYf), u);
+#else
+  u = vsel_vf_vm_vf_vf(vispinf_vm_vf(a), vcast_vf_f(INFINITYf), u);
+#endif
+  u = (vfloat)vandnot_vm_vm_vm(visminf_vm_vf(a), (vmask)u);
+  return u;
+}
+
+vfloat xexpm1f(vfloat a) {
+  vfloat2 d = dfadd2_vf2_vf2_vf(expk2f(vcast_vf2_vf_vf(a, vcast_vf_f(0))), vcast_vf_f(-1.0));
+  vfloat x = vadd_vf_vf_vf(d.x, d.y);
+  x = vsel_vf_vm_vf_vf(vgt_vm_vf_vf(a, vcast_vf_f(88.0f)), vcast_vf_f(INFINITYf), x);
+  x = vsel_vf_vm_vf_vf(vlt_vm_vf_vf(a, vcast_vf_f(-0.15942385152878742116596338793538061065739925620174e+2f)), vcast_vf_f(-1), x);
+  return x;
+}
+
+vfloat xlog10f(vfloat a) {
+  vfloat2 d = dfmul_vf2_vf2_vf2(logkf(a), vcast_vf2_vf_vf(vcast_vf_f(0.43429449200630187988f), vcast_vf_f(-1.0103050118726031315e-08f)));
+  vfloat x = vadd_vf_vf_vf(d.x, d.y);
+
+  x = vsel_vf_vm_vf_vf(vispinf_vm_vf(a), vcast_vf_f(INFINITYf), x);
+  x = (vfloat)vor_vm_vm_vm(vgt_vm_vf_vf(vcast_vf_f(0), a), (vmask)x);
+  x = vsel_vf_vm_vf_vf(veq_vm_vf_vf(a, vcast_vf_f(0)), vcast_vf_f(-INFINITYf), x);
+
+  return x;
+}
+
+vfloat xlog1pf(vfloat a) {
+  vfloat2 d = logk2f(dfadd2_vf2_vf_vf(a, vcast_vf_f(1)));
+  vfloat x = vadd_vf_vf_vf(d.x, d.y);
+
+  x = vsel_vf_vm_vf_vf(vispinf_vm_vf(a), vcast_vf_f(INFINITYf), x);
+  x = (vfloat)vor_vm_vm_vm(vgt_vm_vf_vf(vcast_vf_f(-1), a), (vmask)x);
+  x = vsel_vf_vm_vf_vf(veq_vm_vf_vf(a, vcast_vf_f(-1)), vcast_vf_f(-INFINITYf), x);
+
+  return x;
+}
diff --git a/tester/Makefile b/tester/Makefile
new file mode 100644
index 00000000..8787cd11
--- /dev/null
+++ b/tester/Makefile
@@ -0,0 +1,16 @@
+all : tester testersp testeru1 testerspu1
+
+tester : tester.c
+	gcc -Wall tester.c -lm -lmpfr -o tester
+
+testeru1 : testeru1.c
+	gcc -Wall testeru1.c -lm -lmpfr -o testeru1
+
+testersp : testersp.c
+	gcc -Wall testersp.c -lm -lmpfr -o testersp
+
+testerspu1 : testerspu1.c
+	gcc -Wall testerspu1.c -lm -lmpfr -o testerspu1
+
+clean :
+	rm -f *~ *.o tester testersp testeru1 testerspu1
diff --git a/tester/nonnumber.h b/tester/nonnumber.h
new file mode 100644
index 00000000..5d856fa9
--- /dev/null
+++ b/tester/nonnumber.h
@@ -0,0 +1,19 @@
+#if defined (__GNUC__) || defined (__INTEL_COMPILER) || defined (__clang__)
+#ifdef INFINITY
+#undef INFINITY
+#endif
+
+#ifdef NAN
+#undef NAN
+#endif
+
+#define NAN __builtin_nan("")
+#define NANf __builtin_nanf("")
+#define INFINITY __builtin_inf()
+#define INFINITYf __builtin_inff()
+#else
+
+#include <bits/nan.h>
+#include <bits/inf.h>
+
+#endif
diff --git a/tester/tester.c b/tester/tester.c
new file mode 100644
index 00000000..858303ca
--- /dev/null
+++ b/tester/tester.c
@@ -0,0 +1,2851 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <errno.h>
+#include <inttypes.h>
+
+#include <mpfr.h>
+
+#include <unistd.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include "nonnumber.h"
+
+#define POSITIVE_INFINITY INFINITY
+#define NEGATIVE_INFINITY (-INFINITY)
+
+typedef int boolean;
+
+#define true 1
+#define false 0
+
+void stop(char *mes) {
+  fprintf(stderr, "%s\n", mes);
+  abort();
+}
+
+int readln(int fd, char *buf, int cnt) {
+  int i, rcnt = 0;
+
+  if (cnt < 1) return -1;
+
+  while(cnt >= 2) {
+    i = read(fd, buf, 1);
+    if (i != 1) return i;
+
+    if (*buf == '\n') break;
+
+    rcnt++;
+    buf++;
+    cnt--;
+  }
+
+  *++buf = '\0';
+  rcnt++;
+  return rcnt;
+}
+
+int ptoc[2], ctop[2];
+int pid;
+
+void startChild(const char *path, char *const argv[]) {
+  pipe(ptoc);
+  pipe(ctop);
+
+  pid = fork();
+
+  assert(pid != -1);
+
+  if (pid == 0) {
+    // child process
+    char buf0[1], buf1[1];
+    int i;
+
+    close(ptoc[1]);
+    close(ctop[0]);
+
+    i = dup2(ptoc[0], fileno(stdin));
+    assert(i != -1);
+
+    i = dup2(ctop[1], fileno(stdout));
+    assert(i != -1);
+
+    setvbuf(stdin, buf0, _IONBF,0);
+    setvbuf(stdout, buf1, _IONBF,0);
+
+    fflush(stdin);
+    fflush(stdout);
+
+    execvp(path, argv);
+
+    fprintf(stderr, "execvp in startChild : %s\n", strerror(errno));
+
+    assert(0);
+  }
+
+  // parent process
+
+  close(ptoc[0]);
+  close(ctop[1]);
+}
+
+double u2d(uint64_t u) {
+  union {
+    double f;
+    uint64_t i;
+  } tmp;
+  tmp.i = u;
+  return tmp.f;
+}
+
+uint64_t d2u(double d) {
+  union {
+    double f;
+    uint64_t i;
+  } tmp;
+  tmp.f = d;
+  return tmp.i;
+}
+
+//
+
+boolean isPlusZero(double x) { return x == 0 && copysign(1, x) == 1; }
+boolean isMinusZero(double x) { return x == 0 && copysign(1, x) == -1; }
+boolean xisnan(double x) { return x != x; }
+double sign(double d) { return d < 0 ? -1 : 1; }
+
+boolean cmpDenorm(double x, double y) {
+  if (xisnan(x) && xisnan(y)) return true;
+  if (xisnan(x) || xisnan(y)) return false;
+  if (isinf(x) != isinf(y)) return false;
+  if (x == POSITIVE_INFINITY && y == POSITIVE_INFINITY) return true;
+  if (x == NEGATIVE_INFINITY && y == NEGATIVE_INFINITY) return true;
+  if (y == 0) {
+    if (isPlusZero(x) && isPlusZero(y)) return true;
+    if (isMinusZero(x) && isMinusZero(y)) return true;
+    return false;
+  }
+  if (!xisnan(x) && !xisnan(y) && !isinf(x) && !isinf(y)) return sign(x) == sign(y);
+  return false;
+}
+
+long double ulp(long double x) {
+  x = fabsl(x);
+  int exp;
+
+  if (x == 0) {
+    return DBL_MIN;
+  } else {
+    frexpl(x, &exp);
+  }
+
+  return fmax(ldexp(1.0, exp-53), DBL_MIN);
+}
+
+double countULP(long double x, long double y) {
+  double fx = x;
+  double fy = y;
+  if (xisnan(fx) && xisnan(fy)) return 0;
+  if (xisnan(fx) || xisnan(fy)) return 10000;
+  if (isinf(fx)) {
+    if (sign(fx) == sign(fy) && fabs(fy) > 1e+300) return 0; // Relaxed infinity handling
+    return 10001;
+  }
+  if (fx == POSITIVE_INFINITY && fy == POSITIVE_INFINITY) return 0;
+  if (fx == NEGATIVE_INFINITY && fy == NEGATIVE_INFINITY) return 0;
+  if (fy == 0) {
+    if (fx == 0) return 0;
+    return 10002;
+  }
+  if (!xisnan(fx) && !xisnan(fy) && !isinf(fx) && !isinf(fy)) {
+    return fabs((x - y) / ulp(y));
+  }
+  return 10003;
+}
+
+//
+
+double sinfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_sin(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double sinlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_sin(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double cosfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_cos(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double coslfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_cos(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double tanfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_tan(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double tanlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_tan(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double asinfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_asin(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double asinlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_asin(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double acosfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_acos(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double acoslfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_acos(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double atanfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_atan(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double atanlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_atan(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double atan2fr(double y, double x) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_d(fry, y, GMP_RNDN);
+  mpfr_set_d(frx, x, GMP_RNDN);
+  mpfr_atan2(frx, fry, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+long double atan2lfr(long double y, long double x) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_ld(fry, y, GMP_RNDN);
+  mpfr_set_ld(frx, x, GMP_RNDN);
+  mpfr_atan2(frx, fry, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+double logfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_log(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double loglfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_log(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double expfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_exp(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double explfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_exp(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double powfr(double x, double y) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_d(frx, x, GMP_RNDN);
+  mpfr_set_d(fry, y, GMP_RNDN);
+  mpfr_pow(frx, frx, fry, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+long double powlfr(long double x, long double y) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_ld(frx, x, GMP_RNDN);
+  mpfr_set_ld(fry, y, GMP_RNDN);
+  mpfr_pow(frx, frx, fry, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+double sinhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_sinh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double coshfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_cosh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double tanhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_tanh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double asinhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_asinh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double acoshfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_acosh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double atanhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_atanh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double sinhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_sinh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double coshlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_cosh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double tanhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_tanh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double asinhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_asinh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double acoshlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_acosh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double atanhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_atanh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double sqrtlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_sqrt(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double sqrtfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_sqrt(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double cbrtfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_cbrt(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double cbrtlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_cbrt(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double exp2fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_exp2(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double exp2lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_exp2(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double exp10fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_exp10(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double exp10lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_exp10(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double expm1fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_expm1(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double expm1lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_expm1(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double log10fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_log10(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double log10lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_log10(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double log1pfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_log1p(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double log1plfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_log1p(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+//
+
+typedef struct {
+  double x, y;
+} double2;
+
+double child_sin(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "sin %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sin");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_cos(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "cos %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_cos");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double2 child_sincos(double x) {
+  char str[256];
+  uint64_t u, v;
+
+  sprintf(str, "sincos %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sincos");
+  sscanf(str, "%" PRIx64 " %" PRIx64, &u, &v);
+
+  double2 ret;
+  ret.x = u2d(u);
+  ret.y = u2d(v);
+  return ret;
+}
+
+double child_tan(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "tan %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_tan");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_asin(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "asin %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_asin");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_acos(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "acos %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_acos");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_atan(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "atan %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_atan");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_atan2(double y, double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "atan2 %" PRIx64 " %" PRIx64 "\n", d2u(y), d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_atan2");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_log(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "log %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_log");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_exp(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "exp %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_exp");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_pow(double x, double y) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "pow %" PRIx64 " %" PRIx64 "\n", d2u(x), d2u(y));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_pow");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_sinh(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "sinh %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sinh");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_cosh(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "cosh %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_cosh");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_tanh(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "tanh %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_tanh");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_asinh(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "asinh %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_asinh");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_acosh(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "acosh %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_acosh");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_atanh(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "atanh %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_atanh");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_sqrt(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "sqrt %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sqrt");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_cbrt(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "cbrt %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_cbrt");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_exp2(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "exp2 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_exp2");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_exp10(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "exp10 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_exp10");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_expm1(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "expm1 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_expm1");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_log10(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "log10 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_log10");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_log1p(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "log1p %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_log1p");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_ldexp(double x, int q) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "ldexp %" PRIx64 " %" PRIx64 "\n", d2u(x), d2u(q));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_ldexp");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+int allTestsPassed = 1;
+
+void showResult(int success) {
+  if (!success) allTestsPassed = 0;
+  fprintf(stderr, "%s\n", success ? " OK" : " NG **************");
+}
+
+void do_test() {
+  int i, j;
+
+  fprintf(stderr, "Denormal/nonnumber test atan2(y, x)\n\n");
+
+  fprintf(stderr, "If y is +0 and x is -0, +pi is returned ... ");
+  showResult(child_atan2(+0.0, -0.0) == M_PI);
+
+  fprintf(stderr, "If y is -0 and x is -0, -pi is returned ... ");
+  showResult(child_atan2(-0.0, -0.0) == -M_PI);
+
+  fprintf(stderr, "If y is +0 and x is +0, +0 is returned ... ");
+  showResult(isPlusZero(child_atan2(+0.0, +0.0)));
+
+  fprintf(stderr, "If y is -0 and x is +0, -0 is returned ... ");
+  showResult(isMinusZero(child_atan2(-0.0, +0.0)));
+
+  fprintf(stderr, "If y is positive infinity and x is negative infinity, +3*pi/4 is returned ... ");
+  showResult(child_atan2(POSITIVE_INFINITY, NEGATIVE_INFINITY) == 3*M_PI/4);
+
+  fprintf(stderr, "If y is negative infinity and x is negative infinity, -3*pi/4 is returned ... ");
+  showResult(child_atan2(NEGATIVE_INFINITY, NEGATIVE_INFINITY) == -3*M_PI/4);
+
+  fprintf(stderr, "If y is positive infinity and x is positive infinity, +pi/4 is returned ... ");
+  showResult(child_atan2(POSITIVE_INFINITY, POSITIVE_INFINITY) == M_PI/4);
+
+  fprintf(stderr, "If y is negative infinity and x is positive infinity, -pi/4 is returned ... ");
+  showResult(child_atan2(NEGATIVE_INFINITY, POSITIVE_INFINITY) == -M_PI/4);
+
+  {
+    fprintf(stderr, "If y is +0 and x is less than 0, +pi is returned ... ");
+
+    double ya[] = { +0.0 };
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+    boolean success = true;
+
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != M_PI) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is -0 and x is less than 0, -pi is returned ... ");
+
+    double ya[] = { -0.0 };
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != -M_PI) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is less than 0 and x is 0, -pi/2 is returned ... ");
+
+    double ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+    double xa[] = { +0.0, -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != -M_PI/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is greater than 0 and x is 0, pi/2 is returned ... ");
+
+
+    double ya[] = { 100000.5, 100000, 3, 2.5, 2, 1.5, 1.0, 0.5 };
+    double xa[] = { +0.0, -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != M_PI/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is greater than 0 and x is -0, pi/2 is returned ... ");
+
+    double ya[] = { 100000.5, 100000, 3, 2.5, 2, 1.5, 1.0, 0.5 };
+    double xa[] = { -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != M_PI/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is positive infinity, and x is finite, pi/2 is returned ... ");
+
+    double ya[] = { POSITIVE_INFINITY };
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != M_PI/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is negative infinity, and x is finite, -pi/2 is returned ... ");
+
+    double ya[] = { NEGATIVE_INFINITY };
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != -M_PI/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value greater than 0, and x is negative infinity, +pi is returned ... ");
+
+    double ya[] = { 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    double xa[] = { NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != M_PI) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value less than 0, and x is negative infinity, -pi is returned ... ");
+
+    double ya[] = { -0.5, -1.5, -2.0, -2.5, -3.0, -100000, -100000.5 };
+    double xa[] = { NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != -M_PI) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value greater than 0, and x is positive infinity, +0 is returned ... ");
+
+    double ya[] = { 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    double xa[] = { POSITIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isPlusZero(child_atan2(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value less than 0, and x is positive infinity, -0 is returned ... ");
+
+    double ya[] = { -0.5, -1.5, -2.0, -2.5, -3.0, -100000, -100000.5 };
+    double xa[] = { POSITIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isMinusZero(child_atan2(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is NaN, a NaN is returned ... ");
+
+    double ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, NAN };
+    double xa[] = { NAN };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!xisnan(child_atan2(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a NaN, the result is a NaN ... ");
+
+    double ya[] = { NAN };
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, NAN };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!xisnan(child_atan2(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  fprintf(stderr, "\nend of atan2 denormal/nonnumber test\n");
+
+  //
+
+  fprintf(stderr, "\nDenormal/nonnumber test pow(x, y)\n\n");
+
+  fprintf(stderr, "If x is +1 and y is a NaN, the result is 1.0 ... ");
+  showResult(child_pow(1, NAN) == 1.0);
+
+  fprintf(stderr, "If y is 0 and x is a NaN, the result is 1.0 ... ");
+  showResult(child_pow(NAN, 0) == 1.0);
+
+  fprintf(stderr, "If x is -1, and y is positive infinity, the result is 1.0 ... ");
+  showResult(child_pow(-1, POSITIVE_INFINITY) == 1.0);
+
+  fprintf(stderr, "If x is -1, and y is negative infinity, the result is 1.0 ... ");
+  showResult(child_pow(-1, NEGATIVE_INFINITY) == 1.0);
+
+  {
+    fprintf(stderr, "If x is a finite value less than 0, and y is a finite non-integer, a NaN is returned ... ");
+
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+    double ya[] = { -100000.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!xisnan(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is a NaN, the result is a NaN ... ");
+
+    double xa[] = { NAN };
+    double ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!xisnan(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a NaN, the result is a NaN ... ");
+
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    double ya[] = { NAN };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!xisnan(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is +0, and y is an odd integer greater than 0, the result is +0 ... ");
+
+    double xa[] = { +0.0 };
+    double ya[] = { 1, 3, 5, 7, 100001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isPlusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is -0, and y is an odd integer greater than 0, the result is -0 ... ");
+
+    double xa[] = { -0.0 };
+    double ya[] = { 1, 3, 5, 7, 100001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isMinusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is 0, and y greater than 0 and not an odd integer, the result is +0 ... ");
+
+    double xa[] = { +0.0, -0.0 };
+    double ya[] = { 0.5, 1.5, 2.0, 2.5, 4.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isPlusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is less than 1, and y is negative infinity, the result is positive infinity ... ");
+
+    double xa[] = { -0.999, -0.5, -0.0, +0.0, +0.5, +0.999 };
+    double ya[] = { NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != POSITIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is greater than 1, and y is negative infinity, the result is +0 ... ");
+
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    double ya[] = { NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isPlusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is less than 1, and y is positive infinity, the result is +0 ... ");
+
+    double xa[] = { -0.999, -0.5, -0.0, +0.0, +0.5, +0.999 };
+    double ya[] = { POSITIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isPlusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is greater than 1, and y is positive infinity, the result is positive infinity ... ");
+
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    double ya[] = { POSITIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != POSITIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y is an odd integer less than 0, the result is -0 ... ");
+
+    double xa[] = { NEGATIVE_INFINITY };
+    double ya[] = { -100001, -5, -3, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isMinusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y less than 0 and not an odd integer, the result is +0 ... ");
+
+    double xa[] = { NEGATIVE_INFINITY };
+    double ya[] = { -100000.5, -100000, -4, -2.5, -2, -1.5, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isPlusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y is an odd integer greater than 0, the result is negative infinity ... ");
+
+    double xa[] = { NEGATIVE_INFINITY };
+    double ya[] = { 1, 3, 5, 7, 100001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != NEGATIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y greater than 0 and not an odd integer, the result is positive infinity ... ");
+
+    double xa[] = { NEGATIVE_INFINITY };
+    double ya[] = { 0.5, 1.5, 2, 2.5, 3.5, 4, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != POSITIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is positive infinity, and y less than 0, the result is +0 ... ");
+
+    double xa[] = { POSITIVE_INFINITY };
+    double ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isPlusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is positive infinity, and y greater than 0, the result is positive infinity ... ");
+
+    double xa[] = { POSITIVE_INFINITY };
+    double ya[] = { 0.5, 1, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != POSITIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is +0, and y is an odd integer less than 0, +HUGE_VAL is returned ... ");
+
+    double xa[] = { +0.0 };
+    double ya[] = { -100001, -5, -3, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != POSITIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is -0, and y is an odd integer less than 0, -HUGE_VAL is returned ... ");
+
+    double xa[] = { -0.0 };
+    double ya[] = { -100001, -5, -3, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != NEGATIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is 0, and y is less than 0 and not an odd integer, +HUGE_VAL is returned ... ");
+
+    double xa[] = { +0.0, -0.0 };
+    double ya[] = { -100000.5, -100000, -4, -2.5, -2, -1.5, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != POSITIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the result overflows, the functions return HUGE_VAL with the mathematically correct sign ... ");
+
+    double xa[] = { 1000, -1000 };
+    double ya[] = { 1000, 1000.5, 1001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!cmpDenorm(child_pow(xa[i], ya[j]), powfr(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  fprintf(stderr, "\nEnd of pow denormal/nonnumber test\n\n");
+	
+  //
+
+  {
+    fprintf(stderr, "sin denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_sin(xa[i]), sinfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "sin in sincos denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      double2 q = child_sincos(xa[i]);
+      if (!cmpDenorm(q.x, sinfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "cos denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_cos(xa[i]), cosfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "cos in sincos denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      double2 q = child_sincos(xa[i]);
+      if (!cmpDenorm(q.y, cosfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "tan denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, M_PI/2, -M_PI/2 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_tan(xa[i]), tanfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "asin denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, 2, -2, 1, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_asin(xa[i]), asinfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_asin(xa[i]), asinfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "acos denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, 2, -2, 1, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_acos(xa[i]), acosfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "atan denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_atan(xa[i]), atanfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "log denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, 0, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_log(xa[i]), logfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "exp denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_exp(xa[i]), expfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_exp(xa[i]), expfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "sinh denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, +0.0, -0.0, POSITIVE_INFINITY, NEGATIVE_INFINITY, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_sinh(xa[i]), sinhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_sinh(xa[i]), sinhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "cosh denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, +0.0, -0.0, POSITIVE_INFINITY, NEGATIVE_INFINITY, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_cosh(xa[i]), coshfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_cosh(xa[i]), coshfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "tanh denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, +0.0, -0.0, POSITIVE_INFINITY, NEGATIVE_INFINITY, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_tanh(xa[i]), tanhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_tanh(xa[i]), tanhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "asinh denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, +0.0, -0.0, POSITIVE_INFINITY, NEGATIVE_INFINITY, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_asinh(xa[i]), asinhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_asinh(xa[i]), asinhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "acosh denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, +0.0, -0.0, 1.0, POSITIVE_INFINITY, NEGATIVE_INFINITY, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_acosh(xa[i]), acoshfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_acosh(xa[i]), acoshfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "atanh denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, +0.0, -0.0, 1.0, -1.0, POSITIVE_INFINITY, NEGATIVE_INFINITY, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_atanh(xa[i]), atanhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_atanh(xa[i]), atanhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+#if 0
+  {
+    fprintf(stderr, "sqrt denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, +0.0, -0.0, -1.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_sqrt(xa[i]), sqrtfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_sqrt(xa[i]), sqrtfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+#endif
+
+  {
+    fprintf(stderr, "cbrt denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, +0.0, -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_cbrt(xa[i]), cbrtfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_cbrt(xa[i]), cbrtfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "exp2 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_exp2(xa[i]), exp2fr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_exp2(xa[i]), exp2fr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "exp10 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_exp10(xa[i]), exp10fr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_exp10(xa[i]), exp10fr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "expm1 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_expm1(xa[i]), expm1fr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_expm1(xa[i]), expm1fr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "log10 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, 0, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_log10(xa[i]), log10fr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "log1p denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, 0, -1, -2 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_log1p(xa[i]), log1pfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_log1p(xa[i]), log1pfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  fprintf(stderr, "ldexp denormal/nonnumber test ... ");
+
+  {
+    boolean success = true;
+    for(i=-10000;i<=10000 && success;i++) {
+      double d = child_ldexp(1.0, i);
+      double c = ldexp(1.0, i);
+
+      boolean pass = (isfinite(c) && d == c) || cmpDenorm(c, d);
+      if (!pass) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", (double)i, d, c);
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  //
+
+  fprintf(stderr, "\nAccuracy test (max error in ulp)\n");
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_sin(d);
+      long double c = sinlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SIN;
+      }
+    }
+
+    for(d = -10000000;d < 10000000;d += 200.1) {
+      double q = child_sin(d);
+      long double c = sinlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SIN;
+      }
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      double start = u2d(d2u(M_PI_4 * i)-20);
+      double end = u2d(d2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2d(d2u(d)+1)) {
+	double q = child_sin(d);
+	long double c = sinlfr(d);
+	 double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	  goto STOP_SIN;
+	}
+      }
+    }
+
+  STOP_SIN:
+
+    fprintf(stderr, "sin : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_cos(d);
+      long double c = coslfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -10000000;d < 10000000;d += 200.1) {
+      double q = child_cos(d);
+      long double c = coslfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      double start = u2d(d2u(M_PI_4 * i)-20);
+      double end = u2d(d2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2d(d2u(d)+1)) {
+	double q = child_cos(d);
+	long double c = coslfr(d);
+	 double u = countULP(q, c);
+	max = fmax(max, u);
+      }
+    }
+
+    fprintf(stderr, "cos : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double2 q = child_sincos(d);
+      long double c = sinlfr(d);
+      double u = fabs((q.x - c) / ulp(c));
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.x, (double)c, d, (double)ulp(c));
+	goto STOP_SIN2;
+      }
+    }
+
+    for(d = -10000000;d < 10000000;d += 200.1) {
+      double2 q = child_sincos(d);
+      long double c = sinlfr(d);
+      double u = fabs((q.x - c) / ulp(c));
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.x, (double)c, d, (double)ulp(c));
+	goto STOP_SIN2;
+      }
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      double start = u2d(d2u(M_PI_4 * i)-20);
+      double end = u2d(d2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2d(d2u(d)+1)) {
+	double2 q = child_sincos(d);
+	long double c = sinlfr(d);
+	double u = fabs((q.x - c) / ulp(c));
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.x, (double)c, d, (double)ulp(c));
+	  goto STOP_SIN2;
+	}
+      }
+    }
+
+  STOP_SIN2:
+
+    fprintf(stderr, "sin in sincos : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double2 q = child_sincos(d);
+      long double c = coslfr(d);
+      double u = fabs((q.y - c) / ulp(c));
+      max = fmax(max, u);
+    }
+
+    for(d = -10000000;d < 10000000;d += 200.1) {
+      double2 q = child_sincos(d);
+      long double c = coslfr(d);
+      double u = fabs((q.y - c) / ulp(c));
+      max = fmax(max, u);
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      double start = u2d(d2u(M_PI_4 * i)-20);
+      double end = u2d(d2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2d(d2u(d)+1)) {
+	double2 q = child_sincos(d);
+	long double c = coslfr(d);
+	double u = fabs((q.y - c) / ulp(c));
+	max = fmax(max, u);
+      }
+    }
+
+    fprintf(stderr, "cos in sincos : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_tan(d);
+      long double c = tanlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -10000000;d < 10000000;d += 200.1) {
+      double q = child_tan(d);
+      long double c = tanlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      double start = u2d(d2u(M_PI_4 * i)-20);
+      double end = u2d(d2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2d(d2u(d)+1)) {
+	double q = child_tan(d);
+	long double c = tanlfr(d);
+	 double u = countULP(q, c);
+	max = fmax(max, u);
+      }
+    }
+
+    fprintf(stderr, "tan : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -1;d < 1;d += 0.00002) {
+      double q = child_asin(d);
+      long double c = asinlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ASIN;
+      }
+    }
+
+  STOP_ASIN:
+
+    fprintf(stderr, "asin : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -1;d < 1;d += 0.00002) {
+      double q = child_acos(d);
+      long double c = acoslfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ACOS;
+      }
+    }
+
+  STOP_ACOS:
+
+    fprintf(stderr, "acos : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_atan(d);
+      long double c = atanlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %g, d = %g\n", q, d);
+	goto STOP_ATAN;
+      }
+    }
+
+    for(d = -10000;d < 10000;d += 0.2) {
+      double q = child_atan(d);
+      long double c = atanlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %g, d = %g\n", q, d);
+	goto STOP_ATAN;
+      }
+    }
+
+  STOP_ATAN:
+
+    fprintf(stderr, "atan : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    double x, y, max = 0;
+
+    for(y = -10;y < 10;y += 0.05) {
+      for(x = -10;x < 10;x += 0.05) {
+	double q = child_atan2(y, x);
+	long double c = atan2lfr(y, x);
+	 double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %g, y = %g, x = %g\n", q, y, x);
+	  goto STOP_ATAN2;
+	}
+      }
+    }
+
+    for(y = -100;y < 100;y += 0.51) {
+      for(x = -100;x < 100;x += 0.51) {
+	double q = child_atan2(y, x);
+	long double c = atan2lfr(y, x);
+	 double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %g, y = %g, x = %g\n", q, y, x);
+	  goto STOP_ATAN2;
+	}
+      }
+    }
+
+  STOP_ATAN2:
+
+    fprintf(stderr, "atan2 : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = 0.0001;d < 10;d += 0.0001) {
+      double q = child_log(d);
+      long double c = loglfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = 0.0001;d < 10000;d += 0.1) {
+      double q = child_log(d);
+      long double c = loglfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    int i;
+    for(i = -1000;i <= 1000;i++) {
+      d = pow(2.1, i);
+      double q = child_log(d);
+      long double c = loglfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "log : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_exp(d);
+      long double c = explfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -1000;d < 1000;d += 0.1) {
+      double q = child_exp(d);
+      long double c = explfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "exp : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double x, y, max = 0;
+
+    for(y = 0.1;y < 100;y += 0.2) {
+      for(x = -100;x < 100;x += 0.2) {
+	double q = child_pow(x, y);
+	long double c = powlfr(x, y);
+	double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %g, x = %g, y = %g\n", q, x, y);
+	  goto STOP_POW;
+	}
+      }
+    }
+
+    double d;
+    for(d = -1000;d < 1000;d += 0.1) {
+      double q = child_pow(2.1, d);
+      long double c = powlfr(2.1, d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+
+      if (u > 1000) {
+	fprintf(stderr, "q = %g, c = %g, d = %g\n", q, (double)c, d);
+	goto STOP_POW;
+      }
+    }
+
+  STOP_POW:
+
+    fprintf(stderr, "pow : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10000;d < 10000;d += 0.2) {
+      double q = child_cbrt(d);
+      long double c = cbrtlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_CBRT;
+      }
+    }
+
+    int i;
+    for(i = -1000;i <= 1000;i++) {
+      d = pow(2.1, i);
+      double q = child_cbrt(d);
+      long double c = cbrtlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_CBRT;
+      }
+    }
+
+  STOP_CBRT:
+
+    fprintf(stderr, "cbrt : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+#if 0
+  {
+    double d, max = 0;
+
+    for(d = 0;d < 20000;d += 0.2) {
+      double q = child_sqrt(d);
+      long double c = sqrtlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SQRT;
+      }
+    }
+
+  STOP_SQRT:
+
+    fprintf(stderr, "sqrt : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+#endif
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_sinh(d);
+      long double c = sinhlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SINH;
+      }
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      double q = child_sinh(d);
+      long double c = sinhlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SINH;
+      }
+    }
+
+  STOP_SINH:
+
+    fprintf(stderr, "sinh : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_cosh(d);
+      long double c = coshlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_COSH;
+      }
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      double q = child_cosh(d);
+      long double c = coshlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_COSH;
+      }
+    }
+
+  STOP_COSH:
+
+    fprintf(stderr, "cosh : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_tanh(d);
+      long double c = tanhlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_TANH;
+      }
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      double q = child_tanh(d);
+      long double c = tanhlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_TANH;
+      }
+    }
+
+  STOP_TANH:
+
+    fprintf(stderr, "tanh : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_asinh(d);
+      long double c = asinhlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ASINH;
+      }
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      double q = child_asinh(d);
+      long double c = asinhlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ASINH;
+      }
+    }
+
+  STOP_ASINH:
+
+    fprintf(stderr, "asinh : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = 1;d < 10;d += 0.0002) {
+      double q = child_acosh(d);
+      long double c = acoshlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ACOSH;
+      }
+    }
+
+    for(d = 1;d < 1000;d += 0.02) {
+      double q = child_acosh(d);
+      long double c = acoshlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ACOSH;
+      }
+    }
+
+  STOP_ACOSH:
+
+    fprintf(stderr, "acosh : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_atanh(d);
+      long double c = atanhlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ATANH;
+      }
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      double q = child_atanh(d);
+      long double c = atanhlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ATANH;
+      }
+    }
+
+  STOP_ATANH:
+
+    fprintf(stderr, "atanh : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_exp2(d);
+      long double c = exp2lfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      double q = child_exp2(d);
+      long double c = exp2lfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "exp2 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_exp10(d);
+      long double c = exp10lfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -300;d < 300;d += 0.01) {
+      double q = child_exp10(d);
+      long double c = exp10lfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "exp10 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_expm1(d);
+      long double c = expm1lfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 5) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\n", q, (double)c, d);
+	goto STOP_EXPM1;
+      }
+    }
+
+    for(d = -1000;d < 1000;d += 0.021) {
+      double q = child_expm1(d);
+      long double c = expm1lfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 5) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\n", q, (double)c, d);
+	goto STOP_EXPM1;
+      }
+    }
+
+    for(d = 0;d < 300;d += 0.021) {
+      double d2 = pow(10, -d);
+      double q = child_expm1(d2);
+      long double c = expm1lfr(d2);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 5) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\n", q, (double)c, d);
+	goto STOP_EXPM1;
+      }
+    }
+
+    for(d = 0;d < 300;d += 0.021) {
+      double d2 = -pow(10, -d);
+      double q = child_expm1(d2);
+      long double c = expm1lfr(d2);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 5) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\n", q, (double)c, d);
+	goto STOP_EXPM1;
+      }
+    }
+
+  STOP_EXPM1:
+
+    fprintf(stderr, "expm1 : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = 0.0001;d < 10;d += 0.0001) {
+      double q = child_log10(d);
+      long double c = log10lfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = 0.0001;d < 10000;d += 0.1) {
+      double q = child_log10(d);
+      long double c = log10lfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "log10 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = 0.0001;d < 10;d += 0.0001) {
+      double q = child_log1p(d);
+      long double c = log1plfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = 0.0001;d < 10000;d += 0.1) {
+      double q = child_log1p(d);
+      long double c = log1plfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = 0;d < 300;d += 0.02) {
+      double d2 = pow(10, -d);
+      double q = child_log1p(d2);
+      long double c = log1plfr(d2);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = 0;d < 300;d += 0.02) {
+      double d2 = -pow(10, -d);
+      double q = child_log1p(d2);
+      long double c = log1plfr(d2);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "log1p : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+}
+
+int main(int argc, char **argv) {
+  char *argv2[argc];
+  int i;
+
+  for(i=1;i<argc;i++) argv2[i-1] = argv[i];
+  argv2[argc-1] = NULL;
+
+  mpfr_set_default_prec(128);
+
+  startChild(argv2[0], argv2);
+
+  do_test();
+
+  if (allTestsPassed) {
+    fprintf(stderr, "\n\n*** All tests passed\n");
+  } else {
+    fprintf(stderr, "\n\n*** There were errors in some tests\n");
+  }
+
+  if (allTestsPassed) return 0;
+
+  return -1;
+}
diff --git a/tester/testersp.c b/tester/testersp.c
new file mode 100644
index 00000000..a7e50129
--- /dev/null
+++ b/tester/testersp.c
@@ -0,0 +1,2949 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <inttypes.h>
+
+#include <mpfr.h>
+
+#include <unistd.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include "nonnumber.h"
+
+#define POSITIVE_INFINITYf ((float)INFINITY)
+#define NEGATIVE_INFINITYf (-(float)INFINITY)
+#define M_PIf ((float)M_PI)
+
+#define POSITIVE_INFINITY (INFINITY)
+#define NEGATIVE_INFINITY (-INFINITY)
+
+typedef int boolean;
+
+#define true 1
+#define false 0
+
+int enableFlushToZero = 0;
+
+void stop(char *mes) {
+  fprintf(stderr, "%s\n", mes);
+  abort();
+}
+
+int readln(int fd, char *buf, int cnt) {
+  int i, rcnt = 0;
+
+  if (cnt < 1) return -1;
+
+  while(cnt >= 2) {
+    i = read(fd, buf, 1);
+    if (i != 1) return i;
+
+    if (*buf == '\n') break;
+
+    rcnt++;
+    buf++;
+    cnt--;
+  }
+
+  *++buf = '\0';
+  rcnt++;
+  return rcnt;
+}
+
+int ptoc[2], ctop[2];
+int pid;
+
+void startChild(const char *path, char *const argv[]) {
+  pipe(ptoc);
+  pipe(ctop);
+
+  pid = fork();
+
+  assert(pid != -1);
+
+  if (pid == 0) {
+    // child process
+    char buf0[1], buf1[1];
+    int i;
+
+    close(ptoc[1]);
+    close(ctop[0]);
+
+    i = dup2(ptoc[0], fileno(stdin));
+    assert(i != -1);
+
+    i = dup2(ctop[1], fileno(stdout));
+    assert(i != -1);
+
+    setvbuf(stdin, buf0, _IONBF,0);
+    setvbuf(stdout, buf1, _IONBF,0);
+
+    fflush(stdin);
+    fflush(stdout);
+
+    execvp(path, argv);
+
+    assert(0);
+  }
+
+  // parent process
+
+  close(ptoc[0]);
+  close(ctop[1]);
+}
+
+float u2f(uint32_t u) {
+  union {
+    float f;
+    uint32_t i;
+  } tmp;
+  tmp.i = u;
+  return tmp.f;
+}
+
+uint32_t f2u(float d) {
+  union {
+    float f;
+    uint32_t i;
+  } tmp;
+  tmp.f = d;
+  return tmp.i;
+}
+
+//
+
+boolean isPlusZerof(float x) { return x == 0 && copysignf(1, x) == 1; }
+boolean isMinusZerof(float x) { return x == 0 && copysignf(1, x) == -1; }
+boolean xisnanf(float x) { return x != x; }
+float signf(float d) { return d < 0 ? -1 : 1; }
+
+boolean isPlusZero(double x) { return x == 0 && copysign(1, x) == 1; }
+boolean isMinusZero(double x) { return x == 0 && copysign(1, x) == -1; }
+boolean xisnan(double x) { return x != x; }
+
+double flushToZero(double y) {
+  if (enableFlushToZero && fabs(y) < 1.2e-38) y = copysign(0.0, y);
+  return y;
+}
+
+boolean cmpDenorm(float x, float y) {
+  y = flushToZero(y);
+  if (xisnanf(x) && xisnanf(y)) return true;
+  if (xisnanf(x) || xisnanf(y)) return false;
+  if (isinf(x) != isinf(y)) return false;
+  if (x == POSITIVE_INFINITYf && y == POSITIVE_INFINITYf) return true;
+  if (x == NEGATIVE_INFINITYf && y == NEGATIVE_INFINITYf) return true;
+  if (y == 0) {
+    if (isPlusZerof(x) && isPlusZerof(y)) return true;
+    if (isMinusZerof(x) && isMinusZerof(y)) return true;
+    return false;
+  }
+  if (!xisnanf(x) && !xisnanf(y) && !isinf(x) && !isinf(y)) return signf(x) == signf(y);
+  return false;
+}
+
+double ulp(double x) {
+  x = fabsf(x);
+  int exp;
+
+  if (x == 0) {
+    return FLT_MIN;
+  } else {
+    frexpf(x, &exp);
+  }
+
+  return fmaxf(ldexpf(1.0, exp-24), FLT_MIN);
+}
+
+double countULP(double x, double y) {
+  x = flushToZero(x);
+  y = flushToZero(y);
+  float fx = (float)x;
+  float fy = (float)y;
+  if (xisnan(fx) && xisnan(fy)) return 0;
+  if (xisnan(fx) || xisnan(fy)) return 10000;
+  if (isinf(fx)) {
+    if (signf(fx) == signf(fy) && fabs(fy) > 1e+37) return 0; // Relaxed infinity handling
+    return 10001;
+  }
+  if (fx == POSITIVE_INFINITY && fy == POSITIVE_INFINITY) return 0;
+  if (fx == NEGATIVE_INFINITY && fy == NEGATIVE_INFINITY) return 0;
+  if (fy == 0) {
+    if (fx == 0) return 0;
+    return 10002;
+  }
+  if (!xisnan(fx) && !xisnan(fy) && !isinf(fx) && !isinf(fy)) {
+    return fabs((x - y) / ulp(y));
+  }
+  return 10003;
+}
+
+//
+
+double sinfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_sin(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double sinlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_sin(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double cosfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_cos(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double coslfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_cos(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double tanfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_tan(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double tanlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_tan(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double asinfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_asin(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double asinlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_asin(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double acosfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_acos(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double acoslfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_acos(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double atanfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_atan(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double atanlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_atan(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double atan2fr(double y, double x) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_d(fry, y, GMP_RNDN);
+  mpfr_set_d(frx, x, GMP_RNDN);
+  mpfr_atan2(frx, fry, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+long double atan2lfr(long double y, long double x) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_ld(fry, y, GMP_RNDN);
+  mpfr_set_ld(frx, x, GMP_RNDN);
+  mpfr_atan2(frx, fry, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+double logfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_log(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double loglfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_log(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double expfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_exp(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double explfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_exp(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double powfr(double x, double y) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_d(frx, x, GMP_RNDN);
+  mpfr_set_d(fry, y, GMP_RNDN);
+  mpfr_pow(frx, frx, fry, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+long double powlfr(long double x, long double y) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_ld(frx, x, GMP_RNDN);
+  mpfr_set_ld(fry, y, GMP_RNDN);
+  mpfr_pow(frx, frx, fry, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+double sinhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_sinh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double coshfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_cosh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double tanhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_tanh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double asinhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_asinh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double acoshfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_acosh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double atanhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_atanh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double sinhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_sinh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double coshlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_cosh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double tanhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_tanh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double asinhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_asinh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double acoshlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_acosh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double atanhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_atanh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double sqrtlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_sqrt(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double sqrtfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_sqrt(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double cbrtfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_cbrt(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double cbrtlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_cbrt(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double exp2fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_exp2(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double exp2lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_exp2(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double exp10fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_exp10(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double exp10lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_exp10(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double expm1fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_expm1(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double expm1lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_expm1(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double log10fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_log10(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double log10lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_log10(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double log1pfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_log1p(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double log1plfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_log1p(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+//
+
+typedef struct {
+  float x, y;
+} float2;
+
+float child_sinf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "sinf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sinf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_cosf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "cosf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_cosf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float2 child_sincosf(float x) {
+  char str[256];
+  uint32_t u, v;
+
+  sprintf(str, "sincosf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sincosf");
+  sscanf(str, "%x %x", &u, &v);
+
+  float2 ret;
+  ret.x = u2f(u);
+  ret.y = u2f(v);
+  return ret;
+}
+
+float child_tanf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "tanf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_tanf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_asinf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "asinf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_asinf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_acosf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "acosf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_acosf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_atanf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "atanf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_atanf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_atan2f(float y, float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "atan2f %x %x\n", f2u(y), f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_atan2f");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_logf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "logf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_logf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_expf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "expf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_expf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_powf(float x, float y) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "powf %x %x\n", f2u(x), f2u(y));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_powf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_sinhf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "sinhf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sinhf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_coshf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "coshf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_coshf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_tanhf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "tanhf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_tanhf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_asinhf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "asinhf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_asinhf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_acoshf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "acoshf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_acoshf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_atanhf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "atanhf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_atanhf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_sqrtf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "sqrtf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sqrtf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_cbrtf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "cbrtf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_cbrtf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_exp2f(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "exp2f %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_exp2f");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_exp10f(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "exp10f %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_exp10f");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_expm1f(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "expm1f %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_expm1f");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_log10f(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "log10f %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_log10f");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_log1pf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "log1pf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_log1pf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_ldexpf(float x, int q) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "ldexpf %x %x\n", f2u(x), f2u(q));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_powf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+int allTestsPassed = 1;
+
+void showResult(int success) {
+  if (!success) allTestsPassed = 0;
+  fprintf(stderr, "%s\n", success ? " OK" : " NG **************");
+}
+
+void do_test() {
+  int i, j;
+
+  fprintf(stderr, "Denormal/nonnumber test atan2f(y, x)\n\n");
+
+  fprintf(stderr, "If y is +0 and x is -0, +pi is returned ... ");
+  showResult(child_atan2f(+0.0, -0.0) == M_PIf);
+
+  fprintf(stderr, "If y is -0 and x is -0, -pi is returned ... ");
+  showResult(child_atan2f(-0.0, -0.0) == -M_PIf);
+
+  fprintf(stderr, "If y is +0 and x is +0, +0 is returned ... ");
+  showResult(isPlusZerof(child_atan2f(+0.0, +0.0)));
+
+  fprintf(stderr, "If y is -0 and x is +0, -0 is returned ... ");
+  showResult(isMinusZerof(child_atan2f(-0.0, +0.0)));
+
+  fprintf(stderr, "If y is positive infinity and x is negative infinity, +3*pi/4 is returned ... ");
+  showResult(child_atan2f(POSITIVE_INFINITYf, NEGATIVE_INFINITYf) == 3*M_PIf/4);
+
+  fprintf(stderr, "If y is negative infinity and x is negative infinity, -3*pi/4 is returned ... ");
+  showResult(child_atan2f(NEGATIVE_INFINITYf, NEGATIVE_INFINITYf) == -3*M_PIf/4);
+
+  fprintf(stderr, "If y is positive infinity and x is positive infinity, +pi/4 is returned ... ");
+  showResult(child_atan2f(POSITIVE_INFINITYf, POSITIVE_INFINITYf) == M_PIf/4);
+
+  fprintf(stderr, "If y is negative infinity and x is positive infinity, -pi/4 is returned ... ");
+  showResult(child_atan2f(NEGATIVE_INFINITYf, POSITIVE_INFINITYf) == -M_PIf/4);
+
+  {
+    fprintf(stderr, "If y is +0 and x is less than 0, +pi is returned ... ");
+
+    float ya[] = { +0.0 };
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+    boolean success = true;
+
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != M_PIf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is -0 and x is less than 0, -pi is returned ... ");
+
+    float ya[] = { -0.0 };
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != -M_PIf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is less than 0 and x is 0, -pi/2 is returned ... ");
+
+    float ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+    float xa[] = { +0.0, -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != -M_PIf/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is greater than 0 and x is 0, pi/2 is returned ... ");
+
+
+    float ya[] = { 100000.5, 100000, 3, 2.5, 2, 1.5, 1.0, 0.5 };
+    float xa[] = { +0.0, -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != M_PIf/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is greater than 0 and x is -0, pi/2 is returned ... ");
+
+    float ya[] = { 100000.5, 100000, 3, 2.5, 2, 1.5, 1.0, 0.5 };
+    float xa[] = { -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != M_PIf/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is positive infinity, and x is finite, pi/2 is returned ... ");
+
+    float ya[] = { POSITIVE_INFINITYf };
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != M_PIf/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is negative infinity, and x is finite, -pi/2 is returned ... ");
+
+    float ya[] = { NEGATIVE_INFINITYf };
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != -M_PIf/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value greater than 0, and x is negative infinity, +pi is returned ... ");
+
+    float ya[] = { 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    float xa[] = { NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != M_PIf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value less than 0, and x is negative infinity, -pi is returned ... ");
+
+    float ya[] = { -0.5, -1.5, -2.0, -2.5, -3.0, -100000, -100000.5 };
+    float xa[] = { NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != -M_PIf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value greater than 0, and x is positive infinity, +0 is returned ... ");
+
+    float ya[] = { 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    float xa[] = { POSITIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isPlusZerof(child_atan2f(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value less than 0, and x is positive infinity, -0 is returned ... ");
+
+    float ya[] = { -0.5, -1.5, -2.0, -2.5, -3.0, -100000, -100000.5 };
+    float xa[] = { POSITIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isMinusZerof(child_atan2f(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is NaN, a NaN is returned ... ");
+
+    float ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, NANf };
+    float xa[] = { NANf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!xisnanf(child_atan2f(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a NaN, the result is a NaN ... ");
+
+    float ya[] = { NANf };
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, NANf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!xisnanf(child_atan2f(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  fprintf(stderr, "\nend of atan2f denormal/nonnumber test\n\n");
+
+  //
+
+  fprintf(stderr, "\nDenormal/nonnumber test pow(x, y)\n\n");
+
+  fprintf(stderr, "If x is +1 and y is a NaN, the result is 1.0 ... ");
+  showResult(child_powf(1, NANf) == 1.0);
+
+  fprintf(stderr, "If y is 0 and x is a NaN, the result is 1.0 ... ");
+  showResult(child_powf(NANf, 0) == 1.0);
+
+  fprintf(stderr, "If x is -1, and y is positive infinity, the result is 1.0 ... ");
+  showResult(child_powf(-1, POSITIVE_INFINITYf) == 1.0);
+
+  fprintf(stderr, "If x is -1, and y is negative infinity, the result is 1.0 ... ");
+  showResult(child_powf(-1, NEGATIVE_INFINITYf) == 1.0);
+
+  {
+    fprintf(stderr, "If x is a finite value less than 0, and y is a finite non-integer, a NaN is returned ... ");
+
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+    float ya[] = { -100000.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!xisnanf(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is a NaN, the result is a NaN ... ");
+
+    float xa[] = { NANf };
+    float ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!xisnanf(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a NaN, the result is a NaN ... ");
+
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    float ya[] = { NANf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!xisnanf(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is +0, and y is an odd integer greater than 0, the result is +0 ... ");
+
+    float xa[] = { +0.0 };
+    float ya[] = { 1, 3, 5, 7, 100001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isPlusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is -0, and y is an odd integer greater than 0, the result is -0 ... ");
+
+    float xa[] = { -0.0 };
+    float ya[] = { 1, 3, 5, 7, 100001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isMinusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is 0, and y greater than 0 and not an odd integer, the result is +0 ... ");
+
+    float xa[] = { +0.0, -0.0 };
+    float ya[] = { 0.5, 1.5, 2.0, 2.5, 4.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isPlusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is less than 1, and y is negative infinity, the result is positive infinity ... ");
+
+    float xa[] = { -0.999, -0.5, -0.0, +0.0, +0.5, +0.999 };
+    float ya[] = { NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != POSITIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is greater than 1, and y is negative infinity, the result is +0 ... ");
+
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    float ya[] = { NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isPlusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is less than 1, and y is positive infinity, the result is +0 ... ");
+
+    float xa[] = { -0.999, -0.5, -0.0, +0.0, +0.5, +0.999 };
+    float ya[] = { POSITIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isPlusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is greater than 1, and y is positive infinity, the result is positive infinity ... ");
+
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    float ya[] = { POSITIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != POSITIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y is an odd integer less than 0, the result is -0 ... ");
+
+    float xa[] = { NEGATIVE_INFINITYf };
+    float ya[] = { -100001, -5, -3, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isMinusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y less than 0 and not an odd integer, the result is +0 ... ");
+
+    float xa[] = { NEGATIVE_INFINITYf };
+    float ya[] = { -100000.5, -100000, -4, -2.5, -2, -1.5, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isPlusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y is an odd integer greater than 0, the result is negative infinity ... ");
+
+    float xa[] = { NEGATIVE_INFINITYf };
+    float ya[] = { 1, 3, 5, 7, 100001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != NEGATIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y greater than 0 and not an odd integer, the result is positive infinity ... ");
+
+    float xa[] = { NEGATIVE_INFINITYf };
+    float ya[] = { 0.5, 1.5, 2, 2.5, 3.5, 4, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != POSITIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is positive infinity, and y less than 0, the result is +0 ... ");
+
+    float xa[] = { POSITIVE_INFINITYf };
+    float ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isPlusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is positive infinity, and y greater than 0, the result is positive infinity ... ");
+
+    float xa[] = { POSITIVE_INFINITYf };
+    float ya[] = { 0.5, 1, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != POSITIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is +0, and y is an odd integer less than 0, +HUGE_VAL is returned ... ");
+
+    float xa[] = { +0.0 };
+    float ya[] = { -100001, -5, -3, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != POSITIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is -0, and y is an odd integer less than 0, -HUGE_VAL is returned ... ");
+
+    float xa[] = { -0.0 };
+    float ya[] = { -100001, -5, -3, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != NEGATIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is 0, and y is less than 0 and not an odd integer, +HUGE_VAL is returned ... ");
+
+    float xa[] = { +0.0, -0.0 };
+    float ya[] = { -100000.5, -100000, -4, -2.5, -2, -1.5, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != POSITIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the result overflows, the functions return HUGE_VAL with the mathematically correct sign ... ");
+
+    float xa[] = { 1000, -1000 };
+    float ya[] = { 1000, 1000.5, 1001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!cmpDenorm(child_powf(xa[i], ya[j]), powfr(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  fprintf(stderr, "\nEnd of pow denormal/nonnumber test\n\n");
+	
+  //
+
+  {
+    fprintf(stderr, "sinf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_sinf(xa[i]), sinfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_sinf(xa[i]), sinfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "sin in sincosf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      float2 q = child_sincosf(xa[i]);
+      if (!cmpDenorm(q.x, sinfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], q.x, sinfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "cosf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_cosf(xa[i]), cosfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_cosf(xa[i]), cosfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "cos in sincosf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      float2 q = child_sincosf(xa[i]);
+      if (!cmpDenorm(q.y, cosfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], q.y, cosfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "tanf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, M_PIf/2, -M_PIf/2 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_tanf(xa[i]), tanfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "asinf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 2, -2, 1, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_asinf(xa[i]), asinfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_asinf(xa[i]), asinfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "acosf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 2, -2, 1, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_acosf(xa[i]), acosfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "atanf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_atanf(xa[i]), atanfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_atanf(xa[i]), atanfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "logf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 0, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_logf(xa[i]), logfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "expf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, -2000, 2000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_expf(xa[i]), expfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_expf(xa[i]), expfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "sinhf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, +0.0, -0.0, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_sinhf(xa[i]), sinhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_sinhf(xa[i]), sinhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "coshf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, +0.0, -0.0, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_coshf(xa[i]), coshfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_coshf(xa[i]), coshfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "tanhf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, +0.0, -0.0, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_tanhf(xa[i]), tanhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_tanhf(xa[i]), tanhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "asinhf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, +0.0, -0.0, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_asinhf(xa[i]), asinhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_asinhf(xa[i]), asinhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "acoshf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, +0.0, -0.0, 1.0, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_acoshf(xa[i]), acoshfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_acoshf(xa[i]), acoshfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "atanhf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, +0.0, -0.0, 1.0, -1.0, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_atanhf(xa[i]), atanhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_atanhf(xa[i]), atanhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "sqrtf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, +0.0, -0.0, -1.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_sqrtf(xa[i]), sqrtfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_sqrtf(xa[i]), sqrtfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "cbrtf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, +0.0, -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_cbrtf(xa[i]), cbrtfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_cbrtf(xa[i]), cbrtfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "exp2f denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_exp2f(xa[i]), exp2fr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_exp2f(xa[i]), exp2fr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "exp10f denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_exp10f(xa[i]), exp10fr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_exp10f(xa[i]), exp10fr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "expm1f denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_expm1f(xa[i]), expm1fr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_expm1f(xa[i]), expm1fr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "log10f denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 0, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_log10f(xa[i]), log10fr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "log1pf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 0, -1, -2 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_log1pf(xa[i]), log1pfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_log1pf(xa[i]), log1pfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  fprintf(stderr, "ldexpf denormal/nonnumber test ... ");
+
+  {
+    boolean success = true;
+    for(i=-10000;i<=10000 && success;i++) {
+      float d = child_ldexpf(1.0f, i);
+      float c = ldexpf(1.0f, i);
+
+      boolean pass = (isfinite(c) && d == c) || cmpDenorm(d, c);
+      if (!pass) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", (double)i, d, c);
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  //
+
+  fprintf(stderr, "\nAccuracy test (max error in ulp)\n");
+
+  //
+
+  //
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_sinf(d);
+      double c = sinlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SIN;
+      }
+    }
+
+    for(d = -10000;d < 10000;d += 0.201) {
+      float q = child_sinf(d);
+      double c = sinlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SIN;
+      }
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      float start = u2f(f2u(M_PI_4 * i)-20);
+      float end = u2f(f2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2f(f2u(d)+1)) {
+	float q = child_sinf(d);
+	double c = sinlfr(flushToZero(d));
+	double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	  goto STOP_SIN;
+	}
+      }
+    }
+
+  STOP_SIN:
+
+    fprintf(stderr, "sinf : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_cosf(d);
+      double c = coslfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -10000;d < 10000;d += 0.201) {
+      float q = child_cosf(d);
+      double c = coslfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      float start = u2f(f2u(M_PI_4 * i)-20);
+      float end = u2f(f2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2f(f2u(d)+1)) {
+	float q = child_cosf(d);
+	double c = coslfr(flushToZero(d));
+	double u = countULP(q, c);
+	max = fmax(max, u);
+      }
+    }
+
+    fprintf(stderr, "cosf : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float2 q = child_sincosf(d);
+      long double c = sinlfr(d);
+      double u = fabs((q.x - c) / ulp(c));
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.x, (double)c, d, (double)ulp(c));
+	goto STOP_SIN2;
+      }
+    }
+
+    for(d = -10000;d < 10000;d += 0.201) {
+      float2 q = child_sincosf(d);
+      long double c = sinlfr(d);
+      double u = fabs((q.x - c) / ulp(c));
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.x, (double)c, d, (double)ulp(c));
+	goto STOP_SIN2;
+      }
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      float start = u2f(f2u(M_PI_4 * i)-20);
+      float end = u2f(f2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2f(f2u(d)+1)) {
+	float2 q = child_sincosf(d);
+	double c = sinlfr(d);
+	double u = fabs((q.x - c) / ulp(c));
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.x, (double)c, d, (double)ulp(c));
+	  goto STOP_SIN2;
+	}
+      }
+    }
+
+  STOP_SIN2:
+
+    fprintf(stderr, "sin in sincosf : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float2 q = child_sincosf(d);
+      double c = coslfr(d);
+      double u = fabs((q.y - c) / ulp(c));
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.y, (double)c, d, (double)ulp(c));
+	goto STOP_COS2;
+      }
+    }
+
+    for(d = -10000;d < 10000;d += 0.201) {
+      float2 q = child_sincosf(d);
+      double c = coslfr(d);
+      double u = fabs((q.y - c) / ulp(c));
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.y, (double)c, d, (double)ulp(c));
+	goto STOP_COS2;
+      }
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      float start = u2f(f2u(M_PI_4 * i)-20);
+      float end = u2f(f2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2f(f2u(d)+1)) {
+	float2 q = child_sincosf(d);
+	double c = coslfr(d);
+	double u = fabs((q.y - c) / ulp(c));
+	max = fmax(max, u);
+      }
+    }
+
+  STOP_COS2:
+
+    fprintf(stderr, "cos in sincosf : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_tanf(d);
+      double c = tanlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -10000;d < 10000;d += 0.201) {
+      float q = child_tanf(d);
+      double c = tanlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      float start = u2f(f2u(M_PI_4 * i)-20);
+      float end = u2f(f2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2f(f2u(d)+1)) {
+	float q = child_tanf(d);
+	double c = tanlfr(flushToZero(d));
+	double u = countULP(q, c);
+	max = fmax(max, u);
+      }
+    }
+
+    fprintf(stderr, "tanf : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -1;d < 1;d += 0.00002) {
+      float q = child_asinf(d);
+      double c = asinlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ASIN;
+      }
+    }
+
+  STOP_ASIN:
+
+    fprintf(stderr, "asinf : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -1;d < 1;d += 0.00002) {
+      float q = child_acosf(d);
+      double c = acoslfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ACOS;
+      }
+    }
+
+  STOP_ACOS:
+
+    fprintf(stderr, "acosf : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_atanf(d);
+      double c = atanlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %g, d = %g\n", q, d);
+	goto STOP_ATAN;
+      }
+    }
+
+    for(d = -10000;d < 10000;d += 0.201) {
+      float q = child_atanf(d);
+      double c = atanlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %g, d = %g\n", q, d);
+	goto STOP_ATAN;
+      }
+    }
+
+  STOP_ATAN:
+
+    fprintf(stderr, "atanf : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    float x, y, max = 0;
+
+    for(y = -10;y < 10;y += 0.051) {
+      for(x = -10;x < 10;x += 0.052) {
+	float q = child_atan2f(y, x);
+	double c = atan2lfr(flushToZero(y), flushToZero(x));
+	double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %g, y = %g, x = %g\n", q, y, x);
+	  goto STOP_ATAN2;
+	}
+      }
+    }
+
+    for(y = -100;y < 100;y += 0.51) {
+      for(x = -100;x < 100;x += 0.52) {
+	float q = child_atan2f(y, x);
+	double c = atan2lfr(flushToZero(y), flushToZero(x));
+	double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %g, y = %g, x = %g\n", q, y, x);
+	  goto STOP_ATAN2;
+	}
+      }
+    }
+
+  STOP_ATAN2:
+
+    fprintf(stderr, "atan2f : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = 0.0001;d < 10;d += 0.0001) {
+      float q = child_logf(d);
+      double c = loglfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_LOG;
+      }
+    }
+
+    for(d = 0.0001;d < 10000;d += 0.1) {
+      float q = child_logf(d);
+      double c = loglfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_LOG;
+      }
+    }
+
+    int i;
+    for(i = -1000;i <= 1000;i++) {
+      d = pow(1.1, i);
+      float q = child_logf(d);
+      double c = loglfr(flushToZero(d));
+      double u = countULP(q, c);
+      if (flushToZero(d * 0.1) == 0.0 && q == NEGATIVE_INFINITYf) u = 0;
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_LOG;
+      }
+    }
+
+  STOP_LOG:
+
+    fprintf(stderr, "logf : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_expf(d);
+      double c = explfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -1000;d < 1000;d += 0.1) {
+      float q = child_expf(d);
+      double c = explfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "expf : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float x, y, max = 0;
+
+    for(y = 0.1;y < 100;y += 0.21) {
+      for(x = -100;x < 100;x += 0.22) {
+	float q = child_powf(x, y);
+	double c = powlfr(flushToZero(x), flushToZero(y));
+	double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 10) {
+	  fprintf(stderr, "q = %g, c = %g, x = %g, y = %g\n", q, c, x, y);
+	  goto STOP_POW;
+	}
+      }
+    }
+
+    float d;
+    for(d = -1000;d < 1000;d += 0.1) {
+      float q = child_powf(2.1f, d);
+      double c = powlfr(2.1f, flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+
+      if (u > 10) {
+	fprintf(stderr, "q = %g, c = %g, d = %g\n", q, c, d);
+	goto STOP_POW;
+      }
+    }
+
+  STOP_POW:
+
+    fprintf(stderr, "powf : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = 0;d < 20000;d += 0.2) {
+      float q = child_sqrtf(d);
+      double c = sqrtlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SQRT;
+      }
+    }
+
+    int i;
+    for(i = -1000;i <= 1000;i++) {
+      d = pow(1.1, i);
+      float q = child_sqrtf(d);
+      double c = sqrtlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SQRT;
+      }
+    }
+
+  STOP_SQRT:
+
+    fprintf(stderr, "sqrtf : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10000;d < 10000;d += 0.2) {
+      float q = child_cbrtf(d);
+      double c = cbrtlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_CBRT;
+      }
+    }
+
+    int i;
+    for(i = -1000;i <= 1000;i++) {
+      d = pow(1.1, i);
+      float q = child_cbrtf(d);
+      double c = cbrtlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_CBRT;
+      }
+    }
+
+  STOP_CBRT:
+
+    fprintf(stderr, "cbrtf : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_sinhf(d);
+      double c = sinhlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SINH;
+      }
+    }
+
+    for(d = -100;d < 100;d += 0.02) {
+      float q = child_sinhf(d);
+      double c = sinhlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SINH;
+      }
+    }
+
+  STOP_SINH:
+
+    fprintf(stderr, "sinhf : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_coshf(d);
+      double c = coshlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_COSH;
+      }
+    }
+
+    for(d = -100;d < 100;d += 0.02) {
+      float q = child_coshf(d);
+      double c = coshlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_COSH;
+      }
+    }
+
+  STOP_COSH:
+
+    fprintf(stderr, "coshf : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_tanhf(d);
+      double c = tanhlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_TANH;
+      }
+    }
+
+  STOP_TANH:
+
+    fprintf(stderr, "tanhf : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_asinhf(d);
+      double c = asinhlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ASINH;
+      }
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      float q = child_asinhf(d);
+      double c = asinhlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ASINH;
+      }
+    }
+
+  STOP_ASINH:
+
+    fprintf(stderr, "asinhf : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = 1;d < 10;d += 0.0002) {
+      float q = child_acoshf(d);
+      double c = acoshlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ACOSH;
+      }
+    }
+
+    for(d = 1;d < 1000;d += 0.02) {
+      float q = child_acoshf(d);
+      double c = acoshlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ACOSH;
+      }
+    }
+
+  STOP_ACOSH:
+
+    fprintf(stderr, "acoshf : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_atanhf(d);
+      double c = atanhlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ATANH;
+      }
+    }
+
+    for(d = -1000;d < 1000;d += 0.023) {
+      float q = child_atanhf(d);
+      double c = atanhlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ATANH;
+      }
+    }
+
+  STOP_ATANH:
+
+    fprintf(stderr, "atanhf : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = 0.0001;d < 10;d += 0.0001) {
+      float q = child_log10f(d);
+      double c = log10lfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = 0.0001;d < 10000;d += 0.1) {
+      float q = child_log10f(d);
+      double c = log10lfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "log10f : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+
+  {
+    float d, max = 0;
+
+    for(d = 0.0001;d < 10;d += 0.0001) {
+      float q = child_log1pf(d);
+      double c = log1plfr(flushToZero(d));
+      double u = countULP(q, c);
+      if (u > 10) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_LOG1P;
+      }
+      max = fmax(max, u);
+    }
+
+    for(d = 0.0001;d < 10000;d += 0.1) {
+      float q = child_log1pf(d);
+      double c = log1plfr(flushToZero(d));
+      double u = countULP(q, c);
+      if (u > 10) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_LOG1P;
+      }
+      max = fmax(max, u);
+    }
+
+    for(d = 0;d < 300;d += 0.02) {
+      float d2 = pow(10, -d);
+      float q = child_log1pf(d2);
+      double c = log1plfr(flushToZero(d2));
+      double u = countULP(q, c);
+      if (flushToZero(d2 * 0.1) == 0.0 && q == 0.0) u = 0;
+      if (u > 10) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d2, (double)ulp(c));
+	goto STOP_LOG1P;
+      }
+      max = fmax(max, u);
+    }
+
+    for(d = 0;d < 300;d += 0.02) {
+      float d2 = -pow(10, -d);
+      float q = child_log1pf(d2);
+      double c = log1plfr(flushToZero(d2));
+      double u = countULP(q, c);
+      if (flushToZero(d2 * 0.1) == 0.0 && q == 0.0) u = 0;
+      if (u > 10) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d2, (double)ulp(c));
+	goto STOP_LOG1P;
+      }
+      max = fmax(max, u);
+    }
+
+  STOP_LOG1P:
+
+    fprintf(stderr, "log1pf : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_exp2f(d);
+      double c = exp2lfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_EXP2;
+      }
+    }
+
+    for(d = -120;d < 1000;d += 0.023) {
+      float q = child_exp2f(d);
+      double c = exp2lfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_EXP2;
+      }
+    }
+
+  STOP_EXP2:
+
+    fprintf(stderr, "exp2f : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_exp10f(d);
+      double c = exp10lfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_EXP10;
+      }
+    }
+
+    for(d = -35;d < 1000;d += 0.023) {
+      float q = child_exp10f(d);
+      double c = exp10lfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_EXP10;
+      }
+    }
+
+  STOP_EXP10:
+
+    fprintf(stderr, "exp10f : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_expm1f(d);
+      double c = expm1lfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_EXPM1;
+      }
+    }
+
+    for(d = -1000;d < 1000;d += 0.023) {
+      float q = child_expm1f(d);
+      double c = expm1lfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_EXPM1;
+      }
+    }
+
+    for(d = 0;d < 300;d += 0.02) {
+      float d2 = pow(10, -d);
+      float q = child_expm1f(d2);
+      double c = expm1lfr(flushToZero(d2));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_EXPM1;
+      }
+    }
+
+    for(d = 0;d < 300;d += 0.02) {
+      float d2 = -pow(10, -d);
+      float q = child_expm1f(d2);
+      double c = expm1lfr(flushToZero(d2));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_EXPM1;
+      }
+    }
+
+  STOP_EXPM1:
+
+    fprintf(stderr, "expm1f : %lf ... ", max);
+
+    showResult(max < 5);
+  }
+}
+
+int main(int argc, char **argv) {
+  char *argv2[argc];
+  int i, a2s;
+
+  for(a2s=1;a2s<argc;a2s++) {
+    if (strcmp(argv[a2s], "--flushtozero") == 0) {
+      enableFlushToZero = 1;
+    } else {
+      break;
+    }
+  }
+
+  for(i=a2s;i<argc;i++) argv2[i-a2s] = argv[i];
+  argv2[argc-a2s] = NULL;
+
+  mpfr_set_default_prec(128);
+
+  startChild(argv2[0], argv2);
+
+  do_test();
+
+  if (allTestsPassed) {
+    fprintf(stderr, "\n\n*** All tests passed");
+    if (enableFlushToZero) fprintf(stderr, " (flush to zero)");
+    fprintf(stderr, "\n");
+  } else {
+    fprintf(stderr, "\n\n*** There were errors in some tests\n");
+  }
+
+  if (allTestsPassed) return 0;
+
+  return -1;
+}
diff --git a/tester/testerspu1.c b/tester/testerspu1.c
new file mode 100644
index 00000000..90d4c831
--- /dev/null
+++ b/tester/testerspu1.c
@@ -0,0 +1,2476 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <inttypes.h>
+
+#include <mpfr.h>
+
+#include <unistd.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include "nonnumber.h"
+
+#define POSITIVE_INFINITYf ((float)INFINITY)
+#define NEGATIVE_INFINITYf (-(float)INFINITY)
+#define M_PIf ((float)M_PI)
+
+#define POSITIVE_INFINITY (INFINITY)
+#define NEGATIVE_INFINITY (-INFINITY)
+
+typedef int boolean;
+
+#define true 1
+#define false 0
+
+int enableFlushToZero = 0;
+
+void stop(char *mes) {
+  fprintf(stderr, "%s\n", mes);
+  abort();
+}
+
+int readln(int fd, char *buf, int cnt) {
+  int i, rcnt = 0;
+
+  if (cnt < 1) return -1;
+
+  while(cnt >= 2) {
+    i = read(fd, buf, 1);
+    if (i != 1) return i;
+
+    if (*buf == '\n') break;
+
+    rcnt++;
+    buf++;
+    cnt--;
+  }
+
+  *++buf = '\0';
+  rcnt++;
+  return rcnt;
+}
+
+int ptoc[2], ctop[2];
+int pid;
+
+void startChild(const char *path, char *const argv[]) {
+  pipe(ptoc);
+  pipe(ctop);
+
+  pid = fork();
+
+  assert(pid != -1);
+
+  if (pid == 0) {
+    // child process
+    char buf0[1], buf1[1];
+    int i;
+
+    close(ptoc[1]);
+    close(ctop[0]);
+
+    i = dup2(ptoc[0], fileno(stdin));
+    assert(i != -1);
+
+    i = dup2(ctop[1], fileno(stdout));
+    assert(i != -1);
+
+    setvbuf(stdin, buf0, _IONBF,0);
+    setvbuf(stdout, buf1, _IONBF,0);
+
+    fflush(stdin);
+    fflush(stdout);
+
+    execvp(path, argv);
+
+    assert(0);
+  }
+
+  // parent process
+
+  close(ptoc[0]);
+  close(ctop[1]);
+}
+
+float u2f(uint32_t u) {
+  union {
+    float f;
+    uint32_t i;
+  } tmp;
+  tmp.i = u;
+  return tmp.f;
+}
+
+uint32_t f2u(float d) {
+  union {
+    float f;
+    uint32_t i;
+  } tmp;
+  tmp.f = d;
+  return tmp.i;
+}
+
+//
+
+boolean isPlusZerof(float x) { return x == 0 && copysignf(1, x) == 1; }
+boolean isMinusZerof(float x) { return x == 0 && copysignf(1, x) == -1; }
+boolean xisnanf(float x) { return x != x; }
+float signf(float d) { return d < 0 ? -1 : 1; }
+
+boolean isPlusZero(double x) { return x == 0 && copysign(1, x) == 1; }
+boolean isMinusZero(double x) { return x == 0 && copysign(1, x) == -1; }
+boolean xisnan(double x) { return x != x; }
+
+double flushToZero(double y) {
+  if (enableFlushToZero && fabs(y) < 1.2e-38) y = copysign(0.0, y);
+  return y;
+}
+
+boolean cmpDenorm(float x, float y) {
+  y = flushToZero(y);
+  if (xisnanf(x) && xisnanf(y)) return true;
+  if (xisnanf(x) || xisnanf(y)) return false;
+  if (isinf(x) != isinf(y)) return false;
+  if (x == POSITIVE_INFINITYf && y == POSITIVE_INFINITYf) return true;
+  if (x == NEGATIVE_INFINITYf && y == NEGATIVE_INFINITYf) return true;
+  if (y == 0) {
+    if (isPlusZerof(x) && isPlusZerof(y)) return true;
+    if (isMinusZerof(x) && isMinusZerof(y)) return true;
+    return false;
+  }
+  if (!xisnanf(x) && !xisnanf(y) && !isinf(x) && !isinf(y)) return signf(x) == signf(y);
+  return false;
+}
+
+double ulp(double x) {
+  x = fabsf(x);
+  int exp;
+
+  if (x == 0) {
+    return FLT_MIN;
+  } else {
+    frexpf(x, &exp);
+  }
+
+  return fmaxf(ldexpf(1.0, exp-24), FLT_MIN);
+}
+
+double countULP(double x, double y) {
+  y = flushToZero(y);
+  float fx = (float)x;
+  float fy = (float)y;
+  if (xisnan(fx) && xisnan(fy)) return 0;
+  if (xisnan(fx) || xisnan(fy)) return 10000;
+  if (isinf(fx)) {
+    if (signf(fx) == signf(fy) && fabs(fy) > 1e+37) return 0; // Relaxed infinity handling
+    return 10001;
+  }
+  if (fx == POSITIVE_INFINITY && fy == POSITIVE_INFINITY) return 0;
+  if (fx == NEGATIVE_INFINITY && fy == NEGATIVE_INFINITY) return 0;
+  if (fy == 0) {
+    if (fx == 0) return 0;
+    return 10002;
+  }
+  if (!xisnan(fx) && !xisnan(fy) && !isinf(fx) && !isinf(fy)) {
+    return fabs((x - y) / ulp(y));
+  }
+  return 10003;
+}
+
+//
+
+double sinfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_sin(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double sinlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_sin(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double cosfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_cos(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double coslfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_cos(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double tanfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_tan(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double tanlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_tan(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double asinfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_asin(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double asinlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_asin(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double acosfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_acos(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double acoslfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_acos(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double atanfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_atan(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double atanlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_atan(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double atan2fr(double y, double x) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_d(fry, y, GMP_RNDN);
+  mpfr_set_d(frx, x, GMP_RNDN);
+  mpfr_atan2(frx, fry, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+long double atan2lfr(long double y, long double x) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_ld(fry, y, GMP_RNDN);
+  mpfr_set_ld(frx, x, GMP_RNDN);
+  mpfr_atan2(frx, fry, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+double logfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_log(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double loglfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_log(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double expfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_exp(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double explfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_exp(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double powfr(double x, double y) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_d(frx, x, GMP_RNDN);
+  mpfr_set_d(fry, y, GMP_RNDN);
+  mpfr_pow(frx, frx, fry, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+long double powlfr(long double x, long double y) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_ld(frx, x, GMP_RNDN);
+  mpfr_set_ld(fry, y, GMP_RNDN);
+  mpfr_pow(frx, frx, fry, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+double sinhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_sinh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double coshfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_cosh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double tanhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_tanh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double asinhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_asinh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double acoshfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_acosh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double atanhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_atanh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double sinhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_sinh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double coshlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_cosh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double tanhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_tanh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double asinhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_asinh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double acoshlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_acosh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double atanhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_atanh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double sqrtlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_sqrt(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double sqrtfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_sqrt(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double cbrtfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_cbrt(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double cbrtlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_cbrt(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double exp2fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_exp2(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double exp2lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_exp2(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double exp10fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_exp10(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double exp10lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_exp10(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double expm1fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_expm1(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double expm1lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_expm1(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double log10fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_log10(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double log10lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_log10(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double log1pfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_log1p(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double log1plfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_log1p(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+//
+
+typedef struct {
+  float x, y;
+} float2;
+
+float child_sinf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "sinf_u1 %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sinf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_cosf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "cosf_u1 %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_cosf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float2 child_sincosf(float x) {
+  char str[256];
+  uint32_t u, v;
+
+  sprintf(str, "sincosf_u1 %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sincosf");
+  sscanf(str, "%x %x", &u, &v);
+
+  float2 ret;
+  ret.x = u2f(u);
+  ret.y = u2f(v);
+  return ret;
+}
+
+float child_tanf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "tanf_u1 %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_tanf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_asinf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "asinf_u1 %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_asinf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_acosf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "acosf_u1 %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_acosf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_atanf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "atanf_u1 %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_atanf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_atan2f(float y, float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "atan2f_u1 %x %x\n", f2u(y), f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_atan2f");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_logf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "logf_u1 %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_logf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_expf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "expf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_expf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_powf(float x, float y) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "powf %x %x\n", f2u(x), f2u(y));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_powf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_sinhf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "sinhf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sinhf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_coshf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "coshf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_coshf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_tanhf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "tanhf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_tanhf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_asinhf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "asinhf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_asinhf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_acoshf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "acoshf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_acoshf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_atanhf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "atanhf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_atanhf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_sqrtf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "sqrtf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sqrtf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_cbrtf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "cbrtf_u1 %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_cbrtf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_exp2f(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "exp2f %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_exp2f");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_exp10f(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "exp10f %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_exp10f");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_expm1f(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "expm1f %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_expm1f");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_log10f(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "log10f %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_log10f");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_log1pf(float x) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "log1pf %x\n", f2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_log1pf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+float child_ldexpf(float x, int q) {
+  char str[256];
+  uint32_t u;
+
+  sprintf(str, "ldexpf %x %x\n", f2u(x), f2u(q));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_powf");
+  sscanf(str, "%x", &u);
+  return u2f(u);
+}
+
+int allTestsPassed = 1;
+
+void showResult(int success) {
+  if (!success) allTestsPassed = 0;
+  fprintf(stderr, "%s\n", success ? " OK" : " NG **************");
+}
+
+void do_test() {
+  int i, j;
+
+  fprintf(stderr, "Denormal/nonnumber test atan2f_u1(y, x)\n\n");
+
+  fprintf(stderr, "If y is +0 and x is -0, +pi is returned ... ");
+  showResult(child_atan2f(+0.0, -0.0) == M_PIf);
+
+  fprintf(stderr, "If y is -0 and x is -0, -pi is returned ... ");
+  showResult(child_atan2f(-0.0, -0.0) == -M_PIf);
+
+  fprintf(stderr, "If y is +0 and x is +0, +0 is returned ... ");
+  showResult(isPlusZerof(child_atan2f(+0.0, +0.0)));
+
+  fprintf(stderr, "If y is -0 and x is +0, -0 is returned ... ");
+  showResult(isMinusZerof(child_atan2f(-0.0, +0.0)));
+
+  fprintf(stderr, "If y is positive infinity and x is negative infinity, +3*pi/4 is returned ... ");
+  showResult(child_atan2f(POSITIVE_INFINITYf, NEGATIVE_INFINITYf) == 3*M_PIf/4);
+
+  fprintf(stderr, "If y is negative infinity and x is negative infinity, -3*pi/4 is returned ... ");
+  showResult(child_atan2f(NEGATIVE_INFINITYf, NEGATIVE_INFINITYf) == -3*M_PIf/4);
+
+  fprintf(stderr, "If y is positive infinity and x is positive infinity, +pi/4 is returned ... ");
+  showResult(child_atan2f(POSITIVE_INFINITYf, POSITIVE_INFINITYf) == M_PIf/4);
+
+  fprintf(stderr, "If y is negative infinity and x is positive infinity, -pi/4 is returned ... ");
+  showResult(child_atan2f(NEGATIVE_INFINITYf, POSITIVE_INFINITYf) == -M_PIf/4);
+
+  {
+    fprintf(stderr, "If y is +0 and x is less than 0, +pi is returned ... ");
+
+    float ya[] = { +0.0 };
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+    boolean success = true;
+
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != M_PIf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is -0 and x is less than 0, -pi is returned ... ");
+
+    float ya[] = { -0.0 };
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != -M_PIf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is less than 0 and x is 0, -pi/2 is returned ... ");
+
+    float ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+    float xa[] = { +0.0, -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != -M_PIf/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is greater than 0 and x is 0, pi/2 is returned ... ");
+
+
+    float ya[] = { 100000.5, 100000, 3, 2.5, 2, 1.5, 1.0, 0.5 };
+    float xa[] = { +0.0, -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != M_PIf/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is greater than 0 and x is -0, pi/2 is returned ... ");
+
+    float ya[] = { 100000.5, 100000, 3, 2.5, 2, 1.5, 1.0, 0.5 };
+    float xa[] = { -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != M_PIf/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is positive infinity, and x is finite, pi/2 is returned ... ");
+
+    float ya[] = { POSITIVE_INFINITYf };
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != M_PIf/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is negative infinity, and x is finite, -pi/2 is returned ... ");
+
+    float ya[] = { NEGATIVE_INFINITYf };
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != -M_PIf/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value greater than 0, and x is negative infinity, +pi is returned ... ");
+
+    float ya[] = { 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    float xa[] = { NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != M_PIf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value less than 0, and x is negative infinity, -pi is returned ... ");
+
+    float ya[] = { -0.5, -1.5, -2.0, -2.5, -3.0, -100000, -100000.5 };
+    float xa[] = { NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_atan2f(ya[j], xa[i]) != -M_PIf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value greater than 0, and x is positive infinity, +0 is returned ... ");
+
+    float ya[] = { 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    float xa[] = { POSITIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isPlusZerof(child_atan2f(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value less than 0, and x is positive infinity, -0 is returned ... ");
+
+    float ya[] = { -0.5, -1.5, -2.0, -2.5, -3.0, -100000, -100000.5 };
+    float xa[] = { POSITIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isMinusZerof(child_atan2f(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is NaN, a NaN is returned ... ");
+
+    float ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, NANf };
+    float xa[] = { NANf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!xisnanf(child_atan2f(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a NaN, the result is a NaN ... ");
+
+    float ya[] = { NANf };
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, NANf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!xisnanf(child_atan2f(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  fprintf(stderr, "\nend of atan2f denormal/nonnumber test\n\n");
+
+  //
+
+#if 0
+  fprintf(stderr, "\nDenormal/nonnumber test pow(x, y)\n\n");
+
+  fprintf(stderr, "If x is +1 and y is a NaN, the result is 1.0 ... ");
+  showResult(child_powf(1, NANf) == 1.0);
+
+  fprintf(stderr, "If y is 0 and x is a NaN, the result is 1.0 ... ");
+  showResult(child_powf(NANf, 0) == 1.0);
+
+  fprintf(stderr, "If x is -1, and y is positive infinity, the result is 1.0 ... ");
+  showResult(child_powf(-1, POSITIVE_INFINITYf) == 1.0);
+
+  fprintf(stderr, "If x is -1, and y is negative infinity, the result is 1.0 ... ");
+  showResult(child_powf(-1, NEGATIVE_INFINITYf) == 1.0);
+
+  {
+    fprintf(stderr, "If x is a finite value less than 0, and y is a finite non-integer, a NaN is returned ... ");
+
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+    float ya[] = { -100000.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!xisnanf(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is a NaN, the result is a NaN ... ");
+
+    float xa[] = { NANf };
+    float ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!xisnanf(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a NaN, the result is a NaN ... ");
+
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    float ya[] = { NANf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!xisnanf(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is +0, and y is an odd integer greater than 0, the result is +0 ... ");
+
+    float xa[] = { +0.0 };
+    float ya[] = { 1, 3, 5, 7, 100001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isPlusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is -0, and y is an odd integer greater than 0, the result is -0 ... ");
+
+    float xa[] = { -0.0 };
+    float ya[] = { 1, 3, 5, 7, 100001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isMinusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is 0, and y greater than 0 and not an odd integer, the result is +0 ... ");
+
+    float xa[] = { +0.0, -0.0 };
+    float ya[] = { 0.5, 1.5, 2.0, 2.5, 4.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isPlusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is less than 1, and y is negative infinity, the result is positive infinity ... ");
+
+    float xa[] = { -0.999, -0.5, -0.0, +0.0, +0.5, +0.999 };
+    float ya[] = { NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != POSITIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is greater than 1, and y is negative infinity, the result is +0 ... ");
+
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    float ya[] = { NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isPlusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is less than 1, and y is positive infinity, the result is +0 ... ");
+
+    float xa[] = { -0.999, -0.5, -0.0, +0.0, +0.5, +0.999 };
+    float ya[] = { POSITIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isPlusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is greater than 1, and y is positive infinity, the result is positive infinity ... ");
+
+    float xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    float ya[] = { POSITIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != POSITIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y is an odd integer less than 0, the result is -0 ... ");
+
+    float xa[] = { NEGATIVE_INFINITYf };
+    float ya[] = { -100001, -5, -3, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isMinusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y less than 0 and not an odd integer, the result is +0 ... ");
+
+    float xa[] = { NEGATIVE_INFINITYf };
+    float ya[] = { -100000.5, -100000, -4, -2.5, -2, -1.5, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isPlusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y is an odd integer greater than 0, the result is negative infinity ... ");
+
+    float xa[] = { NEGATIVE_INFINITYf };
+    float ya[] = { 1, 3, 5, 7, 100001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != NEGATIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y greater than 0 and not an odd integer, the result is positive infinity ... ");
+
+    float xa[] = { NEGATIVE_INFINITYf };
+    float ya[] = { 0.5, 1.5, 2, 2.5, 3.5, 4, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != POSITIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is positive infinity, and y less than 0, the result is +0 ... ");
+
+    float xa[] = { POSITIVE_INFINITYf };
+    float ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!isPlusZerof(child_powf(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is positive infinity, and y greater than 0, the result is positive infinity ... ");
+
+    float xa[] = { POSITIVE_INFINITYf };
+    float ya[] = { 0.5, 1, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != POSITIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is +0, and y is an odd integer less than 0, +HUGE_VAL is returned ... ");
+
+    float xa[] = { +0.0 };
+    float ya[] = { -100001, -5, -3, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != POSITIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is -0, and y is an odd integer less than 0, -HUGE_VAL is returned ... ");
+
+    float xa[] = { -0.0 };
+    float ya[] = { -100001, -5, -3, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != NEGATIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is 0, and y is less than 0 and not an odd integer, +HUGE_VAL is returned ... ");
+
+    float xa[] = { +0.0, -0.0 };
+    float ya[] = { -100000.5, -100000, -4, -2.5, -2, -1.5, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (child_powf(xa[i], ya[j]) != POSITIVE_INFINITYf) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the result overflows, the functions return HUGE_VAL with the mathematically correct sign ... ");
+
+    float xa[] = { 1000, -1000 };
+    float ya[] = { 1000, 1000.5, 1001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
+	if (!cmpDenorm(child_powf(xa[i], ya[j]), powfr(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  fprintf(stderr, "\nEnd of pow denormal/nonnumber test\n\n");
+#endif
+	
+  //
+
+  {
+    fprintf(stderr, "sinf_u1 denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_sinf(xa[i]), sinfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_sinf(xa[i]), sinfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "sin in sincosf_u1 denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      float2 q = child_sincosf(xa[i]);
+      if (!cmpDenorm(q.x, sinfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], q.x, sinfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "cosf_u1 denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_cosf(xa[i]), cosfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_cosf(xa[i]), cosfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "cos in sincosf_u1 denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      float2 q = child_sincosf(xa[i]);
+      if (!cmpDenorm(q.y, cosfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], q.y, cosfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "tanf_u1 denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, M_PIf/2, -M_PIf/2 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_tanf(xa[i]), tanfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "asinf_u1 denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 2, -2, 1, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_asinf(xa[i]), asinfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_asinf(xa[i]), asinfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "acosf_u1 denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 2, -2, 1, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_acosf(xa[i]), acosfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "atanf_u1 denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_atanf(xa[i]), atanfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_atanf(xa[i]), atanfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "logf_u1 denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 0, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_logf(xa[i]), logfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+#if 0
+  {
+    fprintf(stderr, "expf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, -2000, 2000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_expf(xa[i]), expfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_expf(xa[i]), expfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "sinhf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, +0.0, -0.0, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_sinhf(xa[i]), sinhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_sinhf(xa[i]), sinhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "coshf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, +0.0, -0.0, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_coshf(xa[i]), coshfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_coshf(xa[i]), coshfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "tanhf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, +0.0, -0.0, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_tanhf(xa[i]), tanhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_tanhf(xa[i]), tanhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "asinhf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, +0.0, -0.0, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_asinhf(xa[i]), asinhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_asinhf(xa[i]), asinhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "acoshf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, +0.0, -0.0, 1.0, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_acoshf(xa[i]), acoshfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_acoshf(xa[i]), acoshfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "atanhf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, +0.0, -0.0, 1.0, -1.0, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_atanhf(xa[i]), atanhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_atanhf(xa[i]), atanhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "sqrtf_u1 denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, +0.0, -0.0, -1.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_sqrtf(xa[i]), sqrtfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_sqrtf(xa[i]), sqrtfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+#endif
+
+  {
+    fprintf(stderr, "cbrtf_u1 denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, +0.0, -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_cbrtf(xa[i]), cbrtfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_cbrtf(xa[i]), cbrtfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+#if 0
+  {
+    fprintf(stderr, "exp2f denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_exp2f(xa[i]), exp2fr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_exp2f(xa[i]), exp2fr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "exp10f denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_exp10f(xa[i]), exp10fr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_exp10f(xa[i]), exp10fr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "expm1f denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_expm1f(xa[i]), expm1fr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_expm1f(xa[i]), expm1fr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "log10f denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 0, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_log10f(xa[i]), log10fr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "log1pf denormal/nonnumber test ... ");
+
+    float xa[] = { NANf, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, 0, -1, -2 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
+      if (!cmpDenorm(child_log1pf(xa[i]), log1pfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_log1pf(xa[i]), log1pfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  fprintf(stderr, "ldexpf denormal/nonnumber test ... ");
+
+  {
+    boolean success = true;
+    for(i=-10000;i<=10000 && success;i++) {
+      float d = child_ldexpf(1.0f, i);
+      float c = ldexpf(1.0f, i);
+
+      boolean pass = (isfinite(c) && d == c) || cmpDenorm(c, d);
+      if (!pass) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", (double)i, d, c);
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+#endif
+
+  //
+
+  fprintf(stderr, "\nAccuracy test (max error in ulp)\n");
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_sinf(d);
+      double c = sinlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SIN;
+      }
+    }
+
+    for(d = -10000;d < 10000;d += 0.201) {
+      float q = child_sinf(d);
+      double c = sinlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SIN;
+      }
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      float start = u2f(f2u(M_PI_4 * i)-20);
+      float end = u2f(f2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2f(f2u(d)+1)) {
+	float q = child_sinf(d);
+	double c = sinlfr(flushToZero(d));
+	double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	  goto STOP_SIN;
+	}
+      }
+    }
+
+  STOP_SIN:
+
+    fprintf(stderr, "sinf_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_cosf(d);
+      double c = coslfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -10000;d < 10000;d += 0.201) {
+      float q = child_cosf(d);
+      double c = coslfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      float start = u2f(f2u(M_PI_4 * i)-20);
+      float end = u2f(f2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2f(f2u(d)+1)) {
+	float q = child_cosf(d);
+	double c = coslfr(flushToZero(d));
+	double u = countULP(q, c);
+	max = fmax(max, u);
+      }
+    }
+
+    fprintf(stderr, "cosf_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float2 q = child_sincosf(d);
+      long double c = sinlfr(d);
+      double u = fabs((q.x - c) / ulp(c));
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.x, (double)c, d, (double)ulp(c));
+	goto STOP_SIN2;
+      }
+    }
+
+    for(d = -10000;d < 10000;d += 0.201) {
+      float2 q = child_sincosf(d);
+      long double c = sinlfr(d);
+      double u = fabs((q.x - c) / ulp(c));
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.x, (double)c, d, (double)ulp(c));
+	goto STOP_SIN2;
+      }
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      float start = u2f(f2u(M_PI_4 * i)-20);
+      float end = u2f(f2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2f(f2u(d)+1)) {
+	float2 q = child_sincosf(d);
+	double c = sinlfr(d);
+	double u = fabs((q.x - c) / ulp(c));
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.x, (double)c, d, (double)ulp(c));
+	  goto STOP_SIN2;
+	}
+      }
+    }
+
+  STOP_SIN2:
+
+    fprintf(stderr, "sin in sincosf_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float2 q = child_sincosf(d);
+      double c = coslfr(d);
+      double u = fabs((q.y - c) / ulp(c));
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.y, (double)c, d, (double)ulp(c));
+	goto STOP_COS2;
+      }
+    }
+
+    for(d = -10000;d < 10000;d += 0.201) {
+      float2 q = child_sincosf(d);
+      double c = coslfr(d);
+      double u = fabs((q.y - c) / ulp(c));
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.y, (double)c, d, (double)ulp(c));
+	goto STOP_COS2;
+      }
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      float start = u2f(f2u(M_PI_4 * i)-20);
+      float end = u2f(f2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2f(f2u(d)+1)) {
+	float2 q = child_sincosf(d);
+	double c = coslfr(d);
+	double u = fabs((q.y - c) / ulp(c));
+	max = fmax(max, u);
+      }
+    }
+
+  STOP_COS2:
+
+    fprintf(stderr, "cos in sincosf_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_tanf(d);
+      double c = tanlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -10000;d < 10000;d += 0.201) {
+      float q = child_tanf(d);
+      double c = tanlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      float start = u2f(f2u(M_PI_4 * i)-20);
+      float end = u2f(f2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2f(f2u(d)+1)) {
+	float q = child_tanf(d);
+	double c = tanlfr(flushToZero(d));
+	double u = countULP(q, c);
+	max = fmax(max, u);
+      }
+    }
+
+    fprintf(stderr, "tanf_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -1;d < 1;d += 0.00002) {
+      float q = child_asinf(d);
+      double c = asinlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ASIN;
+      }
+    }
+
+  STOP_ASIN:
+
+    fprintf(stderr, "asinf_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -1;d < 1;d += 0.00002) {
+      float q = child_acosf(d);
+      double c = acoslfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ACOS;
+      }
+    }
+
+  STOP_ACOS:
+
+    fprintf(stderr, "acosf_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      float q = child_atanf(d);
+      double c = atanlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %g, d = %g\n", q, d);
+	goto STOP_ATAN;
+      }
+    }
+
+    for(d = -10000;d < 10000;d += 0.201) {
+      float q = child_atanf(d);
+      double c = atanlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %g, d = %g\n", q, d);
+	goto STOP_ATAN;
+      }
+    }
+
+  STOP_ATAN:
+
+    fprintf(stderr, "atanf_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float x, y, max = 0;
+
+    for(y = -10;y < 10;y += 0.051) {
+      for(x = -10;x < 10;x += 0.052) {
+	float q = child_atan2f(y, x);
+	double c = atan2lfr(flushToZero(y), flushToZero(x));
+	double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %g, y = %g, x = %g\n", q, y, x);
+	  goto STOP_ATAN2;
+	}
+      }
+    }
+
+    for(y = -100;y < 100;y += 0.51) {
+      for(x = -100;x < 100;x += 0.52) {
+	float q = child_atan2f(y, x);
+	double c = atan2lfr(flushToZero(y), flushToZero(x));
+	double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %g, y = %g, x = %g\n", q, y, x);
+	  goto STOP_ATAN2;
+	}
+      }
+    }
+
+  STOP_ATAN2:
+
+    fprintf(stderr, "atan2f_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = 0.0001;d < 10;d += 0.0001) {
+      float q = child_logf(d);
+      double c = loglfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_LOG;
+      }
+    }
+
+    for(d = 0.0001;d < 10000;d += 0.1) {
+      float q = child_logf(d);
+      double c = loglfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_LOG;
+      }
+    }
+
+    int i;
+    for(i = -1000;i <= 1000;i++) {
+      d = pow(1.1, i);
+      float q = child_logf(d);
+      double c = loglfr(flushToZero(d));
+      double u = countULP(q, c);
+      if (flushToZero(d * 0.1) == 0.0 && q == NEGATIVE_INFINITYf) u = 0;
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\ni=%d\n", q, (double)c, d, (double)ulp(c), i);
+	goto STOP_LOG;
+      }
+    }
+
+  STOP_LOG:
+
+    fprintf(stderr, "logf_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    float d, max = 0;
+
+    for(d = -10000;d < 10000;d += 0.2) {
+      float q = child_cbrtf(d);
+      double c = cbrtlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_CBRT;
+      }
+    }
+
+    int i;
+    for(i = -1000;i <= 1000;i++) {
+      d = pow(1.1, i);
+      float q = child_cbrtf(d);
+      double c = cbrtlfr(flushToZero(d));
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_CBRT;
+      }
+    }
+
+  STOP_CBRT:
+
+    fprintf(stderr, "cbrtf_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+}
+
+int main(int argc, char **argv) {
+  char *argv2[argc];
+  int i, a2s;
+
+  for(a2s=1;a2s<argc;a2s++) {
+    if (strcmp(argv[a2s], "--flushtozero") == 0) {
+      enableFlushToZero = 1;
+    } else {
+      break;
+    }
+  }
+
+  for(i=a2s;i<argc;i++) argv2[i-a2s] = argv[i];
+  argv2[argc-a2s] = NULL;
+
+  mpfr_set_default_prec(128);
+
+  startChild(argv2[0], argv2);
+
+  do_test();
+
+  if (allTestsPassed) {
+    fprintf(stderr, "\n\n*** All tests passed");
+    if (enableFlushToZero) fprintf(stderr, " (flush to zero)");
+    fprintf(stderr, "\n");
+  } else {
+    fprintf(stderr, "\n\n*** There were errors in some tests\n");
+  }
+
+  if (allTestsPassed) return 0;
+
+  return -1;
+}
diff --git a/tester/testeru1.c b/tester/testeru1.c
new file mode 100644
index 00000000..9e2d5c00
--- /dev/null
+++ b/tester/testeru1.c
@@ -0,0 +1,2833 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <errno.h>
+#include <inttypes.h>
+
+#include <mpfr.h>
+
+#include <unistd.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include "nonnumber.h"
+
+#define POSITIVE_INFINITY INFINITY
+#define NEGATIVE_INFINITY (-INFINITY)
+
+typedef int boolean;
+
+#define true 1
+#define false 0
+
+void stop(char *mes) {
+  fprintf(stderr, "%s\n", mes);
+  abort();
+}
+
+int readln(int fd, char *buf, int cnt) {
+  int i, rcnt = 0;
+
+  if (cnt < 1) return -1;
+
+  while(cnt >= 2) {
+    i = read(fd, buf, 1);
+    if (i != 1) return i;
+
+    if (*buf == '\n') break;
+
+    rcnt++;
+    buf++;
+    cnt--;
+  }
+
+  *++buf = '\0';
+  rcnt++;
+  return rcnt;
+}
+
+int ptoc[2], ctop[2];
+int pid;
+
+void startChild(const char *path, char *const argv[]) {
+  pipe(ptoc);
+  pipe(ctop);
+
+  pid = fork();
+
+  assert(pid != -1);
+
+  if (pid == 0) {
+    // child process
+    char buf0[1], buf1[1];
+    int i;
+
+    close(ptoc[1]);
+    close(ctop[0]);
+
+    i = dup2(ptoc[0], fileno(stdin));
+    assert(i != -1);
+
+    i = dup2(ctop[1], fileno(stdout));
+    assert(i != -1);
+
+    setvbuf(stdin, buf0, _IONBF,0);
+    setvbuf(stdout, buf1, _IONBF,0);
+
+    fflush(stdin);
+    fflush(stdout);
+
+    execvp(path, argv);
+
+    fprintf(stderr, "execvp in startChild : %s\n", strerror(errno));
+
+    assert(0);
+  }
+
+  // parent process
+
+  close(ptoc[0]);
+  close(ctop[1]);
+}
+
+double u2d(uint64_t u) {
+  union {
+    double f;
+    uint64_t i;
+  } tmp;
+  tmp.i = u;
+  return tmp.f;
+}
+
+uint64_t d2u(double d) {
+  union {
+    double f;
+    uint64_t i;
+  } tmp;
+  tmp.f = d;
+  return tmp.i;
+}
+
+//
+
+boolean isPlusZero(double x) { return x == 0 && copysign(1, x) == 1; }
+boolean isMinusZero(double x) { return x == 0 && copysign(1, x) == -1; }
+boolean xisnan(double x) { return x != x; }
+double sign(double d) { return d < 0 ? -1 : 1; }
+
+boolean cmpDenorm(double x, double y) {
+  if (xisnan(x) && xisnan(y)) return true;
+  if (xisnan(x) || xisnan(y)) return false;
+  if (isinf(x) != isinf(y)) return false;
+  if (x == POSITIVE_INFINITY && y == POSITIVE_INFINITY) return true;
+  if (x == NEGATIVE_INFINITY && y == NEGATIVE_INFINITY) return true;
+  if (y == 0) {
+    if (isPlusZero(x) && isPlusZero(y)) return true;
+    if (isMinusZero(x) && isMinusZero(y)) return true;
+    return false;
+  }
+  if (!xisnan(x) && !xisnan(y) && !isinf(x) && !isinf(y)) return sign(x) == sign(y);
+  return false;
+}
+
+long double ulp(long double x) {
+  x = fabsl(x);
+  int exp;
+
+  if (x == 0) {
+    return DBL_MIN;
+  } else {
+    frexpl(x, &exp);
+  }
+
+  return fmax(ldexp(1.0, exp-53), DBL_MIN);
+}
+
+double countULP(long double x, long double y) {
+  double fx = x;
+  double fy = y;
+  if (xisnan(fx) && xisnan(fy)) return 0;
+  if (xisnan(fx) || xisnan(fy)) return 10000;
+  if (isinf(fx)) {
+    if (sign(fx) == sign(fy) && fabs(fy) > 1e+300) return 0; // Relaxed infinity handling
+    return 10001;
+  }
+  if (fx == POSITIVE_INFINITY && fy == POSITIVE_INFINITY) return 0;
+  if (fx == NEGATIVE_INFINITY && fy == NEGATIVE_INFINITY) return 0;
+  if (fy == 0) {
+    if (fx == 0) return 0;
+    return 10002;
+  }
+  if (!xisnan(fx) && !xisnan(fy) && !isinf(fx) && !isinf(fy)) {
+    return fabs((x - y) / ulp(y));
+  }
+  return 10003;
+}
+
+//
+
+double sinfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_sin(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double sinlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_sin(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double cosfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_cos(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double coslfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_cos(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double tanfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_tan(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double tanlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_tan(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double asinfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_asin(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double asinlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_asin(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double acosfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_acos(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double acoslfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_acos(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double atanfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_atan(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double atanlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_atan(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double atan2fr(double y, double x) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_d(fry, y, GMP_RNDN);
+  mpfr_set_d(frx, x, GMP_RNDN);
+  mpfr_atan2(frx, fry, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+long double atan2lfr(long double y, long double x) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_ld(fry, y, GMP_RNDN);
+  mpfr_set_ld(frx, x, GMP_RNDN);
+  mpfr_atan2(frx, fry, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+double logfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_log(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double loglfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_log(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double expfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_exp(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double explfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_exp(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double powfr(double x, double y) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_d(frx, x, GMP_RNDN);
+  mpfr_set_d(fry, y, GMP_RNDN);
+  mpfr_pow(frx, frx, fry, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+long double powlfr(long double x, long double y) {
+  mpfr_t frx, fry;
+  mpfr_inits(frx, fry, NULL);
+  mpfr_set_ld(frx, x, GMP_RNDN);
+  mpfr_set_ld(fry, y, GMP_RNDN);
+  mpfr_pow(frx, frx, fry, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, fry, NULL);
+  return ret;
+}
+
+double sinhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_sinh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double coshfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_cosh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double tanhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_tanh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double asinhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_asinh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double acoshfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_acosh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double atanhfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_atanh(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double sinhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_sinh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double coshlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_cosh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double tanhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_tanh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double asinhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_asinh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double acoshlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_acosh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double atanhlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_atanh(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double sqrtlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_sqrt(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double sqrtfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_sqrt(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double cbrtfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_cbrt(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double cbrtlfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_cbrt(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double exp2fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_exp2(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double exp2lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_exp2(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double exp10fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_exp10(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double exp10lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_exp10(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double expm1fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_expm1(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double expm1lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_expm1(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double log10fr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_log10(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double log10lfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_log10(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+double log1pfr(double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_d(frx, d, GMP_RNDN);
+  mpfr_log1p(frx, frx, GMP_RNDN);
+  double ret = mpfr_get_d(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+long double log1plfr(long double d) {
+  mpfr_t frx;
+  mpfr_inits(frx, NULL);
+  mpfr_set_ld(frx, d, GMP_RNDN);
+  mpfr_log1p(frx, frx, GMP_RNDN);
+  long double ret = mpfr_get_ld(frx, GMP_RNDN);
+  mpfr_clears(frx, NULL);
+  return ret;
+}
+
+//
+
+typedef struct {
+  double x, y;
+} double2;
+
+double child_sin(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "sin_u1 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sin");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_cos(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "cos_u1 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_cos");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double2 child_sincos(double x) {
+  char str[256];
+  uint64_t u, v;
+
+  sprintf(str, "sincos_u1 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sincos");
+  sscanf(str, "%" PRIx64 " %" PRIx64, &u, &v);
+
+  double2 ret;
+  ret.x = u2d(u);
+  ret.y = u2d(v);
+  return ret;
+}
+
+double child_tan(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "tan_u1 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_tan");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_asin(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "asin_u1 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_asin_");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_acos(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "acos_u1 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_acos");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_atan(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "atan_u1 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_atan");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_atan2(double y, double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "atan2_u1 %" PRIx64 " %" PRIx64 "\n", d2u(y), d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_atan2");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_log(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "log_u1 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_log");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_exp(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "exp %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_exp");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_pow(double x, double y) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "pow %" PRIx64 " %" PRIx64 "\n", d2u(x), d2u(y));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_pow");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_sinh(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "sinh %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sinh");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_cosh(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "cosh %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_cosh");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_tanh(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "tanh %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_tanh");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_asinh(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "asinh %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_asinh");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_acosh(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "acosh %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_acosh");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_atanh(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "atanh %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_atanh");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_sqrt(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "sqrt %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_sqrt");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_cbrt(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "cbrt_u1 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_cbrt");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_exp2(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "exp2 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_exp2");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_exp10(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "exp10 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_exp10");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_expm1(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "expm1 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_expm1");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_log10(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "log10 %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_log10");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_log1p(double x) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "log1p %" PRIx64 "\n", d2u(x));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_log1p");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+double child_ldexp(double x, int q) {
+  char str[256];
+  uint64_t u;
+
+  sprintf(str, "ldexp %" PRIx64 " %" PRIx64 "\n", d2u(x), d2u(q));
+  write(ptoc[1], str, strlen(str));
+  if (readln(ctop[0], str, 255) < 1) stop("child_ldexp");
+  sscanf(str, "%" PRIx64, &u);
+  return u2d(u);
+}
+
+int allTestsPassed = 1;
+
+void showResult(int success) {
+  if (!success) allTestsPassed = 0;
+  fprintf(stderr, "%s\n", success ? " OK" : " NG **************");
+}
+
+void do_test() {
+  int i, j;
+
+  fprintf(stderr, "Denormal/nonnumber test atan2_u1(y, x)\n\n");
+
+  fprintf(stderr, "If y is +0 and x is -0, +pi is returned ... ");
+  showResult(child_atan2(+0.0, -0.0) == M_PI);
+
+  fprintf(stderr, "If y is -0 and x is -0, -pi is returned ... ");
+  showResult(child_atan2(-0.0, -0.0) == -M_PI);
+
+  fprintf(stderr, "If y is +0 and x is +0, +0 is returned ... ");
+  showResult(isPlusZero(child_atan2(+0.0, +0.0)));
+
+  fprintf(stderr, "If y is -0 and x is +0, -0 is returned ... ");
+  showResult(isMinusZero(child_atan2(-0.0, +0.0)));
+
+  fprintf(stderr, "If y is positive infinity and x is negative infinity, +3*pi/4 is returned ... ");
+  showResult(child_atan2(POSITIVE_INFINITY, NEGATIVE_INFINITY) == 3*M_PI/4);
+
+  fprintf(stderr, "If y is negative infinity and x is negative infinity, -3*pi/4 is returned ... ");
+  showResult(child_atan2(NEGATIVE_INFINITY, NEGATIVE_INFINITY) == -3*M_PI/4);
+
+  fprintf(stderr, "If y is positive infinity and x is positive infinity, +pi/4 is returned ... ");
+  showResult(child_atan2(POSITIVE_INFINITY, POSITIVE_INFINITY) == M_PI/4);
+
+  fprintf(stderr, "If y is negative infinity and x is positive infinity, -pi/4 is returned ... ");
+  showResult(child_atan2(NEGATIVE_INFINITY, POSITIVE_INFINITY) == -M_PI/4);
+
+  {
+    fprintf(stderr, "If y is +0 and x is less than 0, +pi is returned ... ");
+
+    double ya[] = { +0.0 };
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+    boolean success = true;
+
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != M_PI) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is -0 and x is less than 0, -pi is returned ... ");
+
+    double ya[] = { -0.0 };
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != -M_PI) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is less than 0 and x is 0, -pi/2 is returned ... ");
+
+    double ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+    double xa[] = { +0.0, -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != -M_PI/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is greater than 0 and x is 0, pi/2 is returned ... ");
+
+
+    double ya[] = { 100000.5, 100000, 3, 2.5, 2, 1.5, 1.0, 0.5 };
+    double xa[] = { +0.0, -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != M_PI/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is greater than 0 and x is -0, pi/2 is returned ... ");
+
+    double ya[] = { 100000.5, 100000, 3, 2.5, 2, 1.5, 1.0, 0.5 };
+    double xa[] = { -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != M_PI/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is positive infinity, and x is finite, pi/2 is returned ... ");
+
+    double ya[] = { POSITIVE_INFINITY };
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != M_PI/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is negative infinity, and x is finite, -pi/2 is returned ... ");
+
+    double ya[] = { NEGATIVE_INFINITY };
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != -M_PI/2) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value greater than 0, and x is negative infinity, +pi is returned ... ");
+
+    double ya[] = { 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    double xa[] = { NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != M_PI) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value less than 0, and x is negative infinity, -pi is returned ... ");
+
+    double ya[] = { -0.5, -1.5, -2.0, -2.5, -3.0, -100000, -100000.5 };
+    double xa[] = { NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_atan2(ya[j], xa[i]) != -M_PI) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value greater than 0, and x is positive infinity, +0 is returned ... ");
+
+    double ya[] = { 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    double xa[] = { POSITIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isPlusZero(child_atan2(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a finite value less than 0, and x is positive infinity, -0 is returned ... ");
+
+    double ya[] = { -0.5, -1.5, -2.0, -2.5, -3.0, -100000, -100000.5 };
+    double xa[] = { POSITIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isMinusZero(child_atan2(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is NaN, a NaN is returned ... ");
+
+    double ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, NAN };
+    double xa[] = { NAN };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!xisnan(child_atan2(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a NaN, the result is a NaN ... ");
+
+    double ya[] = { NAN };
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, NAN };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!xisnan(child_atan2(ya[j], xa[i]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  fprintf(stderr, "\nend of atan2_u1 denormal/nonnumber test\n");
+
+  //
+
+#if 0
+  fprintf(stderr, "\nDenormal/nonnumber test pow(x, y)\n\n");
+
+  fprintf(stderr, "If x is +1 and y is a NaN, the result is 1.0 ... ");
+  showResult(child_pow(1, NAN) == 1.0);
+
+  fprintf(stderr, "If y is 0 and x is a NaN, the result is 1.0 ... ");
+  showResult(child_pow(NAN, 0) == 1.0);
+
+  fprintf(stderr, "If x is -1, and y is positive infinity, the result is 1.0 ... ");
+  showResult(child_pow(-1, POSITIVE_INFINITY) == 1.0);
+
+  fprintf(stderr, "If x is -1, and y is negative infinity, the result is 1.0 ... ");
+  showResult(child_pow(-1, NEGATIVE_INFINITY) == 1.0);
+
+  {
+    fprintf(stderr, "If x is a finite value less than 0, and y is a finite non-integer, a NaN is returned ... ");
+
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+    double ya[] = { -100000.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!xisnan(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is a NaN, the result is a NaN ... ");
+
+    double xa[] = { NAN };
+    double ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!xisnan(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If y is a NaN, the result is a NaN ... ");
+
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    double ya[] = { NAN };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!xisnan(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is +0, and y is an odd integer greater than 0, the result is +0 ... ");
+
+    double xa[] = { +0.0 };
+    double ya[] = { 1, 3, 5, 7, 100001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isPlusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is -0, and y is an odd integer greater than 0, the result is -0 ... ");
+
+    double xa[] = { -0.0 };
+    double ya[] = { 1, 3, 5, 7, 100001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isMinusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is 0, and y greater than 0 and not an odd integer, the result is +0 ... ");
+
+    double xa[] = { +0.0, -0.0 };
+    double ya[] = { 0.5, 1.5, 2.0, 2.5, 4.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isPlusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is less than 1, and y is negative infinity, the result is positive infinity ... ");
+
+    double xa[] = { -0.999, -0.5, -0.0, +0.0, +0.5, +0.999 };
+    double ya[] = { NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != POSITIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is greater than 1, and y is negative infinity, the result is +0 ... ");
+
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    double ya[] = { NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isPlusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is less than 1, and y is positive infinity, the result is +0 ... ");
+
+    double xa[] = { -0.999, -0.5, -0.0, +0.0, +0.5, +0.999 };
+    double ya[] = { POSITIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isPlusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the absolute value of x is greater than 1, and y is positive infinity, the result is positive infinity ... ");
+
+    double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+    double ya[] = { POSITIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != POSITIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y is an odd integer less than 0, the result is -0 ... ");
+
+    double xa[] = { NEGATIVE_INFINITY };
+    double ya[] = { -100001, -5, -3, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isMinusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y less than 0 and not an odd integer, the result is +0 ... ");
+
+    double xa[] = { NEGATIVE_INFINITY };
+    double ya[] = { -100000.5, -100000, -4, -2.5, -2, -1.5, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isPlusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y is an odd integer greater than 0, the result is negative infinity ... ");
+
+    double xa[] = { NEGATIVE_INFINITY };
+    double ya[] = { 1, 3, 5, 7, 100001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != NEGATIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is negative infinity, and y greater than 0 and not an odd integer, the result is positive infinity ... ");
+
+    double xa[] = { NEGATIVE_INFINITY };
+    double ya[] = { 0.5, 1.5, 2, 2.5, 3.5, 4, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != POSITIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is positive infinity, and y less than 0, the result is +0 ... ");
+
+    double xa[] = { POSITIVE_INFINITY };
+    double ya[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!isPlusZero(child_pow(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is positive infinity, and y greater than 0, the result is positive infinity ... ");
+
+    double xa[] = { POSITIVE_INFINITY };
+    double ya[] = { 0.5, 1, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != POSITIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is +0, and y is an odd integer less than 0, +HUGE_VAL is returned ... ");
+
+    double xa[] = { +0.0 };
+    double ya[] = { -100001, -5, -3, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != POSITIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is -0, and y is an odd integer less than 0, -HUGE_VAL is returned ... ");
+
+    double xa[] = { -0.0 };
+    double ya[] = { -100001, -5, -3, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != NEGATIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If x is 0, and y is less than 0 and not an odd integer, +HUGE_VAL is returned ... ");
+
+    double xa[] = { +0.0, -0.0 };
+    double ya[] = { -100000.5, -100000, -4, -2.5, -2, -1.5, -0.5 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (child_pow(xa[i], ya[j]) != POSITIVE_INFINITY) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "If the result overflows, the functions return HUGE_VAL with the mathematically correct sign ... ");
+
+    double xa[] = { 1000, -1000 };
+    double ya[] = { 1000, 1000.5, 1001 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
+	if (!cmpDenorm(child_pow(xa[i], ya[j]), powfr(xa[i], ya[j]))) {
+	  success = false;
+	  break;
+	}
+      }
+    }
+
+    showResult(success);
+  }
+
+  fprintf(stderr, "\nEnd of pow denormal/nonnumber test\n\n");
+#endif
+
+  //
+
+  {
+    fprintf(stderr, "sin_u1 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_sin(xa[i]), sinfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "sin in sincos_u1 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      double2 q = child_sincos(xa[i]);
+      if (!cmpDenorm(q.x, sinfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "cos_u1 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_cos(xa[i]), cosfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "cos in sincos_u1 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      double2 q = child_sincos(xa[i]);
+      if (!cmpDenorm(q.y, cosfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "tan_u1 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, M_PI/2, -M_PI/2 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_tan(xa[i]), tanfr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "asin_u1 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, 2, -2, 1, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_asin(xa[i]), asinfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_asin(xa[i]), asinfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "acos_u1 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, 2, -2, 1, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_acos(xa[i]), acosfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_acos(xa[i]), acosfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "atan_u1 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_atan(xa[i]), atanfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_atan(xa[i]), atanfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "log_u1 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, 0, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_log(xa[i]), logfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_log(xa[i]), logfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+#if 0
+  {
+    fprintf(stderr, "exp denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_exp(xa[i]), expfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_exp(xa[i]), expfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "sinh denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, +0.0, -0.0, POSITIVE_INFINITY, NEGATIVE_INFINITY, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_sinh(xa[i]), sinhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_sinh(xa[i]), sinhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "cosh denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, +0.0, -0.0, POSITIVE_INFINITY, NEGATIVE_INFINITY, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_cosh(xa[i]), coshfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_cosh(xa[i]), coshfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "tanh denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, +0.0, -0.0, POSITIVE_INFINITY, NEGATIVE_INFINITY, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_tanh(xa[i]), tanhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_tanh(xa[i]), tanhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "asinh denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, +0.0, -0.0, POSITIVE_INFINITY, NEGATIVE_INFINITY, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_asinh(xa[i]), asinhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_asinh(xa[i]), asinhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "acosh denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, +0.0, -0.0, 1.0, POSITIVE_INFINITY, NEGATIVE_INFINITY, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_acosh(xa[i]), acoshfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_acosh(xa[i]), acoshfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "atanh denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, +0.0, -0.0, 1.0, -1.0, POSITIVE_INFINITY, NEGATIVE_INFINITY, 10000, -10000 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_atanh(xa[i]), atanhfr(xa[i]))) {
+	fprintf(stderr, "\nxa = %.20g, d = %.20g, c = %.20g", xa[i], child_atanh(xa[i]), atanhfr(xa[i]));
+	success = false;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "sqrt denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, +0.0, -0.0, -1.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_sqrt(xa[i]), sqrtfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_sqrt(xa[i]), sqrtfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+#endif
+
+  {
+    fprintf(stderr, "cbrt_u1 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, +0.0, -0.0 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_cbrt(xa[i]), cbrtfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_cbrt(xa[i]), cbrtfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+#if 0
+  {
+    fprintf(stderr, "exp2 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_exp2(xa[i]), exp2fr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_exp2(xa[i]), exp2fr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "exp10 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_exp10(xa[i]), exp10fr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_exp10(xa[i]), exp10fr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "expm1 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_expm1(xa[i]), expm1fr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_expm1(xa[i]), expm1fr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "log10 denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, 0, -1 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_log10(xa[i]), log10fr(xa[i]))) {
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  {
+    fprintf(stderr, "log1p denormal/nonnumber test ... ");
+
+    double xa[] = { NAN, POSITIVE_INFINITY, NEGATIVE_INFINITY, 0, -1, -2 };
+
+    boolean success = true;
+    for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
+      if (!cmpDenorm(child_log1p(xa[i]), log1pfr(xa[i]))) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", xa[i], child_log1p(xa[i]), log1pfr(xa[i]));
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+
+  fprintf(stderr, "ldexp denormal/nonnumber test ... ");
+
+  {
+    boolean success = true;
+    for(i=-10000;i<=10000 && success;i++) {
+      double d = child_ldexp(1.0, i);
+      double c = ldexp(1.0, i);
+
+      boolean pass = (isfinite(c) && d == c) || cmpDenorm(c, d);
+      if (!pass) {
+	fprintf(stderr, "xa = %.20g, d = %.20g, c = %.20g\n", (double)i, d, c);
+	success = false;
+	break;
+      }
+    }
+
+    showResult(success);
+  }
+#endif
+
+  //
+
+  fprintf(stderr, "\nAccuracy test (max error in ulp)\n");
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_sin(d);
+      long double c = sinlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SIN;
+      }
+    }
+
+    for(d = -10000000;d < 10000000;d += 200.1) {
+      double q = child_sin(d);
+      long double c = sinlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SIN;
+      }
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      double start = u2d(d2u(M_PI_4 * i)-20);
+      double end = u2d(d2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2d(d2u(d)+1)) {
+	double q = child_sin(d);
+	long double c = sinlfr(d);
+	double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	  goto STOP_SIN;
+	}
+      }
+    }
+
+  STOP_SIN:
+
+    fprintf(stderr, "sin_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_cos(d);
+      long double c = coslfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -10000000;d < 10000000;d += 200.1) {
+      double q = child_cos(d);
+      long double c = coslfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      double start = u2d(d2u(M_PI_4 * i)-20);
+      double end = u2d(d2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2d(d2u(d)+1)) {
+	double q = child_cos(d);
+	long double c = coslfr(d);
+	double u = countULP(q, c);
+	max = fmax(max, u);
+      }
+    }
+
+    fprintf(stderr, "cos_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double2 q = child_sincos(d);
+      long double c = sinlfr(d);
+      double u = fabs((q.x - c) / ulp(c));
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.x, (double)c, d, (double)ulp(c));
+	goto STOP_SIN2;
+      }
+    }
+
+    for(d = -10000000;d < 10000000;d += 200.1) {
+      double2 q = child_sincos(d);
+      long double c = sinlfr(d);
+      double u = fabs((q.x - c) / ulp(c));
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.x, (double)c, d, (double)ulp(c));
+	goto STOP_SIN2;
+      }
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      double start = u2d(d2u(M_PI_4 * i)-20);
+      double end = u2d(d2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2d(d2u(d)+1)) {
+	double2 q = child_sincos(d);
+	long double c = sinlfr(d);
+	double u = fabs((q.x - c) / ulp(c));
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q.x, (double)c, d, (double)ulp(c));
+	  goto STOP_SIN2;
+	}
+      }
+    }
+
+  STOP_SIN2:
+
+    fprintf(stderr, "sin in sincos_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double2 q = child_sincos(d);
+      long double c = coslfr(d);
+      double u = fabs((q.y - c) / ulp(c));
+      max = fmax(max, u);
+    }
+
+    for(d = -10000000;d < 10000000;d += 200.1) {
+      double2 q = child_sincos(d);
+      long double c = coslfr(d);
+      double u = fabs((q.y - c) / ulp(c));
+      max = fmax(max, u);
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      double start = u2d(d2u(M_PI_4 * i)-20);
+      double end = u2d(d2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2d(d2u(d)+1)) {
+	double2 q = child_sincos(d);
+	long double c = coslfr(d);
+	double u = fabs((q.y - c) / ulp(c));
+	max = fmax(max, u);
+      }
+    }
+
+    fprintf(stderr, "cos in sincos_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_tan(d);
+      long double c = tanlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -10000000;d < 10000000;d += 200.1) {
+      double q = child_tan(d);
+      long double c = tanlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    int i;
+
+    for(i=1;i<10000;i++) {
+      double start = u2d(d2u(M_PI_4 * i)-20);
+      double end = u2d(d2u(M_PI_4 * i)+20);
+
+      for(d = start;d <= end;d = u2d(d2u(d)+1)) {
+	double q = child_tan(d);
+	long double c = tanlfr(d);
+	double u = countULP(q, c);
+	max = fmax(max, u);
+      }
+    }
+
+    fprintf(stderr, "tan_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -1;d < 1;d += 0.00002) {
+      double q = child_asin(d);
+      long double c = asinlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ASIN;
+      }
+    }
+
+  STOP_ASIN:
+
+    fprintf(stderr, "asin_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -1;d < 1;d += 0.00002) {
+      double q = child_acos(d);
+      long double c = acoslfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ACOS;
+      }
+    }
+
+  STOP_ACOS:
+
+    fprintf(stderr, "acos_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_atan(d);
+      long double c = atanlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %g, d = %g\n", q, d);
+	goto STOP_ATAN;
+      }
+    }
+
+    for(d = -10000;d < 10000;d += 0.2) {
+      double q = child_atan(d);
+      long double c = atanlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %g, d = %g\n", q, d);
+	goto STOP_ATAN;
+      }
+    }
+
+  STOP_ATAN:
+
+    fprintf(stderr, "atan_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double x, y, max = 0;
+
+    for(y = -10;y < 10;y += 0.05) {
+      for(x = -10;x < 10;x += 0.05) {
+	double q = child_atan2(y, x);
+	long double c = atan2lfr(y, x);
+	double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %g, y = %g, x = %g\n", q, y, x);
+	  goto STOP_ATAN2;
+	}
+      }
+    }
+
+    for(y = -100;y < 100;y += 0.51) {
+      for(x = -100;x < 100;x += 0.51) {
+	double q = child_atan2(y, x);
+	long double c = atan2lfr(y, x);
+	double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %g, y = %g, x = %g\n", q, y, x);
+	  goto STOP_ATAN2;
+	}
+      }
+    }
+
+  STOP_ATAN2:
+
+    fprintf(stderr, "atan2_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = 0.0001;d < 10;d += 0.0001) {
+      double q = child_log(d);
+      long double c = loglfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = 0.0001;d < 10000;d += 0.1) {
+      double q = child_log(d);
+      long double c = loglfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    int i;
+    for(i = -1000;i <= 1000;i++) {
+      d = pow(2.1, i);
+      double q = child_log(d);
+      long double c = loglfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "log_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+#if 0
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_exp(d);
+      long double c = explfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      double q = child_exp(d);
+      long double c = explfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "exp : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+#endif
+
+#if 0
+  {
+    double x, y, max = 0;
+
+    for(y = 0.1;y < 100;y += 0.2) {
+      for(x = -100;x < 100;x += 0.2) {
+	double q = child_pow(x, y);
+	long double c = powlfr(x, y);
+	double u = countULP(q, c);
+	max = fmax(max, u);
+	if (u > 1000) {
+	  fprintf(stderr, "q = %g, x = %g, y = %g\n", q, x, y);
+	  goto STOP_POW;
+	}
+      }
+    }
+
+  STOP_POW:
+
+    fprintf(stderr, "pow : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+#if 0
+  {
+    double d, max = 0;
+
+    for(d = 0;d < 20000;d += 0.2) {
+      double q = child_sqrt(d);
+      long double c = sqrtlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SQRT;
+      }
+    }
+
+  STOP_SQRT:
+
+    fprintf(stderr, "sqrt : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+#endif
+
+  {
+    double d, max = 0;
+
+    for(d = -10000;d < 10000;d += 0.2) {
+      double q = child_cbrt(d);
+      long double c = cbrtlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_CBRT;
+      }
+    }
+
+    int i;
+    for(i = -1000;i <= 1000;i++) {
+      d = pow(2.1, i);
+      double q = child_cbrt(d);
+      long double c = cbrtlfr(d);
+       double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_CBRT;
+      }
+    }
+
+  STOP_CBRT:
+
+    fprintf(stderr, "cbrt_u1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_sinh(d);
+      long double c = sinhlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SINH;
+      }
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      double q = child_sinh(d);
+      long double c = sinhlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_SINH;
+      }
+    }
+
+  STOP_SINH:
+
+    fprintf(stderr, "sinh : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_cosh(d);
+      long double c = coshlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_COSH;
+      }
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      double q = child_cosh(d);
+      long double c = coshlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_COSH;
+      }
+    }
+
+  STOP_COSH:
+
+    fprintf(stderr, "cosh : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_tanh(d);
+      long double c = tanhlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_TANH;
+      }
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      double q = child_tanh(d);
+      long double c = tanhlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_TANH;
+      }
+    }
+
+  STOP_TANH:
+
+    fprintf(stderr, "tanh : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_asinh(d);
+      long double c = asinhlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ASINH;
+      }
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      double q = child_asinh(d);
+      long double c = asinhlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ASINH;
+      }
+    }
+
+  STOP_ASINH:
+
+    fprintf(stderr, "asinh : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = 1;d < 10;d += 0.0002) {
+      double q = child_acosh(d);
+      long double c = acoshlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ACOSH;
+      }
+    }
+
+    for(d = 1;d < 1000;d += 0.02) {
+      double q = child_acosh(d);
+      long double c = acoshlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ACOSH;
+      }
+    }
+
+  STOP_ACOSH:
+
+    fprintf(stderr, "acosh : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_atanh(d);
+      long double c = atanhlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ATANH;
+      }
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      double q = child_atanh(d);
+      long double c = atanhlfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+      if (u > 1000) {
+	fprintf(stderr, "q = %.20g\nc = %.20g\nd = %.20g\nulp = %g\n", q, (double)c, d, (double)ulp(c));
+	goto STOP_ATANH;
+      }
+    }
+
+  STOP_ATANH:
+
+    fprintf(stderr, "atanh : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+#endif
+
+#if 0
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_exp2(d);
+      long double c = exp2lfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      double q = child_exp2(d);
+      long double c = exp2lfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "exp2 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_exp10(d);
+      long double c = exp10lfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -300;d < 300;d += 0.01) {
+      double q = child_exp10(d);
+      long double c = exp10lfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "exp10 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = -10;d < 10;d += 0.0002) {
+      double q = child_expm1(d);
+      long double c = expm1lfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = -1000;d < 1000;d += 0.02) {
+      double q = child_expm1(d);
+      long double c = expm1lfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = 0;d < 300;d += 0.02) {
+      double d2 = pow(10, -d);
+      double q = child_expm1(d2);
+      long double c = expm1lfr(d2);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = 0;d < 300;d += 0.02) {
+      double d2 = -pow(10, -d);
+      double q = child_expm1(d2);
+      long double c = expm1lfr(d2);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "expm1 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = 0.0001;d < 10;d += 0.0001) {
+      double q = child_log10(d);
+      long double c = log10lfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = 0.0001;d < 10000;d += 0.1) {
+      double q = child_log10(d);
+      long double c = log10lfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "log10 : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+
+  {
+    double d, max = 0;
+
+    for(d = 0.0001;d < 10;d += 0.0001) {
+      double q = child_log1p(d);
+      long double c = log1plfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = 0.0001;d < 10000;d += 0.1) {
+      double q = child_log1p(d);
+      long double c = log1plfr(d);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = 0;d < 300;d += 0.02) {
+      double d2 = pow(10, -d);
+      double q = child_log1p(d2);
+      long double c = log1plfr(d2);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    for(d = 0;d < 300;d += 0.02) {
+      double d2 = -pow(10, -d);
+      double q = child_log1p(d2);
+      long double c = log1plfr(d2);
+      double u = countULP(q, c);
+      max = fmax(max, u);
+    }
+
+    fprintf(stderr, "log1p : %lf ... ", max);
+
+    showResult(max < 1);
+  }
+#endif
+}
+
+int main(int argc, char **argv) {
+  char *argv2[argc];
+  int i;
+
+  for(i=1;i<argc;i++) argv2[i-1] = argv[i];
+  argv2[argc-1] = NULL;
+
+  mpfr_set_default_prec(128);
+
+  startChild(argv2[0], argv2);
+
+  do_test();
+
+  if (allTestsPassed) {
+    fprintf(stderr, "\n\n*** All tests passed\n");
+  } else {
+    fprintf(stderr, "\n\n*** There were errors in some tests\n");
+  }
+
+  if (allTestsPassed) return 0;
+
+  return -1;
+}