diff --git a/Makefile b/Makefile index 703faec3..97a75d4e 100644 --- a/Makefile +++ b/Makefile @@ -77,6 +77,7 @@ distclean : clean +"$(MAKE)" --directory=include distclean +"$(MAKE)" --directory=lib distclean +"$(MAKE)" --directory=src distclean + rm -rf debian/libsleef3 # rm -f debian/debhelper-build-stamp # rm -f debian/files # rm -f debian/libsleef3.debhelper.log diff --git a/README.md b/README.md index 28f76782..acbfcab3 100644 --- a/README.md +++ b/README.md @@ -1,117 +1,15 @@ [![Build Status](https://travis-ci.org/shibatch/sleef.svg?branch=master)](https://travis-ci.org/shibatch/sleef) In this library, functions for evaluating some elementary functions -are implemented. The algorithm is intended for efficient evaluation -utilizing SIMD instruction sets like SSE or AVX, but it is also fast -using usual scalar operations. - -The package contains a few directories in which implementation in the -corresponding languages are contained. You can run "make test" in -order to test the functions in each directory. +are implemented. The library also includes DFT subroutines. The software is distributed under the Boost Software License, Version 1.0. See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt. Contributions to this project are accepted under the same license. -Copyright Naoki Shibata and contributors 2010 - 2017. - - -Main download page : http://shibatch.sourceforge.net/ - --- - -Compiling library with Microsoft Visual C++ - -Below is the instruction for compiling SLEEF with Microsoft Visual -C++. Only 64bit architecture is supported. Only DLLs are built. - - -1. Install Visual Studio 2015 or later, along with Cygwin -2. Copy vcvars64.bat to a working directory. - This file is usually in the following directory. - C:\Program Files (x86)\MSVCCommunity2015\VC\bin\amd64 -3. Add the following line at the end of vcvars64.bat - if "%SHELL%"=="/bin/bash" c:\cygwin64\bin\bash.exe -4. Execute vcvars64.bat within the Cygwin bash shell. -5. Go to sleef-3.X directory -6. Run "make -f Makefile.vc" - --- - - -History - -3.0 -* New API is defined -* Functions for DFT are added -* sincospi functions are added -* gencoef now supports single, extended and quad precision in addition to double precision -* Linux, Windows and Mac OS X are supported -* GCC, Clang, Intel Compiler, Microsoft Visual C++ are supported -* The library can be compiled as DLLs -* Files needed for creating a debian package are now included - - -2.121 -* Renamed LICENSE_1_0.txt to LICENSE.txt -2.120 -* Relicensed to Boost Software License Version 1.0 - -2.110 -* The valid range of argument is extended for trig functions -* Specification of each functions regarding to the domain and accuracy is added -* A coefficient generation tool is added -* New testing tools are introduced -* Following functions returned incorrect values when the argument is very large or small : exp, pow, asinh, acosh -* SIMD xsin and xcos returned values more than 1 when FMA is enabled -* Pure C cbrt returned incorrect values when the argument is negative -* tan_u1 returned values with more than 1 ulp of error on rare occasions -* Removed support for Java language(because no one seems using this) - -2.100 Added support for AVX-512F and Clang Extended Vectors. - -2.90 Added ilogbf. All the reported bugs(listed below) are fixed. -* Log function returned incorrect values when the argument is very small. -* Signs of returned values were incorrect when the argument is signed zero. -* Tester incorrectly counted ULP in some cases. -* ilogb function returned incorrect values in some cases. - -2.80 Added support for ARM NEON. Added higher accuracy single -precision functions : sinf_u1, cosf_u1, sincosf_u1, tanf_u1, asinf_u1, -acosf_u1, atanf_u1, atan2f_u1, logf_u1, and cbrtf_u1. - -2.70 Added higher accuracy functions : sin_u1, cos_u1, sincos_u1, -tan_u1, asin_u1, acos_u1, atan_u1, atan2_u1, log_u1, and -cbrt_u1. These functions evaluate the corresponding function with at -most 1 ulp of error. - -2.60 Added the remaining single precision functions : powf, sinhf, -coshf, tanhf, exp2f, exp10f, log10f, log1pf. Added support for FMA4 -(for AMD Bulldozer). Added more test cases. Fixed minor bugs (which -degraded accuracy in some rare cases). - -2.50 Added support for AVX2. SLEEF now compiles with ICC. - -2.40 Fixed incorrect denormal/nonnumber handling in ldexp, ldexpf, -sinf and cosf. Removed support for Go language. - -2.31 Added sincosf. - -2.30 Added single precision functions : sinf, cosf, tanf, asinf, -acosf, atanf, logf, expf, atan2f and cbrtf. - -2.20 Added exp2, exp10, expm1, log10, log1p, and cbrt. - -2.10 asin() and acos() are back. Added ilogb() and ldexp(). Added -hyperbolic functions. Eliminated dependency on frexp, ldexp, fabs, -isnan and isinf. - -2.00 All of the algorithm has been updated. Both accuracy and speed -are improved since version 1.10. Denormal number handling is also -improved. - -1.10 AVX support is added. Accuracy tester is added. +Copyright Naoki Shibata and contributors 2010 - 2017. -1.00 Initial release +Main Page : http://sleef.org/ +GitHub Repo : https://github.com/shibatch/sleef diff --git a/config.mk b/config.mk index 844fd07f..957e68cb 100644 --- a/config.mk +++ b/config.mk @@ -1,5 +1,5 @@ export SOVERSION=3 -export MINORVERSION=0 +export MINORVERSION=1 export MAXBUTWIDTH=3 diff --git a/debian/changelog b/debian/changelog index 58993818..307ca863 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,74 +1,5 @@ -libsleef3 (3.1-2) xenial; urgency=medium - - * hypot is now ok - * Added nextafter, frexp and fmod to sleedp.c - - -- Naoki Shibata Tue, 07 Feb 2017 19:24:12 +0900 - -libsleef3 (3.1-1-ubuntu1) xenial; urgency=medium - - * Fixed bug in atan2_u1 - * Tester2 has better coverage - - -- Naoki Shibata Tue, 07 Feb 2017 19:24:12 +0900 - -libsleef3 (3.1-0-0) xenial; urgency=medium - - * This is version 3.1 - - -- Naoki Shibata Tue, 07 Feb 2017 19:24:12 +0900 - -libsleef3 (3.0-4-ubuntu1) xenial; urgency=medium - - * Hello - - -- Naoki Shibata Tue, 07 Feb 2017 19:24:12 +0900 - -libsleef3 (3.0-3-ubuntu0) xenial; urgency=medium - - * Yeah - - -- Naoki Shibata Tue, 07 Feb 2017 19:17:20 +0900 - -libsleef3 (3.0-1ubuntu9) xenial; urgency=medium - - * Hello, hello - - -- Naoki Shibata Tue, 07 Feb 2017 18:43:05 +0900 - -libsleef3 (3.0-1ubuntu8) xenial; urgency=medium - - * It rejects saying contents are same - - -- Naoki Shibata Tue, 07 Feb 2017 18:36:54 +0900 - -libsleef3 (3.0-1ubuntu7) xenial; urgency=medium - - * Still trying - - -- Naoki Shibata Tue, 07 Feb 2017 18:20:34 +0900 - -libsleef3 (3.0-1ubuntu6) xenial; urgency=medium - - * Still trying - * Changed e-mail address - - -- Naoki Shibata Tue, 07 Feb 2017 18:19:47 +0900 - -libsleef3 (3.0-1ubuntu3) xenial; urgency=medium - - * It did not upload correctly. I don't know why. - - -- Naoki Shibata Tue, 07 Feb 2017 18:11:19 +0900 - -libsleef3 (3.0-1ubuntu1) xenial; urgency=medium - - * Modified dependency - - -- Naoki Shibata Tue, 07 Feb 2017 17:52:12 +0900 - -libsleef3 (3.0-1) xenial; urgency=medium +libsleef3 (3.1-1) xenial; urgency=medium * Initial release. - -- Naoki Shibata Sat, 31 Dec 2016 02:45:16 +0900 + -- Naoki Shibata Sat, 1 Jul 2017 12:00:00 +0900 diff --git a/debian/compat b/debian/compat index ec635144..f599e28b 100644 --- a/debian/compat +++ b/debian/compat @@ -1 +1 @@ -9 +10 diff --git a/debian/rules b/debian/rules index cbe925d7..bfc48c7f 100644 --- a/debian/rules +++ b/debian/rules @@ -1,3 +1,6 @@ #!/usr/bin/make -f %: dh $@ + +override_dh_auto_test: + echo Skipping test diff --git a/doc/SPECIFICATION b/doc/SPECIFICATION deleted file mode 100644 index e016d759..00000000 --- a/doc/SPECIFICATION +++ /dev/null @@ -1,129 +0,0 @@ - -Below is the specification of each function. - -All functions should handle nonnumber arguments correctly. If there is -an domain error, a non-number is returned according to the -specification of the math functions in C language, unless otherwise -noted. - -RANGEMAXD, RANGEMAXS and RANGEMAXS2 are 1e+14, 5e+9 and 39000, -respectively. - - -xsin, xcos, xsincos : If the argument is in [-RANGEMAXD, RANGEMAXD], -these functions return values with 3.5 ULPs of error bound. If the -argument is out of this range, the functions return arbitrary -values. The returned value is in [-1, 1], regardless of the argument. - -xsinf, xcosf, xsincosf : If the argument is in [-RANGEMAXS2, -RANGEMAXS2], these functions return values with 3.5 ULPs of error -bound. If the argument is out of this range, the functions return -arbitrary values. The returned value is in [-1, 1], regardless of the -argument. - -xsin_u1, xcos_u1, xsincos_u1 : If the argument is in [-RANGEMAXD, -RANGEMAXD], these functions return values with 1 ULP of error -bound. If the argument is out of this range, the functions return -arbitrary values. The returned value is in [-1, 1], regardless of the -argument. - -xsinf_u1, xcosf_u1, xsincosf_u1 : If the argument is in [-RANGEMAXS, -RANGEMAXS], these functions return values with 1 ULP of error -bound. If the argument is out of this range, the functions return -arbitrary values. The returned value is in [-1, 1], regardless of the -argument. - -xtan : If the argument is in [-1e+7, 1e+7], xtan returns a value with -5 ULP of error bound. If the argument is in [-RANGEMAXD, RANGEMAXD], -xtan returns value with 3.5 ULPs of error bound. If the argument is -out of this range, xtan returns an arbitrary value except NaN. - -xtanf : If the argument is in [-RANGEMAXS2, RANGEMAXS2], xtanf returns a -value with 3.5 ULPs of error bound. If the argument is out of this -range, xtanf returns an arbitrary value except NaN. - -xtan_u1 : If the argument is in [-RANGEMAXD, RANGEMAXD], xtan_u1 -should return value with 1 ULP of error bound. If the argument is out -of this range, xtan_u1 returns an arbitrary value except NaN. - -xtanf_u1 : If the argument is in [-RANGEMAXS, RANGEMAXS], xtanf_u1 -should return value with 1 ULP of error bound. If the argument is out -of this range, xtanf_u1 returns an arbitrary value except NaN. - -xlog, xlogf : These functions should return values with 3.5 ULP of -error bound. - -xlog_u1, xlogf_u1, xlog10, xlog10f : These functions should return -values with 1 ULP of error bound. - -xlog1p : If the argument is in [-1, 1e+307], this function should -return a value with 1 ULP of error bound. If the argument is higher -than 1e+307, this function returns either +Inf or a value with 1 -ULP of error bound. - -xlog1pf : If the argument is in [-1, 1e+38], this function should -return a value with 1 ULP of error bound. If the argument is higher -than 1e+38, this function returns either +Inf or a value with 1 ULP -of error bound. - -xexp, xexpf, xexp2, xexp2f, xexp10, xexp10f, xexpm1, xexpm1f : These -functions should return values with 1 ULP of error bound. - -xpow, xpowf : These functions should return values with 1 ULP of error -bound. - -xcbrt, xcbrtf : These functions should return values with 3.5 ULPs of -error bound. - -xcbrt_u1, xcbrtf_u1 : These functions should return values with 1 ULP -of error bound. - -xasin, xasinf, xacos, xacosf, xatan, xatanf, xatan2, xatan2f : These -functions should return values with 3.5 ULPs of error bound. - -xasin_u1, xasinf_u1, xacos_u1, xacosf_u1, xatan_u1, xatanf_u1 : These -functions should return values with 1 ULP of error bound. - -xatan2_u1 : This function should return a value with max(1 ULP, -DBL_MIN) of error bound. - -xatan2f_u1 : This function should return a value with max(1 ULP, -FLT_MIN) of error bound. - -xsinh, xcosh : If the argument is in [-709, 709], these functions -should return values with 1 ULP of error bound. If the argument is out -of this range, these functions return either an infinity with -correct sign or a value with 1 ULP of error bound. - -xsinhf, xcoshf : If the argument is in [-88.5, 88.5], these functions -should return values with 1 ULP of error bound. If the argument is out -of this range, these functions return either an infinity with -correct sign or a value with 1 ULP of error bound. - -xtanh, xatanh : These functions should return values with 1 ULP of -error bound. - -xtanhf, xatanhf : These functions should return values with 1.0001 ULP -of error bound. - -xasinh : If the argument is in [-sqrt(DBL_MAX), sqrt(DBL_MAX)], this -function should return a value with 1 ULP of error bound. If the -argument is out of this range, this function returns either an -infinity with correct sign or a value with 1 ULP of error bound. - -xasinhf : If the argument is in [-sqrt(FLT_MAX), sqrt(FLT_MAX)], this -function should return a value with 1.0001 ULP of error bound. If the -argument is out of this range, this function returns either an -infinity with correct sign or a value with 1.0001 ULP of error bound. - -xacosh : If the argument is in [-sqrt(DBL_MAX), sqrt(DBL_MAX)], this -function should return a value with 1 ULP of error bound. If the -argument is higher than sqrt(DBL_MAX), this function returns either -an infinity or a value with 1 ULP of error bound. - -xacoshf : If the argument is in [-sqrt(FLT_MAX), sqrt(FLT_MAX)], this -function should return a value with 1.0001 ULP of error bound. If the -argument is higher than sqrt(FLT_MAX), this function returns either -an infinity or a value with 1.0001 ULP of error bound. - - diff --git a/doc/html/aarch32.xhtml b/doc/html/aarch32.xhtml new file mode 100644 index 00000000..d48dbc6e --- /dev/null +++ b/doc/html/aarch32.xhtml @@ -0,0 +1,1243 @@ + + + + + + + + + + +SLEEF Documentation + + +

SLEEF Documentation - Math library reference

+ +

Table of contents

+ + + +

Data types for AArch32 architecture

+ +

Sleef_float32x4_t_2

+ +

Description

+ +

+Sleef_float32x4_t_2 is a data type for storing two float32x4_t values, +which is defined in sleef.h as follows: +

+ +
typedef struct {
+  float32x4_t x, y;
+} Sleef_float32x4_t_2;
+
+ + +

Trigonometric Functions

+ +

Vectorized single precision sine function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_sinf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sinf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision sine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_sinf4_u35neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sinf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision cosine function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_cosf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cosf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision cosine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_cosf4_u35neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cosf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float64x2_t_2 Sleef_sincosd2_u10neon(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sincos_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision combined sine and cosine function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float32x4_t_2 Sleef_sincosf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sincosf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float32x4_t_2 Sleef_sincosf4_u35neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sincosf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision sine function with 0.506 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_sinpif4_u05neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sinpif_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision cosine function with 0.506 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_cospif4_u05neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cospif_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float32x4_t_2 Sleef_sincospif4_u05neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sincospif_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float32x4_t_2 Sleef_sincospif4_u35neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sincospif_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision tangent function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_tanf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_tanf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision tangent function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_tanf4_u35neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_tanf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +

Power, exponential, and logarithmic function

+ +

Vectorized single precision power function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_powf4_u10neon(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_powf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision natural logarithmic function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_logf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_logf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision natural logarithmic function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_logf4_u35neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_logf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision base-10 logarithmic function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_log10f4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_log10f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_log1pf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_log1pf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision base-e exponential function function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_expf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_expf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision base-2 exponential function function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_exp2f4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_exp2f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision base-10 exponential function function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_exp10f4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_exp10f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision base-e exponential function minus 1 with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_expm1f4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_expm1f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision square root function with 0.5001 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_sqrtf4_u05neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sqrtf_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision square root function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_sqrtf4_u35neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sqrtf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision cubic root function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_cbrtf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cbrtf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision cubic root function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_cbrtf4_u35neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cbrtf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision 2D Euclidian distance function with 0.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_hypotf4_u05neon(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_hypotf_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision 2D Euclidian distance function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_hypotf4_u35neon(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_hypotf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ + +

Inverse Trigonometric Functions

+ +

Vectorized single precision arc sine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_asinf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_asinf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision arc sine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_asinf4_u35neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_asinf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision arc cosine function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_acosf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_acosf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision arc cosine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_acosf4_u35neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_acosf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision arc tangent function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_atanf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atanf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision arc tangent function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_atanf4_u35neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atanf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision arc tangent function of two variables with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_atan2f4_u10neon(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atan2f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision arc tangent function of two variables with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_atan2f4_u35neon(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atan2f_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ + + +

Hyperbolic function and inverse hyperbolic function

+ +

Vectorized single precision hyperbolic sine function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_sinhf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sinhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision hyperbolic cosine function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_coshf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_coshf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision hyperbolic tangent function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_tanhf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_tanhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision inverse hyperbolic sine function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_asinhf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_asinhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision inverse hyperbolic cosine function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_acoshf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_acoshf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision inverse hyperbolic tangent function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_atanhf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atanhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ + +

Error and gamma function

+ +

Vectorized single precision error function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_erff4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_erff_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision complementary error function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_erfcf4_u15neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_erfcf_u15. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision gamma function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_tgammaf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_tgammaf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision log gamma function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_lgammaf4_u10neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_lgammaf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ + +

Nearest integer function

+ +

Vectorized single precision function for rounding to integer towards zero

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_truncf4_neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_truncf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision function for rounding to integer towards negative infinity

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_floorf4_neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_floorf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision function for rounding to integer towards positive infinity

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_ceilf4_neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_ceilf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision function for rounding to nearest integer

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_roundf4_neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_roundf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision function for rounding to nearest integer

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_rintf4_neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_rintf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ + +

Other function

+ +

Vectorized single precision function for fused multiply-accumulation

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_fmaf4_neon(float32x4_t a, float32x4_t b, float32x4_t c);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fmaf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+ +

Vectorized single precision FP remainder

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_fmodf4_neon(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fmodf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision function for obtaining fractional component of an FP number

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_frfrexpf4_neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_frfrexpf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+ +

Vectorized single precision signed integral and fractional values

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float32x4_t_2 Sleef_modff4_neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_modff. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision function for calculating the absolute value

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_fabsf4_neon(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fabsf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision function for copying signs

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_copysignf4_neon(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_copysignf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision function for determining maximum of two values

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_fmaxf4_neon(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fmaxf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision function for determining minimum of two values

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_fminf4_neon(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fminf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision function to calculate positive difference of two values

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_fdimf4_neon(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fdimf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ +
+

Vectorized single precision function for obtaining the next representable FP value

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_nextafterf4_neon(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_nextafterf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

+ + + + + diff --git a/doc/html/aarch64.xhtml b/doc/html/aarch64.xhtml new file mode 100644 index 00000000..e57e300c --- /dev/null +++ b/doc/html/aarch64.xhtml @@ -0,0 +1,2436 @@ + + + + + + + + + + +SLEEF Documentation + + +

SLEEF Documentation - Math library reference

+ +

Table of contents

+ + + +

Data types for AArch64 architecture

+ +

Sleef_float32x4_t_2

+ +

Description

+ +

+Sleef_float32x4_t_2 is a data type for storing two float32x4_t values, +which is defined in sleef.h as follows: +

+ +
typedef struct {
+  float32x4_t x, y;
+} Sleef_float32x4_t_2;
+
+ +
+ +

Sleef_float64x2_t_2

+ +

Description

+ +

+Sleef_float64x2_t_2 is a data type for storing two float64x2_t values, +which is defined in sleef.h as follows: +

+ +
typedef struct {
+  float64x2_t x, y;
+} Sleef_float64x2_t_2;
+
+ +

Trigonometric Functions

+ +

Vectorized double precision sine function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_sind2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sin_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision sine function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_sinf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sinf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision sine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_sind2_u35advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sin_u35 with the same accuracy specification. +

+ +
+

Vectorized single precision sine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_sinf4_u35advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sinf_u35 with the same accuracy specification. +

+ +
+

Vectorized double precision cosine function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_cosd2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cos_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision cosine function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_cosf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cosf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision cosine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_cosd2_u35advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cos_u35 with the same accuracy specification. +

+ +
+

Vectorized single precision cosine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_cosf4_u35advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cosf_u35 with the same accuracy specification. +

+ +
+

Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float64x2_t_2 Sleef_sincosd2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sincos_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision combined sine and cosine function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float32x4_t_2 Sleef_sincosf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sincosf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision combined sine and cosine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float64x2_t_2 Sleef_sincosd2_u35advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sincos_u35 with the same accuracy specification. +

+ +
+

Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float32x4_t_2 Sleef_sincosf4_u35advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sincosf_u35 with the same accuracy specification. +

+ +
+

Vectorized double precision sine function with 0.506 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_sinpid2_u05advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sinpi_u05 with the same accuracy specification. +

+ +
+

Vectorized single precision sine function with 0.506 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_sinpif4_u05advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sinpif_u05 with the same accuracy specification. +

+ +
+

Vectorized double precision cosine function with 0.506 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_cospid2_u05advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cospi_u05 with the same accuracy specification. +

+ +
+

Vectorized single precision cosine function with 0.506 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_cospif4_u05advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cospif_u05 with the same accuracy specification. +

+ +
+

Vectorized double precision combined sine and cosine function with 0.506 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float64x2_t_2 Sleef_sincospid2_u05advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sincospi_u05 with the same accuracy specification. +

+ +
+

Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float32x4_t_2 Sleef_sincospif4_u05advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sincospif_u05 with the same accuracy specification. +

+ +
+

Vectorized double precision combined sine and cosine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float64x2_t_2 Sleef_sincospid2_u35advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sincospi_u35 with the same accuracy specification. +

+ +
+

Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float32x4_t_2 Sleef_sincospif4_u35advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sincospif_u35 with the same accuracy specification. +

+ +
+

Vectorized double precision tangent function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_tand2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_tan_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision tangent function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_tanf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_tanf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision tangent function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_tand2_u35advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_tan_u35 with the same accuracy specification. +

+ +
+

Vectorized single precision tangent function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_tanf4_u35advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_tanf_u35 with the same accuracy specification. +

+ +

Power, exponential, and logarithmic function

+ +

Vectorized double precision power function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_powd2_u10advsimd(float64x2_t a, float64x2_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_pow_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision power function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_powf4_u10advsimd(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_powf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision natural logarithmic function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_logd2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_log_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision natural logarithmic function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_logf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_logf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision natural logarithmic function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_logd2_u35advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_log_u35 with the same accuracy specification. +

+ +
+

Vectorized single precision natural logarithmic function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_logf4_u35advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_logf_u35 with the same accuracy specification. +

+ +
+

Vectorized double precision base-10 logarithmic function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_log10d2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_log10_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision base-10 logarithmic function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_log10f4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_log10f_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision logarithm of one plus argument with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_log1pd2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_log1p_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_log1pf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_log1pf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision base-e exponential function function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_expd2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_exp_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision base-e exponential function function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_expf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_expf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision base-2 exponential function function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_exp2d2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_exp2_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision base-2 exponential function function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_exp2f4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_exp2f_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision base-10 exponential function function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_exp10d2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_exp10_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision base-10 exponential function function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_exp10f4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_exp10f_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision base-e exponential function minus 1 with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_expm1d2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_expm1_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision base-e exponential function minus 1 with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_expm1f4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_expm1f_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision square root function with 0.5001 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_sqrtd2_u05advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sqrt_u05 with the same accuracy specification. +

+ +
+

Vectorized single precision square root function with 0.5001 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_sqrtf4_u05advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sqrtf_u05 with the same accuracy specification. +

+ +
+

Vectorized double precision square root function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_sqrtd2_u35advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sqrt_u35 with the same accuracy specification. +

+ +
+

Vectorized single precision square root function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_sqrtf4_u35advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sqrtf_u35 with the same accuracy specification. +

+ +
+

Vectorized double precision cubic root function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_cbrtd2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cbrt_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision cubic root function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_cbrtf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cbrtf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision cubic root function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_cbrtd2_u35advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cbrt_u35 with the same accuracy specification. +

+ +
+

Vectorized single precision cubic root function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_cbrtf4_u35advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cbrtf_u35 with the same accuracy specification. +

+ +
+

Vectorized double precision 2D Euclidian distance function with 0.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_hypotd2_u05advsimd(float64x2_t a, float64x2_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_hypot_u05 with the same accuracy specification. +

+ +
+

Vectorized single precision 2D Euclidian distance function with 0.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_hypotf4_u05advsimd(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_hypotf_u05 with the same accuracy specification. +

+ +
+

Vectorized double precision 2D Euclidian distance function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_hypotd2_u35advsimd(float64x2_t a, float64x2_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_hypot_u35 with the same accuracy specification. +

+ +
+

Vectorized single precision 2D Euclidian distance function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_hypotf4_u35advsimd(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_hypotf_u35 with the same accuracy specification. +

+ + +

Inverse Trigonometric Functions

+ +

Vectorized double precision arc sine function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_asind2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_asin_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision arc sine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_asinf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_asinf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision arc sine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_asind2_u35advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_asin_u35 with the same accuracy specification. +

+ +
+

Vectorized single precision arc sine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_asinf4_u35advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_asinf_u35 with the same accuracy specification. +

+ +
+

Vectorized double precision arc cosine function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_acosd2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_acos_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision arc cosine function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_acosf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_acosf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision arc cosine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_acosd2_u35advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_acos_u35 with the same accuracy specification. +

+ +
+

Vectorized single precision arc cosine function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_acosf4_u35advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_acosf_u35 with the same accuracy specification. +

+ +
+

Vectorized double precision arc tangent function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_atand2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atan_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision arc tangent function with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_atanf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atanf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision arc tangent function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_atand2_u35advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atan_u35 with the same accuracy specification. +

+ +
+

Vectorized single precision arc tangent function with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_atanf4_u35advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atanf_u35 with the same accuracy specification. +

+ +
+

Vectorized double precision arc tangent function of two variables with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_atan2d2_u10advsimd(float64x2_t a, float64x2_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atan2_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision arc tangent function of two variables with 1.0 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_atan2f4_u10advsimd(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atan2f_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision arc tangent function of two variables with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_atan2d2_u35advsimd(float64x2_t a, float64x2_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atan2_u35 with the same accuracy specification. +

+ +
+

Vectorized single precision arc tangent function of two variables with 3.5 ULP error bound

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_atan2f4_u35advsimd(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atan2f_u35 with the same accuracy specification. +

+ + + +

Hyperbolic function and inverse hyperbolic function

+ +

Vectorized double precision hyperbolic sine function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_sinhd2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sinh_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision hyperbolic sine function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_sinhf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_sinhf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision hyperbolic cosine function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_coshd2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_cosh_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision hyperbolic cosine function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_coshf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_coshf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision hyperbolic tangent function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_tanhd2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_tanh_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision hyperbolic tangent function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_tanhf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_tanhf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision inverse hyperbolic sine function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_asinhd2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_asinh_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision inverse hyperbolic sine function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_asinhf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_asinhf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision inverse hyperbolic cosine function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_acoshd2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_acosh_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision inverse hyperbolic cosine function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_acoshf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_acoshf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision inverse hyperbolic tangent function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_atanhd2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atanh_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision inverse hyperbolic tangent function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_atanhf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_atanhf_u10 with the same accuracy specification. +

+ + +

Error and gamma function

+ +

Vectorized double precision error function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_erfd2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_erf_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision error function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_erff4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_erff_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision complementary error function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_erfcd2_u15advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_erfc_u15 with the same accuracy specification. +

+ +
+

Vectorized single precision complementary error function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_erfcf4_u15advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_erfcf_u15 with the same accuracy specification. +

+ +
+

Vectorized double precision gamma function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_tgammad2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_tgamma_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision gamma function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_tgammaf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_tgammaf_u10 with the same accuracy specification. +

+ +
+

Vectorized double precision log gamma function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_lgammad2_u10advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_lgamma_u10 with the same accuracy specification. +

+ +
+

Vectorized single precision log gamma function

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_lgammaf4_u10advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_lgammaf_u10 with the same accuracy specification. +

+ + +

Nearest integer function

+ +

Vectorized double precision function for rounding to integer towards zero

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_truncd2_advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_trunc with the same accuracy specification. +

+ +
+

Vectorized single precision function for rounding to integer towards zero

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_truncf4_advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_truncf with the same accuracy specification. +

+ +
+

Vectorized double precision function for rounding to integer towards negative infinity

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_floord2_advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_floor with the same accuracy specification. +

+ +
+

Vectorized single precision function for rounding to integer towards negative infinity

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_floorf4_advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_floorf with the same accuracy specification. +

+ +
+

Vectorized double precision function for rounding to integer towards positive infinity

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_ceild2_advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_ceil with the same accuracy specification. +

+ +
+

Vectorized single precision function for rounding to integer towards positive infinity

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_ceilf4_advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_ceilf with the same accuracy specification. +

+ +
+

Vectorized double precision function for rounding to nearest integer

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_roundd2_advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_round with the same accuracy specification. +

+ +
+

Vectorized single precision function for rounding to nearest integer

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_roundf4_advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_roundf with the same accuracy specification. +

+ +
+

Vectorized double precision function for rounding to nearest integer

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_rintd2_advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_rint with the same accuracy specification. +

+ +
+

Vectorized single precision function for rounding to nearest integer

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_rintf4_advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_rintf with the same accuracy specification. +

+ + +

Other function

+ +

Vectorized double precision function for fused multiply-accumulation

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_fmad2_advsimd(float64x2_t a, float64x2_t b, float64x2_t c);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fma with the same accuracy specification. +

+ +
+

Vectorized single precision function for fused multiply-accumulation

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_fmaf4_advsimd(float32x4_t a, float32x4_t b, float32x4_t c);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fmaf with the same accuracy specification. +

+ +
+ +

Vectorized double precision FP remainder

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_fmodd2_advsimd(float64x2_t a, float64x2_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fmod with the same accuracy specification. +

+ +
+

Vectorized single precision FP remainder

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_fmodf4_advsimd(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fmodf with the same accuracy specification. +

+ +
+

Vectorized double precision function for multiplying by integral power of 2

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_ldexpd2_advsimd(float64x2_t a, int32x2_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_ldexp with the same accuracy specification. +

+ +
+

Vectorized double precision function for obtaining fractional component of an FP number

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_frfrexpd2_advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_frfrexp with the same accuracy specification. +

+ +
+

Vectorized single precision function for obtaining fractional component of an FP number

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_frfrexpf4_advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_frfrexpf with the same accuracy specification. +

+ +
+

Vectorized double precision function for obtaining integral component of an FP number

+ +

Synopsis

+ +

+#include <sleef.h>
+
+int32x2_t Sleef_expfrexpd2_advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_expfrexp with the same accuracy specification. +

+ +
+ +

Vectorized double precision function for getting integer exponent

+ +

Synopsis

+ +

+#include <sleef.h>
+
+int32x2_t Sleef_ilogbd2_advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_ilogb with the same accuracy specification. +

+ +
+

Vectorized double precision signed integral and fractional values

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float64x2_t_2 Sleef_modfd2_advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_modf with the same accuracy specification. +

+ +
+

Vectorized single precision signed integral and fractional values

+ +

Synopsis

+ +

+#include <sleef.h>
+
+Sleef_float32x4_t_2 Sleef_modff4_advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_modff with the same accuracy specification. +

+ +
+

Vectorized double precision function for calculating the absolute value

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_fabsd2_advsimd(float64x2_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fabs with the same accuracy specification. +

+ +
+

Vectorized single precision function for calculating the absolute value

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_fabsf4_advsimd(float32x4_t a);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fabsf with the same accuracy specification. +

+ +
+

Vectorized double precision function for copying signs

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_copysignd2_advsimd(float64x2_t a, float64x2_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_copysign with the same accuracy specification. +

+ +
+

Vectorized single precision function for copying signs

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_copysignf4_advsimd(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_copysignf with the same accuracy specification. +

+ +
+

Vectorized double precision function for determining maximum of two values

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_fmaxd2_advsimd(float64x2_t a, float64x2_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fmax with the same accuracy specification. +

+ +
+

Vectorized single precision function for determining maximum of two values

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_fmaxf4_advsimd(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fmaxf with the same accuracy specification. +

+ +
+

Vectorized double precision function for determining minimum of two values

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_fmind2_advsimd(float64x2_t a, float64x2_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fmin with the same accuracy specification. +

+ +
+

Vectorized single precision function for determining minimum of two values

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_fminf4_advsimd(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fminf with the same accuracy specification. +

+ +
+

Vectorized double precision function to calculate positive difference of two values

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_fdimd2_advsimd(float64x2_t a, float64x2_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fdim with the same accuracy specification. +

+ +
+

Vectorized single precision function to calculate positive difference of two values

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_fdimf4_advsimd(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_fdimf with the same accuracy specification. +

+ +
+

Vectorized double precision function for obtaining the next representable FP value

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float64x2_t Sleef_nextafterd2_advsimd(float64x2_t a, float64x2_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_nextafter with the same accuracy specification. +

+ +
+

Vectorized single precision function for obtaining the next representable FP value

+ +

Synopsis

+ +

+#include <sleef.h>
+
+float32x4_t Sleef_nextafterf4_advsimd(float32x4_t a, float32x4_t b);
+
+Link with -lsleef. +

+ +

Description

+ +

+This is the vectorized function of Sleef_nextafterf with the same accuracy specification. +

+ + + + + diff --git a/doc/html/additional.xhtml b/doc/html/additional.xhtml new file mode 100644 index 00000000..dd72d1f2 --- /dev/null +++ b/doc/html/additional.xhtml @@ -0,0 +1,206 @@ + + + + + + + + + + +SLEEF Documentation + + +

SLEEF Documentation - Additional Notes

+ +

Table of contents

+ + + +

Additional Notes

+ +

How the dispatcher works

+ +

+ Fig. 7.1 shows a simplified code of our dispatcher. There is only + one exported function mainFunc. When + mainFunc is called for the first + time, dispatcherMain is called internally, + since funcPtr is initialized to the pointer to + dispatcherMain(line 14). It then detects if the + CPU supports SSE 4.1(line 7), and + rewrites funcPtr to a pointer to the function + that utilizes SSE 4.1 or SSE 2, depending on the result of CPU + feature detection(line 10). When + mainFunc is called for the second time, it does + not execute the + dispatcherMain. It just executes the function + pointed by the pointer stored in funcPtr during + the execution of + dispatcherMain. +

+ +

+ There are a few advantages in our dispatcher. The first advantage is + that it does not require any compiler-specific extension. The second + advantage is simplicity. There are only 18 lines of simple + code. Since the dispatchers are completely separated for each + function, there is not much room for bugs to get in. +

+ +

+ The third advantage is low overhead. You might think that the + overhead is one function call including execution of prologue and + epilogue. However, since modern compilers eliminate redundant + execution of the prologue, epilogue and return instruction, the + actual overhead is just one jmp instruction. This is very fast since + it is not conditional. +

+ +

+ The fourth advantage is thread safety. There is only one variable + shared among threads, which is funcPtr. There are + only two possible values for this pointer variable. The first value + is the pointer to the dispatcherMain, and the + second value is the pointer to either funcSSE2 + or funcSSE4, depending on the availability of + extensions. Once funcPtr is substituted with the + pointer to funcSSE2 + or funcSSE4, it will not be changed in the + future. It is obvious that the code works in all the cases. +

+ + +
+static double (*funcPtr)(double arg);
+
+static double dispatcherMain(double arg) {
+    double (*p)(double arg) = funcSSE2;
+
+#if the compiler supports SSE4.1
+    if (SSE4.1 is available on the CPU) p = funcSSE4;
+#endif
+
+    funcPtr = p;
+    return (*funcPtr)(arg);
+}
+
+static double (*funcPtr)(double arg) = dispatcherMain;
+
+double mainFunc(double arg) {
+    return (*funcPtr)(arg);
+}
+
+

+ Fig. 7.1: Simplified code of our dispatcher +

+ + +

ULP, gradual underflow and flush-to-zero mode

+ +

+ ULP stands for "unit in the last place", which is sometimes used for + measuring accuracy of calculations. 1 ULP is basically the distance + between the two closest floating point number, which depends on the + exponent of the FP number. The accuracy of calculations by reputable + math libraries is usually between 0.5 and 1 ULP. Here, the accuracy + means the largest error of calculation, which only happens in the + worst case. SLEEF math library provides multiple accuracy choices + for some math functions. Many functions have 3.5-ULP and 1-ULP + versions, and 3.5-ULP versions are significantly faster than 1-ULP + versions. If you care more about execution speed than accuracy, it + is advised to use the 3.5-ULP versions along with -ffast-math or + "unsafe math optimization" options for the compiler. +

+ +

+ In IEEE 754 standard, underflow does not happen abruptly when the + exponent becomes zero. Instead, denormal numbers are produced which + has less precision, and this is sometimes called gradual + underflow. On some implementation which is not IEEE-754 conformant, + flush-to-zero mode is used since it is easier to implement. In + flush-to-zero mode, numbers smaller than the smallest normalized + number cannot be represented, and it is replaced with zero. Because + of this, the accuracy of calculation may be influenced in some + cases. The smallest normalized precision number can be referred with + DBL_MIN for double precision, and FLT_MIN for single precision. The + naming of these macros is a little bit confusing because DBL_MIN is + not the smallest double precision number. +

+ +

About sincospi

+ +

+ The sincospi series of functions evaluates sin( + πa ) and cos( + πa ) simultaneously. These functions are + added to SLEEF as of version 3.0. There are a few reasons that I + added these functions. +

+ +

+ C standards include specifications for functions that evaluate + trigonometric functions. In order to do calculations for evaluating + these functions, reduction of an argument is required. This involves + a multiple precision multiplication with π, + which requires many operations of addition and multiplication. This + is slow especially if accurate evaluation is required. By designing + the function in a way that the argument is pre-multiplied + by π, this reduction can be eliminated. This + leads to faster and more accurate evaluation. +

+ +

+ The second reason is that sincospi functions are handy for + implementing an FFT library. FFT libraries need to evaluate + trigonometric functions for generating twiddle factors that is used in + the butterfly operations. Since the butterfly operations are + repeatedly applied, the error in twiddle factors accumulates. Thus, we + want to make the error in twiddle factors as small as possible. In an + FFT of power-of-two size, twiddle factors are + sin( πm / + 2n ) where m + and n are integer. If we just use the usual + trigonometric functions defined in the C standards with the + precision same as that used for butterfly operations, we already + have error when calculating arguments, since + πm / 2n cannot + be represented as a floating point value without error. On the + other hand, if we use sincospi function, the argument can be + accurately represented by a radix 2 FP number. Thus, we can + calculate twiddle factors with better accuracy. +

+ +

+ The third reason is that sinpi is needed internally to implement + gamma functions. +

+ +

About the logo

+ +

+ It is a soup ladle. +

+ +
+ +

+ + logo + +
+ Fig. 7.2: SLEEF logo +

+ + + + + diff --git a/doc/html/apple-touch-icon.png b/doc/html/apple-touch-icon.png new file mode 100644 index 00000000..a5e05dfe Binary files /dev/null and b/doc/html/apple-touch-icon.png differ diff --git a/doc/html/bench_s1.png b/doc/html/bench_s1.png new file mode 100644 index 00000000..39051e5b Binary files /dev/null and b/doc/html/bench_s1.png differ diff --git a/doc/html/bench_s2.png b/doc/html/bench_s2.png new file mode 100644 index 00000000..62cf1d58 Binary files /dev/null and b/doc/html/bench_s2.png differ diff --git a/doc/html/bench_s3.png b/doc/html/bench_s3.png new file mode 100644 index 00000000..64e84f58 Binary files /dev/null and b/doc/html/bench_s3.png differ diff --git a/doc/html/bench_s4.png b/doc/html/bench_s4.png new file mode 100644 index 00000000..4e148182 Binary files /dev/null and b/doc/html/bench_s4.png differ diff --git a/doc/html/benchmark.xhtml b/doc/html/benchmark.xhtml new file mode 100644 index 00000000..9bf09d4a --- /dev/null +++ b/doc/html/benchmark.xhtml @@ -0,0 +1,122 @@ + + + + + + + + + +SLEEF Documentation - Benchmark Results + + +

SLEEF Documentation - Benchmark Results

+ +

Table of contents

+ + + +

Benchmark results

+ +

+ These graphs show comparison of the execution time between + SLEEF-3.1 and Intel + SVML. +

+ +

+ The execution time of each function is measured by executing each + function 10^8 times and taking the average time. Each time a + function is executed, a uniformly distributed random number is set + to each element of the argument vector(each element is set a + different value.) The ranges of the random number for each + function are shown below. Argument vectors are generated before + the measurement, and the time to generate random argument vectors + is not included in the execution time. +

+ +
+ +
    +
  • Trigonometric functions : [0, 6.28] and [0, 10^6] for + double-precision functions. [0, 6.28] and [0, 30000] for + single-precision functions.
  • +
  • Log : [0, 10^300] and [0, 10^38] for double-precision + functions and single-precision functions, respectively.
  • +
  • Exp : [-700, 700] and [-100, 100] for double-precision + functions and single-precision functions, respectively.
  • +
  • Pow : [-30, 30] for both the first and the second + arguments.
  • +
  • Asin : [-1, 1]
  • +
  • Atan : [-10, 10]
  • +
  • Atan2 : [-10, 10] for both the first and the second + arguments.
  • +
+ +
+ +

+ The accuracy of SVML functions can be chosen by compiler options, + not the function names. "-fimf-max-error=1.0" option is specified + to icc to obtain the 1-ulp-accuracy results, and + "-fimf-max-error=5.0" option is used for the 5-ulp-accuracy + results. +

+ +

+ Those results are measured on a PC with Intel Core i7-6700 CPU @ + 3.40GHz with Turbo Boost turned off. The CPU should be always + running at 3.4GHz during the measurement. +

+ +

+ Click graphs to magnify. +

+ +

 

+ +

+ + Performance graph for DP trigonometric functions + +
+ Fig. 6.1: Execution time of double precision trigonometric functions +

+ +

+ + Performance graph for SP trigonometric functions + +
+ Fig. 6.2: Execution time of single precision trigonometric functions +

+ +

+ + Performance graph for other DP functions + +
+ Fig. 6.3: Execution time of double precision log, exp, pow and inverse trigonometric functions +

+ +

+ + Performance graph for other SP functions + +
+ Fig. 6.4: Execution time of single precision log, exp, pow and inverse trigonometric functions +

+ + + + + diff --git a/doc/html/compile.xhtml b/doc/html/compile.xhtml new file mode 100644 index 00000000..d1c0cab3 --- /dev/null +++ b/doc/html/compile.xhtml @@ -0,0 +1,135 @@ + + + + + + + + + +SLEEF Documentation + + +

SLEEF Documentation - Compiling and installing the library

+ +

Table of contents

+ + + +

Compiling and installing the library

+ +

Compiling and installing library on Linux

+ +

+ You need to install libmpfr and OpenMP(libmpfr is only required to + build the tester, and it is not linked to the library.) Change + directory to sleef-3.X directory and run make. The built headers + and libraries will be located under include and lib directories. +

+ +

+ You can run make install using sudo command to install the library + and header. Those files are installed under /usr/lib and + /usr/include. You can run make uninstall to uninstall those files. +

+ +
+$ sudo apt-get install libmpfr-dev libgomp1-dev gcc
+$ cd sleef-3.X
+$ make
+$ sudo make install
+
+

+ Fig. 2.1: Commands for compiling SLEEF +

+ + +

Compiling library with Microsoft Visual C++

+ +

+Below is the instruction for compiling SLEEF with Microsoft Visual C++ +2015. +

+ +
    +
  1. Install Visual Studio 2015 or later, along with Cygwin
  2. +
  3. Copy vcvars64.bat to a working directory. This file is usually + in the following directory. +
    C:\Program Files (x86)\MSVCCommunity2015\VC\bin\amd64
  4. +
  5. Add the following line at the end of the copy of vcvars64.bat +
    if "%SHELL%"=="/bin/bash" c:\cygwin64\bin\bash.exe
  6. +
  7. Execute the copy of vcvars64.bat within the Cygwin bash + shell.
  8. +
  9. Go to sleef-3.X directory and run "make -f Makefile.vc"
  10. +
+ +

Compiling and running "Hello SLEEF!"

+ +

+ Now, let's try compiling the source code shown in Fig. 2.2. +

+ +
+#include <stdio.h>
+#include <x86intrin.h>
+#include <sleef.h>
+
+int main(int argc, char **argv) {
+  double a[] = {2, 10};
+  double b[] = {3, 20};
+
+  __m128d va, vb, vc;
+  
+  va = _mm_loadu_pd(a);
+  vb = _mm_loadu_pd(b);
+
+  vc = Sleef_powd2_u10(va, vb);
+
+  double c[2];
+
+  _mm_storeu_pd(c, vc);
+
+  printf("pow(%g, %g) = %g\n", a[0], b[0], c[0]);
+  printf("pow(%g, %g) = %g\n", a[1], b[1], c[1]);
+}
+
+

+ Fig. 2.2: Source code for testing +

+ + + +

+ Fig.2.3 shows typical commands for compiling and executing the hello + code on Linux computers. +

+ +
+$ gcc hellox86.c -o hellox86 -lsleef
+$ ./hellox86
+pow(2, 3) = 8
+pow(10, 20) = 1e+20
+
+

+ Fig. 2.3: Commands for compiling and executing hellox86.c +

+ +

+ You may need to set LD_LIBRARY_PATH environment variable + appropriately. If you are trying to execute the program on Mac OSX + or Windows, try copying the DLLs to the current directory. +

+ + + + + diff --git a/doc/html/convention.png b/doc/html/convention.png new file mode 100644 index 00000000..a886a5a9 Binary files /dev/null and b/doc/html/convention.png differ diff --git a/doc/html/default.css b/doc/html/default.css deleted file mode 100644 index 2945c06c..00000000 --- a/doc/html/default.css +++ /dev/null @@ -1,120 +0,0 @@ -body {margin-left: 1.0cm; padding-left: 0.1cm; margin-right: 1.0cm; padding-right: 0.1cm; margin-top: 1.0cm; padding-top: 0.1cm; margin-bottom: 1.0cm; padding-bottom: 0.1cm; } -h1 {font-size:1.8em; font-family: arial, sansserif; font-weight: bold; font-style: italic; margin-top: 0.8cm; } -h2 {font-size:1.6em; font-family: arial, sansserif; font-weight: bold; margin-top: 1.8cm; margin-bottom: 0.5cm; font-style: normal; } - -h3 { - font-size:1.2em; - font-family: arial, sansserif; - font-weight: bold; - margin: 0.7cm; - font-style: normal; -} - -h4 {font-family: arial, sansserif; font-weight: bold; margin-top: 0.3cm; margin-bottom: 0.3cm; } -p {font-family: "Times New Roman", times, serif; margin-top: 0.3cm; margin-left: 0.5cm; margin-bottom: 0.3cm;} -p.dir {font-family: arial, sansserif; margin-top: 0cm; margin-bottom: 0cm;} -dl { margin-left: 0.5cm; } -dt { font-weight: bold; } -a:link { margin-left: 0cm; color: black; text-decoration: none; } -a:visited { margin-left: 0cm; color: black; text-decoration: none; } -a:hover { margin-left: 0cm; color: black; text-decoration: underline; } -a:article { margin-left: 0cm; color: black; text-decoration: none; } -a.underlined:link { text-decoration: underline; } -ul.disc { list-style-type: disc; font-size:0.9em; font-family: arial, sansserif; font-weight: normal; } -ul.circle {list-style-type: circle; font-size:0.9em; font-family: arial, sansserif; } -ul.square {list-style-type: square; font-family: times, serif;} -ul.none { list-style-type: none; font-size:1.25em; font-family: arial, sansserif; font-weight: bold; } - -i {font-family: "Times New Roman", times, serif; font-weight: bold; color:#a00000; } -i.math {font-family: "Times New Roman", times, serif; font-weight: normal; font-style:normal; color:#000000; } -i.var {font-family: "Times New Roman", times, serif; font-weight: normal; color:#000000; } -b {font-family: arial, sansserif; font-weight: normal; color:#0050a0; } -b.func {font-family: arial, sansserif; font-weight: normal; color:#008040; } -ol.level1 { font-family: arial, sansserif; font-weight: bold; font-style: italic; font-size:1.5em; } -ol.level2 { font-family: "Times New Roman", serif; font-weight: normal; font-style: normal; font-size:0.85em; margin-top: 0.2cm; margin-bottom: 0.5cm; } -table.figure { margin-left:auto; margin-right:auto; margin-top:1.0cm; margin-bottom:1.0cm; } - -td.caption { font-family: arial, sansserif; font-size: 75%; color: black; } -td { font-family: times, serif; } - -table.lt { border-collapse: collapse; border-style: none; } -td.lt- { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-width: 1px; border-style: none; padding-left=0.2cm; padding-right=0.2cm; } -td.lt-r { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-right-style: solid; border-width: 1px; border-color: black; } -td.lt-l { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-left-style: solid; border-width: 1px; border-color: black; } -td.lt-lr { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-right-style: solid; border-left-style: solid; border-width: 1px; border-color: black; } -td.lt-b { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-width: 1px; border-color: black; } -td.lt-hl { margin: 0px; border-style: none; border-bottom-style: solid; border-width: 1px; border-color: black; height: 2px; } -td.lt-bl { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-left-style: solid; border-width: 1px; border-color: black; } -td.lt-br { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-right-style: solid; border-width: 1px; border-color: black; } -td.lt-blr { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-left-style: solid; border-right-style: solid; border-width: 1px; border-color: black; } - -p.funcname { font-family: arial, sansserif; font-size:1.2em; font-weight: normal; margin-top: 0.3cm; margin-bottom: 0.3cm; margin-left: 0.1cm; margin-right: 0.2cm; padding-left: 0.0cm; padding-right: 0.1cm; } -p.header { font-family: arial, sansserif; font-size:1.1em; font-weight: bold; margin-top: 1.0cm; margin-bottom: 0.3cm; margin-left: 0.4cm; margin-right: 0.2cm; padding-left: 0.1cm; padding-right: 0.1cm; } -p.synopsis { font-family: arial, sansserif; font-size:1.0em; font-weight: normal; margin-top: 0.3cm; margin-bottom: 0.3cm; margin-left: 0.6cm; margin-right: 0.2cm; padding-left: 0.1cm; padding-right: 0.1cm; } - -p.description { - font-family: times, serif; - font-size:1.0em; - font-weight: normal; - margin-top: 0.3cm; - margin-bottom: 0.3cm; - margin-left: 0.6cm; - margin-right: 0.2cm; - padding-left: 0.1cm; - padding-right: 0.1cm; -} - -hr { margin-top: 0.8cm; margin-bottom: 0.5cm; padding-top: 0cm; padding-bottom: 0cm; } - -pre.command { - margin: 0.5cm 1.5cm 0.5cm 1.5cm; - padding: 1em; - border:0px; - font-family: arial, sansserif; - font-size: 12pt; - font-weight: normal; - background-color:#fbf2ef; - box-shadow: 3px 3px 3px #aaa; -} - -pre.white { - font-family: arial, sansserif; - font-size:1.0em; - font-weight: normal; - background-color:white; - overflow: auto; - - margin: 0.6cm; - padding: 0.1cm; -} - -pre.code { - font-family:arial, sansserif; - font-size:9pt; - font-weight: normal; - background-color:#fbf8ef; - box-shadow: 3px 3px 3px #aaa; - overflow: auto; - - margin: 1.0cm 1.5cm 1.0cm 1.5cm; - padding: 1em 1em 2em 1.1em; - counter-reset: line; -} - -code { - font-family:"Consolas", arial, sansserif; - font-size:9pt; - counter-increment:line; -} - -code:before { - content: counter(line); - display: inline-block; - border-right: 1px solid #c0a0a0; - padding: 0 0.5em 0 0.5em; - margin-right: 0.5em; - min-width: 2em; - text-align: right; - font-size:9pt; -} - diff --git a/doc/html/dft.xhtml b/doc/html/dft.xhtml index e3b689f8..d58b11b0 100644 --- a/doc/html/dft.xhtml +++ b/doc/html/dft.xhtml @@ -4,32 +4,37 @@ - -SLEEF API Reference + + + + +SLEEF Documentation -

SLEEF API Reference - DFT library reference

+

SLEEF Documentation - DFT library reference

Table of contents

-
    +

    Tutorial

    -

    +

    I now explain how to use this DFT library by referring to an example source code shown below. This source code is @@ -113,8 +118,13 @@ exit(success); } +

    + Fig. 4.1: Test code for DFT subroutines +

    + +
    -

    +

    As shown in the first line, you can compile the source code with the following command, after you install the library.

    @@ -122,17 +132,17 @@
    gcc tutorial.c -lsleef -lsleefdft -lm
     
    -

    +

    This program takes one integer argument n. It executes forward complex transform with size 2n using a naive transform and the library. If the two results match, it prints OK.

    -

    +

    For the first execution, this program takes a few seconds to finish. This is because the library measures computation speed with - many different configurations to find the best exectuion plan. The + many different configurations to find the best execution plan. The best plan is saved to "plan.txt", as specified in line 28. Later executions will finish instantly as the library reads the plan from this file. Instead of specifying the file name in the program, the @@ -142,7 +152,7 @@ specified at line 30.

    -

    +

    This library executes transforms using the most suitable SIMD instructions available on the computer, in addition to multi-threading. In order to make the computation efficient, the @@ -157,21 +167,21 @@ memory region yourself, and pass the pointer to the library.

    -

    - The real and imaginary parts of the kth number - are stored in (2k)-th and - (2k+1)-th elements of the input and output array, +

    + The real and imaginary parts of the kth number + are stored in (2k)-th and + (2k+1)-th elements of the input and output array, respectively. At line 54, the transform is executed by the library. You can specify the same array as the input and output.

    -

    +

    Under src/dft-tester directory, there are other examples showing how to execute transforms in a way that you get equivalent results to other libraries.

    -

    Compatibility with other libraries

    +

    Function reference

    @@ -184,15 +194,15 @@ #include <stdlib.h>
    #include <sleef.h>

    -void * Sleef_malloc(size_t z);
    +void * Sleef_malloc(size_t z);

    Link with -lsleef.

    Description

    -

    - Sleef_malloc allocates z bytes of aligned +

    + Sleef_malloc allocates z bytes of aligned memory region, and return the pointer to that region. The returned pointer points an address that can be accessed by all SIMD load and store instructions available on that computer. Memory regions @@ -210,15 +220,15 @@ Link with -lsleef. #include <stdlib.h>
    #include <sleef.h>

    -void Sleef_free(void *ptr);
    +void Sleef_free(void *ptr);

    Link with -lsleef.

    Description

    -

    - A memory region pointed by ptr that is allocated +

    + A memory region pointed by ptr that is allocated by Sleef_malloc can be freed with Sleef_free.

    @@ -233,24 +243,29 @@ with Sleef_free. #include <stdint.h>
    #include <sleefdft.h>

    -void SleefDFT_setPlanFilePath(const char *path, const char *arch, uint64_t mode);
    +void SleefDFT_setPlanFilePath(const char *path, const char *arch, uint64_t mode);

    Link with -lsleefdft -lsleef.

    Description

    -

    +

    File name for storing execution plan can be specified by this -function. If NULL is specified as path, the file name is read +function. If NULL is specified as path, the file name is read from SLEEFDFTPLAN environment variable. A string for identifying system micro architecture can be also given. The library will automatically detect the marchitecture if NULL is given -as arch. Management options for the plan file can be specified -by the mode parameter, as shown below. +as arch. Management options for the plan file can be specified +by the mode parameter, as shown below.

    - +
    + +
    + + +
    Table 4.2: Mode flags for SleefFT_setPlanFilePath
    @@ -293,24 +308,29 @@ by the mode parameter, as shown below. #include <stdint.h>
    #include <sleefdft.h>

    -struct SleefDFT *SleefDFT_double_init1d(uint32_tn, const double *in, double *out, uint64_tmode);
    -struct SleefDFT *SleefDFT_float_init1d(uint32_tn, const float *in, float *out, uint64_tmode);
    -struct SleefDFT *SleefDFT_longdouble_init1d(uint32_tn, const long double *in, long double *out, uint64_tmode);
    +struct SleefDFT *SleefDFT_double_init1d(uint32_tn, const double *in, double *out, uint64_tmode);
    +struct SleefDFT *SleefDFT_float_init1d(uint32_tn, const float *in, float *out, uint64_tmode);
    +struct SleefDFT *SleefDFT_longdouble_init1d(uint32_tn, const long double *in, long double *out, uint64_tmode);

    Link with -lsleefdft -lsleef.

    Description

    -

    - These functions generates and initilizes the tables that is used for +

    + These functions generates and initializes the tables that is used for 1D transform, and returns the pointer. Size of transform can be - specified by n. Currently, power-of-two sizes can be only - specified. The list of the flags that can be passed to mode + specified by n. Currently, power-of-two sizes can be only + specified. The list of the flags that can be passed to mode is shown below.

    -
    +
    + +
    + + + @@ -369,7 +389,7 @@ Link with -lsleefdft -lsleef.

    Return value

    -

    +

    These functions return a pointer to the data that is used for 1D DFT computation, or NULL if an error occurred.

    @@ -384,26 +404,26 @@ Link with -lsleefdft -lsleef. #include <stdint.h>
    #include <sleefdft.h>

    -struct SleefDFT *SleefDFT_double_init2d(uint32_tn, uint32_tm, const double *in, double *out, uint64_tmode);
    -struct SleefDFT *SleefDFT_float_init2d(uint32_tn, uint32_tm, const float *in, float *out, uint64_tmode);
    -struct SleefDFT *SleefDFT_longdouble_init2d(uint32_tn, uint32_tm, const long double *in, long double *out, uint64_tmode);
    +struct SleefDFT *SleefDFT_double_init2d(uint32_tn, uint32_tm, const double *in, double *out, uint64_tmode);
    +struct SleefDFT *SleefDFT_float_init2d(uint32_tn, uint32_tm, const float *in, float *out, uint64_tmode);
    +struct SleefDFT *SleefDFT_longdouble_init2d(uint32_tn, uint32_tm, const long double *in, long double *out, uint64_tmode);

    Link with -lsleefdft -lsleef.

    Description

    -

    +

    These functions generates and initilizes the tables that is used for 2D transform, and returns the pointer. Size of transform can be - specified by n. Currently, power-of-two sizes can be only - specified. The list of the flags that can be passed to mode + specified by n. Currently, power-of-two sizes can be only + specified. The list of the flags that can be passed to mode is shown below.

    Return value

    -

    +

    These functions return a pointer to the data that is used for 2D DFT computation, or NULL if an error occurred.

    @@ -418,16 +438,20 @@ Link with -lsleefdft -lsleef. #include <stdint.h>
    #include <sleefdft.h>

    -voidSleefDFT_double_execute(struct SleefDFT *ptr, const double *in, double *out);
    -voidSleefDFT_float_execute(struct SleefDFT *ptr, const float *in, float *out);
    -voidSleefDFT_longdouble_execute(struct SleefDFT *ptr, const long double *in, long double *out);
    +voidSleefDFT_double_execute(struct SleefDFT *ptr, const double *in, double *out);
    +voidSleefDFT_float_execute(struct SleefDFT *ptr, const float *in, float *out);
    +voidSleefDFT_longdouble_execute(struct SleefDFT *ptr, const long double *in, long double *out);

    Link with -lsleefdft -lsleef.

    Description

    -

    +

    + ptr is a pointer to the +plan. in and out must be +pointers returned from Sleef_malloc function. You can specify the same +pointer to in and out.


    @@ -440,16 +464,20 @@ Link with -lsleefdft -lsleef. #include <stdint.h>
    #include <sleefdft.h>

    -voidSleefDFT_dispose(struct SleefDFT *ptr);
    +voidSleefDFT_dispose(struct SleefDFT *ptr);

    Link with -lsleefdft -lsleef.

    Description

    -

    +

    + This function frees a plan returned + by SleefDFT_double_init1d, SleefDFT_float_init1d, SleefDFT_longdouble_init1d, SleefDFT_double_init2d, SleefDFT_float_init2d, + or SleefDFT_longdouble_init2d functions.

    + diff --git a/doc/html/favicon.png b/doc/html/favicon.png new file mode 100644 index 00000000..31151a76 Binary files /dev/null and b/doc/html/favicon.png differ diff --git a/doc/html/hellox86.c b/doc/html/hellox86.c new file mode 100644 index 00000000..31fc61d4 --- /dev/null +++ b/doc/html/hellox86.c @@ -0,0 +1,22 @@ +#include +#include +#include + +int main(int argc, char **argv) { + double a[] = {2, 10}; + double b[] = {3, 20}; + + __m128d va, vb, vc; + + va = _mm_loadu_pd(a); + vb = _mm_loadu_pd(b); + + vc = Sleef_powd2_u10(va, vb); + + double c[2]; + + _mm_storeu_pd(c, vc); + + printf("pow(%g, %g) = %g\n", a[0], b[0], c[0]); + printf("pow(%g, %g) = %g\n", a[1], b[1], c[1]); +} diff --git a/doc/html/index.xhtml b/doc/html/index.xhtml index 388c9d30..871f5d48 100644 --- a/doc/html/index.xhtml +++ b/doc/html/index.xhtml @@ -4,114 +4,356 @@ - -SLEEF API Reference + + + +SLEEF Documentation -

    SLEEF API Reference - Introduction

    +

    SLEEF Documentation - Introductionlogo

    + +

    Table of contents

    -
      +

      Overview

      -

      About ULP

      +

      + SLEEF stands for SIMD Library for Evaluating Elementary + Functions. It implements vectorized versions of all C99 real + floating point math functions. It can utilize SIMD instructions of + modern processors. SLEEF is designed to fully utilize SIMD + computation by reducing the use of conditional branches and + scatter/gather memory access. Our benchmarks show that the performance of + SLEEF is comparable to that of the best commercial library. Unlike + vendor-tuned libraries, SLEEF is portable : it can be easily ported + to other architectures by writing a helper file, which is a thin + abstraction layer of SIMD intrinsics. SLEEF is also designed to work + with various operating systems and compilers. It also includes + vectorized DFT subroutines. +

      -

      - ULP stands for "unit in the last place", which is sometimes used for - measuring accuracy of calculations. 1 ULP is basically the distance - between the two closest floating point number, which depends on the - exponent of the FP number. The accuracy of calculations by reputable - math libraries is usually between 0.5 and 1 ULP. Here, the accuracy - means the largest error of calculation, which happens in the worst - case. +

      + The library contains subroutines for all C99 real FP math functions + in double precision and single precision. Different accuracy of the + results can be chosen for a subset of the elementary functions; for + this subset there are versions with up to 1 ulp error (which is the + maximum error, not the average) and versions with a few ulp + error. Obviously, less accurate versions are faster. For non-finite + inputs and outputs, the library should return the same results as + libm as specified in the C99 standard. The library is rigorously + tested if the evaluation error is within the designed limit. The + library is tested against high-precision evaluation + using the libmpfr + library. Especially, we carefully checked the error of the + trigonometric functions when the arguments are close to an integral + multiple of π/2.

      -

      - SLEEF math library provides multiple accuracy choices for some math - functions. Many functions have 3.5-ULP and 1-ULP versions, and - 3.5-ULP versions are significantly faster than 1-ULP versions. If - you care more about execution speed than accuracy, it is advised to - use the 3.5-ULP versions along with "unsafe math optimization" - options for the compiler. +

      Supported environments

      + +

      + This library currently supports several SIMD architectures :

      -

      About sincospi

      +
      -

      - The sincospi series of functions evaluates sin( - πa ) and cos( - πa ) simultaneously. These functions - are added to SLEEF as of version 3.0. Below, I explain the two - reasons that I added these functions. +

        +
      • x86 - SSE2, SSE4.1, AVX, FMA4, AVX2+FMA3, AVX512F
      • +
      • AArch64 - Advanced SIMD
      • +
      • AArch32 - NEON
      • +
      + +
      + +

      + In addition to the SIMD implementation, Pure C (scalar) version is + provided. For x86 architecture, the library provides dispatchers + that automatically choose the best subroutines for the computer the + library is run. The supported combinations of the architecture, + operating system and compiler are shown in Table 1.1.

      -

      - C standards include specifications for functions that evaluate - trigonometric functions. In order to do calculations for evaluating - these functions, reduction of an argument is required. This involves - a multiple precision multiplication with π, - which requires many operations of addition and multiplication. This - is slow especially if accurate evaluation is required. By designing - the function in a way that the argument is pre-multiplied - by π, this reduction can be elminated. This - leads to faster and more accurate evaluation. +

      + +
    Table 4.3: Mode flags for SleefDFT_double_init
    @@ -360,7 +380,7 @@ Link with -lsleefdft -lsleef. - +
    SLEEF_MODE_NO_MTMultithreading will be disabled in the compuation for transforms.Multithreading will be disabled in the computation for transforms.
    + + + + + + +
    Table 1.1: Environment support matrix
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    GCCClangIntel CompilerMSVC
    x86 (64bit), LinuxSupportedSupportedSupported(*1)N/A
    x86 (32bit), LinuxSupported(*2)Supported(*2)N/A
    AArch64, LinuxSupportedSupportedN/AN/A
    AArch32, LinuxSupported(*3)Supported(*3)N/AN/A
    x86 (64bit), OS XSupportedSupportedN/AN/A
    x86 (64bit), WindowsSupported(Cygwin)(*4)Supported(Cygwin)(*4)Supported(*5)
    GenericSupported(*6)Supported(*6)
    +
    + +
    + +

    + The supported compiler versions are as follows.

    -

    - The second reason is that sincospi functions are handy for - implementing an FFT library. FFT libraries need to evaluate - trigonometric functions for generating twiddle factors that is used in - the butterfly operations. Since the butterfly operations are - repeatedly applied, the error in twiddle factors accumulates. Thus, we - want to make the error in twiddle factors as small as possible. In an - FFT of power-of-two size, twiddle factors are - sin( πm / - 2n ) where m - and n are integer. If we just use the usual - trigonometric functions defined in the C standards with the - precision same as that used for butterfly operations, we already - have error when calculating arguments, since - πm / 2n cannot - be represented as a floating point value without error. On the - other hand, if we use sincospi function, the argument can be - accuratedly represented by a radix 2 FP number. Thus, we can - calculate twiddle factors with better accuracy. +
    + +

      +
    • GCC : version 5 and later
    • +
    • Clang : version 3.9 and later
    • +
    • Intel Compiler : ICC version 17
    • +
    • MSVC : Visual Studio 2015
    • +
    + +
    + +

    + *1 FMA4 is not supported by Intel Compiler.

    -

    Supported environments

    +

    + *2 SSE2 is required to run the scalar functions on 32-bit x86 + architecture. x87 is not supported. +

    -

    Compiling and installing the library

    +

    + *3 NEON has only single precision support. The computation results + are not in full accuracy since NEON is not IEEE 754-compliant. +

    -

    Compatibility with other libraries

    +

    + *4 AVX functions are not supported for Cygwin, since AVX is not + supported by Cygwin ABI. +

    -

    Porting

    +

    + *5 AVX512F is not supported by Visual Studio 2015. +

    +

    + *6 Generic architecture is supported through GCC/Clang vector + extensions. IEEE 754-compliant 64-bit and 32-bit calculation is + required. +

    + +
    + +

    + All functions in the library are thread safe unless otherwise noted. +

    +

    Credit

    +
      +
    • The main developer + is Naoki Shibata + at Nara Institute of Science and Technology.
    • +
    • Francesco Petrogalli at ARM Ltd. contributed the helper for AArch64 + (helperadvsimd.h). He also reviewed the code, gave precious comments + and suggestions.
    • +
    • Hal Finkel at Argonne Leadership Computing Facility is now + working on importing and adapting SLEEF + as an LLVM runtime. He also gave precious comments.
    • +
    +

    License

    +

    +SLEEF is distributed +under Boost Software +License Version 1.0. +

    +

    History

    +

    3.1 (Released on July 19, 2017)

    +
      +
    • Added AArch64 support
    • +
    • Implemented the remaining C99 math functions : lgamma, tgamma, + erf, erfc, fabs, copysign, fmax, fmin, fdim, trunc, floor, ceil, + round, rint, modf, ldexp, nextafter, frexp, hypot, and fmod.
    • +
    • Added dispatcher for x86 functions
    • +
    • Improved reduction of trigonometric functions
    • +
    • Added support for 32-bit x86, Cygwin, etc.
    • +
    • Improved tester
    • +
    • Etc.
    • +
    + +

    3.0 (Released on Feb. 7, 2017)

    +
      +
    • New API is defined
    • +
    • Functions for DFT are added
    • +
    • sincospi functions are added
    • +
    • gencoef now supports single, extended and quad precision in addition to double precision
    • +
    • Linux, Windows and Mac OS X are supported
    • +
    • GCC, Clang, Intel Compiler, Microsoft Visual C++ are supported
    • +
    • The library can be compiled as DLLs
    • +
    • Files needed for creating a debian package are now included
    • +
    + +

    2.120 (Released on Jan. 30, 2017)

    +
      +
    • Relicensed to Boost Software License Version 1.0
    • +
    + +

    2.110 (Released on Dec. 11, 2016)

    +
      +
    • The valid range of argument is extended for trig functions
    • +
    • Specification of each functions regarding to the domain and accuracy is added
    • +
    • A coefficient generation tool is added
    • +
    • New testing tools are introduced
    • +
    • Following functions returned incorrect values when the argument is very large or small : exp, pow, asinh, acosh
    • +
    • SIMD xsin and xcos returned values more than 1 when FMA is enabled
    • +
    • Pure C cbrt returned incorrect values when the argument is negative
    • +
    • tan_u1 returned values with more than 1 ulp of error on rare occasions
    • +
    • Removed support for Java language(because no one seems using this)
    • +
    + + + + + + + Download sleef + + + + diff --git a/doc/html/misc.xhtml b/doc/html/misc.xhtml index 56802ff7..d702e2f1 100644 --- a/doc/html/misc.xhtml +++ b/doc/html/misc.xhtml @@ -4,16 +4,19 @@ - -SLEEF API Reference + + + +SLEEF Documentation -

    SLEEF API Reference - Introduction

    +

    SLEEF Documentation - Other tools included in the package

    Table of contents

    - -

    Overview

    +

    Libm tester

    + +

    + SLEEF has two kinds of testers, and each kind of testers has its own role. +

    + +

    + The first kind of testers is separated into tester and iut (which + stands for Implementation Under Test.) Those two are built as + separate executables, and communicate with each other using a + pipe. The role for this tester is to perform a perfunctory set of + tests to check if the build is correct. It is also performs + regression tests. Since the tester executable and the iut executable + are separated, the iut can be implemented with an exotic + languages. It is also possible to perform a test over the network. +

    + +

    + The second kind of testers are designed to run continuously. It + repeats randomly generating arguments for each function, and + comparing the results of each function to the results calculated + with the corresponding function in libmpfr. This tester is expected + to find bugs if it is run for sufficiently long time. +

    -

    Gencoef

    -

    -With this small tool, the coefficients for polynomial approximation -used in kernels can be generated. +

    DFT tester

    + +

    + The DFT has its own tester. This tester compares the results + computed by SLEEF DFT with a naive implementation. +

    + + +

    Gencoef

    + +

    + Gencoef is a small tool for generating the coefficients for + polynomial approximation used in the kernels.

    -

    -In order to change the configurations, please edit gencoefdp.c. In the -beginning of the file, specifications of the parameters for generating -coefficients are listed. Enable one of them by changing #if. Then, run -make to compile the source code. Run the gencoef, and it will show the -generated coefficients in a few minutes. +

    + In order to change the configurations, please edit gencoefdp.c. In + the beginning of the file, specifications of the parameters for + generating coefficients are listed. Enable one of them by changing + #if. Then, run make to compile the source code. Run the gencoef, and + it will show the generated coefficients in a few minutes. It may + take longer time depending on the settings.

    -

    -There are two phases of the program. The first phase is the regression -for minimizing the maximum relative error. This problem can be reduced -to a linear programming problem, and the Simplex method is used in -this implementation. This requires multi-precision calculation, and -the implementation uses the MPFR library to do this. In this phase, -only a small number of values (specified by S macro, usually 40 or so) -of the function to approximate are sampled within the argument -range. The function to approximate can be given by FRFUNC -function. Specifying higher values for S does not always give better -results. +

    + There are two phases of the program. The first phase is the + regression for minimizing the maximum relative error. This problem + can be reduced to a linear programming problem, and the Simplex + method is used in this implementation. This requires multi-precision + calculation, and the implementation uses the MPFR library to do + this. In this phase, it uses only a small number of values + (specified by macro S, usually less than 100) within the input + domain of the kernel function to approximate the function. The + function to approximate is given by FRFUNC function. Specifying + higher values for S does not always give better results.

    -

    -The second phase is to optimize the coefficients so that it gives good -accuracy with double precision calculation. In this phase, it checks -10000 points (specified by Q macro) within the specified argument -range to see if the polynomial gives good error bound. In some cases, -the last few terms have to be calculated in higher precision in order -to achieve 1 ULP overall accuracy, and this implementation can take -care of that. The L parameter specifies the number of high precision -coefficients. +

    + The second phase is to optimize the coefficients so that it gives + good accuracy with double precision calculation. In this phase, it + checks 10000 points (specified by macro Q) within the specified + argument range to see if the polynomial gives good error bounds. In + some cases, the last few terms have to be calculated in higher + precision in order to achieve 1 ULP or less overall accuracy, and + this implementation can take care of that. The L parameter specifies + the number of high precision coefficients.

    -

    -In some cases, it is desirable to fix the last few coefficients to -values like 1. This can be specified if you define FIXCOEF0 -macro. This sometimes does not work, however. In this case, you need -to specify the function to approximate as shown in the definition for -cos. +

    + In some cases, it is desirable to fix the last few coefficients to + values like 1 or 0.5. This can be specified if you define FIXCOEF0 + macro.

    -

    -Finding a set of good parameters is not a straightforward process. You -usually need many iterations of trial and error. +

    + Finding a set of good parameters is not a straightforward process.

    + + diff --git a/doc/html/purec.xhtml b/doc/html/purec.xhtml index 62636d2c..610933cd 100644 --- a/doc/html/purec.xhtml +++ b/doc/html/purec.xhtml @@ -4,16 +4,20 @@ - -SLEEF API Reference + + + + +SLEEF Documentation -

    SLEEF API Reference - Math library reference

    +

    SLEEF Documentation - Math library reference

    Table of contents

    -

    Data types

    -

    Sleef_double2

    +

    Sleef_double2

    Description

    -

    -Sleef_double2 is a generic data type for storing two double-precision -floating point values, which is defined in <sleef.h> as follows: +

    +Sleef_double2 is a generic data type for storing +two double-precision floating point values, which is defined in +<sleef.h> as follows:

    typedef struct {
    @@ -56,12 +65,12 @@ floating point values, which is defined in <sleef.h> as follows:
     
     
    -

    Sleef_float2

    +

    Sleef_float2

    Description

    -

    -Sleef_float2 is a generic data type for storing two single-precision +

    +Sleef_float2 is a generic data type for storing two single-precision floating point values, which is defined in <sleef.h> as follows:

    @@ -72,12 +81,12 @@ floating point values, which is defined in <sleef.h> as follows:
    -

    Sleef_longdouble2

    +

    Sleef_longdouble2

    Description

    -

    -Sleef_longdouble2 is a generic data type for storing two +

    +Sleef_longdouble2 is a generic data type for storing two extended-precision (80-bit) floating point values, which is defined in <sleef.h> as follows:

    @@ -89,28 +98,28 @@ extended-precision (80-bit) floating point values, which is defined in

    Trigonometric Functions

    -

    Sleef_sin_u10, Sleef_sinf_u10 - sine functions with 1.0 ULP error bound

    +

    Sleef_sin_u10, Sleef_sinf_u10 - sine functions with 1.0 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_sin_u10(double a);
    -float Sleef_sinf_u10(float a);
    - +double Sleef_sin_u10(double a);
    +float Sleef_sinf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the sine function of a value - in a. The error bound of the returned value is 1.0 ULP - if a is in [-1e+14, 1e+14] + in a. The error bound of the returned value is 1.0 ULP + if a is in [-1e+14, 1e+14] for the double-precision function or [-5e+9, 5e+9] - for the single-precision function. If a is a finite + for the single-precision function. If a is a finite value out of this range, an arbitrary value within [-1, 1] is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions @@ -119,28 +128,28 @@ Link with -lsleef.


    -

    Sleef_sin_u35, Sleef_sinf_u35 - sine functions with 3.5 ULP error bound

    +

    Sleef_sin_u35, Sleef_sinf_u35 - sine functions with 3.5 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_sin_u35(double a);
    -float Sleef_sinf_u35(float a);
    - +double Sleef_sin_u35(double a);
    +float Sleef_sinf_u35(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the sine function of a value - in a. The error bound of the returned value is 3.5 ULP - if a is in [-1e+14, 1e+14] + in a. The error bound of the returned value is 3.5 ULP + if a is in [-1e+14, 1e+14] for the double-precision function or [-39000, 39000] - for the single-precision function. If a is a finite + for the single-precision function. If a is a finite value out of this range, an arbitrary value within [-1, 1] is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions @@ -149,28 +158,28 @@ Link with -lsleef.


    -

    Sleef_cos_u10, Sleef_cosf_u10 - cosine functions with 1.0 ULP error bound

    +

    Sleef_cos_u10, Sleef_cosf_u10 - cosine functions with 1.0 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_cos_u10(double a);
    -float Sleef_cosf_u10(float a);
    - +double Sleef_cos_u10(double a);
    +float Sleef_cosf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the cosine function of a value - in a. The error bound of the returned value is 1.0 ULP - if a is in [-1e+14, 1e+14] + in a. The error bound of the returned value is 1.0 ULP + if a is in [-1e+14, 1e+14] for the double-precision function or [-5e+9, 5e+9] - for the single-precision function. If a is a finite + for the single-precision function. If a is a finite value out of this range, an arbitrary value within [-1, 1] is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions @@ -179,28 +188,28 @@ Link with -lsleef.


    -

    Sleef_cos_u35, Sleef_cosf_u35 - cosine functions with 3.5 ULP error bound

    +

    Sleef_cos_u35, Sleef_cosf_u35 - cosine functions with 3.5 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_cos_u35(double a);
    -float Sleef_cosf_u35(float a);
    - +double Sleef_cos_u35(double a);
    +float Sleef_cosf_u35(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the cosine function of a value - in a. The error bound of the returned value is 3.5 ULP - if a is in [-1e+14, 1e+14] + in a. The error bound of the returned value is 3.5 ULP + if a is in [-1e+14, 1e+14] for the double-precision function or [-39000, 39000] - for the single-precision function. If a is a finite + for the single-precision function. If a is a finite value out of this range, an arbitrary value within [-1, 1] is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions @@ -209,200 +218,265 @@ Link with -lsleef.


    -

    Sleef_tan_u10, Sleef_tanf_u10 - tangent functions with 1.0 ULP error bound

    +

    Sleef_sincos_u10, Sleef_sincosf_u10 - evaluate sine and cosine functions simultaneously with 1.0 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_tan_u10(double a);
    -float Sleef_tanf_u10(float a);
    - +Sleef_double2 Sleef_sincos_u10(double a)
    +Sleef_float2 Sleef_sincosf_u10(float a)
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - These functions evaluates the tangent function of a value - in a. The error bound of the returned value is 1.0 ULP - if a is in [-1e+14, 1e+14] +

    + Evaluates the sine and cosine functions of a value in a at a + time, and store the two values in x and y elements in + the returned value, respectively. The error bound of the returned + values is 1.0 ULP if a is in [-1e+14, 1e+14] for the double-precision function or [-5e+9, 5e+9] - for the single-precision function. If a is a finite + for the single-precision function. If a is a finite value out of this range, an arbitrary value within [-1, 1] is - returned. These functions treat the non-number arguments and return - non-numbers as specified in the C99 specification. These functions - do not set errno nor raise an exception. + returned. If a is a NaN or infinity, a NaN is returned.


    -

    Sleef_tan_u35, Sleef_tanf_u35 - tangent functions with 3.5 ULP error bound

    +

    Sleef_sincos_u35, Sleef_sincosf_u35 - evaluate sine and cosine functions simultaneously with 3.5 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_tan_u35(double a);
    -float Sleef_tanf_u35(float a);
    - +Sleef_double2 Sleef_sincos_u35(double a)
    +Sleef_float2 Sleef_sincosf_u35(float a)
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - These functions evaluates the tangent function of a value - in a. The error bound of the returned value is 3.5 ULP - if a is in [-1e+14, 1e+14] +

    + Evaluates the sine and cosine functions of a value in a at a + time, and store the two values in x and y elements in + the returned value, respectively. The error bound of the returned + values is 3.5 ULP if a is in [-1e+14, 1e+14] for the double-precision function or [-39000, 39000] - for the single-precision function. If a is a finite + for the single-precision function. If a is a finite value out of this range, an arbitrary value within [-1, 1] is - returned. These functions treat the non-number arguments and return - non-numbers as specified in the C99 specification. These functions - do not set errno nor raise an exception. + returned. If a is a NaN or infinity, a NaN is returned.


    -

    Sleef_sincos_u10, Sleef_sincosf_u10 - evaluate sine and cosine functions simultaneously with 1.0 ULP error bound

    +

    Sleef_sincospi_u05, Sleef_sincospif_u05, Sleef_sincospil_u05 - evaluate sin( πa ) and cos( πa ) for given a simultaneously with 0.506 ULP error bound

    Synopsis

    #include <sleef.h>

    -Sleef_double2 Sleef_sincos_u10(double a)
    -Sleef_float2 Sleef_sincosf_u10(float a)
    - +Sleef_double2 Sleef_sincospi_u05(double a)
    +Sleef_float2 Sleef_sincospif_u05(float a)
    +Sleef_longdouble2 Sleef_sincospil_u05(long double a)

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - Evaluates the sine and cosine functions of a value in a at a - time, and store the two values in x and y elements in - the returned value, respectively. The error bound of the returned - values is 1.0 ULP if a is in [-1e+14, 1e+14] - for the double-precision function or [-5e+9, 5e+9] - for the single-precision function. If a is a finite - value out of this range, an arbitrary value within [-1, 1] is - returned. If a is a NaN or infinity, a NaN is returned. +

    + Evaluates the sine and cosine functions of + πa at a time, and store the two values + in x and y elements in the + returned value, respectively. The error bound of the returned value + are max(0.506 ULP, DBL_MIN) if a is in [-1e+9, + 1e+9] for double-precision function, or max(0.506 ULP, FLT_MIN) if + [-1e+7, 1e+7] for the single-precision + function. If a is a finite value out of this + range, an arbitrary value within [-1, 1] is + returned. If a is a NaN or infinity, a NaN is + returned.


    -

    Sleef_sincos_u35, Sleef_sincosf_u35 - evaluate sine and cosine functions simultaneously with 3.5 ULP error bound

    +

    Sleef_sincospi_u35, Sleef_sincospif_u35, Sleef_sincospil_u35 - evaluate sin( πa ) and cos( πa ) for given a simultaneously with 3.5 ULP error bound

    Synopsis

    #include <sleef.h>

    -Sleef_double2 Sleef_sincos_u35(double a)
    -Sleef_float2 Sleef_sincosf_u35(float a)
    - +Sleef_double2 Sleef_sincospi_u35(double a)
    +Sleef_float2 Sleef_sincospif_u35(float a)
    +Sleef_longdouble2 Sleef_sincospil_u35(long double a)

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - Evaluates the sine and cosine functions of a value in a at a - time, and store the two values in x and y elements in - the returned value, respectively. The error bound of the returned - values is 3.5 ULP if a is in [-1e+14, 1e+14] - for the double-precision function or [-39000, 39000] - for the single-precision function. If a is a finite - value out of this range, an arbitrary value within [-1, 1] is - returned. If a is a NaN or infinity, a NaN is returned. +

    + Evaluates the sine and cosine functions of + πa at a time, and store the two values in x + and y elements in the returned value, respectively. The error + bound of the returned values is 3.5 ULP if a is in [-1e+9, + 1e+9] for double-precision function or [-1e+7, 1e+7] for the + single-precision function. If a is a finite value out of this + range, an arbitrary value within [-1, 1] is returned. If a is + a NaN or infinity, a NaN is returned.


    -

    Sleef_sincospi_u05, Sleef_sincospif_u05, Sleef_sincospil_u05 - evaluate sin( πa ) and cos( πa ) for given a simultaneously with 0.505 ULP error bound

    +

    Sleef_sinpi_u05, Sleef_sinpif_u05 - evaluate sin( πa ) for given a with 0.506 ULP error bound

    Synopsis

    #include <sleef.h>

    -Sleef_double2 Sleef_sincospi_u05(double a)
    -Sleef_float2 Sleef_sincospif_u05(float a)
    -Sleef_longdouble2 Sleef_sincospil_u05(long double a)
    +double Sleef_sinpi_u05(double a);
    +float Sleef_sinpif_u05(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - Evaluates the sine and cosine functions of - πa at a time, and store the two values in x - and y elements in the returned value, respectively. The error - bound of the returned values is 0.505 ULP if a is in [-1e+9, - 1e+9] for double-precision function or [-1e+7, 1e+7] for the - single-precision function. If a is a finite value out of this - range, an arbitrary value within [-1, 1] is returned. If a is - a NaN or infinity, a NaN is returned. +

    + These functions evaluates the sine functions of + πa . The error bound of the returned value + are max(0.506 ULP, DBL_MIN) if a is in [-1e+9, + 1e+9] for double-precision function, or max(0.506 ULP, FLT_MIN) if + [-1e+7, 1e+7] for the single-precision + function. If a is a finite value out of this + range, an arbitrary value within [-1, 1] is + returned. If a is a NaN or infinity, a NaN is + returned.


    -

    Sleef_sincospi_u35, Sleef_sincospif_u35, Sleef_sincospil_u35 - evaluate sin( πa ) and cos( πa ) for given a simultaneously with 3.5 ULP error bound

    +

    Sleef_cospi_u05, Sleef_cospif_u05 - evaluate cos( πa ) for given a with 0.506 ULP error bound

    Synopsis

    #include <sleef.h>

    -Sleef_double2 Sleef_sincospi_u35(double a)
    -Sleef_float2 Sleef_sincospif_u35(float a)
    -Sleef_longdouble2 Sleef_sincospil_u35(long double a)
    +double Sleef_cospi_u05(double a);
    +float Sleef_cospif_u05(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - Evaluates the sine and cosine functions of - πa at a time, and store the two values in x - and y elements in the returned value, respectively. The error - bound of the returned values is 3.5 ULP if a is in [-1e+9, - 1e+9] for double-precision function or [-1e+7, 1e+7] for the - single-precision function. If a is a finite value out of this - range, an arbitrary value within [-1, 1] is returned. If a is - a NaN or infinity, a NaN is returned. +

    + These functions evaluates the cosine functions of + πa . The error bound of the returned value + are max(0.506 ULP, DBL_MIN) if a is in [-1e+9, + 1e+9] for double-precision function, or max(0.506 ULP, FLT_MIN) if + [-1e+7, 1e+7] for the single-precision + function. If a is a finite value out of this + range, an arbitrary value within [-1, 1] is + returned. If a is a NaN or infinity, a NaN is + returned. +

    + + +
    + +

    Sleef_tan_u10, Sleef_tanf_u10 - tangent functions with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_tan_u10(double a);
    +float Sleef_tanf_u10(float a);
    + +
    +Link with -lsleef. +

    + +

    Description

    + +

    + These functions evaluates the tangent function of a value + in a. The error bound of the returned value is 1.0 ULP + if a is in [-1e+14, 1e+14] + for the double-precision function or [-5e+9, 5e+9] + for the single-precision function. If a is a finite + value out of this range, an arbitrary value within [-1, 1] is + returned. These functions treat the non-number arguments and return + non-numbers as specified in the C99 specification. These functions + do not set errno nor raise an exception.

    +
    + +

    Sleef_tan_u35, Sleef_tanf_u35 - tangent functions with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_tan_u35(double a);
    +float Sleef_tanf_u35(float a);
    + +
    +Link with -lsleef. +

    + +

    Description

    + +

    + These functions evaluates the tangent function of a value + in a. The error bound of the returned value is 3.5 ULP + if a is in [-1e+14, 1e+14] + for the double-precision function or [-39000, 39000] + for the single-precision function. If a is a finite + value out of this range, an arbitrary value within [-1, 1] is + returned. These functions treat the non-number arguments and return + non-numbers as specified in the C99 specification. These functions + do not set errno nor raise an exception. +

    + +

    Power, exponential, and logarithmic functions

    -

    Sleef_pow_u10, Sleef_powf_u10 - power functions

    +

    Sleef_pow_u10, Sleef_powf_u10 - power functions

    Synopsis

    #include <sleef.h>

    -double Sleef_pow_u10(double x, double y);
    -float Sleef_powf_u10(float x, float y);
    +double Sleef_pow_u10(double x, double y);
    +float Sleef_powf_u10(float x, float y);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - These functions return the value of x raised to the power - of y. The error bound of the returned value is 1.0 ULP. These +

    + These functions return the value of x raised to the power + of y. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception. @@ -410,50 +484,76 @@ Link with -lsleef.


    -

    Sleef_exp_u10, Sleef_expf_u10 - base-e exponential functions

    +

    Sleef_log_u10, Sleef_logf_u10 - natural logarithmic functions with 1.0 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_exp_u10(double a);
    -float Sleef_expf_u10(float a);
    - +double Sleef_log_u10(double a);
    +float Sleef_logf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - These functions return the value of e raised - to a. The error bound of the returned value is 1.0 ULP. - These functions treat the non-number arguments and return - non-numbers as specified in the C99 specification. These functions - do not set errno nor raise an exception. +

    + These functions return the natural logarithm of a. The error + bound of the returned value is 1.0 ULP. These functions treat the + non-number arguments and return non-numbers as specified in the C99 + specification. These functions do not set errno nor raise an + exception. +

    + +
    + +

    Sleef_log_u35, Sleef_logf_u35 - natural logarithmic functions with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_log_u35(double a);
    +float Sleef_logf_u35(float a);
    + +
    +Link with -lsleef. +

    + +

    Description

    + +

    + These functions return the natural logarithm of a. The error + bound of the returned value is 3.5 ULP. These functions treat the + non-number arguments and return non-numbers as specified in the C99 + specification. These functions do not set errno nor raise an + exception.


    -

    Sleef_log_u10, Sleef_logf_u10 - natural logarithmic functions with 1.0 ULP error bound

    +

    Sleef_log10_u10, Sleef_log10f_u10 - base-10 logarithmic functions

    Synopsis

    #include <sleef.h>

    -double Sleef_log_u10(double a);
    -float Sleef_logf_u10(float a);
    - +double Sleef_log10_u10(double a);
    +float Sleef_log10f_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - These functions return the natural logarithm of a. The error +

    + These functions return the base-10 logarithm of a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an @@ -462,25 +562,25 @@ Link with -lsleef.


    -

    Sleef_log_u35, Sleef_logf_u35 - natural logarithmic functions with 3.5 ULP error bound

    +

    Sleef_log1p_u10, Sleef_log1pf_u10 - logarithm of one plus argument

    Synopsis

    #include <sleef.h>

    -double Sleef_log_u35(double a);
    -float Sleef_logf_u35(float a);
    - +double Sleef_log1p_u10(double a);
    +float Sleef_log1pf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - These functions return the natural logarithm of a. The error - bound of the returned value is 3.5 ULP. These functions treat the +

    + These functions return the natural logarithm of (1+a). The error + bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception. @@ -488,24 +588,50 @@ Link with -lsleef.


    -

    Sleef_exp2_u10, Sleef_exp2f_u10 - base-2 exponential functions

    +

    Sleef_exp_u10, Sleef_expf_u10 - base-e exponential functions

    Synopsis

    #include <sleef.h>

    -double Sleef_exp2_u10(double a);
    -float Sleef_exp2f_u10(float a);
    - +double Sleef_exp_u10(double a);
    +float Sleef_expf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - These functions return 2 raised to a. The error bound of the +

    + These functions return the value of e raised + to a. The error bound of the returned value is 1.0 ULP. + These functions treat the non-number arguments and return + non-numbers as specified in the C99 specification. These functions + do not set errno nor raise an exception. +

    + +
    + +

    Sleef_exp2_u10, Sleef_exp2f_u10 - base-2 exponential functions

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_exp2_u10(double a);
    +float Sleef_exp2f_u10(float a);
    + +
    +Link with -lsleef. +

    + +

    Description

    + +

    + These functions return 2 raised to a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an @@ -514,24 +640,24 @@ Link with -lsleef.


    -

    Sleef_exp10_u10, Sleef_exp10f_u10 - base-10 exponential functions

    +

    Sleef_exp10_u10, Sleef_exp10f_u10 - base-10 exponential functions

    Synopsis

    #include <sleef.h>

    -double Sleef_exp10_u10(double a);
    -float Sleef_exp10f_u10(float a);
    - +double Sleef_exp10_u10(double a);
    +float Sleef_exp10f_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - These functions return 10 raised to a. The error bound of the +

    + These functions return 10 raised to a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an @@ -540,25 +666,25 @@ Link with -lsleef.


    -

    Sleef_expm1_u10, Sleef_expm1f_u10 - base-e exponential functions minus 1

    +

    Sleef_expm1_u10, Sleef_expm1f_u10 - base-e exponential functions minus 1

    Synopsis

    #include <sleef.h>

    -double Sleef_expm1_u10(double a);
    -float Sleef_expm1f_u10(float a);
    - +double Sleef_expm1_u10(double a);
    +float Sleef_expm1f_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - These functions return the value one less than e - raised to a. The error bound of the returned value is 1.0 +

    + These functions return the value one less than e + raised to a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception. @@ -566,76 +692,72 @@ Link with -lsleef.


    -

    Sleef_log10_u10, Sleef_log10f_u10 - base-10 logarithmic functions

    +

    Sleef_sqrt_u05, Sleef_sqrtf_u05 - square root function with 0.5001 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_log10_u10(double a);
    -float Sleef_log10f_u10(float a);
    - +double Sleef_sqrt_u05(double x);
    +float Sleef_sqrtf_u05(float x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - These functions return the base-10 logarithm of a. The error - bound of the returned value is 1.0 ULP. These functions treat the - non-number arguments and return non-numbers as specified in the C99 - specification. These functions do not set errno nor raise an - exception. +

    + These functions return the value as specified in the C99 + specification of sqrt and sqrtf functions. The error bound of the + returned value is 0.5001 ULP. These functions do not set errno nor + raise an exception.


    -

    Sleef_log1p_u10, Sleef_log1pf_u10 - logarithm of one plus argument

    +

    Sleef_sqrtf_u35 - square root function with 3.5 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_log1p_u10(double a);
    -float Sleef_log1pf_u10(float a);
    - + +float Sleef_sqrt_u35(float x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - These functions return the natural logarithm of (1+a). The error - bound of the returned value is 1.0 ULP. These functions treat the - non-number arguments and return non-numbers as specified in the C99 - specification. These functions do not set errno nor raise an - exception. +

    + These functions return the value as specified in the C99 + specification of sqrt and sqrtf functions. The error bound of the + returned value is 3.5 ULP. These functions do not set errno nor + raise an exception.


    -

    Sleef_cbrt_u10, Sleef_cbrtf_u10 - cube root function with 1.0 ULP error bound

    +

    Sleef_cbrt_u10, Sleef_cbrtf_u10 - cube root function with 1.0 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_cbrt_u10(double a);
    -float Sleef_cbrtf_u10(float a);
    - +double Sleef_cbrt_u10(double a);
    +float Sleef_cbrtf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - These functions return the real cube root of a. The error +

    + These functions return the real cube root of a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an @@ -644,52 +766,99 @@ Link with -lsleef.


    -

    Sleef_cbrt_u35, Sleef_cbrtf_u35 - cube root function with 3.5 ULP error bound

    +

    Sleef_cbrt_u35, Sleef_cbrtf_u35 - cube root function with 3.5 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_cbrt_u35(double a);
    -float Sleef_cbrtf_u35(float a);
    - +double Sleef_cbrt_u35(double a);
    +float Sleef_cbrtf_u35(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - These functions return the real cube root of a. The error +

    + These functions return the real cube root of a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.

    +
    + +

    Sleef_hypot_u05, Sleef_hypotf_u05 - 2D Euclidian distance function with 0.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_hypot_u05(double x, double y);
    +float Sleef_hypotf_u05(float x, float y);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    + These functions return the value as specified in the C99 + specification of hypot and hypotf functions. The error bound of the + returned value is 0.5001 ULP. These functions do not set errno nor + raise an exception. +

    + +
    + +

    Sleef_hypot_u35, Sleef_hypotf_u35 - 2D Euclidian distance function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_hypot_u35(double x, double y);
    +float Sleef_hypotf_u35(float x, float y);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    + These functions return the value as specified in the C99 + specification of hypot and hypotf functions. The error bound of the + returned value is 0.5001 ULP. These functions do not set errno nor + raise an exception. +

    Inverse Trigonometric Functions

    -

    Sleef_asin_u10, Sleef_asinf_u10 - arc sine functions with 1.0 ULP error bound

    +

    Sleef_asin_u10, Sleef_asinf_u10 - arc sine functions with 1.0 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_asin_u10(double a);
    -float Sleef_asinf_u10(float a);
    - +double Sleef_asin_u10(double a);
    +float Sleef_asinf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the arc sine function of a value - in a. The error bound of the returned value is 1.0 ULP. These + in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception. @@ -697,25 +866,25 @@ Link with -lsleef.


    -

    Sleef_asin_u35, Sleef_asinf_u35 - arc sine functions with 3.5 ULP error bound

    +

    Sleef_asin_u35, Sleef_asinf_u35 - arc sine functions with 3.5 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_asin_u35(double a);
    -float Sleef_asinf_u35(float a);
    - +double Sleef_asin_u35(double a);
    +float Sleef_asinf_u35(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the arc sine function of a value - in a. The error bound of the returned value is 3.5 ULP. These + in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception. @@ -723,25 +892,25 @@ Link with -lsleef.


    -

    Sleef_acos_u10, Sleef_acosf_u10 - arc cosine functions with 1.0 ULP error bound

    +

    Sleef_acos_u10, Sleef_acosf_u10 - arc cosine functions with 1.0 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_acos_u10(double a);
    -float Sleef_acosf_u10(float a);
    - +double Sleef_acos_u10(double a);
    +float Sleef_acosf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the arc cosine function of a value - in a. The error bound of the returned value is 1.0 ULP. These + in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception. @@ -749,25 +918,25 @@ Link with -lsleef.


    -

    Sleef_acos_u35, Sleef_acosf_u35 - arc cosine functions with 3.5 ULP error bound

    +

    Sleef_acos_u35, Sleef_acosf_u35 - arc cosine functions with 3.5 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_acos_u35(double a);
    -float Sleef_acosf_u35(float a);
    - +double Sleef_acos_u35(double a);
    +float Sleef_acosf_u35(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the arc cosine function of a value - in a. The error bound of the returned value is 3.5 ULP. These + in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception. @@ -775,25 +944,25 @@ Link with -lsleef.


    -

    Sleef_atan_u10, Sleef_atanf_u10 - arc tangent functions with 1.0 ULP error bound

    +

    Sleef_atan_u10, Sleef_atanf_u10 - arc tangent functions with 1.0 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_atan_u10(double a);
    -float Sleef_atanf_u10(float a);
    - +double Sleef_atan_u10(double a);
    +float Sleef_atanf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the arc tangent function of a value - in a. The error bound of the returned value is 1.0 ULP. These + in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception. @@ -801,25 +970,25 @@ Link with -lsleef.


    -

    Sleef_atan_u35, Sleef_atanf_u35 - arc tangent functions with 3.5 ULP error bound

    +

    Sleef_atan_u35, Sleef_atanf_u35 - arc tangent functions with 3.5 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_atan_u35(double a);
    -float Sleef_atanf_u35(float a);
    - +double Sleef_atan_u35(double a);
    +float Sleef_atanf_u35(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the arc tangent function of a value - in a. The error bound of the returned value is 3.5 ULP. These + in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception. @@ -827,54 +996,55 @@ Link with -lsleef.


    -

    Sleef_atan2_u10, Sleef_atan2f_u10 - arc tangent functions of two variables with 1.0 ULP error bound

    +

    Sleef_atan2_u10, Sleef_atan2f_u10 - arc tangent functions of two variables with 1.0 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_atan2_u10(double y, double x);
    -float Sleef_atan2f_u10(float y, float x);
    - +double Sleef_atan2_u10(double y, double x);
    +float Sleef_atan2f_u10(float y, float x);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the arc tangent function - of y/x. The quadrant of the result is determined - according to the signs of x and y. The error bound of - the returned value is 1.0 ULP. These functions treat the non-number - arguments and return non-numbers as specified in the C99 - specification. These functions do not set errno nor raise an - exception. + of (y / x). The quadrant of the + result is determined according to the signs of x + and y. The error bounds of the returned values + are max(1.0 ULP, DBL_MIN) and max(1.0 ULP, FLT_MIN), + respectively. These functions treat the non-number arguments and + return non-numbers as specified in the C99 specification. These + functions do not set errno nor raise an exception.


    -

    Sleef_atan2_u35, Sleef_atan2f_u35 - arc tangent functions of two variables with 3.5 ULP error bound

    +

    Sleef_atan2_u35, Sleef_atan2f_u35 - arc tangent functions of two variables with 3.5 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_atan2_u35(double y, double x);
    -float Sleef_atan2f_u35(float y, float x);
    - +double Sleef_atan2_u35(double y, double x);
    +float Sleef_atan2f_u35(float y, float x);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the arc tangent function - of y/x. The quadrant of the result is determined - according to the signs of x and y. The error bound of + of (y / x). The quadrant of the result is determined + according to the signs of x and y. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an @@ -883,27 +1053,27 @@ Link with -lsleef.

    Hyperbolic functions and inverse hyperbolic functions

    -

    Sleef_sinh_u10, Sleef_sinhf_u10 - hyperbolic sine functions

    +

    Sleef_sinh_u10, Sleef_sinhf_u10 - hyperbolic sine functions

    Synopsis

    #include <sleef.h>

    -double Sleef_sinh_u10(double a);
    -float Sleef_sinhf_u10(float a);
    - +double Sleef_sinh_u10(double a);
    +float Sleef_sinhf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the hyperbolic sine function of a value - in a. The error bound of the returned value is 1.0 ULP - if a is in [-709, 709] for the double-precision function or - [-88.5, 88.5] for the single-precision function . If a is a + in a. The error bound of the returned value is 1.0 ULP + if a is in [-709, 709] for the double-precision function or + [-88.5, 88.5] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 1.0 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified @@ -913,27 +1083,27 @@ Link with -lsleef.


    -

    Sleef_cosh_u10, Sleef_coshf_u10 - hyperbolic cosine functions

    +

    Sleef_cosh_u10, Sleef_coshf_u10 - hyperbolic cosine functions

    Synopsis

    #include <sleef.h>

    -double Sleef_cosh_u10(double a);
    -float Sleef_coshf_u10(float a);
    - +double Sleef_cosh_u10(double a);
    +float Sleef_coshf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the hyperbolic cosine function of a value - in a. The error bound of the returned value is 1.0 ULP - if a is in [-709, 709] for the double-precision function or - [-88.5, 88.5] for the single-precision function . If a is a + in a. The error bound of the returned value is 1.0 ULP + if a is in [-709, 709] for the double-precision function or + [-88.5, 88.5] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 1.0 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified @@ -943,25 +1113,25 @@ Link with -lsleef.


    -

    Sleef_tanh_u10, Sleef_tanhf_u10 - hyperbolic tangent functions

    +

    Sleef_tanh_u10, Sleef_tanhf_u10 - hyperbolic tangent functions

    Synopsis

    #include <sleef.h>

    -double Sleef_tanh_u10(double a);
    -float Sleef_tanhf_u10(float a);
    - +double Sleef_tanh_u10(double a);
    +float Sleef_tanhf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the hyperbolic tangent function of a value - in a. The error bound of the returned value is 1.0 ULP for + in a. The error bound of the returned value is 1.0 ULP for the double-precision function or 1.0001 ULP for the single-precision function. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions @@ -970,28 +1140,28 @@ Link with -lsleef.


    -

    Sleef_asinh_u10, Sleef_asinhf_u10 - inverse hyperbolic sine functions

    +

    Sleef_asinh_u10, Sleef_asinhf_u10 - inverse hyperbolic sine functions

    Synopsis

    #include <sleef.h>

    -double Sleef_asinh_u10(double a);
    -float Sleef_asinhf_u10(float a);
    - +double Sleef_asinh_u10(double a);
    +float Sleef_asinhf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the inverse hyperbolic sine function of a - value in a. The error bound of the returned value is 1.0 ULP - if a is in [-1.34e+154, 1.34e+154] for the double-precision - function or 1.001 ULP if a is in [-1.84e+19, 1.84e+19] for - the single-precision function . If a is a finite value out of + value in a. The error bound of the returned value is 1.0 ULP + if a is in [-1.34e+154, 1.34e+154] for the double-precision + function or 1.001 ULP if a is in [-1.84e+19, 1.84e+19] for + the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 1.0 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 @@ -1001,28 +1171,28 @@ Link with -lsleef.


    -

    Sleef_acosh_u10, Sleef_acoshf_u10 - inverse hyperbolic cosine functions

    +

    Sleef_acosh_u10, Sleef_acoshf_u10 - inverse hyperbolic cosine functions

    Synopsis

    #include <sleef.h>

    -double Sleef_acosh_u10(double a);
    -float Sleef_acoshf_u10(float a);
    - +double Sleef_acosh_u10(double a);
    +float Sleef_acoshf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the inverse hyperbolic cosine function of - a value in a. The error bound of the returned value is 1.0 - ULP if a is in [-1.34e+154, 1.34e+154] for the - double-precision function or 1.001 ULP if a is in [-1.84e+19, - 1.84e+19] for the single-precision function . If a is a + a value in a. The error bound of the returned value is 1.0 + ULP if a is in [-1.34e+154, 1.34e+154] for the + double-precision function or 1.001 ULP if a is in [-1.84e+19, + 1.84e+19] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 1.0 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified @@ -1032,25 +1202,25 @@ Link with -lsleef.


    -

    Sleef_atanh_u10, Sleef_atanhf_u10 - inverse hyperbolic tangent functions

    +

    Sleef_atanh_u10, Sleef_atanhf_u10 - inverse hyperbolic tangent functions

    Synopsis

    #include <sleef.h>

    -double Sleef_atanh_u10(double a);
    -float Sleef_atanhf_u10(float a);
    - +double Sleef_atanh_u10(double a);
    +float Sleef_atanhf_u10(float a);
    +
    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    These functions evaluates the inverse hyperbolic tangent function of - a value in a. The error bound of the returned value is 1.0 + a value in a. The error bound of the returned value is 1.0 ULP for the double-precision function or 1.0001 ULP for the single-precision function. These functions treat the non-number arguments and return non-numbers as specified in the C99 @@ -1058,410 +1228,532 @@ Link with -lsleef. exception.

    -

    Other functions

    +

    Error and gamma functions

    -

    Sleef_fabs, Sleef_fabsf - absolute value

    +

    Sleef_erf_u10, Sleef_erff_u10 - error functions with 1.0 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_fabs(double x);
    -float Sleef_fabsf(float x);
    +double Sleef_erf_u10(double x);
    +float Sleef_erff_u10(float x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    + These functions return the value as specified in the C99 + specification of erf and erff functions. The error bound of the + returned value is 1.0 ULP. These functions do not set errno nor + raise an exception.


    -

    Sleef_fmax, Sleef_fmaxf - maximum of two numbers

    +

    Sleef_erfc_u15, Sleef_erfcf_u15 - complementary error functions

    Synopsis

    #include <sleef.h>

    -double Sleef_fmax(double x, double y);
    -float Sleef_fmaxf(float x, float y);
    +double Sleef_erfc_u15(double x);
    +float Sleef_erfcf_u15(float x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    + These functions return the value as specified in the C99 + specification of erfc and erfcf functions. The error bound of the + returned value for the DP function is max(1.5 ULP, DBL_MIN) if the + argument is less than 26.2, and max(2.5 ULP, DBL_MIN) otherwise. For the SP + function, the error bound is max(1.5 ULP, FLT_MIN). These functions + do not set errno nor raise an exception.


    -

    Sleef_fmin, Sleef_fminf - minimum of two numbers

    +

    Sleef_tgamma_u10, Sleef_tgammaf_u10 - gamma functions with 1.0 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_fmin(double x, double y);
    -float Sleef_fminf(float x, float y);
    +double Sleef_tgamma_u10(double x);
    +float Sleef_tgammaf_u10(float x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    + These functions return the value as specified in the C99 + specification of tgamma and tgammaf functions. The error bound of the + returned value is 1.0 ULP. These functions do not set errno nor + raise an exception.


    -

    Sleef_fdim, Sleef_fdimf - positive difference

    +

    Sleef_lgamma_u10, Sleef_lgammaf_u10 - log gamma functions with 1.0 ULP error bound

    Synopsis

    #include <sleef.h>

    -double Sleef_fdim(double x, double y);
    -float Sleef_fdimf(float x, float y);
    +double Sleef_lgamma_u10(double x);
    +float Sleef_lgammaf_u10(float x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    + These functions return the value as specified in the C99 + specification of lgamma and lgammaf functions. The error bound of + the returned value is 1.0 ULP if the argument is positive. If the + argument is larger than 2e+305 for the DP function and 4e+36 for the + SP function, it may return infinity instead of the correct value. + The error bound is max(1 ULP, 1e-15) for the DP function and max(1 + ULP and 1e-8) for the SP function, if the argument is negative. + These functions do not set errno nor raise an exception. +

    + + +

    Nearest integer functions

    + +

    Sleef_trunc, Sleef_truncf - round to integer towards zero

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_trunc(double x);
    +float Sleef_truncf(float x);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +These functions return the value as specified in the C99 specification +of trunc and truncf functions. These functions do not set errno nor +raise an exception.


    -

    Sleef_copysign, Sleef_copysignf - copy sign of a number

    +

    Sleef_floor, Sleef_floorf - round to integer towards minus infinity

    Synopsis

    #include <sleef.h>

    -double Sleef_copysign(double x, double y);
    -float Sleef_copysignf(float x, float y);
    +double Sleef_floor(double x);
    +float Sleef_floorf(float x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    +These functions return the value as specified +in the C99 specification of floor and floorf functions. These +functions do not set errno nor raise an exception.


    -

    Sleef_nextafter, Sleef_nextafterf - find the next representable FP value

    +

    Sleef_ceil, Sleef_ceilf - round to integer towards plus infinity

    Synopsis

    #include <sleef.h>

    -double Sleef_nextafter(double x, double y);
    -float Sleef_nextafterf(float x, float y);
    +double Sleef_ceil(double x);
    +float Sleef_ceilf(float x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    +These functions return the value as specified in +the C99 specification of ceil and ceilf functions. These functions +do not set errno nor raise an exception.


    -

    Sleef_ldexp, Sleef_ldexpf - multiply by integral power of 2

    +

    Sleef_round, Sleef_roundf - round to integer away from zero

    Synopsis

    #include <sleef.h>

    -double Sleef_ldexp(double m, int x);
    -float Sleef_ldexpf(float m, int x);
    +double Sleef_round(double x);
    +float Sleef_roundf(float x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    - These functions return the result of multiplying m by 2 - raised to the power x. These functions treat the non-number - arguments and return non-numbers as specified in the C99 - specification. These functions do not set errno nor raise an - exception. +

    + These functions return the value as specified in the C99 + specification of round and roundf functions. These functions do not + set errno nor raise an exception.


    -

    Sleef_frfrexp, Sleef_frfrexpf - fractional component of an FP number

    +

    Sleef_rint, Sleef_rintf - round to integer, ties round to even

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_rint(double x);
    +float Sleef_rintf(float x);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    + These functions return the value as specified in the C99 + specification of rint and rintf functions. These functions do not + set errno nor raise an exception. +

    + +

    Other functions

    + +

    Sleef_fma, Sleef_fmaf - fused multiply and accumulate

    Synopsis

    #include <sleef.h>

    -double Sleef_frfrexp(double x);
    -float Sleef_frfrexpf(float x);
    +double Sleef_fma(double x, double y, double z);
    +float Sleef_fmaf(float x, float y, float z);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    +These functions compute (xy + z) without +rounding, and then return the rounded value of the result. These +functions may return infinity with a correct sign if the absolute +value of the correct return value is greater than 1e+300 and 1e+33, +respectively. The error bounds of the returned values are 0.5 ULP and +max(0.50001 ULP, FLT_MIN), respectively.


    -

    Sleef_expfrexp, Sleef_expfrexpf - exponent of an FP number

    +

    Sleef_fmod, Sleef_fmodf - FP remainder

    Synopsis

    #include <sleef.h>

    -int Sleef_expfrexp(double x);
    -int Sleef_expfrexpf(float x);
    +double Sleef_fmod(double x, double y);
    +float Sleef_fmodf(float x, float y);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    + These functions return the value as specified in the C99 + specification of fmod and fmodf functions, if |x / y| is + smaller than 1e+300 and 1e+38, respectively. The returned value is + undefined, otherwise. These functions do not set errno nor raise an + exception.


    -

    Sleef_modf, Sleef_modff - integral and fractional value of FP number

    +

    Sleef_ldexp, Sleef_ldexpf - multiply by integral power of 2

    Synopsis

    #include <sleef.h>

    -Sleef_double2 Sleef_modf(double x);
    -Sleef_float2 Sleef_modff(float x);
    +double Sleef_ldexp(double m, int x);
    +float Sleef_ldexpf(float m, int x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    + These functions return the result of multiplying m by 2 + raised to the power x. These functions treat the non-number + arguments and return non-numbers as specified in the C99 + specification. These functions do not set errno nor raise an + exception.


    -

    Sleef_trunc, Sleef_truncf - round to integer towards zero

    +

    Sleef_frfrexp, Sleef_frfrexpf - fractional component of an FP number

    Synopsis

    #include <sleef.h>

    -double Sleef_trunc(double x);
    -float Sleef_truncf(float x);
    +double Sleef_frfrexp(double x);
    +float Sleef_frfrexpf(float x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    + These functions return the value as specified in the C99 + specification of frexp and frexpf functions. These functions do not + set errno nor raise an exception.


    -

    Sleef_floor, Sleef_floorf - round to integer towards minus infinity

    +

    Sleef_expfrexp, Sleef_expfrexpf - exponent of an FP number

    Synopsis

    #include <sleef.h>

    -double Sleef_floor(double x);
    -float Sleef_floorf(float x);
    +int Sleef_expfrexp(double x);
    +int Sleef_expfrexpf(float x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    + These functions return the exponent returned by frexp and frexpf + functions as specified in the C99 specification. These functions do + not set errno nor raise an exception.


    -

    Sleef_ceil, Sleef_ceilf - round to integer towards plus infinity

    +

    Sleef_ilogb, Sleef_ilogbf - integer exponent of an FP number

    Synopsis

    #include <sleef.h>

    -double Sleef_ceil(double x);
    -float Sleef_ceilf(float x);
    +int Sleef_ilogb(double m, int x);
    +int Sleef_ilogbf(float m, int x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    + These functions return the value as specified in the C99 + specification of ilogb and ilogbf functions. These functions do not + set errno nor raise an exception.


    -

    Sleef_round, Sleef_roundf - round to integer away from zero

    +

    Sleef_modf, Sleef_modff - integral and fractional value of FP number

    Synopsis

    #include <sleef.h>

    -double Sleef_round(double x);
    -float Sleef_roundf(float x);
    +Sleef_double2 Sleef_modf(double x);
    +Sleef_float2 Sleef_modff(float x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    + These functions return the value as specified in the C99 + specification of modf and modff functions. These functions do not + set errno nor raise an exception.


    -

    Sleef_rint, Sleef_rintf - round to integer, ties round to even

    +

    Sleef_fabs, Sleef_fabsf - absolute value

    Synopsis

    #include <sleef.h>

    -double Sleef_rint(double x);
    -float Sleef_rintf(float x);
    +double Sleef_fabs(double x);
    +float Sleef_fabsf(float x);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    + These functions return the value as specified in the C99 + specification of fabs and fabsf functions. These functions do not + set errno nor raise an exception.


    -

    Sleef_fma, Sleef_fmaf - fused multiply and accumulate

    +

    Sleef_fmax, Sleef_fmaxf - maximum of two numbers

    Synopsis

    #include <sleef.h>

    -double Sleef_fma(double x, double y, double z);
    -float Sleef_fmaf(float x, float y, float z);
    +double Sleef_fmax(double x, double y);
    +float Sleef_fmaxf(float x, float y);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    +These functions return the value as specified in +the C99 specification of fmax and fmaxf functions. These functions +do not set errno nor raise an exception.


    -

    Sleef_sqrt_u05, Sleef_sqrtf_u05 - square root

    +

    Sleef_fmin, Sleef_fminf - minimum of two numbers

    Synopsis

    #include <sleef.h>

    -double Sleef_sqrt_u05(double x);
    -float Sleef_sqrt_u05(float x);
    +double Sleef_fmin(double x, double y);
    +float Sleef_fminf(float x, float y);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    +These functions return the value as specified in +the C99 specification of fmin and fminf functions. These functions +do not set errno nor raise an exception.


    -

    Sleef_hypot_u05, Sleef_hypotf_u05 - find hypotenuse when other two sides are provided with 0.5 ULP error bound

    +

    Sleef_fdim, Sleef_fdimf - positive difference

    Synopsis

    #include <sleef.h>

    -double Sleef_hypot_u05(double x, double y);
    -float Sleef_hypot_u05(float x, float y);
    +double Sleef_fdim(double x, double y);
    +float Sleef_fdimf(float x, float y);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    +These functions return the value as specified in +the C99 specification of fdim and fdimf functions. These functions +do not set errno nor raise an exception.


    -

    Sleef_hypot_u35, Sleef_hypotf_u35 - find hypotenuse when other two sides are provided with 3.5 ULP error bound

    +

    Sleef_copysign, Sleef_copysignf - copy sign of a number

    Synopsis

    #include <sleef.h>

    -double Sleef_hypot_u35(double x, double y);
    -float Sleef_hypot_u35(float x, float y);
    +double Sleef_copysign(double x, double y);
    +float Sleef_copysignf(float x, float y);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    +These functions return the value as specified in +the C99 specification of copysign and copysignf functions. These functions +do not set errno nor raise an exception.


    -

    Sleef_fmod, Sleef_fmodf - FP remainder

    +

    Sleef_nextafter, Sleef_nextafterf - find the next representable FP value

    Synopsis

    #include <sleef.h>

    -double Sleef_fmod(double x, double y);
    -float Sleef_fmodf(float x, float y);
    +double Sleef_nextafter(double x, double y);
    +float Sleef_nextafterf(float x, float y);

    -Link with -lsleef. +Link with -lsleef.

    Description

    -

    +

    +These functions return the value as specified in +the C99 specification of nextafter and nextafterf functions. These functions +do not set errno nor raise an exception.

    + + diff --git a/doc/html/sleef.css b/doc/html/sleef.css new file mode 100644 index 00000000..e9d0b80e --- /dev/null +++ b/doc/html/sleef.css @@ -0,0 +1,63 @@ +p.funcname { + font-family: "Times New Roman", times, serif; + font-size:1.2em; + font-weight: normal; + + margin-top: 0.3cm; + margin-bottom: 0.3cm; + + margin-left: 0.0cm; + text-indent:0pt; +} + +p.header { + font-family: arial, sansserif; + font-size:1.1em; + font-weight: bold; + margin-top: 1.0cm; + margin-bottom: 0.3cm; + + margin-left: 0.0cm; + text-indent:0pt; +} + +p.synopsis { + font-family: Ubuntu, arial, sansserif; + font-size:1.0em; + font-weight: normal; + margin-top: 0.3cm; + margin-bottom: 0.3cm; + margin-left: 0.6cm; + margin-right: 0.2cm; + padding-left: 0.1cm; + padding-right: 0.1cm; + text-indent:0pt; +} + +pre.white { + font-family: Ubuntu, arial, sansserif; +} + +i.var { + font-family: "Times New Roman", times, serif; + font-weight: bold; + color:#a00000; +} + +i.math { + font-family: "Times New Roman", times, serif; + font-weight: normal; + font-style:normal; +} + +b.func { + font-family: Ubuntu, arial, sansserif; + font-weight: normal; + color:#008040; +} + +b.type { + font-family: Ubuntu, arial, sansserif; + font-weight: normal; + color:#0050a0; +} diff --git a/doc/html/sleeflogo2.png b/doc/html/sleeflogo2.png new file mode 100644 index 00000000..4b60764f Binary files /dev/null and b/doc/html/sleeflogo2.png differ diff --git a/doc/html/texlike.css b/doc/html/texlike.css new file mode 100644 index 00000000..943a2c31 --- /dev/null +++ b/doc/html/texlike.css @@ -0,0 +1,250 @@ +body { + margin-left: 1.5cm; + border-left: 0.0cm; + padding-left: 0.0cm; + + margin-right: 1.5cm; + border-right: 0.0cm; + padding-right: 0.0cm; + + margin-top: 1.0cm; + padding-top: 0.1cm; + margin-bottom: 2.0cm; + padding-bottom: 0.1cm; + + font-size:12.5pt; +} + +h1 { + font-family: arial, sansserif; + font-weight: bold; + font-style: italic; + font-size:1.8em; + + margin-top: 0.8cm; + + margin-left: 0.0cm; +} + +h2 { + font-family: arial, sansserif; + font-weight: bold; + font-style: normal; + font-size:1.6em; + + margin-top: 1.5cm; + margin-bottom: 0.5cm; + + margin-left: 0.0cm; +} + +h3 { + font-family: arial, sansserif; + font-weight: bold; + font-style: normal; + font-size:1.2em; + + margin-top: 0.9cm; + margin-bottom: 0.5cm; + + margin-left: 0.0cm; +} + +h4 { + font-family: arial, sansserif; + font-weight: bold; + font-style: normal; + + margin-top: 0.7cm; + margin-left: 0.0cm; + margin-bottom: 0.2cm; + padding-bottom: 0.0cm; +} + +p { + font-family: "Times New Roman", times, serif; + font-weight: normal; + font-style: normal; + + margin-top: 0.0cm; + padding-top: 0.0cm; + margin-bottom: 0.0cm; + padding-bottom: 0.0cm; + + text-indent:16pt; + + margin-left: 0.0cm; +} + +p.noindent { + text-indent:0pt; +} + +span.normal { + font-family: "Times New Roman", times, serif; + font-weight: normal; + font-style: normal; +} + +ul { + list-style-type: disc; + + font-family: "Times New Roman", times, serif; + font-weight: normal; + font-style: normal; + + margin-top: 0.0cm; + padding-top: 0.0cm; + margin-bottom: 0.0cm; + padding-bottom: 0.0cm; + margin-left: 0.8cm; + padding-left: 0.0cm; +} + +ul.circle { + list-style-type: circle; +} + +ul.square { + list-style-type: square; +} + +ul.none { + list-style-type: none; + margin-left: 0.0cm; +} + +ol { + font-family: "Times New Roman", serif; + font-weight: normal; + font-style: normal; + + margin-left: 0.8cm; + padding-left: 0.0cm; +} + + +a:link { + margin-left: 0cm; + color: black; + text-decoration: none; +} + +a:visited { + margin-left: 0cm; + color: black; + text-decoration: none; +} + +a:hover { + margin-left: 0cm; + color: black; + text-decoration: underline; +} + +a:article { + margin-left: 0cm; + color: black; + text-decoration: none; +} + +a.underlined:link { + text-decoration: underline; +} + +a.nothing:hover { + text-decoration: none; +} + +i { + font-family: "Times New Roman", times, serif; + font-weight: normal; +} + +b { + font-family: arial, sansserif; + font-weight: normal; +} + +hr { + margin-top: 0.8cm; + margin-bottom: 0.5cm; + padding-top: 0cm; + padding-bottom: 0cm; +} + +// + +table { + margin-left:auto; + margin-right:auto; +} + +td.caption { font-family: times, serif; color: black; } +td { font-family: times, serif; } + +table.lt { border-collapse: collapse; border-style: none; } +td.lt- { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-width: 1px; border-style: none; padding-left=0.2cm; padding-right=0.2cm; } +td.lt-r { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-right-style: solid; border-width: 1px; border-color: black; } +td.lt-l { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-left-style: solid; border-width: 1px; border-color: black; } +td.lt-lr { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-right-style: solid; border-left-style: solid; border-width: 1px; border-color: black; } +td.lt-b { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-width: 1px; border-color: black; } +td.lt-hl { margin: 0px; border-style: none; border-bottom-style: solid; border-width: 1px; border-color: black; height: 2px; } +td.lt-bl { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-left-style: solid; border-width: 1px; border-color: black; } +td.lt-br { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-right-style: solid; border-width: 1px; border-color: black; } +td.lt-blr { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-left-style: solid; border-right-style: solid; border-width: 1px; border-color: black; } + +// + +pre.command { + margin: 0.5cm 1.5cm 0.5cm 1.5cm; + padding: 1em; + border:0px; + font-family: arial, sansserif; + font-size: 12pt; + font-weight: normal; + background-color:#fbf2ef; + box-shadow: 3px 3px 3px #aaa; +} + +pre.white { + font-family: arial, sansserif; + font-size:1.0em; + font-weight: normal; + background-color:white; + overflow: auto; + + margin: 0.6cm; + margin-top: 1.0cm; + padding: 0.1cm; +} + +pre.code { + font-family:arial, sansserif; + font-size:9pt; + font-weight: normal; + background-color:#fbf8ef; + box-shadow: 3px 3px 3px #aaa; + overflow: auto; + + margin: 1.0cm 1.5cm 1.0cm 1.5cm; + padding: 1em 1em 2em 1.1em; + counter-reset: line; +} + +code { + font-family:"Consolas", arial, sansserif; + font-size:9pt; + counter-increment:line; +} + +code:before { + content: counter(line); + display: inline-block; + border-right: 1px solid #c0a0a0; + padding: 0 0.5em 0 0.5em; + margin-right: 0.5em; + min-width: 2em; + text-align: right; + font-size:9pt; +} diff --git a/doc/html/x86.xhtml b/doc/html/x86.xhtml index 704f80ea..77b81608 100644 --- a/doc/html/x86.xhtml +++ b/doc/html/x86.xhtml @@ -4,187 +4,3767 @@ - -SLEEF API Reference + + + + +SLEEF Documentation -

    SLEEF API Reference - Math library reference

    +

    SLEEF Documentation - Math library reference

    Table of contents

    -
      + -

      Data types and functions for x86 architectures

      +

      Function naming convention

      -

      Data types

      +

      +The naming convention for the vectorized math functions is shown in +Fig. 3.1. The function name is a concatenation of the following items, +in this order. +

      + +
      + +
        +
      • String "Sleef_".
      • +
      • Name of the corresponding double precision function in math.h.
      • +
      • Data type specifier of a vector element, "d" and "f" for double and single precision functions, respectively.
      • +
      • The number of elements in a vector.
      • +
      • Accuracy specifier, a concatenation of string "_u" and 10 times + the maximum error for typical input domain in ULP(two digits). + There is no field in the name, if the function is expected to + always return the correctly rounded value.
      • +
      • Vector extension specifier.
      • +
          +
        • (Nothing) : Dispatcher + automatically chooses the fastest available vector extension
        • +
        • sse2 : SSE2
        • +
        • sse4 : SSE4.1
        • +
        • avx2128 : AVX2+FMA3 instructions utilized for 128 bit computation
        • +
        • avx : AVX
        • +
        • fma4 : AMD FMA4
        • +
        • avx2 : AVX2+FMA3
        • +
        • avx512f : AVX512F
        • +
        +
      + +

      + naming convention +
      + Fig. 3.1: Naming convention of vectorized functions +

      + +

      Data types for x86 architecture

      -

      Sleef___m128_2

      +

      Sleef___m128_2

      Description

      -

      -Sleef___m128_2 is a data type for storing two __m128 values, -which is defined in <sleef.h> as follows: +

      +Sleef___m128_2 is a data type for storing two __m128 values, +which is defined in sleef.h as follows:

      -
      typedef struct {
      +
      typedef struct {
         __m128 x, y;
       } Sleef___m128_2;
       

      -

      Sleef___m128d_2

      +

      Sleef___m128d_2

      Description

      -

      -Sleef___m128d_2 is a data type for storing two __m128d values, -which is defined in <sleef.h> as follows: +

      +Sleef___m128d_2 is a data type for storing two __m128d values, +which is defined in sleef.h as follows:

      -
      typedef struct {
      +
      typedef struct {
         __m128d x, y;
       } Sleef___m128d_2;
       

      -

      Sleef___m256_2

      +

      Sleef___m256_2

      Description

      -

      -Sleef___m256_2 is a data type for storing two __m256 values, -which is defined in <sleef.h> as follows: +

      +Sleef___m256_2 is a data type for storing two __m256 values, +which is defined in sleef.h as follows:

      -
      typedef struct {
      +
      typedef struct {
         __m256 x, y;
       } Sleef___m256_2;
       

      -

      Sleef___m256d_2

      +

      Sleef___m256d_2

      Description

      -

      -Sleef___m256d_2 is a data type for storing two __m256d values, -which is defined in <sleef.h> as follows: +

      +Sleef___m256d_2 is a data type for storing two __m256d values, +which is defined in sleef.h as follows:

      -
      typedef struct {
      +
      typedef struct {
         __m256d x, y;
       } Sleef___m256d_2;
       

      -

      Sleef___m512_2

      +

      Sleef___m512_2

      Description

      -

      -Sleef___m512_2 is a data type for storing two __m512 values, -which is defined in <sleef.h> as follows: +

      +Sleef___m512_2 is a data type for storing two __m512 values, +which is defined in sleef.h as follows:

      -
      typedef struct {
      +
      typedef struct {
         __m512 x, y;
       } Sleef___m512_2;
       

      -

      Sleef___m512d_2

      +

      Sleef___m512d_2

      Description

      -

      -Sleef___m512d_2 is a data type for storing two __m512d values, -which is defined in <sleef.h> as follows: +

      +Sleef___m512d_2 is a data type for storing two __m512d values, +which is defined in sleef.h as follows:

      -
      typedef struct {
      +
      typedef struct {
         __m512d x, y;
       } Sleef___m512d_2;
       
      -

      SSE2 Double-Precision Functions

      +

      Trigonometric Functions

      + +

      Vectorized double precision sine functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_sind2_u10(__m128d a);
      +__m128d Sleef_sind2_u10sse2(__m128d a);
      +__m128d Sleef_sind2_u10sse4(__m128d a);
      +__m128d Sleef_sind2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_sind4_u10(__m256d a);
      +__m256d Sleef_sind4_u10avx(__m256d a);
      +__m256d Sleef_sind4_u10fma4(__m256d a);
      +__m256d Sleef_sind4_u10avx2(__m256d a);
      +
      +__m512d Sleef_sind8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sin_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision sine functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_sinf4_u10(__m128 a);
      +__m128 Sleef_sinf4_u10sse2(__m128 a);
      +__m128 Sleef_sinf4_u10sse4(__m128 a);
      +__m128 Sleef_sinf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_sinf8_u10(__m256 a);
      +__m256 Sleef_sinf8_u10avx(__m256 a);
      +__m256 Sleef_sinf8_u10fma4(__m256 a);
      +__m256 Sleef_sinf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_sinf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sinf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision sine functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_sind2_u35(__m128d a);
      +__m128d Sleef_sind2_u35sse2(__m128d a);
      +__m128d Sleef_sind2_u35sse4(__m128d a);
      +__m128d Sleef_sind2_u35avx2128(__m128d a);
      +
      +__m256d Sleef_sind4_u35(__m256d a);
      +__m256d Sleef_sind4_u35avx(__m256d a);
      +__m256d Sleef_sind4_u35fma4(__m256d a);
      +__m256d Sleef_sind4_u35avx2(__m256d a);
      +
      +__m512d Sleef_sind8_u35avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sin_u35 with the same accuracy specification. +

      + +
      +

      Vectorized single precision sine functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_sinf4_u35(__m128 a);
      +__m128 Sleef_sinf4_u35sse2(__m128 a);
      +__m128 Sleef_sinf4_u35sse4(__m128 a);
      +__m128 Sleef_sinf4_u35avx2128(__m128 a);
      +
      +__m256 Sleef_sinf8_u35(__m256 a);
      +__m256 Sleef_sinf8_u35avx(__m256 a);
      +__m256 Sleef_sinf8_u35fma4(__m256 a);
      +__m256 Sleef_sinf8_u35avx2(__m256 a);
      +
      +__m512 Sleef_sinf16_u35avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sinf_u35 with the same accuracy specification. +

      + +
      +

      Vectorized double precision cosine functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_cosd2_u10(__m128d a);
      +__m128d Sleef_cosd2_u10sse2(__m128d a);
      +__m128d Sleef_cosd2_u10sse4(__m128d a);
      +__m128d Sleef_cosd2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_cosd4_u10(__m256d a);
      +__m256d Sleef_cosd4_u10avx(__m256d a);
      +__m256d Sleef_cosd4_u10fma4(__m256d a);
      +__m256d Sleef_cosd4_u10avx2(__m256d a);
      +
      +__m512d Sleef_cosd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_cos_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision cosine functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_cosf4_u10(__m128 a);
      +__m128 Sleef_cosf4_u10sse2(__m128 a);
      +__m128 Sleef_cosf4_u10sse4(__m128 a);
      +__m128 Sleef_cosf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_cosf8_u10(__m256 a);
      +__m256 Sleef_cosf8_u10avx(__m256 a);
      +__m256 Sleef_cosf8_u10fma4(__m256 a);
      +__m256 Sleef_cosf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_cosf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_cosf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision cosine functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_cosd2_u35(__m128d a);
      +__m128d Sleef_cosd2_u35sse2(__m128d a);
      +__m128d Sleef_cosd2_u35sse4(__m128d a);
      +__m128d Sleef_cosd2_u35avx2128(__m128d a);
      +
      +__m256d Sleef_cosd4_u35(__m256d a);
      +__m256d Sleef_cosd4_u35avx(__m256d a);
      +__m256d Sleef_cosd4_u35fma4(__m256d a);
      +__m256d Sleef_cosd4_u35avx2(__m256d a);
      +
      +__m512d Sleef_cosd8_u35avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_cos_u35 with the same accuracy specification. +

      + +
      +

      Vectorized single precision cosine functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_cosf4_u35(__m128 a);
      +__m128 Sleef_cosf4_u35sse2(__m128 a);
      +__m128 Sleef_cosf4_u35sse4(__m128 a);
      +__m128 Sleef_cosf4_u35avx2128(__m128 a);
      +
      +__m256 Sleef_cosf8_u35(__m256 a);
      +__m256 Sleef_cosf8_u35avx(__m256 a);
      +__m256 Sleef_cosf8_u35fma4(__m256 a);
      +__m256 Sleef_cosf8_u35avx2(__m256 a);
      +
      +__m512 Sleef_cosf16_u35avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_cosf_u35 with the same accuracy specification. +

      + +
      +

      Vectorized double precision combined sine and cosine functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +Sleef___m128d_2 Sleef_sincosd2_u10(__m128d a);
      +Sleef___m128d_2 Sleef_sincosd2_u10sse2(__m128d a);
      +Sleef___m128d_2 Sleef_sincosd2_u10sse4(__m128d a);
      +Sleef___m128d_2 Sleef_sincosd2_u10avx2128(__m128d a);
      +
      +Sleef___m256d_2 Sleef_sincosd4_u10(__m256d a);
      +Sleef___m256d_2 Sleef_sincosd4_u10avx(__m256d a);
      +Sleef___m256d_2 Sleef_sincosd4_u10fma4(__m256d a);
      +Sleef___m256d_2 Sleef_sincosd4_u10avx2(__m256d a);
      +
      +Sleef___m512d_2 Sleef_sincosd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sincos_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision combined sine and cosine functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +Sleef___m128_2 Sleef_sincosf4_u10(__m128 a);
      +Sleef___m128_2 Sleef_sincosf4_u10sse2(__m128 a);
      +Sleef___m128_2 Sleef_sincosf4_u10sse4(__m128 a);
      +Sleef___m128_2 Sleef_sincosf4_u10avx2128(__m128 a);
      +
      +Sleef___m256_2 Sleef_sincosf8_u10(__m256 a);
      +Sleef___m256_2 Sleef_sincosf8_u10avx(__m256 a);
      +Sleef___m256_2 Sleef_sincosf8_u10fma4(__m256 a);
      +Sleef___m256_2 Sleef_sincosf8_u10avx2(__m256 a);
      +
      +Sleef___m512_2 Sleef_sincosf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sincosf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision combined sine and cosine functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +Sleef___m128d_2 Sleef_sincosd2_u35(__m128d a);
      +Sleef___m128d_2 Sleef_sincosd2_u35sse2(__m128d a);
      +Sleef___m128d_2 Sleef_sincosd2_u35sse4(__m128d a);
      +Sleef___m128d_2 Sleef_sincosd2_u35avx2128(__m128d a);
      +
      +Sleef___m256d_2 Sleef_sincosd4_u35(__m256d a);
      +Sleef___m256d_2 Sleef_sincosd4_u35avx(__m256d a);
      +Sleef___m256d_2 Sleef_sincosd4_u35fma4(__m256d a);
      +Sleef___m256d_2 Sleef_sincosd4_u35avx2(__m256d a);
      +
      +Sleef___m512d_2 Sleef_sincosd8_u35avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sincos_u35 with the same accuracy specification. +

      + +
      +

      Vectorized single precision combined sine and cosine functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +Sleef___m128_2 Sleef_sincosf4_u35(__m128 a);
      +Sleef___m128_2 Sleef_sincosf4_u35sse2(__m128 a);
      +Sleef___m128_2 Sleef_sincosf4_u35sse4(__m128 a);
      +Sleef___m128_2 Sleef_sincosf4_u35avx2128(__m128 a);
      +
      +Sleef___m256_2 Sleef_sincosf8_u35(__m256 a);
      +Sleef___m256_2 Sleef_sincosf8_u35avx(__m256 a);
      +Sleef___m256_2 Sleef_sincosf8_u35fma4(__m256 a);
      +Sleef___m256_2 Sleef_sincosf8_u35avx2(__m256 a);
      +
      +Sleef___m512_2 Sleef_sincosf16_u35avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sincosf_u35 with the same accuracy specification. +

      + +
      +

      Vectorized double precision sine functions with 0.506 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_sinpid2_u05(__m128d a);
      +__m128d Sleef_sinpid2_u05sse2(__m128d a);
      +__m128d Sleef_sinpid2_u05sse4(__m128d a);
      +__m128d Sleef_sinpid2_u05avx2128(__m128d a);
      +
      +__m256d Sleef_sinpid4_u05(__m256d a);
      +__m256d Sleef_sinpid4_u05avx(__m256d a);
      +__m256d Sleef_sinpid4_u05fma4(__m256d a);
      +__m256d Sleef_sinpid4_u05avx2(__m256d a);
      +
      +__m512d Sleef_sinpid8_u05avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sinpi_u05 with the same accuracy specification. +

      + +
      +

      Vectorized single precision sine functions with 0.506 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_sinpif4_u05(__m128 a);
      +__m128 Sleef_sinpif4_u05sse2(__m128 a);
      +__m128 Sleef_sinpif4_u05sse4(__m128 a);
      +__m128 Sleef_sinpif4_u05avx2128(__m128 a);
      +
      +__m256 Sleef_sinpif8_u05(__m256 a);
      +__m256 Sleef_sinpif8_u05avx(__m256 a);
      +__m256 Sleef_sinpif8_u05fma4(__m256 a);
      +__m256 Sleef_sinpif8_u05avx2(__m256 a);
      +
      +__m512 Sleef_sinpif16_u05avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sinpif_u05 with the same accuracy specification. +

      + +
      +

      Vectorized double precision cosine functions with 0.506 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_cospid2_u05(__m128d a);
      +__m128d Sleef_cospid2_u05sse2(__m128d a);
      +__m128d Sleef_cospid2_u05sse4(__m128d a);
      +__m128d Sleef_cospid2_u05avx2128(__m128d a);
      +
      +__m256d Sleef_cospid4_u05(__m256d a);
      +__m256d Sleef_cospid4_u05avx(__m256d a);
      +__m256d Sleef_cospid4_u05fma4(__m256d a);
      +__m256d Sleef_cospid4_u05avx2(__m256d a);
      +
      +__m512d Sleef_cospid8_u05avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_cospi_u05 with the same accuracy specification. +

      + +
      +

      Vectorized single precision cosine functions with 0.506 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_cospif4_u05(__m128 a);
      +__m128 Sleef_cospif4_u05sse2(__m128 a);
      +__m128 Sleef_cospif4_u05sse4(__m128 a);
      +__m128 Sleef_cospif4_u05avx2128(__m128 a);
      +
      +__m256 Sleef_cospif8_u05(__m256 a);
      +__m256 Sleef_cospif8_u05avx(__m256 a);
      +__m256 Sleef_cospif8_u05fma4(__m256 a);
      +__m256 Sleef_cospif8_u05avx2(__m256 a);
      +
      +__m512 Sleef_cospif16_u05avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_cospif_u05 with the same accuracy specification. +

      + +
      +

      Vectorized double precision combined sine and cosine functions with 0.506 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +Sleef___m128d_2 Sleef_sincospid2_u05(__m128d a);
      +Sleef___m128d_2 Sleef_sincospid2_u05sse2(__m128d a);
      +Sleef___m128d_2 Sleef_sincospid2_u05sse4(__m128d a);
      +Sleef___m128d_2 Sleef_sincospid2_u05avx2128(__m128d a);
      +
      +Sleef___m256d_2 Sleef_sincospid4_u05(__m256d a);
      +Sleef___m256d_2 Sleef_sincospid4_u05avx(__m256d a);
      +Sleef___m256d_2 Sleef_sincospid4_u05fma4(__m256d a);
      +Sleef___m256d_2 Sleef_sincospid4_u05avx2(__m256d a);
      +
      +Sleef___m512d_2 Sleef_sincospid8_u05avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sincospi_u05 with the same accuracy specification. +

      + +
      +

      Vectorized single precision combined sine and cosine functions with 0.506 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +Sleef___m128_2 Sleef_sincospif4_u05(__m128 a);
      +Sleef___m128_2 Sleef_sincospif4_u05sse2(__m128 a);
      +Sleef___m128_2 Sleef_sincospif4_u05sse4(__m128 a);
      +Sleef___m128_2 Sleef_sincospif4_u05avx2128(__m128 a);
      +
      +Sleef___m256_2 Sleef_sincospif8_u05(__m256 a);
      +Sleef___m256_2 Sleef_sincospif8_u05avx(__m256 a);
      +Sleef___m256_2 Sleef_sincospif8_u05fma4(__m256 a);
      +Sleef___m256_2 Sleef_sincospif8_u05avx2(__m256 a);
      +
      +Sleef___m512_2 Sleef_sincospif16_u05avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sincospif_u05 with the same accuracy specification. +

      + +
      +

      Vectorized double precision combined sine and cosine functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +Sleef___m128d_2 Sleef_sincospid2_u35(__m128d a);
      +Sleef___m128d_2 Sleef_sincospid2_u35sse2(__m128d a);
      +Sleef___m128d_2 Sleef_sincospid2_u35sse4(__m128d a);
      +Sleef___m128d_2 Sleef_sincospid2_u35avx2128(__m128d a);
      +
      +Sleef___m256d_2 Sleef_sincospid4_u35(__m256d a);
      +Sleef___m256d_2 Sleef_sincospid4_u35avx(__m256d a);
      +Sleef___m256d_2 Sleef_sincospid4_u35fma4(__m256d a);
      +Sleef___m256d_2 Sleef_sincospid4_u35avx2(__m256d a);
      +
      +Sleef___m512d_2 Sleef_sincospid8_u35avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sincospi_u35 with the same accuracy specification. +

      -

      __m128d Sleef_sind2_u10sse2(__m128d a)

      +
      +

      Vectorized single precision combined sine and cosine functions with 3.5 ULP error bound

      Synopsis

      -__m128d Sleef_sind2_u10sse2(__m128d a)
      #include <sleef.h>
      -Required instruction set : SSE2 +
      +Sleef___m128_2 Sleef_sincospif4_u35(__m128 a);
      +Sleef___m128_2 Sleef_sincospif4_u35sse2(__m128 a);
      +Sleef___m128_2 Sleef_sincospif4_u35sse4(__m128 a);
      +Sleef___m128_2 Sleef_sincospif4_u35avx2128(__m128 a);
      +
      +Sleef___m256_2 Sleef_sincospif8_u35(__m256 a);
      +Sleef___m256_2 Sleef_sincospif8_u35avx(__m256 a);
      +Sleef___m256_2 Sleef_sincospif8_u35fma4(__m256 a);
      +Sleef___m256_2 Sleef_sincospif8_u35avx2(__m256 a);
      +
      +Sleef___m512_2 Sleef_sincospif16_u35avx512f(__m512 a);
      +
      +Link with -lsleef.

      Description

      -

      - Evaluates the sine function of a packed double-precision values - in a. The error bound for each returned value is 1.0 ULP if - the corresponding element in a is in [-1e+14, 1e+14]. If the - element in a is out of this range, an arbitrary value within - [-1, 1] is returned. If the element in a is a NaN or - infinity, a NaN is returned. +

      +These are the vectorized functions of Sleef_sincospif_u35 with the same accuracy specification.


      +

      Vectorized double precision tangent functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_tand2_u10(__m128d a);
      +__m128d Sleef_tand2_u10sse2(__m128d a);
      +__m128d Sleef_tand2_u10sse4(__m128d a);
      +__m128d Sleef_tand2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_tand4_u10(__m256d a);
      +__m256d Sleef_tand4_u10avx(__m256d a);
      +__m256d Sleef_tand4_u10fma4(__m256d a);
      +__m256d Sleef_tand4_u10avx2(__m256d a);
      +
      +__m512d Sleef_tand8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      -

      Sleef_double2 Sleef_sincos_u10(double a)

      +

      +These are the vectorized functions of Sleef_tan_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision tangent functions with 1.0 ULP error bound

      Synopsis

      -Sleef___m128d_2 Sleef_sincosd2_u10sse2(__m128d a)
      #include <sleef.h>
      -Required instruction set : SSE2 +
      +__m128 Sleef_tanf4_u10(__m128 a);
      +__m128 Sleef_tanf4_u10sse2(__m128 a);
      +__m128 Sleef_tanf4_u10sse4(__m128 a);
      +__m128 Sleef_tanf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_tanf8_u10(__m256 a);
      +__m256 Sleef_tanf8_u10avx(__m256 a);
      +__m256 Sleef_tanf8_u10fma4(__m256 a);
      +__m256 Sleef_tanf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_tanf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef.

      Description

      -

      - Evaluates the sine and cosine functions of a packed double-precision - values in a at a time, and store the two sets of values - in x and y elements in the returned value, - respectively. The error bound for the returned values is 1.0 ULP if - the corresponding element in a is in [-1e+14, 1e+14]. If the - element in a is out of this range, an arbitrary value within - [-1, 1] is returned. If the element in a is a NaN or - infinity, a NaN is returned. +

      +These are the vectorized functions of Sleef_tanf_u10 with the same accuracy specification.


      +

      Vectorized double precision tangent functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_tand2_u35(__m128d a);
      +__m128d Sleef_tand2_u35sse2(__m128d a);
      +__m128d Sleef_tand2_u35sse4(__m128d a);
      +__m128d Sleef_tand2_u35avx2128(__m128d a);
      +
      +__m256d Sleef_tand4_u35(__m256d a);
      +__m256d Sleef_tand4_u35avx(__m256d a);
      +__m256d Sleef_tand4_u35fma4(__m256d a);
      +__m256d Sleef_tand4_u35avx2(__m256d a);
      +
      +__m512d Sleef_tand8_u35avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_tan_u35 with the same accuracy specification. +

      + +
      +

      Vectorized single precision tangent functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_tanf4_u35(__m128 a);
      +__m128 Sleef_tanf4_u35sse2(__m128 a);
      +__m128 Sleef_tanf4_u35sse4(__m128 a);
      +__m128 Sleef_tanf4_u35avx2128(__m128 a);
      +
      +__m256 Sleef_tanf8_u35(__m256 a);
      +__m256 Sleef_tanf8_u35avx(__m256 a);
      +__m256 Sleef_tanf8_u35fma4(__m256 a);
      +__m256 Sleef_tanf8_u35avx2(__m256 a);
      +
      +__m512 Sleef_tanf16_u35avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_tanf_u35 with the same accuracy specification. +

      + +

      Power, exponential, and logarithmic functions

      + +

      Vectorized double precision power functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_powd2_u10(__m128d a, __m128d b);
      +__m128d Sleef_powd2_u10sse2(__m128d a, __m128d b);
      +__m128d Sleef_powd2_u10sse4(__m128d a, __m128d b);
      +__m128d Sleef_powd2_u10avx2128(__m128d a, __m128d b);
      +
      +__m256d Sleef_powd4_u10(__m256d a, __m256d b);
      +__m256d Sleef_powd4_u10avx(__m256d a, __m256d b);
      +__m256d Sleef_powd4_u10fma4(__m256d a, __m256d b);
      +__m256d Sleef_powd4_u10avx2(__m256d a, __m256d b);
      +
      +__m512d Sleef_powd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_pow_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision power functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_powf4_u10(__m128 a, __m128 b);
      +__m128 Sleef_powf4_u10sse2(__m128 a, __m128 b);
      +__m128 Sleef_powf4_u10sse4(__m128 a, __m128 b);
      +__m128 Sleef_powf4_u10avx2128(__m128 a, __m128 b);
      +
      +__m256 Sleef_powf8_u10(__m256 a, __m256 b);
      +__m256 Sleef_powf8_u10avx(__m256 a, __m256 b);
      +__m256 Sleef_powf8_u10fma4(__m256 a, __m256 b);
      +__m256 Sleef_powf8_u10avx2(__m256 a, __m256 b);
      +
      +__m512 Sleef_powf16_u10avx512f(__m512 a, __m512 b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_powf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision natural logarithmic functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_logd2_u10(__m128d a);
      +__m128d Sleef_logd2_u10sse2(__m128d a);
      +__m128d Sleef_logd2_u10sse4(__m128d a);
      +__m128d Sleef_logd2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_logd4_u10(__m256d a);
      +__m256d Sleef_logd4_u10avx(__m256d a);
      +__m256d Sleef_logd4_u10fma4(__m256d a);
      +__m256d Sleef_logd4_u10avx2(__m256d a);
      +
      +__m512d Sleef_logd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_log_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision natural logarithmic functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_logf4_u10(__m128 a);
      +__m128 Sleef_logf4_u10sse2(__m128 a);
      +__m128 Sleef_logf4_u10sse4(__m128 a);
      +__m128 Sleef_logf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_logf8_u10(__m256 a);
      +__m256 Sleef_logf8_u10avx(__m256 a);
      +__m256 Sleef_logf8_u10fma4(__m256 a);
      +__m256 Sleef_logf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_logf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_logf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision natural logarithmic functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_logd2_u35(__m128d a);
      +__m128d Sleef_logd2_u35sse2(__m128d a);
      +__m128d Sleef_logd2_u35sse4(__m128d a);
      +__m128d Sleef_logd2_u35avx2128(__m128d a);
      +
      +__m256d Sleef_logd4_u35(__m256d a);
      +__m256d Sleef_logd4_u35avx(__m256d a);
      +__m256d Sleef_logd4_u35fma4(__m256d a);
      +__m256d Sleef_logd4_u35avx2(__m256d a);
      +
      +__m512d Sleef_logd8_u35avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_log_u35 with the same accuracy specification. +

      + +
      +

      Vectorized single precision natural logarithmic functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_logf4_u35(__m128 a);
      +__m128 Sleef_logf4_u35sse2(__m128 a);
      +__m128 Sleef_logf4_u35sse4(__m128 a);
      +__m128 Sleef_logf4_u35avx2128(__m128 a);
      +
      +__m256 Sleef_logf8_u35(__m256 a);
      +__m256 Sleef_logf8_u35avx(__m256 a);
      +__m256 Sleef_logf8_u35fma4(__m256 a);
      +__m256 Sleef_logf8_u35avx2(__m256 a);
      +
      +__m512 Sleef_logf16_u35avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_logf_u35 with the same accuracy specification. +

      + +
      +

      Vectorized double precision base-10 logarithmic functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_log10d2_u10(__m128d a);
      +__m128d Sleef_log10d2_u10sse2(__m128d a);
      +__m128d Sleef_log10d2_u10sse4(__m128d a);
      +__m128d Sleef_log10d2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_log10d4_u10(__m256d a);
      +__m256d Sleef_log10d4_u10avx(__m256d a);
      +__m256d Sleef_log10d4_u10fma4(__m256d a);
      +__m256d Sleef_log10d4_u10avx2(__m256d a);
      +
      +__m512d Sleef_log10d8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_log10_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision base-10 logarithmic functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_log10f4_u10(__m128 a);
      +__m128 Sleef_log10f4_u10sse2(__m128 a);
      +__m128 Sleef_log10f4_u10sse4(__m128 a);
      +__m128 Sleef_log10f4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_log10f8_u10(__m256 a);
      +__m256 Sleef_log10f8_u10avx(__m256 a);
      +__m256 Sleef_log10f8_u10fma4(__m256 a);
      +__m256 Sleef_log10f8_u10avx2(__m256 a);
      +
      +__m512 Sleef_log10f16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_log10f_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision logarithm of one plus argument with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_log1pd2_u10(__m128d a);
      +__m128d Sleef_log1pd2_u10sse2(__m128d a);
      +__m128d Sleef_log1pd2_u10sse4(__m128d a);
      +__m128d Sleef_log1pd2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_log1pd4_u10(__m256d a);
      +__m256d Sleef_log1pd4_u10avx(__m256d a);
      +__m256d Sleef_log1pd4_u10fma4(__m256d a);
      +__m256d Sleef_log1pd4_u10avx2(__m256d a);
      +
      +__m512d Sleef_log1pd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_log1p_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_log1pf4_u10(__m128 a);
      +__m128 Sleef_log1pf4_u10sse2(__m128 a);
      +__m128 Sleef_log1pf4_u10sse4(__m128 a);
      +__m128 Sleef_log1pf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_log1pf8_u10(__m256 a);
      +__m256 Sleef_log1pf8_u10avx(__m256 a);
      +__m256 Sleef_log1pf8_u10fma4(__m256 a);
      +__m256 Sleef_log1pf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_log1pf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_log1pf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision base-e exponential functions functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_expd2_u10(__m128d a);
      +__m128d Sleef_expd2_u10sse2(__m128d a);
      +__m128d Sleef_expd2_u10sse4(__m128d a);
      +__m128d Sleef_expd2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_expd4_u10(__m256d a);
      +__m256d Sleef_expd4_u10avx(__m256d a);
      +__m256d Sleef_expd4_u10fma4(__m256d a);
      +__m256d Sleef_expd4_u10avx2(__m256d a);
      +
      +__m512d Sleef_expd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_exp_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision base-e exponential functions functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_expf4_u10(__m128 a);
      +__m128 Sleef_expf4_u10sse2(__m128 a);
      +__m128 Sleef_expf4_u10sse4(__m128 a);
      +__m128 Sleef_expf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_expf8_u10(__m256 a);
      +__m256 Sleef_expf8_u10avx(__m256 a);
      +__m256 Sleef_expf8_u10fma4(__m256 a);
      +__m256 Sleef_expf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_expf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_expf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision base-2 exponential functions functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_exp2d2_u10(__m128d a);
      +__m128d Sleef_exp2d2_u10sse2(__m128d a);
      +__m128d Sleef_exp2d2_u10sse4(__m128d a);
      +__m128d Sleef_exp2d2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_exp2d4_u10(__m256d a);
      +__m256d Sleef_exp2d4_u10avx(__m256d a);
      +__m256d Sleef_exp2d4_u10fma4(__m256d a);
      +__m256d Sleef_exp2d4_u10avx2(__m256d a);
      +
      +__m512d Sleef_exp2d8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_exp2_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision base-2 exponential functions functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_exp2f4_u10(__m128 a);
      +__m128 Sleef_exp2f4_u10sse2(__m128 a);
      +__m128 Sleef_exp2f4_u10sse4(__m128 a);
      +__m128 Sleef_exp2f4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_exp2f8_u10(__m256 a);
      +__m256 Sleef_exp2f8_u10avx(__m256 a);
      +__m256 Sleef_exp2f8_u10fma4(__m256 a);
      +__m256 Sleef_exp2f8_u10avx2(__m256 a);
      +
      +__m512 Sleef_exp2f16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_exp2f_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision base-10 exponential functions functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_exp10d2_u10(__m128d a);
      +__m128d Sleef_exp10d2_u10sse2(__m128d a);
      +__m128d Sleef_exp10d2_u10sse4(__m128d a);
      +__m128d Sleef_exp10d2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_exp10d4_u10(__m256d a);
      +__m256d Sleef_exp10d4_u10avx(__m256d a);
      +__m256d Sleef_exp10d4_u10fma4(__m256d a);
      +__m256d Sleef_exp10d4_u10avx2(__m256d a);
      +
      +__m512d Sleef_exp10d8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_exp10_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision base-10 exponential functions functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_exp10f4_u10(__m128 a);
      +__m128 Sleef_exp10f4_u10sse2(__m128 a);
      +__m128 Sleef_exp10f4_u10sse4(__m128 a);
      +__m128 Sleef_exp10f4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_exp10f8_u10(__m256 a);
      +__m256 Sleef_exp10f8_u10avx(__m256 a);
      +__m256 Sleef_exp10f8_u10fma4(__m256 a);
      +__m256 Sleef_exp10f8_u10avx2(__m256 a);
      +
      +__m512 Sleef_exp10f16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_exp10f_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision base-e exponential functions minus 1 with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_expm1d2_u10(__m128d a);
      +__m128d Sleef_expm1d2_u10sse2(__m128d a);
      +__m128d Sleef_expm1d2_u10sse4(__m128d a);
      +__m128d Sleef_expm1d2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_expm1d4_u10(__m256d a);
      +__m256d Sleef_expm1d4_u10avx(__m256d a);
      +__m256d Sleef_expm1d4_u10fma4(__m256d a);
      +__m256d Sleef_expm1d4_u10avx2(__m256d a);
      +
      +__m512d Sleef_expm1d8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_expm1_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision base-e exponential functions minus 1 with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_expm1f4_u10(__m128 a);
      +__m128 Sleef_expm1f4_u10sse2(__m128 a);
      +__m128 Sleef_expm1f4_u10sse4(__m128 a);
      +__m128 Sleef_expm1f4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_expm1f8_u10(__m256 a);
      +__m256 Sleef_expm1f8_u10avx(__m256 a);
      +__m256 Sleef_expm1f8_u10fma4(__m256 a);
      +__m256 Sleef_expm1f8_u10avx2(__m256 a);
      +
      +__m512 Sleef_expm1f16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_expm1f_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision square root functions with 0.5001 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_sqrtd2_u05(__m128d a);
      +__m128d Sleef_sqrtd2_u05sse2(__m128d a);
      +__m128d Sleef_sqrtd2_u05sse4(__m128d a);
      +__m128d Sleef_sqrtd2_u05avx2128(__m128d a);
      +
      +__m256d Sleef_sqrtd4_u05(__m256d a);
      +__m256d Sleef_sqrtd4_u05avx(__m256d a);
      +__m256d Sleef_sqrtd4_u05fma4(__m256d a);
      +__m256d Sleef_sqrtd4_u05avx2(__m256d a);
      +
      +__m512d Sleef_sqrtd8_u05avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sqrt_u05 with the same accuracy specification. +

      + +
      +

      Vectorized single precision square root functions with 0.5001 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_sqrtf4_u05(__m128 a);
      +__m128 Sleef_sqrtf4_u05sse2(__m128 a);
      +__m128 Sleef_sqrtf4_u05sse4(__m128 a);
      +__m128 Sleef_sqrtf4_u05avx2128(__m128 a);
      +
      +__m256 Sleef_sqrtf8_u05(__m256 a);
      +__m256 Sleef_sqrtf8_u05avx(__m256 a);
      +__m256 Sleef_sqrtf8_u05fma4(__m256 a);
      +__m256 Sleef_sqrtf8_u05avx2(__m256 a);
      +
      +__m512 Sleef_sqrtf16_u05avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sqrtf_u05 with the same accuracy specification. +

      + +
      +

      Vectorized double precision square root functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_sqrtd2_u35(__m128d a);
      +__m128d Sleef_sqrtd2_u35sse2(__m128d a);
      +__m128d Sleef_sqrtd2_u35sse4(__m128d a);
      +__m128d Sleef_sqrtd2_u35avx2128(__m128d a);
      +
      +__m256d Sleef_sqrtd4_u35(__m256d a);
      +__m256d Sleef_sqrtd4_u35avx(__m256d a);
      +__m256d Sleef_sqrtd4_u35fma4(__m256d a);
      +__m256d Sleef_sqrtd4_u35avx2(__m256d a);
      +
      +__m512d Sleef_sqrtd8_u35avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sqrt_u35 with the same accuracy specification. +

      + +
      +

      Vectorized single precision square root functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_sqrtf4_u35(__m128 a);
      +__m128 Sleef_sqrtf4_u35sse2(__m128 a);
      +__m128 Sleef_sqrtf4_u35sse4(__m128 a);
      +__m128 Sleef_sqrtf4_u35avx2128(__m128 a);
      +
      +__m256 Sleef_sqrtf8_u35(__m256 a);
      +__m256 Sleef_sqrtf8_u35avx(__m256 a);
      +__m256 Sleef_sqrtf8_u35fma4(__m256 a);
      +__m256 Sleef_sqrtf8_u35avx2(__m256 a);
      +
      +__m512 Sleef_sqrtf16_u35avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sqrtf_u35 with the same accuracy specification. +

      + +
      +

      Vectorized double precision cubic root functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_cbrtd2_u10(__m128d a);
      +__m128d Sleef_cbrtd2_u10sse2(__m128d a);
      +__m128d Sleef_cbrtd2_u10sse4(__m128d a);
      +__m128d Sleef_cbrtd2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_cbrtd4_u10(__m256d a);
      +__m256d Sleef_cbrtd4_u10avx(__m256d a);
      +__m256d Sleef_cbrtd4_u10fma4(__m256d a);
      +__m256d Sleef_cbrtd4_u10avx2(__m256d a);
      +
      +__m512d Sleef_cbrtd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_cbrt_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision cubic root functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_cbrtf4_u10(__m128 a);
      +__m128 Sleef_cbrtf4_u10sse2(__m128 a);
      +__m128 Sleef_cbrtf4_u10sse4(__m128 a);
      +__m128 Sleef_cbrtf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_cbrtf8_u10(__m256 a);
      +__m256 Sleef_cbrtf8_u10avx(__m256 a);
      +__m256 Sleef_cbrtf8_u10fma4(__m256 a);
      +__m256 Sleef_cbrtf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_cbrtf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_cbrtf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision cubic root functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_cbrtd2_u35(__m128d a);
      +__m128d Sleef_cbrtd2_u35sse2(__m128d a);
      +__m128d Sleef_cbrtd2_u35sse4(__m128d a);
      +__m128d Sleef_cbrtd2_u35avx2128(__m128d a);
      +
      +__m256d Sleef_cbrtd4_u35(__m256d a);
      +__m256d Sleef_cbrtd4_u35avx(__m256d a);
      +__m256d Sleef_cbrtd4_u35fma4(__m256d a);
      +__m256d Sleef_cbrtd4_u35avx2(__m256d a);
      +
      +__m512d Sleef_cbrtd8_u35avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_cbrt_u35 with the same accuracy specification. +

      + +
      +

      Vectorized single precision cubic root functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_cbrtf4_u35(__m128 a);
      +__m128 Sleef_cbrtf4_u35sse2(__m128 a);
      +__m128 Sleef_cbrtf4_u35sse4(__m128 a);
      +__m128 Sleef_cbrtf4_u35avx2128(__m128 a);
      +
      +__m256 Sleef_cbrtf8_u35(__m256 a);
      +__m256 Sleef_cbrtf8_u35avx(__m256 a);
      +__m256 Sleef_cbrtf8_u35fma4(__m256 a);
      +__m256 Sleef_cbrtf8_u35avx2(__m256 a);
      +
      +__m512 Sleef_cbrtf16_u35avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_cbrtf_u35 with the same accuracy specification. +

      + +
      +

      Vectorized double precision 2D Euclidian distance functions with 0.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_hypotd2_u05(__m128d a, __m128d b);
      +__m128d Sleef_hypotd2_u05sse2(__m128d a, __m128d b);
      +__m128d Sleef_hypotd2_u05sse4(__m128d a, __m128d b);
      +__m128d Sleef_hypotd2_u05avx2128(__m128d a, __m128d b);
      +
      +__m256d Sleef_hypotd4_u05(__m256d a, __m256d b);
      +__m256d Sleef_hypotd4_u05avx(__m256d a, __m256d b);
      +__m256d Sleef_hypotd4_u05fma4(__m256d a, __m256d b);
      +__m256d Sleef_hypotd4_u05avx2(__m256d a, __m256d b);
      +
      +__m512d Sleef_hypotd8_u05avx512f(__m512d a, __m512d b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_hypot_u05 with the same accuracy specification. +

      + +
      +

      Vectorized single precision 2D Euclidian distance functions with 0.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_hypotf4_u05(__m128 a, __m128 b);
      +__m128 Sleef_hypotf4_u05sse2(__m128 a, __m128 b);
      +__m128 Sleef_hypotf4_u05sse4(__m128 a, __m128 b);
      +__m128 Sleef_hypotf4_u05avx2128(__m128 a, __m128 b);
      +
      +__m256 Sleef_hypotf8_u05(__m256 a, __m256 b);
      +__m256 Sleef_hypotf8_u05avx(__m256 a, __m256 b);
      +__m256 Sleef_hypotf8_u05fma4(__m256 a, __m256 b);
      +__m256 Sleef_hypotf8_u05avx2(__m256 a, __m256 b);
      +
      +__m512 Sleef_hypotf16_u05avx512f(__m512 a, __m512 b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_hypotf_u05 with the same accuracy specification. +

      + +
      +

      Vectorized double precision 2D Euclidian distance functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_hypotd2_u35(__m128d a, __m128d b);
      +__m128d Sleef_hypotd2_u35sse2(__m128d a, __m128d b);
      +__m128d Sleef_hypotd2_u35sse4(__m128d a, __m128d b);
      +__m128d Sleef_hypotd2_u35avx2128(__m128d a, __m128d b);
      +
      +__m256d Sleef_hypotd4_u35(__m256d a, __m256d b);
      +__m256d Sleef_hypotd4_u35avx(__m256d a, __m256d b);
      +__m256d Sleef_hypotd4_u35fma4(__m256d a, __m256d b);
      +__m256d Sleef_hypotd4_u35avx2(__m256d a, __m256d b);
      +
      +__m512d Sleef_hypotd8_u35avx512f(__m512d a, __m512d b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_hypot_u35 with the same accuracy specification. +

      + +
      +

      Vectorized single precision 2D Euclidian distance functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_hypotf4_u35(__m128 a, __m128 b);
      +__m128 Sleef_hypotf4_u35sse2(__m128 a, __m128 b);
      +__m128 Sleef_hypotf4_u35sse4(__m128 a, __m128 b);
      +__m128 Sleef_hypotf4_u35avx2128(__m128 a, __m128 b);
      +
      +__m256 Sleef_hypotf8_u35(__m256 a, __m256 b);
      +__m256 Sleef_hypotf8_u35avx(__m256 a, __m256 b);
      +__m256 Sleef_hypotf8_u35fma4(__m256 a, __m256 b);
      +__m256 Sleef_hypotf8_u35avx2(__m256 a, __m256 b);
      +
      +__m512 Sleef_hypotf16_u35avx512f(__m512 a, __m512 b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_hypotf_u35 with the same accuracy specification. +

      + + +

      Inverse Trigonometric Functions

      + +

      Vectorized double precision arc sine functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_asind2_u10(__m128d a);
      +__m128d Sleef_asind2_u10sse2(__m128d a);
      +__m128d Sleef_asind2_u10sse4(__m128d a);
      +__m128d Sleef_asind2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_asind4_u10(__m256d a);
      +__m256d Sleef_asind4_u10avx(__m256d a);
      +__m256d Sleef_asind4_u10fma4(__m256d a);
      +__m256d Sleef_asind4_u10avx2(__m256d a);
      +
      +__m512d Sleef_asind8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_asin_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision arc sine functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_asinf4_u10(__m128 a);
      +__m128 Sleef_asinf4_u10sse2(__m128 a);
      +__m128 Sleef_asinf4_u10sse4(__m128 a);
      +__m128 Sleef_asinf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_asinf8_u10(__m256 a);
      +__m256 Sleef_asinf8_u10avx(__m256 a);
      +__m256 Sleef_asinf8_u10fma4(__m256 a);
      +__m256 Sleef_asinf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_asinf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_asinf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision arc sine functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_asind2_u35(__m128d a);
      +__m128d Sleef_asind2_u35sse2(__m128d a);
      +__m128d Sleef_asind2_u35sse4(__m128d a);
      +__m128d Sleef_asind2_u35avx2128(__m128d a);
      +
      +__m256d Sleef_asind4_u35(__m256d a);
      +__m256d Sleef_asind4_u35avx(__m256d a);
      +__m256d Sleef_asind4_u35fma4(__m256d a);
      +__m256d Sleef_asind4_u35avx2(__m256d a);
      +
      +__m512d Sleef_asind8_u35avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_asin_u35 with the same accuracy specification. +

      + +
      +

      Vectorized single precision arc sine functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_asinf4_u35(__m128 a);
      +__m128 Sleef_asinf4_u35sse2(__m128 a);
      +__m128 Sleef_asinf4_u35sse4(__m128 a);
      +__m128 Sleef_asinf4_u35avx2128(__m128 a);
      +
      +__m256 Sleef_asinf8_u35(__m256 a);
      +__m256 Sleef_asinf8_u35avx(__m256 a);
      +__m256 Sleef_asinf8_u35fma4(__m256 a);
      +__m256 Sleef_asinf8_u35avx2(__m256 a);
      +
      +__m512 Sleef_asinf16_u35avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_asinf_u35 with the same accuracy specification. +

      + +
      +

      Vectorized double precision arc cosine functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_acosd2_u10(__m128d a);
      +__m128d Sleef_acosd2_u10sse2(__m128d a);
      +__m128d Sleef_acosd2_u10sse4(__m128d a);
      +__m128d Sleef_acosd2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_acosd4_u10(__m256d a);
      +__m256d Sleef_acosd4_u10avx(__m256d a);
      +__m256d Sleef_acosd4_u10fma4(__m256d a);
      +__m256d Sleef_acosd4_u10avx2(__m256d a);
      +
      +__m512d Sleef_acosd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_acos_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision arc cosine functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_acosf4_u10(__m128 a);
      +__m128 Sleef_acosf4_u10sse2(__m128 a);
      +__m128 Sleef_acosf4_u10sse4(__m128 a);
      +__m128 Sleef_acosf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_acosf8_u10(__m256 a);
      +__m256 Sleef_acosf8_u10avx(__m256 a);
      +__m256 Sleef_acosf8_u10fma4(__m256 a);
      +__m256 Sleef_acosf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_acosf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_acosf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision arc cosine functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_acosd2_u35(__m128d a);
      +__m128d Sleef_acosd2_u35sse2(__m128d a);
      +__m128d Sleef_acosd2_u35sse4(__m128d a);
      +__m128d Sleef_acosd2_u35avx2128(__m128d a);
      +
      +__m256d Sleef_acosd4_u35(__m256d a);
      +__m256d Sleef_acosd4_u35avx(__m256d a);
      +__m256d Sleef_acosd4_u35fma4(__m256d a);
      +__m256d Sleef_acosd4_u35avx2(__m256d a);
      +
      +__m512d Sleef_acosd8_u35avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_acos_u35 with the same accuracy specification. +

      + +
      +

      Vectorized single precision arc cosine functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_acosf4_u35(__m128 a);
      +__m128 Sleef_acosf4_u35sse2(__m128 a);
      +__m128 Sleef_acosf4_u35sse4(__m128 a);
      +__m128 Sleef_acosf4_u35avx2128(__m128 a);
      +
      +__m256 Sleef_acosf8_u35(__m256 a);
      +__m256 Sleef_acosf8_u35avx(__m256 a);
      +__m256 Sleef_acosf8_u35fma4(__m256 a);
      +__m256 Sleef_acosf8_u35avx2(__m256 a);
      +
      +__m512 Sleef_acosf16_u35avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_acosf_u35 with the same accuracy specification. +

      + +
      +

      Vectorized double precision arc tangent functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_atand2_u10(__m128d a);
      +__m128d Sleef_atand2_u10sse2(__m128d a);
      +__m128d Sleef_atand2_u10sse4(__m128d a);
      +__m128d Sleef_atand2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_atand4_u10(__m256d a);
      +__m256d Sleef_atand4_u10avx(__m256d a);
      +__m256d Sleef_atand4_u10fma4(__m256d a);
      +__m256d Sleef_atand4_u10avx2(__m256d a);
      +
      +__m512d Sleef_atand8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_atan_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision arc tangent functions with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_atanf4_u10(__m128 a);
      +__m128 Sleef_atanf4_u10sse2(__m128 a);
      +__m128 Sleef_atanf4_u10sse4(__m128 a);
      +__m128 Sleef_atanf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_atanf8_u10(__m256 a);
      +__m256 Sleef_atanf8_u10avx(__m256 a);
      +__m256 Sleef_atanf8_u10fma4(__m256 a);
      +__m256 Sleef_atanf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_atanf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_atanf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision arc tangent functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_atand2_u35(__m128d a);
      +__m128d Sleef_atand2_u35sse2(__m128d a);
      +__m128d Sleef_atand2_u35sse4(__m128d a);
      +__m128d Sleef_atand2_u35avx2128(__m128d a);
      +
      +__m256d Sleef_atand4_u35(__m256d a);
      +__m256d Sleef_atand4_u35avx(__m256d a);
      +__m256d Sleef_atand4_u35fma4(__m256d a);
      +__m256d Sleef_atand4_u35avx2(__m256d a);
      +
      +__m512d Sleef_atand8_u35avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_atan_u35 with the same accuracy specification. +

      + +
      +

      Vectorized single precision arc tangent functions with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_atanf4_u35(__m128 a);
      +__m128 Sleef_atanf4_u35sse2(__m128 a);
      +__m128 Sleef_atanf4_u35sse4(__m128 a);
      +__m128 Sleef_atanf4_u35avx2128(__m128 a);
      +
      +__m256 Sleef_atanf8_u35(__m256 a);
      +__m256 Sleef_atanf8_u35avx(__m256 a);
      +__m256 Sleef_atanf8_u35fma4(__m256 a);
      +__m256 Sleef_atanf8_u35avx2(__m256 a);
      +
      +__m512 Sleef_atanf16_u35avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_atanf_u35 with the same accuracy specification. +

      + +
      +

      Vectorized double precision arc tangent functions of two variables with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_atan2d2_u10(__m128d a, __m128d b);
      +__m128d Sleef_atan2d2_u10sse2(__m128d a, __m128d b);
      +__m128d Sleef_atan2d2_u10sse4(__m128d a, __m128d b);
      +__m128d Sleef_atan2d2_u10avx2128(__m128d a, __m128d b);
      +
      +__m256d Sleef_atan2d4_u10(__m256d a, __m256d b);
      +__m256d Sleef_atan2d4_u10avx(__m256d a, __m256d b);
      +__m256d Sleef_atan2d4_u10fma4(__m256d a, __m256d b);
      +__m256d Sleef_atan2d4_u10avx2(__m256d a, __m256d b);
      +
      +__m512d Sleef_atan2d8_u10avx512f(__m512d a, __m512d b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_atan2_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision arc tangent functions of two variables with 1.0 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_atan2f4_u10(__m128 a, __m128 b);
      +__m128 Sleef_atan2f4_u10sse2(__m128 a, __m128 b);
      +__m128 Sleef_atan2f4_u10sse4(__m128 a, __m128 b);
      +__m128 Sleef_atan2f4_u10avx2128(__m128 a, __m128 b);
      +
      +__m256 Sleef_atan2f8_u10(__m256 a, __m256 b);
      +__m256 Sleef_atan2f8_u10avx(__m256 a, __m256 b);
      +__m256 Sleef_atan2f8_u10fma4(__m256 a, __m256 b);
      +__m256 Sleef_atan2f8_u10avx2(__m256 a, __m256 b);
      +
      +__m512 Sleef_atan2f16_u10avx512f(__m512 a, __m512 b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_atan2f_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision arc tangent functions of two variables with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_atan2d2_u35(__m128d a, __m128d b);
      +__m128d Sleef_atan2d2_u35sse2(__m128d a, __m128d b);
      +__m128d Sleef_atan2d2_u35sse4(__m128d a, __m128d b);
      +__m128d Sleef_atan2d2_u35avx2128(__m128d a, __m128d b);
      +
      +__m256d Sleef_atan2d4_u35(__m256d a, __m256d b);
      +__m256d Sleef_atan2d4_u35avx(__m256d a, __m256d b);
      +__m256d Sleef_atan2d4_u35fma4(__m256d a, __m256d b);
      +__m256d Sleef_atan2d4_u35avx2(__m256d a, __m256d b);
      +
      +__m512d Sleef_atan2d8_u35avx512f(__m512d a, __m512d b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_atan2_u35 with the same accuracy specification. +

      + +
      +

      Vectorized single precision arc tangent functions of two variables with 3.5 ULP error bound

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_atan2f4_u35(__m128 a, __m128 b);
      +__m128 Sleef_atan2f4_u35sse2(__m128 a, __m128 b);
      +__m128 Sleef_atan2f4_u35sse4(__m128 a, __m128 b);
      +__m128 Sleef_atan2f4_u35avx2128(__m128 a, __m128 b);
      +
      +__m256 Sleef_atan2f8_u35(__m256 a, __m256 b);
      +__m256 Sleef_atan2f8_u35avx(__m256 a, __m256 b);
      +__m256 Sleef_atan2f8_u35fma4(__m256 a, __m256 b);
      +__m256 Sleef_atan2f8_u35avx2(__m256 a, __m256 b);
      +
      +__m512 Sleef_atan2f16_u35avx512f(__m512 a, __m512 b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_atan2f_u35 with the same accuracy specification. +

      + + + +

      Hyperbolic functions and inverse hyperbolic functions

      + +

      Vectorized double precision hyperbolic sine functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_sinhd2_u10(__m128d a);
      +__m128d Sleef_sinhd2_u10sse2(__m128d a);
      +__m128d Sleef_sinhd2_u10sse4(__m128d a);
      +__m128d Sleef_sinhd2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_sinhd4_u10(__m256d a);
      +__m256d Sleef_sinhd4_u10avx(__m256d a);
      +__m256d Sleef_sinhd4_u10fma4(__m256d a);
      +__m256d Sleef_sinhd4_u10avx2(__m256d a);
      +
      +__m512d Sleef_sinhd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sinh_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision hyperbolic sine functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_sinhf4_u10(__m128 a);
      +__m128 Sleef_sinhf4_u10sse2(__m128 a);
      +__m128 Sleef_sinhf4_u10sse4(__m128 a);
      +__m128 Sleef_sinhf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_sinhf8_u10(__m256 a);
      +__m256 Sleef_sinhf8_u10avx(__m256 a);
      +__m256 Sleef_sinhf8_u10fma4(__m256 a);
      +__m256 Sleef_sinhf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_sinhf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_sinhf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision hyperbolic cosine functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_coshd2_u10(__m128d a);
      +__m128d Sleef_coshd2_u10sse2(__m128d a);
      +__m128d Sleef_coshd2_u10sse4(__m128d a);
      +__m128d Sleef_coshd2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_coshd4_u10(__m256d a);
      +__m256d Sleef_coshd4_u10avx(__m256d a);
      +__m256d Sleef_coshd4_u10fma4(__m256d a);
      +__m256d Sleef_coshd4_u10avx2(__m256d a);
      +
      +__m512d Sleef_coshd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_cosh_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision hyperbolic cosine functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_coshf4_u10(__m128 a);
      +__m128 Sleef_coshf4_u10sse2(__m128 a);
      +__m128 Sleef_coshf4_u10sse4(__m128 a);
      +__m128 Sleef_coshf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_coshf8_u10(__m256 a);
      +__m256 Sleef_coshf8_u10avx(__m256 a);
      +__m256 Sleef_coshf8_u10fma4(__m256 a);
      +__m256 Sleef_coshf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_coshf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_coshf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision hyperbolic tangent functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_tanhd2_u10(__m128d a);
      +__m128d Sleef_tanhd2_u10sse2(__m128d a);
      +__m128d Sleef_tanhd2_u10sse4(__m128d a);
      +__m128d Sleef_tanhd2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_tanhd4_u10(__m256d a);
      +__m256d Sleef_tanhd4_u10avx(__m256d a);
      +__m256d Sleef_tanhd4_u10fma4(__m256d a);
      +__m256d Sleef_tanhd4_u10avx2(__m256d a);
      +
      +__m512d Sleef_tanhd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_tanh_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision hyperbolic tangent functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_tanhf4_u10(__m128 a);
      +__m128 Sleef_tanhf4_u10sse2(__m128 a);
      +__m128 Sleef_tanhf4_u10sse4(__m128 a);
      +__m128 Sleef_tanhf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_tanhf8_u10(__m256 a);
      +__m256 Sleef_tanhf8_u10avx(__m256 a);
      +__m256 Sleef_tanhf8_u10fma4(__m256 a);
      +__m256 Sleef_tanhf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_tanhf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_tanhf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision inverse hyperbolic sine functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_asinhd2_u10(__m128d a);
      +__m128d Sleef_asinhd2_u10sse2(__m128d a);
      +__m128d Sleef_asinhd2_u10sse4(__m128d a);
      +__m128d Sleef_asinhd2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_asinhd4_u10(__m256d a);
      +__m256d Sleef_asinhd4_u10avx(__m256d a);
      +__m256d Sleef_asinhd4_u10fma4(__m256d a);
      +__m256d Sleef_asinhd4_u10avx2(__m256d a);
      +
      +__m512d Sleef_asinhd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_asinh_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision inverse hyperbolic sine functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_asinhf4_u10(__m128 a);
      +__m128 Sleef_asinhf4_u10sse2(__m128 a);
      +__m128 Sleef_asinhf4_u10sse4(__m128 a);
      +__m128 Sleef_asinhf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_asinhf8_u10(__m256 a);
      +__m256 Sleef_asinhf8_u10avx(__m256 a);
      +__m256 Sleef_asinhf8_u10fma4(__m256 a);
      +__m256 Sleef_asinhf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_asinhf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_asinhf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision inverse hyperbolic cosine functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_acoshd2_u10(__m128d a);
      +__m128d Sleef_acoshd2_u10sse2(__m128d a);
      +__m128d Sleef_acoshd2_u10sse4(__m128d a);
      +__m128d Sleef_acoshd2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_acoshd4_u10(__m256d a);
      +__m256d Sleef_acoshd4_u10avx(__m256d a);
      +__m256d Sleef_acoshd4_u10fma4(__m256d a);
      +__m256d Sleef_acoshd4_u10avx2(__m256d a);
      +
      +__m512d Sleef_acoshd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_acosh_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision inverse hyperbolic cosine functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_acoshf4_u10(__m128 a);
      +__m128 Sleef_acoshf4_u10sse2(__m128 a);
      +__m128 Sleef_acoshf4_u10sse4(__m128 a);
      +__m128 Sleef_acoshf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_acoshf8_u10(__m256 a);
      +__m256 Sleef_acoshf8_u10avx(__m256 a);
      +__m256 Sleef_acoshf8_u10fma4(__m256 a);
      +__m256 Sleef_acoshf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_acoshf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_acoshf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision inverse hyperbolic tangent functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_atanhd2_u10(__m128d a);
      +__m128d Sleef_atanhd2_u10sse2(__m128d a);
      +__m128d Sleef_atanhd2_u10sse4(__m128d a);
      +__m128d Sleef_atanhd2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_atanhd4_u10(__m256d a);
      +__m256d Sleef_atanhd4_u10avx(__m256d a);
      +__m256d Sleef_atanhd4_u10fma4(__m256d a);
      +__m256d Sleef_atanhd4_u10avx2(__m256d a);
      +
      +__m512d Sleef_atanhd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_atanh_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision inverse hyperbolic tangent functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_atanhf4_u10(__m128 a);
      +__m128 Sleef_atanhf4_u10sse2(__m128 a);
      +__m128 Sleef_atanhf4_u10sse4(__m128 a);
      +__m128 Sleef_atanhf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_atanhf8_u10(__m256 a);
      +__m256 Sleef_atanhf8_u10avx(__m256 a);
      +__m256 Sleef_atanhf8_u10fma4(__m256 a);
      +__m256 Sleef_atanhf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_atanhf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_atanhf_u10 with the same accuracy specification. +

      + + +

      Error and gamma functions

      + +

      Vectorized double precision error functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_erfd2_u10(__m128d a);
      +__m128d Sleef_erfd2_u10sse2(__m128d a);
      +__m128d Sleef_erfd2_u10sse4(__m128d a);
      +__m128d Sleef_erfd2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_erfd4_u10(__m256d a);
      +__m256d Sleef_erfd4_u10avx(__m256d a);
      +__m256d Sleef_erfd4_u10fma4(__m256d a);
      +__m256d Sleef_erfd4_u10avx2(__m256d a);
      +
      +__m512d Sleef_erfd8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_erf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision error functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_erff4_u10(__m128 a);
      +__m128 Sleef_erff4_u10sse2(__m128 a);
      +__m128 Sleef_erff4_u10sse4(__m128 a);
      +__m128 Sleef_erff4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_erff8_u10(__m256 a);
      +__m256 Sleef_erff8_u10avx(__m256 a);
      +__m256 Sleef_erff8_u10fma4(__m256 a);
      +__m256 Sleef_erff8_u10avx2(__m256 a);
      +
      +__m512 Sleef_erff16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_erff_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision complementary error functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_erfcd2_u15(__m128d a);
      +__m128d Sleef_erfcd2_u15sse2(__m128d a);
      +__m128d Sleef_erfcd2_u15sse4(__m128d a);
      +__m128d Sleef_erfcd2_u15avx2128(__m128d a);
      +
      +__m256d Sleef_erfcd4_u15(__m256d a);
      +__m256d Sleef_erfcd4_u15avx(__m256d a);
      +__m256d Sleef_erfcd4_u15fma4(__m256d a);
      +__m256d Sleef_erfcd4_u15avx2(__m256d a);
      +
      +__m512d Sleef_erfcd8_u15avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_erfc_u15 with the same accuracy specification. +

      + +
      +

      Vectorized single precision complementary error functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_erfcf4_u15(__m128 a);
      +__m128 Sleef_erfcf4_u15sse2(__m128 a);
      +__m128 Sleef_erfcf4_u15sse4(__m128 a);
      +__m128 Sleef_erfcf4_u15avx2128(__m128 a);
      +
      +__m256 Sleef_erfcf8_u15(__m256 a);
      +__m256 Sleef_erfcf8_u15avx(__m256 a);
      +__m256 Sleef_erfcf8_u15fma4(__m256 a);
      +__m256 Sleef_erfcf8_u15avx2(__m256 a);
      +
      +__m512 Sleef_erfcf16_u15avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_erfcf_u15 with the same accuracy specification. +

      + +
      +

      Vectorized double precision gamma functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_tgammad2_u10(__m128d a);
      +__m128d Sleef_tgammad2_u10sse2(__m128d a);
      +__m128d Sleef_tgammad2_u10sse4(__m128d a);
      +__m128d Sleef_tgammad2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_tgammad4_u10(__m256d a);
      +__m256d Sleef_tgammad4_u10avx(__m256d a);
      +__m256d Sleef_tgammad4_u10fma4(__m256d a);
      +__m256d Sleef_tgammad4_u10avx2(__m256d a);
      +
      +__m512d Sleef_tgammad8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_tgamma_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision gamma functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_tgammaf4_u10(__m128 a);
      +__m128 Sleef_tgammaf4_u10sse2(__m128 a);
      +__m128 Sleef_tgammaf4_u10sse4(__m128 a);
      +__m128 Sleef_tgammaf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_tgammaf8_u10(__m256 a);
      +__m256 Sleef_tgammaf8_u10avx(__m256 a);
      +__m256 Sleef_tgammaf8_u10fma4(__m256 a);
      +__m256 Sleef_tgammaf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_tgammaf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_tgammaf_u10 with the same accuracy specification. +

      + +
      +

      Vectorized double precision log gamma functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_lgammad2_u10(__m128d a);
      +__m128d Sleef_lgammad2_u10sse2(__m128d a);
      +__m128d Sleef_lgammad2_u10sse4(__m128d a);
      +__m128d Sleef_lgammad2_u10avx2128(__m128d a);
      +
      +__m256d Sleef_lgammad4_u10(__m256d a);
      +__m256d Sleef_lgammad4_u10avx(__m256d a);
      +__m256d Sleef_lgammad4_u10fma4(__m256d a);
      +__m256d Sleef_lgammad4_u10avx2(__m256d a);
      +
      +__m512d Sleef_lgammad8_u10avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_lgamma_u10 with the same accuracy specification. +

      + +
      +

      Vectorized single precision log gamma functions

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_lgammaf4_u10(__m128 a);
      +__m128 Sleef_lgammaf4_u10sse2(__m128 a);
      +__m128 Sleef_lgammaf4_u10sse4(__m128 a);
      +__m128 Sleef_lgammaf4_u10avx2128(__m128 a);
      +
      +__m256 Sleef_lgammaf8_u10(__m256 a);
      +__m256 Sleef_lgammaf8_u10avx(__m256 a);
      +__m256 Sleef_lgammaf8_u10fma4(__m256 a);
      +__m256 Sleef_lgammaf8_u10avx2(__m256 a);
      +
      +__m512 Sleef_lgammaf16_u10avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_lgammaf_u10 with the same accuracy specification. +

      + + +

      Nearest integer functions

      + +

      Vectorized double precision functions for rounding to integer towards zero

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_truncd2(__m128d a);
      +__m128d Sleef_truncd2_sse2(__m128d a);
      +__m128d Sleef_truncd2_sse4(__m128d a);
      +__m128d Sleef_truncd2_avx2128(__m128d a);
      +
      +__m256d Sleef_truncd4(__m256d a);
      +__m256d Sleef_truncd4_avx(__m256d a);
      +__m256d Sleef_truncd4_fma4(__m256d a);
      +__m256d Sleef_truncd4_avx2(__m256d a);
      +
      +__m512d Sleef_truncd8_avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_trunc with the same accuracy specification. +

      + +
      +

      Vectorized single precision functions for rounding to integer towards zero

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_truncf4(__m128 a);
      +__m128 Sleef_truncf4_sse2(__m128 a);
      +__m128 Sleef_truncf4_sse4(__m128 a);
      +__m128 Sleef_truncf4_avx2128(__m128 a);
      +
      +__m256 Sleef_truncf8(__m256 a);
      +__m256 Sleef_truncf8_avx(__m256 a);
      +__m256 Sleef_truncf8_fma4(__m256 a);
      +__m256 Sleef_truncf8_avx2(__m256 a);
      +
      +__m512 Sleef_truncf16_avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_truncf with the same accuracy specification. +

      + +
      +

      Vectorized double precision functions for rounding to integer towards negative infinity

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_floord2(__m128d a);
      +__m128d Sleef_floord2_sse2(__m128d a);
      +__m128d Sleef_floord2_sse4(__m128d a);
      +__m128d Sleef_floord2_avx2128(__m128d a);
      +
      +__m256d Sleef_floord4(__m256d a);
      +__m256d Sleef_floord4_avx(__m256d a);
      +__m256d Sleef_floord4_fma4(__m256d a);
      +__m256d Sleef_floord4_avx2(__m256d a);
      +
      +__m512d Sleef_floord8_avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_floor with the same accuracy specification. +

      + +
      +

      Vectorized single precision functions for rounding to integer towards negative infinity

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_floorf4(__m128 a);
      +__m128 Sleef_floorf4_sse2(__m128 a);
      +__m128 Sleef_floorf4_sse4(__m128 a);
      +__m128 Sleef_floorf4_avx2128(__m128 a);
      +
      +__m256 Sleef_floorf8(__m256 a);
      +__m256 Sleef_floorf8_avx(__m256 a);
      +__m256 Sleef_floorf8_fma4(__m256 a);
      +__m256 Sleef_floorf8_avx2(__m256 a);
      +
      +__m512 Sleef_floorf16_avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_floorf with the same accuracy specification. +

      + +
      +

      Vectorized double precision functions for rounding to integer towards positive infinity

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_ceild2(__m128d a);
      +__m128d Sleef_ceild2_sse2(__m128d a);
      +__m128d Sleef_ceild2_sse4(__m128d a);
      +__m128d Sleef_ceild2_avx2128(__m128d a);
      +
      +__m256d Sleef_ceild4(__m256d a);
      +__m256d Sleef_ceild4_avx(__m256d a);
      +__m256d Sleef_ceild4_fma4(__m256d a);
      +__m256d Sleef_ceild4_avx2(__m256d a);
      +
      +__m512d Sleef_ceild8_avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_ceil with the same accuracy specification. +

      + +
      +

      Vectorized single precision functions for rounding to integer towards positive infinity

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_ceilf4(__m128 a);
      +__m128 Sleef_ceilf4_sse2(__m128 a);
      +__m128 Sleef_ceilf4_sse4(__m128 a);
      +__m128 Sleef_ceilf4_avx2128(__m128 a);
      +
      +__m256 Sleef_ceilf8(__m256 a);
      +__m256 Sleef_ceilf8_avx(__m256 a);
      +__m256 Sleef_ceilf8_fma4(__m256 a);
      +__m256 Sleef_ceilf8_avx2(__m256 a);
      +
      +__m512 Sleef_ceilf16_avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_ceilf with the same accuracy specification. +

      + +
      +

      Vectorized double precision functions for rounding to nearest integer

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_roundd2(__m128d a);
      +__m128d Sleef_roundd2_sse2(__m128d a);
      +__m128d Sleef_roundd2_sse4(__m128d a);
      +__m128d Sleef_roundd2_avx2128(__m128d a);
      +
      +__m256d Sleef_roundd4(__m256d a);
      +__m256d Sleef_roundd4_avx(__m256d a);
      +__m256d Sleef_roundd4_fma4(__m256d a);
      +__m256d Sleef_roundd4_avx2(__m256d a);
      +
      +__m512d Sleef_roundd8_avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_round with the same accuracy specification. +

      + +
      +

      Vectorized single precision functions for rounding to nearest integer

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_roundf4(__m128 a);
      +__m128 Sleef_roundf4_sse2(__m128 a);
      +__m128 Sleef_roundf4_sse4(__m128 a);
      +__m128 Sleef_roundf4_avx2128(__m128 a);
      +
      +__m256 Sleef_roundf8(__m256 a);
      +__m256 Sleef_roundf8_avx(__m256 a);
      +__m256 Sleef_roundf8_fma4(__m256 a);
      +__m256 Sleef_roundf8_avx2(__m256 a);
      +
      +__m512 Sleef_roundf16_avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_roundf with the same accuracy specification. +

      + +
      +

      Vectorized double precision functions for rounding to nearest integer

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_rintd2(__m128d a);
      +__m128d Sleef_rintd2_sse2(__m128d a);
      +__m128d Sleef_rintd2_sse4(__m128d a);
      +__m128d Sleef_rintd2_avx2128(__m128d a);
      +
      +__m256d Sleef_rintd4(__m256d a);
      +__m256d Sleef_rintd4_avx(__m256d a);
      +__m256d Sleef_rintd4_fma4(__m256d a);
      +__m256d Sleef_rintd4_avx2(__m256d a);
      +
      +__m512d Sleef_rintd8_avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_rint with the same accuracy specification. +

      + +
      +

      Vectorized single precision functions for rounding to nearest integer

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_rintf4(__m128 a);
      +__m128 Sleef_rintf4_sse2(__m128 a);
      +__m128 Sleef_rintf4_sse4(__m128 a);
      +__m128 Sleef_rintf4_avx2128(__m128 a);
      +
      +__m256 Sleef_rintf8(__m256 a);
      +__m256 Sleef_rintf8_avx(__m256 a);
      +__m256 Sleef_rintf8_fma4(__m256 a);
      +__m256 Sleef_rintf8_avx2(__m256 a);
      +
      +__m512 Sleef_rintf16_avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_rintf with the same accuracy specification. +

      + + +

      Other functions

      + +

      Vectorized double precision functions for fused multiply-accumulation

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_fmad2(__m128d a, __m128d b, __m128d c);
      +__m128d Sleef_fmad2_sse2(__m128d a, __m128d b, __m128d c);
      +__m128d Sleef_fmad2_sse4(__m128d a, __m128d b, __m128d c);
      +__m128d Sleef_fmad2_avx2128(__m128d a, __m128d b, __m128d c);
      +
      +__m256d Sleef_fmad4(__m256d a, __m256d b, __m256d c);
      +__m256d Sleef_fmad4_avx(__m256d a, __m256d b, __m256d c);
      +__m256d Sleef_fmad4_fma4(__m256d a, __m256d b, __m256d c);
      +__m256d Sleef_fmad4_avx2(__m256d a, __m256d b, __m256d c);
      +
      +__m512d Sleef_fmad8_avx512f(__m512d a, __m512d b, __m512d c);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_fma with the same accuracy specification. +

      + +
      +

      Vectorized single precision functions for fused multiply-accumulation

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_fmaf4(__m128 a, __m128 b, __m128 c);
      +__m128 Sleef_fmaf4_sse2(__m128 a, __m128 b, __m128 c);
      +__m128 Sleef_fmaf4_sse4(__m128 a, __m128 b, __m128 c);
      +__m128 Sleef_fmaf4_avx2128(__m128 a, __m128 b, __m128 c);
      +
      +__m256 Sleef_fmaf8(__m256 a, __m256 b, __m256 c);
      +__m256 Sleef_fmaf8_avx(__m256 a, __m256 b, __m256 c);
      +__m256 Sleef_fmaf8_fma4(__m256 a, __m256 b, __m256 c);
      +__m256 Sleef_fmaf8_avx2(__m256 a, __m256 b, __m256 c);
      +
      +__m512 Sleef_fmaf16_avx512f(__m512 a, __m512 b, __m512 c);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_fmaf with the same accuracy specification. +

      + +
      + +

      Vectorized double precision FP remainder

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_fmodd2(__m128d a, __m128d b);
      +__m128d Sleef_fmodd2_sse2(__m128d a, __m128d b);
      +__m128d Sleef_fmodd2_sse4(__m128d a, __m128d b);
      +__m128d Sleef_fmodd2_avx2128(__m128d a, __m128d b);
      +
      +__m256d Sleef_fmodd4(__m256d a, __m256d b);
      +__m256d Sleef_fmodd4_avx(__m256d a, __m256d b);
      +__m256d Sleef_fmodd4_fma4(__m256d a, __m256d b);
      +__m256d Sleef_fmodd4_avx2(__m256d a, __m256d b);
      +
      +__m512d Sleef_fmodd8_avx512f(__m512d a, __m512d b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_fmod with the same accuracy specification. +

      + +
      +

      Vectorized single precision FP remainder

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_fmodf4(__m128 a, __m128 b);
      +__m128 Sleef_fmodf4_sse2(__m128 a, __m128 b);
      +__m128 Sleef_fmodf4_sse4(__m128 a, __m128 b);
      +__m128 Sleef_fmodf4_avx2128(__m128 a, __m128 b);
      +
      +__m256 Sleef_fmodf8(__m256 a, __m256 b);
      +__m256 Sleef_fmodf8_avx(__m256 a, __m256 b);
      +__m256 Sleef_fmodf8_fma4(__m256 a, __m256 b);
      +__m256 Sleef_fmodf8_avx2(__m256 a, __m256 b);
      +
      +__m512 Sleef_fmodf16_avx512f(__m512 a, __m512 b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_fmodf with the same accuracy specification. +

      + +
      +

      Vectorized double precision functions for multiplying by integral power of 2

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_ldexpd2(__m128d a, __m128i b);
      +__m128d Sleef_ldexpd2_sse2(__m128d a, __m128i b);
      +__m128d Sleef_ldexpd2_sse4(__m128d a, __m128i b);
      +__m128d Sleef_ldexpd2_avx2128(__m128d a, __m128i b);
      +
      +__m256d Sleef_ldexpd4(__m256d a, __m128i b);
      +__m256d Sleef_ldexpd4_avx(__m256d a, __m128i b);
      +__m256d Sleef_ldexpd4_fma4(__m256d a, __m128i b);
      +__m256d Sleef_ldexpd4_avx2(__m256d a, __m128i b);
      +
      +__m512d Sleef_ldexpd8_avx512f(__m512d a, __m256i b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_ldexp with the same accuracy specification. +

      + +
      +

      Vectorized double precision functions for obtaining fractional component of an FP number

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_frfrexpd2(__m128d a);
      +__m128d Sleef_frfrexpd2_sse2(__m128d a);
      +__m128d Sleef_frfrexpd2_sse4(__m128d a);
      +__m128d Sleef_frfrexpd2_avx2128(__m128d a);
      +
      +__m256d Sleef_frfrexpd4(__m256d a);
      +__m256d Sleef_frfrexpd4_avx(__m256d a);
      +__m256d Sleef_frfrexpd4_fma4(__m256d a);
      +__m256d Sleef_frfrexpd4_avx2(__m256d a);
      +
      +__m512d Sleef_frfrexpd8_avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_frfrexp with the same accuracy specification. +

      + +
      +

      Vectorized single precision functions for obtaining fractional component of an FP number

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_frfrexpf4(__m128 a);
      +__m128 Sleef_frfrexpf4_sse2(__m128 a);
      +__m128 Sleef_frfrexpf4_sse4(__m128 a);
      +__m128 Sleef_frfrexpf4_avx2128(__m128 a);
      +
      +__m256 Sleef_frfrexpf8(__m256 a);
      +__m256 Sleef_frfrexpf8_avx(__m256 a);
      +__m256 Sleef_frfrexpf8_fma4(__m256 a);
      +__m256 Sleef_frfrexpf8_avx2(__m256 a);
      +
      +__m512 Sleef_frfrexpf16_avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_frfrexpf with the same accuracy specification. +

      + +
      +

      Vectorized double precision function for obtaining integral component of an FP number

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128i Sleef_expfrexpd2(__m128d a);
      +__m128i Sleef_expfrexpd2_sse2(__m128d a);
      +__m128i Sleef_expfrexpd2_sse4(__m128d a);
      +__m128i Sleef_expfrexpd2_avx2128(__m128d a);
      +
      +__m128i Sleef_expfrexpd4(__m256d a);
      +__m128i Sleef_expfrexpd4_avx(__m256d a);
      +__m128i Sleef_expfrexpd4_fma4(__m256d a);
      +__m128i Sleef_expfrexpd4_avx2(__m256d a);
      +
      +__m256i Sleef_expfrexpd8_avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_expfrexp with the same accuracy specification. +

      + +
      + +

      Vectorized double precision functions for getting integer exponent

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128i Sleef_ilogbd2(__m128d a);
      +__m128i Sleef_ilogbd2_sse2(__m128d a);
      +__m128i Sleef_ilogbd2_sse4(__m128d a);
      +__m128i Sleef_ilogbd2_avx2128(__m128d a);
      +
      +__m128i Sleef_ilogbd4(__m256d a);
      +__m128i Sleef_ilogbd4_avx(__m256d a);
      +__m128i Sleef_ilogbd4_fma4(__m256d a);
      +__m128i Sleef_ilogbd4_avx2(__m256d a);
      +
      +__m256i Sleef_ilogbd8_avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_ilogb with the same accuracy specification. +

      + +
      +

      Vectorized double precision signed integral and fractional values

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +Sleef___m128d_2 Sleef_modfd2(__m128d a);
      +Sleef___m128d_2 Sleef_modfd2_sse2(__m128d a);
      +Sleef___m128d_2 Sleef_modfd2_sse4(__m128d a);
      +Sleef___m128d_2 Sleef_modfd2_avx2128(__m128d a);
      +
      +Sleef___m256d_2 Sleef_modfd4(__m256d a);
      +Sleef___m256d_2 Sleef_modfd4_avx(__m256d a);
      +Sleef___m256d_2 Sleef_modfd4_fma4(__m256d a);
      +Sleef___m256d_2 Sleef_modfd4_avx2(__m256d a);
      +
      +Sleef___m512d_2 Sleef_modfd8_avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_modf with the same accuracy specification. +

      + +
      +

      Vectorized single precision signed integral and fractional values

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +Sleef___m128_2 Sleef_modff4(__m128 a);
      +Sleef___m128_2 Sleef_modff4_sse2(__m128 a);
      +Sleef___m128_2 Sleef_modff4_sse4(__m128 a);
      +Sleef___m128_2 Sleef_modff4_avx2128(__m128 a);
      +
      +Sleef___m256_2 Sleef_modff8(__m256 a);
      +Sleef___m256_2 Sleef_modff8_avx(__m256 a);
      +Sleef___m256_2 Sleef_modff8_fma4(__m256 a);
      +Sleef___m256_2 Sleef_modff8_avx2(__m256 a);
      +
      +Sleef___m512_2 Sleef_modff16_avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_modff with the same accuracy specification. +

      + +
      +

      Vectorized double precision functions for calculating the absolute value

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_fabsd2(__m128d a);
      +__m128d Sleef_fabsd2_sse2(__m128d a);
      +__m128d Sleef_fabsd2_sse4(__m128d a);
      +__m128d Sleef_fabsd2_avx2128(__m128d a);
      +
      +__m256d Sleef_fabsd4(__m256d a);
      +__m256d Sleef_fabsd4_avx(__m256d a);
      +__m256d Sleef_fabsd4_fma4(__m256d a);
      +__m256d Sleef_fabsd4_avx2(__m256d a);
      +
      +__m512d Sleef_fabsd8_avx512f(__m512d a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_fabs with the same accuracy specification. +

      + +
      +

      Vectorized single precision functions for calculating the absolute value

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_fabsf4(__m128 a);
      +__m128 Sleef_fabsf4_sse2(__m128 a);
      +__m128 Sleef_fabsf4_sse4(__m128 a);
      +__m128 Sleef_fabsf4_avx2128(__m128 a);
      +
      +__m256 Sleef_fabsf8(__m256 a);
      +__m256 Sleef_fabsf8_avx(__m256 a);
      +__m256 Sleef_fabsf8_fma4(__m256 a);
      +__m256 Sleef_fabsf8_avx2(__m256 a);
      +
      +__m512 Sleef_fabsf16_avx512f(__m512 a);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_fabsf with the same accuracy specification. +

      + +
      +

      Vectorized double precision functions for copying signs

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_copysignd2(__m128d a, __m128d b);
      +__m128d Sleef_copysignd2_sse2(__m128d a, __m128d b);
      +__m128d Sleef_copysignd2_sse4(__m128d a, __m128d b);
      +__m128d Sleef_copysignd2_avx2128(__m128d a, __m128d b);
      +
      +__m256d Sleef_copysignd4(__m256d a, __m256d b);
      +__m256d Sleef_copysignd4_avx(__m256d a, __m256d b);
      +__m256d Sleef_copysignd4_fma4(__m256d a, __m256d b);
      +__m256d Sleef_copysignd4_avx2(__m256d a, __m256d b);
      +
      +__m512d Sleef_copysignd8_avx512f(__m512d a, __m512d b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_copysign with the same accuracy specification. +

      + +
      +

      Vectorized single precision functions for copying signs

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_copysignf4(__m128 a, __m128 b);
      +__m128 Sleef_copysignf4_sse2(__m128 a, __m128 b);
      +__m128 Sleef_copysignf4_sse4(__m128 a, __m128 b);
      +__m128 Sleef_copysignf4_avx2128(__m128 a, __m128 b);
      +
      +__m256 Sleef_copysignf8(__m256 a, __m256 b);
      +__m256 Sleef_copysignf8_avx(__m256 a, __m256 b);
      +__m256 Sleef_copysignf8_fma4(__m256 a, __m256 b);
      +__m256 Sleef_copysignf8_avx2(__m256 a, __m256 b);
      +
      +__m512 Sleef_copysignf16_avx512f(__m512 a, __m512 b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_copysignf with the same accuracy specification. +

      + +
      +

      Vectorized double precision functions for determining maximum of two values

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_fmaxd2(__m128d a, __m128d b);
      +__m128d Sleef_fmaxd2_sse2(__m128d a, __m128d b);
      +__m128d Sleef_fmaxd2_sse4(__m128d a, __m128d b);
      +__m128d Sleef_fmaxd2_avx2128(__m128d a, __m128d b);
      +
      +__m256d Sleef_fmaxd4(__m256d a, __m256d b);
      +__m256d Sleef_fmaxd4_avx(__m256d a, __m256d b);
      +__m256d Sleef_fmaxd4_fma4(__m256d a, __m256d b);
      +__m256d Sleef_fmaxd4_avx2(__m256d a, __m256d b);
      +
      +__m512d Sleef_fmaxd8_avx512f(__m512d a, __m512d b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_fmax with the same accuracy specification. +

      + +
      +

      Vectorized single precision functions for determining maximum of two values

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_fmaxf4(__m128 a, __m128 b);
      +__m128 Sleef_fmaxf4_sse2(__m128 a, __m128 b);
      +__m128 Sleef_fmaxf4_sse4(__m128 a, __m128 b);
      +__m128 Sleef_fmaxf4_avx2128(__m128 a, __m128 b);
      +
      +__m256 Sleef_fmaxf8(__m256 a, __m256 b);
      +__m256 Sleef_fmaxf8_avx(__m256 a, __m256 b);
      +__m256 Sleef_fmaxf8_fma4(__m256 a, __m256 b);
      +__m256 Sleef_fmaxf8_avx2(__m256 a, __m256 b);
      +
      +__m512 Sleef_fmaxf16_avx512f(__m512 a, __m512 b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_fmaxf with the same accuracy specification. +

      + +
      +

      Vectorized double precision functions for determining minimum of two values

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_fmind2(__m128d a, __m128d b);
      +__m128d Sleef_fmind2_sse2(__m128d a, __m128d b);
      +__m128d Sleef_fmind2_sse4(__m128d a, __m128d b);
      +__m128d Sleef_fmind2_avx2128(__m128d a, __m128d b);
      +
      +__m256d Sleef_fmind4(__m256d a, __m256d b);
      +__m256d Sleef_fmind4_avx(__m256d a, __m256d b);
      +__m256d Sleef_fmind4_fma4(__m256d a, __m256d b);
      +__m256d Sleef_fmind4_avx2(__m256d a, __m256d b);
      +
      +__m512d Sleef_fmind8_avx512f(__m512d a, __m512d b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_fmin with the same accuracy specification. +

      + +
      +

      Vectorized single precision functions for determining minimum of two values

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_fminf4(__m128 a, __m128 b);
      +__m128 Sleef_fminf4_sse2(__m128 a, __m128 b);
      +__m128 Sleef_fminf4_sse4(__m128 a, __m128 b);
      +__m128 Sleef_fminf4_avx2128(__m128 a, __m128 b);
      +
      +__m256 Sleef_fminf8(__m256 a, __m256 b);
      +__m256 Sleef_fminf8_avx(__m256 a, __m256 b);
      +__m256 Sleef_fminf8_fma4(__m256 a, __m256 b);
      +__m256 Sleef_fminf8_avx2(__m256 a, __m256 b);
      +
      +__m512 Sleef_fminf16_avx512f(__m512 a, __m512 b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_fminf with the same accuracy specification. +

      + +
      +

      Vectorized double precision functions to calculate positive difference of two values

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_fdimd2(__m128d a, __m128d b);
      +__m128d Sleef_fdimd2_sse2(__m128d a, __m128d b);
      +__m128d Sleef_fdimd2_sse4(__m128d a, __m128d b);
      +__m128d Sleef_fdimd2_avx2128(__m128d a, __m128d b);
      +
      +__m256d Sleef_fdimd4(__m256d a, __m256d b);
      +__m256d Sleef_fdimd4_avx(__m256d a, __m256d b);
      +__m256d Sleef_fdimd4_fma4(__m256d a, __m256d b);
      +__m256d Sleef_fdimd4_avx2(__m256d a, __m256d b);
      +
      +__m512d Sleef_fdimd8_avx512f(__m512d a, __m512d b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_fdim with the same accuracy specification. +

      + +
      +

      Vectorized single precision functions to calculate positive difference of two values

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_fdimf4(__m128 a, __m128 b);
      +__m128 Sleef_fdimf4_sse2(__m128 a, __m128 b);
      +__m128 Sleef_fdimf4_sse4(__m128 a, __m128 b);
      +__m128 Sleef_fdimf4_avx2128(__m128 a, __m128 b);
      +
      +__m256 Sleef_fdimf8(__m256 a, __m256 b);
      +__m256 Sleef_fdimf8_avx(__m256 a, __m256 b);
      +__m256 Sleef_fdimf8_fma4(__m256 a, __m256 b);
      +__m256 Sleef_fdimf8_avx2(__m256 a, __m256 b);
      +
      +__m512 Sleef_fdimf16_avx512f(__m512 a, __m512 b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_fdimf with the same accuracy specification. +

      + +
      +

      Vectorized double precision functions for obtaining the next representable FP value

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128d Sleef_nextafterd2(__m128d a, __m128d b);
      +__m128d Sleef_nextafterd2_sse2(__m128d a, __m128d b);
      +__m128d Sleef_nextafterd2_sse4(__m128d a, __m128d b);
      +__m128d Sleef_nextafterd2_avx2128(__m128d a, __m128d b);
      +
      +__m256d Sleef_nextafterd4(__m256d a, __m256d b);
      +__m256d Sleef_nextafterd4_avx(__m256d a, __m256d b);
      +__m256d Sleef_nextafterd4_fma4(__m256d a, __m256d b);
      +__m256d Sleef_nextafterd4_avx2(__m256d a, __m256d b);
      +
      +__m512d Sleef_nextafterd8_avx512f(__m512d a, __m512d b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_nextafter with the same accuracy specification. +

      + +
      +

      Vectorized single precision functions for obtaining the next representable FP value

      + +

      Synopsis

      + +

      +#include <sleef.h>
      +
      +__m128 Sleef_nextafterf4(__m128 a, __m128 b);
      +__m128 Sleef_nextafterf4_sse2(__m128 a, __m128 b);
      +__m128 Sleef_nextafterf4_sse4(__m128 a, __m128 b);
      +__m128 Sleef_nextafterf4_avx2128(__m128 a, __m128 b);
      +
      +__m256 Sleef_nextafterf8(__m256 a, __m256 b);
      +__m256 Sleef_nextafterf8_avx(__m256 a, __m256 b);
      +__m256 Sleef_nextafterf8_fma4(__m256 a, __m256 b);
      +__m256 Sleef_nextafterf8_avx2(__m256 a, __m256 b);
      +
      +__m512 Sleef_nextafterf16_avx512f(__m512 a, __m512 b);
      +
      +Link with -lsleef. +

      + +

      Description

      + +

      +These are the vectorized functions of Sleef_nextafterf with the same accuracy specification. +

      + diff --git a/doc/sleeflogo2.png b/doc/sleeflogo2.png deleted file mode 100644 index c5aa0981..00000000 Binary files a/doc/sleeflogo2.png and /dev/null differ diff --git a/lib/Makefile b/lib/Makefile index 04143403..da7e1fc6 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -90,7 +90,7 @@ libsleefgnuabi.dylib : ../src/libm/libm.a ../src/libm/OBJ2.txt ../src/common/com $(CC) -dynamiclib -current_version 3.1 -compatibility_version 3.1 -fvisibility=hidden `cat ../src/libm/OBJ2.txt` ../src/common/common.o -lm -o libsleefgnuabi.dylib libsleefdft.dylib : ../src/dft/dft.a libsleef.dylib ../src/common/arraymap.a - $(CC) $(OPENMPFLAG) -dynamiclib -current_version 3.0 -compatibility_version 3.0 -fvisibility=hidden ../src/dft/*.o ../src/common/arraymap.o libsleef.dylib -lm -o libsleefdft.dylib + $(CC) $(OPENMPFLAG) -dynamiclib -current_version 3.1 -compatibility_version 3.1 -fvisibility=hidden ../src/dft/*.o ../src/common/arraymap.o libsleef.dylib -lm -o libsleefdft.dylib endif #