Skip to content

Commit

Permalink
Release of version 2.100
Browse files Browse the repository at this point in the history
  • Loading branch information
shibatch committed Dec 3, 2016
1 parent 20b4818 commit f9d377c
Show file tree
Hide file tree
Showing 22 changed files with 2,567 additions and 1,562 deletions.
2 changes: 2 additions & 0 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ Single precision trigonometric functions : |arg| <= 10000

History

2.100 Added support for AVX-512F and Clang Extended Vectors.

2.90 Added ilogbf. All the reported bugs(listed below) are fixed.
* Log function returned incorrect values when the argument is very small.
* Signs of returned values were incorrect when the argument is signed zero.
Expand Down
54 changes: 27 additions & 27 deletions purec/sleefdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -838,23 +838,23 @@ double xlog(double d) {
double x, x2, t, m;
int e;

e = ilogbk(d * 1.4142);
e = ilogbk(d * (1.0/0.75));
m = ldexpk(d, -e);

x = (m-1) / (m+1);
x2 = x * x;

t = 0.148197055177935105296783;
t = mla(t, x2, 0.153108178020442575739679);
t = mla(t, x2, 0.181837339521549679055568);
t = mla(t, x2, 0.22222194152736701733275);
t = mla(t, x2, 0.285714288030134544449368);
t = mla(t, x2, 0.399999999989941956712869);
t = mla(t, x2, 0.666666666666685503450651);
t = 0.153487338491425068243146;
t = mla(t, x2, 0.152519917006351951593857);
t = mla(t, x2, 0.181863266251982985677316);
t = mla(t, x2, 0.222221366518767365905163);
t = mla(t, x2, 0.285714294746548025383248);
t = mla(t, x2, 0.399999999950799600689777);
t = mla(t, x2, 0.6666666666667778740063);
t = mla(t, x2, 2);

x = x * t + 0.693147180559945286226764 * e;

if (xisinf(d)) x = INFINITY;
if (d < 0) x = NAN;
if (d == 0) x = -INFINITY;
Expand Down Expand Up @@ -894,20 +894,20 @@ static inline double2 logk(double d) {
double m, t;
int e;

e = ilogbk(d * 1.4142);
e = ilogbk(d * (1.0/0.75));
m = ldexpk(d, -e);

x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m));
x2 = ddsqu_d2_d2(x);

t = 0.134601987501262130076155;
t = mla(t, x2.x, 0.132248509032032670243288);
t = mla(t, x2.x, 0.153883458318096079652524);
t = mla(t, x2.x, 0.181817427573705403298686);
t = mla(t, x2.x, 0.222222231326187414840781);
t = mla(t, x2.x, 0.285714285651261412873718);
t = mla(t, x2.x, 0.400000000000222439910458);
t = mla(t, x2.x, 0.666666666666666371239645);
t = 0.13860436390467167910856;
t = mla(t, x2.x, 0.131699838841615374240845);
t = mla(t, x2.x, 0.153914168346271945653214);
t = mla(t, x2.x, 0.181816523941564611721589);
t = mla(t, x2.x, 0.22222224632662035403996);
t = mla(t, x2.x, 0.285714285511134091777308);
t = mla(t, x2.x, 0.400000000000914013309483);
t = mla(t, x2.x, 0.666666666666664853302393);

return ddadd2_d2_d2_d2(ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e),
ddadd2_d2_d2_d2(ddscale_d2_d2_d(x, 2), ddmul_d2_d2_d(ddmul_d2_d2_d2(x2, x), t)));
Expand Down Expand Up @@ -1041,20 +1041,20 @@ static inline double2 logk2(double2 d) {
double t;
int e;

e = ilogbk(d.x * 1.4142);
e = ilogbk(d.x * (1.0/0.75));
m = ddscale_d2_d2_d(d, pow2i(-e));

x = dddiv_d2_d2_d2(ddadd2_d2_d2_d(m, -1), ddadd2_d2_d2_d(m, 1));
x2 = ddsqu_d2_d2(x);

t = 0.134601987501262130076155;
t = mla(t, x2.x, 0.132248509032032670243288);
t = mla(t, x2.x, 0.153883458318096079652524);
t = mla(t, x2.x, 0.181817427573705403298686);
t = mla(t, x2.x, 0.222222231326187414840781);
t = mla(t, x2.x, 0.285714285651261412873718);
t = mla(t, x2.x, 0.400000000000222439910458);
t = mla(t, x2.x, 0.666666666666666371239645);
t = 0.13860436390467167910856;
t = mla(t, x2.x, 0.131699838841615374240845);
t = mla(t, x2.x, 0.153914168346271945653214);
t = mla(t, x2.x, 0.181816523941564611721589);
t = mla(t, x2.x, 0.22222224632662035403996);
t = mla(t, x2.x, 0.285714285511134091777308);
t = mla(t, x2.x, 0.400000000000914013309483);
t = mla(t, x2.x, 0.666666666666664853302393);

return ddadd2_d2_d2_d2(ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e),
ddadd2_d2_d2_d2(ddscale_d2_d2_d(x, 2), ddmul_d2_d2_d(ddmul_d2_d2_d2(x2, x), t)));
Expand Down
36 changes: 18 additions & 18 deletions purec/sleefsp.c
Original file line number Diff line number Diff line change
Expand Up @@ -757,20 +757,20 @@ float xlogf(float d) {
float x, x2, t, m;
int e;

e = ilogbkf(d * 1.4142f);
e = ilogbkf(d * (1.0f/0.75f));
m = ldexpkf(d, -e);

x = (m-1.0f) / (m+1.0f);
x2 = x * x;

t = 0.2371599674224853515625f;
t = mlaf(t, x2, 0.285279005765914916992188f);
t = mlaf(t, x2, 0.400005519390106201171875f);
t = mlaf(t, x2, 0.666666567325592041015625f);
t = 0.2392828464508056640625f;
t = mlaf(t, x2, 0.28518211841583251953125f);
t = mlaf(t, x2, 0.400005877017974853515625f);
t = mlaf(t, x2, 0.666666686534881591796875f);
t = mlaf(t, x2, 2.0f);

x = x * t + 0.693147180559945286226764f * e;

if (xisinff(d)) x = INFINITYf;
if (d < 0) x = NANf;
if (d == 0) x = -INFINITYf;
Expand Down Expand Up @@ -834,16 +834,16 @@ static inline float2 logkf(float d) {
float m, t;
int e;

e = ilogbkf(d * 1.4142f);
e = ilogbkf(d * (1.0f/0.75f));
m = ldexpkf(d, -e);

x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m));
x2 = dfsqu_f2_f2(x);

t = 0.2371599674224853515625f;
t = mlaf(t, x2.x, 0.285279005765914916992188f);
t = mlaf(t, x2.x, 0.400005519390106201171875f);
t = mlaf(t, x2.x, 0.666666567325592041015625f);
t = 0.2392828464508056640625f;
t = mlaf(t, x2.x, 0.28518211841583251953125f);
t = mlaf(t, x2.x, 0.400005877017974853515625f);
t = mlaf(t, x2.x, 0.666666686534881591796875f);

return dfadd2_f2_f2_f2(dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), e),
dfadd2_f2_f2_f2(dfscale_f2_f2_f(x, 2), dfmul_f2_f2_f(dfmul_f2_f2_f2(x2, x), t)));
Expand Down Expand Up @@ -945,16 +945,16 @@ static inline float2 logk2f(float2 d) {
float t;
int e;

e = ilogbkf(d.x * 1.4142f);
e = ilogbkf(d.x * (1.0f/0.75f));
m = dfscale_f2_f2_f(d, pow2if(-e));

x = dfdiv_f2_f2_f2(dfadd2_f2_f2_f(m, -1), dfadd2_f2_f2_f(m, 1));
x2 = dfsqu_f2_f2(x);

t = 0.2371599674224853515625f;
t = mlaf(t, x2.x, 0.285279005765914916992188f);
t = mlaf(t, x2.x, 0.400005519390106201171875f);
t = mlaf(t, x2.x, 0.666666567325592041015625f);
t = 0.2392828464508056640625f;
t = mlaf(t, x2.x, 0.28518211841583251953125f);
t = mlaf(t, x2.x, 0.400005877017974853515625f);
t = mlaf(t, x2.x, 0.666666686534881591796875f);

return dfadd2_f2_f2_f2(dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), e),
dfadd2_f2_f2_f2(dfscale_f2_f2_f(x, 2), dfmul_f2_f2_f(dfmul_f2_f2_f2(x2, x), t)));
Expand Down
30 changes: 17 additions & 13 deletions simd/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CC=gcc
OPT=-O -Wall -Wno-unused -Wno-attributes -ffp-contract=off
SDE=/opt/sde-bdw-external-5.38.0-2013-01-03-lin/sde
CC=gcc-5
OPT=-O -Wall -Wno-unused -Wno-attributes -ffp-contract=off -fmax-errors=3
SDE=sde

all : testsse2 testavx testavx2

Expand All @@ -13,6 +13,9 @@ iutavx : sleefsimddp.c sleefsimdsp.c helperavx.h iut.c
iutavx2 : sleefsimddp.c sleefsimdsp.c helperavx2.h iut.c
$(CC) $(OPT) -DENABLE_AVX2 -mavx2 -mfma iut.c sleefsimddp.c sleefsimdsp.c -o iutavx2 -lm

iutavx512f : sleefsimddp.c sleefsimdsp.c helperavx512f.h iut.c
$(CC) $(OPT) -DENABLE_AVX512F -mavx512f iut.c sleefsimddp.c sleefsimdsp.c -o iutavx512f -lm

iutfma4 : sleefsimddp.c sleefsimdsp.c helperfma4.h iut.c
$(CC) $(OPT) -DENABLE_FMA4 -mavx -mfma4 iut.c sleefsimddp.c sleefsimdsp.c -o iutfma4 -lm

Expand All @@ -30,32 +33,33 @@ iutfma4 : sleefsimddp.c sleefsimdsp.c helperfma4.h iut.c

testsse2 : iutsse2 ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
../tester/tester ./iutsse2
../tester/testeru1 ./iutsse2
../tester/testersp ./iutsse2
../tester/testeru1 ./iutsse2
../tester/testerspu1 ./iutsse2

testavx : iutavx ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
../tester/tester ./iutavx
../tester/testeru1 ./iutavx
../tester/testersp ./iutavx
../tester/testeru1 ./iutavx
../tester/testerspu1 ./iutavx

testavx2 : iutavx2 ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
../tester/tester ./iutavx2
../tester/testeru1 ./iutavx2
../tester/testersp ./iutavx2
../tester/testeru1 ./iutavx2
../tester/testerspu1 ./iutavx2

testavx512f : iutavx512f ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
../tester/tester $(SDE) -- ./iutavx512f
../tester/testersp $(SDE) -- ./iutavx512f
../tester/testeru1 $(SDE) -- ./iutavx512f
../tester/testerspu1 $(SDE) -- ./iutavx512f

testfma4 : iutfma4 ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
../tester/tester ./iutfma4
../tester/testeru1 ./iutfma4
../tester/testersp ./iutfma4
../tester/testeru1 ./iutfma4
../tester/testerspu1 ./iutfma4

clean :
rm -f *~ *.o *.s iutsse2 iutavx iutavx2 iutfma4 iutneon

# ../tester/tester $(SDE) -- ./iutavx2
# ../tester/testeru1 $(SDE) -- ./iutavx2
# ../tester/testersp $(SDE) -- ./iutavx2
# ../tester/testerspu1 $(SDE) -- ./iutavx2
rm -f *~ *.o *.s iutsse2 iutavx iutavx2 iutavx512f iutfma4 iutneon iutclangvec
74 changes: 74 additions & 0 deletions simd/Makefile.clang
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
CC=clang
OPT=-O -Wall -Wno-unused -Wno-attributes -ffp-contract=off -ferror-limit=3
SDE=sde

all : testclangvec

iutsse2 : sleefsimddp.c sleefsimdsp.c helpersse2.h iut.c
$(CC) $(OPT) -DENABLE_SSE2 -msse2 iut.c sleefsimddp.c sleefsimdsp.c -o iutsse2 -lm

iutavx : sleefsimddp.c sleefsimdsp.c helperavx.h iut.c
$(CC) $(OPT) -DENABLE_AVX -mavx iut.c sleefsimddp.c sleefsimdsp.c -o iutavx -lm

iutavx2 : sleefsimddp.c sleefsimdsp.c helperavx2.h iut.c
$(CC) $(OPT) -DENABLE_AVX2 -mavx2 -mfma iut.c sleefsimddp.c sleefsimdsp.c -o iutavx2 -lm

iutavx512f : sleefsimddp.c sleefsimdsp.c helperavx512f.h iut.c
$(CC) $(OPT) -DENABLE_AVX512F -mavx512f iut.c sleefsimddp.c sleefsimdsp.c -o iutavx512f -lm

iutfma4 : sleefsimddp.c sleefsimdsp.c helperfma4.h iut.c
$(CC) $(OPT) -DENABLE_FMA4 -mavx -mfma4 iut.c sleefsimddp.c sleefsimdsp.c -o iutfma4 -lm

iutclangvec : sleefsimddp.c sleefsimdsp.c helperclangvec.h iut.c
$(CC) $(OPT) -DENABLE_CLANGVEC iut.c sleefsimddp.c sleefsimdsp.c -o iutclangvec -lm

../tester/tester :
cd ../tester; make tester

../tester/testeru1 :
cd ../tester; make testeru1

../tester/testersp :
cd ../tester; make testersp

../tester/testerspu1 :
cd ../tester; make testerspu1

testsse2 : iutsse2 ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
../tester/tester ./iutsse2
../tester/testersp ./iutsse2
../tester/testeru1 ./iutsse2
../tester/testerspu1 ./iutsse2

testavx : iutavx ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
../tester/tester ./iutavx
../tester/testersp ./iutavx
../tester/testeru1 ./iutavx
../tester/testerspu1 ./iutavx

testavx2 : iutavx2 ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
../tester/tester ./iutavx2
../tester/testersp ./iutavx2
../tester/testeru1 ./iutavx2
../tester/testerspu1 ./iutavx2

testavx512f : iutavx512f ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
../tester/tester $(SDE) -- ./iutavx512f
../tester/testersp $(SDE) -- ./iutavx512f
../tester/testeru1 $(SDE) -- ./iutavx512f
../tester/testerspu1 $(SDE) -- ./iutavx512f

testfma4 : iutfma4 ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
../tester/tester ./iutfma4
../tester/testersp ./iutfma4
../tester/testeru1 ./iutfma4
../tester/testerspu1 ./iutfma4

testclangvec : iutclangvec ../tester/tester ../tester/testeru1 ../tester/testersp ../tester/testerspu1
../tester/tester ./iutclangvec
../tester/testersp ./iutclangvec
../tester/testeru1 ./iutclangvec
../tester/testerspu1 ./iutclangvec

clean :
rm -f *~ *.o *.s iutsse2 iutavx iutavx2 iutavx512f iutfma4 iutneon iutclangvec
27 changes: 9 additions & 18 deletions simd/dd.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ typedef struct {
vdouble x, y;
} vdouble2;

static INLINE vdouble vupper_vd_vd(vdouble d) {
return (vdouble)vand_vm_vm_vm((vmask)d, vcast_vm_i_i(0xffffffff, 0xf8000000));
}

static INLINE vdouble2 vcast_vd2_vd_vd(vdouble h, vdouble l) {
vdouble2 ret = {h, l};
return ret;
Expand All @@ -12,10 +16,10 @@ static INLINE vdouble2 vcast_vd2_d_d(double h, double l) {
return ret;
}

static INLINE vdouble2 vsel_vd2_vm_vd2_vd2(vmask m, vdouble2 x, vdouble2 y) {
static INLINE vdouble2 vsel_vd2_vo_vd2_vd2(vopmask m, vdouble2 x, vdouble2 y) {
vdouble2 r;
r.x = vsel_vd_vm_vd_vd(m, x.x, y.x);
r.y = vsel_vd_vm_vd_vd(m, x.y, y.y);
r.x = vsel_vd_vo_vd_vd(m, x.x, y.x);
r.y = vsel_vd_vo_vd_vd(m, x.y, y.y);
return r;
}

Expand Down Expand Up @@ -145,7 +149,7 @@ static INLINE vdouble2 ddadd2_vd2_vd2_vd2(vdouble2 x, vdouble2 y) {
return r;
}

static inline vdouble2 ddsub_vd2_vd_vd(vdouble x, vdouble y) {
static INLINE vdouble2 ddsub_vd2_vd_vd(vdouble x, vdouble y) {
// |x| >= |y|

vdouble2 r;
Expand All @@ -170,19 +174,6 @@ static INLINE vdouble2 ddsub_vd2_vd2_vd2(vdouble2 x, vdouble2 y) {
return r;
}

#if 0
static inline vdouble2 ddsub_vd2_vd2_vd2(vdouble2 x, vdouble2 y) {
// |x| >= |y|

vdouble2 r;

r.x = vsub_vd_vd_vd(x.x, y.x);
r.y = vsub_vd_vd_vd(vadd_vd_vd_vd(vsub_vd_vd_vd(vsub_vd_vd_vd(x.x, r.x), y.x), x.y), y.y);

return r;
}
#endif

#ifdef ENABLE_FMA_DP
static INLINE vdouble2 dddiv_vd2_vd2_vd2(vdouble2 n, vdouble2 d) {
vdouble2 q;
Expand Down Expand Up @@ -232,7 +223,7 @@ static INLINE vdouble2 ddmul_vd2_vd2_vd(vdouble2 x, vdouble y) {
return r;
}

static inline vdouble2 ddrec_vd2_vd(vdouble d) {
static INLINE vdouble2 ddrec_vd2_vd(vdouble d) {
vdouble2 q;

q.x = vrec_vd_vd(d);
Expand Down
Loading

0 comments on commit f9d377c

Please sign in to comment.