-
Notifications
You must be signed in to change notification settings - Fork 131
Examples of auto vectorizable codes
Naoki Shibata edited this page Feb 7, 2018
·
37 revisions
All source codes in this page are in public domain. These codes are provided for checking how compiler vecotorizes codes with various compiler options. Try compiling them at Compiler Explorer.
- Automatically vectorized by clang or gcc
- Generates calls to a vectorized math library
Do not forget to specify -fno-math-errno and -fno-trapping-math options.
#include <math.h>
#define N 256
__attribute__ ((__aligned__(64))) float in[N][N], out[N][N];
static float srgb2linear_pix(float c) {
float r = pow((c + 0.055) / (1 + 0.055), 2.4);
return c < 0.04045 ? (c * (1.0 / 12.92)) : r;
}
void srgb2linear(void) {
for (int y = 0; y < N; y++) {
for (int x = 0; x < N; x++) {
out[y][x] = srgb2linear_pix(in[y][x]);
}
}
}
#include <math.h>
#define N 256
__attribute__ ((__aligned__(64))) double in[4][N], out[3][N];
typedef struct { double x, y, z; } double3;
static double3 execute(double a, double b, double c, double d) {
double s1 = b*(1/a), s2 = c*(1/a), s3 = d*(1/a);
double p = s2 - 1/3.0*s1*s1;
double q = s3 - 1/3.0*s1*s2 + 2/27.0*s1*s1*s1;
double z = q*q + 4/27.0*p*p*p;
double w = cbrt((-q + sqrt(z)) * 0.5) + cbrt((-q - sqrt(z)) * 0.5) - 1/3.0*s1;
double th = acos(0.5*(3.0/p)*q*sqrt(-(3.0/p)));
double w0 = 2 * sqrt(-1.0/3*p) * cos(1.0/3.0*th - 2*M_PI*0/3)-1/3.0*s1;
double w1 = 2 * sqrt(-1.0/3*p) * cos(1.0/3.0*th - 2*M_PI*1/3)-1/3.0*s1;
double w2 = 2 * sqrt(-1.0/3*p) * cos(1.0/3.0*th - 2*M_PI*2/3)-1/3.0*s1;
double3 ret = { NAN, NAN, NAN };
if (z >= 0) {
ret.x = w;
} else {
ret.x = w0; ret.y = w1; ret.z = w2;
}
return ret;
}
void cardanoN(void) {
for (int i = 0; i < N; i++) {
double3 r = execute(in[0][i], in[1][i], in[2][i], in[3][i]);
out[0][i] = r.x;
out[1][i] = r.y;
out[2][i] = r.z;
}
}
#include <math.h>
typedef struct { double x, y, z; } double3;
#define N 256
__attribute__ ((__aligned__(64))) double3 out[N][N];
static double3 dini(double a, double b, double u, double v) {
double3 ret;
ret.x = a * cos(u) * sin(v);
ret.y = a * sin(u) * sin(v);
ret.z = a * (cos(v) + log(tan(v * 0.5))) + b * u;
return ret;
}
void diniSurface(double a, double b) {
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
double u = 4.0 * M_PI * i / N;
double v = 2.0 * j / N;
out[i][j] = dini(a, b, u, v);
}
}
}
#include <math.h>
#define N 256
__attribute__ ((__aligned__(64))) double in[N], out[N];
// Factorial approximation formula by Peter Luschny
#define c0 (1.0 / 24.0)
#define c1 (3.0 / 80.0)
#define c2 (18029.0 / 45360.0)
#define c3 (6272051.0 / 14869008.0)
static double lus(double x) {
x += 0.5;
double p = (pow(x, 5)+(c3+c2+c1)*pow(x, 3)+c1*c3*x) /
(pow(x,4)+(c3+c2+c1+c0)*pow(x,2)+(c1+c0)*c3+c0*c2);
return 0.5*log(2*M_PI) + x * (log(p)-1);
}
void factorialN() {
for (int i = 0; i < N; i++) {
out[i] = lus(in[i]);
}
}