Skip to content

Examples of auto vectorizable codes

Naoki Shibata edited this page Feb 7, 2018 · 37 revisions

All source codes in this page are in public domain. These codes are provided for checking how compiler vecotorizes codes with various compiler options. Try compiling them at Compiler Explorer.

  • Automatically vectorized by clang or gcc
  • Generates calls to a vectorized math library

Do not forget to specify -fno-math-errno and -fno-trapping-math options.

SRGB to linear image conversion

#include <math.h>

#define N 256
__attribute__ ((__aligned__(64))) float in[N][N], out[N][N];

static float srgb2linear_pix(float c) {
  float r = pow((c + 0.055) / (1 + 0.055), 2.4);
  return c < 0.04045 ? (c * (1.0 / 12.92)) : r;
}

void srgb2linear(void) {
  for (int y = 0; y < N; y++) {
    for (int x = 0; x < N; x++) {
      out[y][x] = srgb2linear_pix(in[y][x]);
    }
  }
}

Finding roots of cubic equations

#include <math.h>

#define N 256
__attribute__ ((__aligned__(64))) double in[4][N], out[3][N];

typedef struct { double x, y, z; } double3;

static double3 execute(double a, double b, double c, double d) {
  double s1 = b*(1/a), s2 = c*(1/a), s3 = d*(1/a);
  double p = s2 - 1/3.0*s1*s1;
  double q = s3 - 1/3.0*s1*s2 + 2/27.0*s1*s1*s1;
  double z = q*q + 4/27.0*p*p*p;

  double w = cbrt((-q + sqrt(z)) * 0.5) + cbrt((-q - sqrt(z)) * 0.5) - 1/3.0*s1;

  double th = acos(0.5*(3.0/p)*q*sqrt(-(3.0/p)));
  double w0 = 2 * sqrt(-1.0/3*p) * cos(1.0/3.0*th - 2*M_PI*0/3)-1/3.0*s1;
  double w1 = 2 * sqrt(-1.0/3*p) * cos(1.0/3.0*th - 2*M_PI*1/3)-1/3.0*s1;
  double w2 = 2 * sqrt(-1.0/3*p) * cos(1.0/3.0*th - 2*M_PI*2/3)-1/3.0*s1;

  double3 ret = { NAN, NAN, NAN };
  if (z >= 0) {
    ret.x = w;
  } else {
    ret.x = w0; ret.y = w1; ret.z = w2;
  }

  return ret;
}

void cardanoN(void) {
  for (int i = 0; i < N; i++) {
    double3 r = execute(in[0][i], in[1][i], in[2][i], in[3][i]);
    out[0][i] = r.x;
    out[1][i] = r.y;
    out[2][i] = r.z;
  }
}

Generating Dini's surface

#include <math.h>

typedef struct { double x, y, z; } double3;

#define N 256
__attribute__ ((__aligned__(64))) double3 out[N][N];

static double3 dini(double a, double b, double u, double v) {
  double3 ret;
  ret.x = a * cos(u) * sin(v);
  ret.y = a * sin(u) * sin(v);
  ret.z = a * (cos(v) + log(tan(v * 0.5))) + b * u;
  return ret;
}

void diniSurface(double a, double b) {
  for (int i = 0; i < N; i++) {
    for (int j = 0; j < N; j++) {
      double u = 4.0 * M_PI * i / N;
      double v = 2.0 * j / N;
      out[i][j] = dini(a, b, u, v);
    }
  }
}

Factorial approximation formula by Peter Luschny

#include <math.h>

#define N 256
__attribute__ ((__aligned__(64))) double in[N], out[N];

// Factorial approximation formula by Peter Luschny
#define c0 (1.0 / 24.0)
#define c1 (3.0 / 80.0)
#define c2 (18029.0 / 45360.0)
#define c3 (6272051.0 / 14869008.0)

static double lus(double x) {
  x += 0.5;
  double p = (pow(x, 5)+(c3+c2+c1)*pow(x, 3)+c1*c3*x) /
    (pow(x,4)+(c3+c2+c1+c0)*pow(x,2)+(c1+c0)*c3+c0*c2);
  return 0.5*log(2*M_PI) + x * (log(p)-1);
}

void factorialN() {
  for (int i = 0; i < N; i++) {
    out[i] = lus(in[i]);
  }
}
Clone this wiki locally