-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.cpp
77 lines (62 loc) · 1.95 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
// Static dispatch SSE2
#include <hwy/highway.h>
#include <hwy/timer.h> // Now()
#include <iostream>
#include <numeric>
#include <vector>
#include "dynamic_dispatch.h"
namespace hn = hwy::HWY_NAMESPACE;
using T = float;
void MulAddLoop(const T* HWY_RESTRICT mul_array,
const T* HWY_RESTRICT add_array, const size_t size,
T* HWY_RESTRICT x_array) {
const hn::ScalableTag<T> d;
for (size_t i = 0; i < size; i += hn::Lanes(d)) {
const auto mul = hn::Load(d, mul_array + i);
const auto add = hn::Load(d, add_array + i);
// hwy::PrintArray(x_array, size);
auto x = hn::Load(d, x_array + i);
x = hn::Add(mul, add);
hn::Store(x, d, x_array + i);
}
}
int main() {
using namespace std;
std::vector<float> v_mult(100000000, 2.0);
std::vector<float> v_add(100000000, 1.0);
std::iota(v_add.begin(), v_add.end(), 1);
std::vector<float> x_arr(100000000, 3.0);
// for(auto i : v_mult) cout << i << ' ';
{
const double t0 = hwy::platform::Now();
for (auto i = 0; i < v_mult.size(); i++) {
x_arr[i] = v_mult[i] + v_add[i];
}
const double elapsed = hwy::platform::Now() - t0;
cout << "elapsed single: " << elapsed << endl;
}
{
// Static dispatch
const double t0 = hwy::platform::Now();
MulAddLoop(&v_mult[0], &v_add[0], x_arr.size(), &x_arr[0]);
const double elapsed = hwy::platform::Now() - t0;
cout << "elapsed SSE2: " << elapsed << endl;
// for(auto i : x_arr) cout << i << ' ';
}
{
// Dynamic dispatch
const double t0 = hwy::platform::Now();
EXAMPLE_DYNAMIC::CallMulAddLoop(&v_mult[0], &v_add[0], x_arr.size(),
&x_arr[0]);
const double elapsed = hwy::platform::Now() - t0;
cout << "elapsed AVX: " << elapsed << endl;
// for(auto i : x_arr) cout << i << ' ';
}
/*
Approx:
elapsed single: 0.109093
elapsed SSE2: 0.0911126
elapsed AVX: 0.0868143
*/
std::cout << "DONE" << std::endl;
}