forked from jemalloc/jemalloc-experiments
-
Notifications
You must be signed in to change notification settings - Fork 0
/
memsetVsMadvise.cpp
145 lines (126 loc) · 4.28 KB
/
memsetVsMadvise.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#include <sys/mman.h>
#include <stdlib.h>
#include <cstdint>
#include <cstring>
#include <future>
#include <iostream>
#include <thread>
#include <vector>
#include <gflags/gflags.h>
#include "util.h"
DEFINE_int32(num_runs, 100,
"Number of times to zero the pages (per page count)");
DEFINE_int32(num_pages_min, 1, "Minimum number of pages to zero");
DEFINE_int32(num_pages_max, 50, "Maximum number of pages to zero");
DEFINE_int32(num_threads, 1,
"Number of threads on which to try the experiment at once.");
DEFINE_bool(touch_after_zero, false,
"Whether to actually try touching the pages we zero.");
struct Result {
std::uint64_t memsetCycles;
std::uint64_t madviseDontneedCycles;
std::uint64_t madviseDontneedWillneedCycles;
Result()
: memsetCycles(0),
madviseDontneedCycles(0),
madviseDontneedWillneedCycles(0) {}
void accum(const Result& other) {
memsetCycles += other.memsetCycles;
madviseDontneedCycles += other.madviseDontneedCycles;
madviseDontneedWillneedCycles += other.madviseDontneedWillneedCycles;
}
};
void maybeTouchPages(void* beginv, std::size_t length) {
char* begin = static_cast<char*>(beginv);
if (FLAGS_touch_after_zero) {
for (char* ptr = begin; ptr != begin + length; ptr += 4096) {
*ptr = 0;
}
}
}
void zeroMemset(void* ptr, std::size_t size) {
std::memset(ptr, 0, size);
}
void zeroMadviseDontneed(void* ptr, std::size_t size) {
int err = madvise(ptr, size, MADV_DONTNEED);
if (err != 0) {
std::cerr << "Couldn't madvise(... MADV_DONTNEED); error was "
<< err << std::endl;
exit(1);
}
}
void zeroMadviseDontneedWillneed(void* ptr, std::size_t size) {
int err = madvise(ptr, size, MADV_DONTNEED);
if (err != 0) {
std::cerr << "Couldn't madvise(..., MADV_DONTNEED); error was "
<< err << std::endl;
exit(1);
}
err = madvise(ptr, size, MADV_WILLNEED);
if (err != 0) {
std::cerr << "Couldn't madvise(..., MAP_POPULATE); error was "
<< err << std::endl;
exit(1);
}
}
Result runTest(std::size_t size) {
Result result;
void *ptr;
int err = posix_memalign(&ptr, 4096, size);
if (err != 0) {
std::cerr << "Couldn't allocate; error was " << err << std::endl;
exit(1);
}
// Touch all the pages from this thread.
std::memset(ptr, 0, size);
// Touch all the pages from another thread.
std::async(std::launch::async, std::memset, ptr, 0, size).get();
// We'll probably be dealing with uncached memory here; we care about this
// difference when pulling memory out of an inactive state.
util::flushCache(ptr, size);
result.memsetCycles = util::runTimed([&]() {
zeroMemset(ptr, size);
maybeTouchPages(ptr, size);
});
util::flushCache(ptr, size);
result.madviseDontneedCycles = util::runTimed([&]() {
zeroMadviseDontneed(ptr, size);
maybeTouchPages(ptr, size);
});
util::flushCache(ptr, size);
result.madviseDontneedWillneedCycles = util::runTimed([&]() {
zeroMadviseDontneedWillneed(ptr, size);
maybeTouchPages(ptr, size);
});
return result;
}
int main(int argc, char** argv) {
std::string usage =
"This program benchmarks memset vs madvise for zeroing memory.\n"
"Sample usage:\n";
usage += argv[0];
usage += " --num_pages_min=20 --num_pagse_max=50 --num_runs=30 ";
usage += "--num_threads=4 --touch_after_zero=true";
gflags::SetUsageMessage(usage);
gflags::ParseCommandLineFlags(&argc, &argv, true);
for (int i = FLAGS_num_pages_min; i <= FLAGS_num_pages_max; ++i) {
Result sum;
for (int j = 0; j < FLAGS_num_runs; ++j) {
std::vector<std::future<Result>> results;
for (int k = 0; k < FLAGS_num_threads; ++k) {
results.push_back(std::async(std::launch::async, runTest, 4096 * i));
}
for (int k = 0; k < FLAGS_num_threads; ++k) {
sum.accum(results[k].get());
}
}
std::cout << "When zeroing " << i << " pages (averaging across "
<< FLAGS_num_runs << " runs of " << FLAGS_num_threads << " threads:\n"
<< " memset: " << sum.memsetCycles / FLAGS_num_runs << " cycles\n"
<< " madvise(..., MADV_DONTNEED): "
<< sum.madviseDontneedCycles / FLAGS_num_runs << " cycles\n"
<< " madvise(..., MADV_DONTNEED); madvise(..., MADV_WILLNEED): "
<< sum.madviseDontneedWillneedCycles / FLAGS_num_runs << " cycles\n";
}
return 0;
}