-
Notifications
You must be signed in to change notification settings - Fork 14
/
ps_smr.h
195 lines (167 loc) · 6.27 KB
/
ps_smr.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
/***
* Copyright 2014-2015 by Gabriel Parmer. All rights reserved.
* Redistribution of this file is permitted under the BSD 2 clause license.
*
* Authors: Qi Wang, [email protected], Gabriel Parmer, [email protected], 2015
*
* History:
* - Started as parsec.c and parsec.h by Qi.
*/
/***
* A Scalable Memory Reclamation (SMR) technique built off of the slab
* allocator for parsec (parallel sections). Maintains a freelist per
* slab with memory items ordered in terms of the Time Stamp Counter
* (tsc) taken when the node was freed. Removal from these queues is
* governed by quiescence of parallel threads at the time the memory
* was freed (which might be some time in the past). This code
* specifies the policy for when memory flows between the quiescing
* queues, and the slab memory. Moving memory back to the slabs is
* important to enable us to reclaim and migrate memory between cores
* (each slab is owned by a core), thus there is some balancing to be
* done here.
*/
#ifndef PS_SMR_H
#define PS_SMR_H
#include <ps_config.h>
#include <ps_global.h>
#include <ps_slab.h>
#include <ps_quiesce.h>
struct parsec {
int refcnt;
struct ps_smr_percore timing_info[PS_NUMCORES] PS_ALIGNED;
} PS_ALIGNED;
void ps_init(struct parsec *ps);
struct parsec *ps_alloc(void);
void __ps_smr_reclaim(coreid_t curr, struct ps_qsc_list *ql, struct ps_smr_info *si, struct ps_mem *mem, ps_free_fn_t ffn);
void __ps_invoke_smr(coreid_t curr, struct ps_qsc_list *ql, struct ps_smr_info *si, struct ps_mem *mem, ps_free_fn_t ffn);
void __ps_memptr_init(struct ps_mem *m, struct parsec *ps);
int __ps_memptr_delete(struct ps_mem *m);
static inline void
__ps_smr_free(void *buf, struct ps_mem *mem, ps_free_fn_t ffn)
{
struct ps_mheader *m = __ps_mhead_get(buf);
struct ps_smr_info *si;
struct ps_qsc_list *ql;
coreid_t curr_core, curr_numa;
ps_tsc_t tsc;
/* this is 85% of the cost of the function... */
tsc = ps_tsc_locality(&curr_core, &curr_numa);
si = &mem->percore[curr_core].smr_info;
ql = &si->qsc_list;
/*
* Note: we currently enqueue remotely freed memory into the
* qlist of the core the memory is freed on, later to be moved
* to its native core by the remote free logic within the slab
* allocator. This might cause some cache coherency traffic
* that we wouldn't otherwise have due to qlist operations
* (i.e. writing to the ->next field within the header), but
* has the large benefit that we don't have to complicate the
* free-time ordering of memory chunks in the quiescence list.
*/
__ps_mhead_setfree(m, tsc);
__ps_qsc_enqueue(ql, m);
si->account.qmemcnt++;
__ps_invoke_smr(curr_core, ql, si, mem, ffn);
}
static inline void
__ps_quiesce(struct ps_mem *mem, ps_free_fn_t ffn)
{
struct ps_smr_info *si;
struct ps_qsc_list *ql;
coreid_t curr;
curr = ps_coreid();
si = &mem->percore[curr].smr_info;
ql = &si->qsc_list;
__ps_smr_reclaim(curr, ql, si, mem, ffn);
return;
}
static inline void
ps_enter(struct parsec *parsec)
{
coreid_t curr_cpu, curr_numa;
ps_tsc_t curr_time;
struct ps_quiescence_timing *timing;
curr_time = ps_tsc_locality(&curr_cpu, &curr_numa);
timing = &(parsec->timing_info[curr_cpu].timing);
timing->time_in = curr_time;
/*
* The following is needed when we have coarse granularity
* time-stamps (i.e. non-cycle granularity, which means we
* could have same time-stamp for different events).
*/
/* timing->time_out = curr_time - 1; */
ps_mem_fence();
return;
}
static inline int
__ps_in_lib(struct ps_quiescence_timing *timing)
{ return timing->time_out <= timing->time_in; }
static inline void
ps_exit(struct parsec *parsec)
{
int curr_cpu = ps_coreid();
struct ps_quiescence_timing *timing;
timing = &(parsec->timing_info[curr_cpu].timing);
/*
* Here we don't require a full memory barrier on x86 -- only
* a compiler barrier is enough.
*/
ps_cc_barrier();
timing->time_out = timing->time_in + 1;
return;
}
#define __PS_PARSLAB_CREATE_AFNS(name, objsz, allocsz, headoff, allocfn, freefn) \
PS_SLAB_CREATE_AFNS(name, objsz, allocsz, headoff, allocfn, freefn) \
static inline void \
__ps_parslab_free_tramp_##name(struct ps_mem *m, struct ps_slab *s, size_t sz, coreid_t c) \
{ (void)sz; ps_slabptr_free_coreid_##name(m, s, c); } \
static inline void * \
ps_memptr_alloc_##name(struct ps_mem *m) \
{ return ps_slabptr_alloc_##name(m); } \
static inline void * \
ps_mem_alloc_##name(void) \
{ return ps_slab_alloc_##name(); } \
static inline void \
ps_memptr_free_##name(struct ps_mem *m, void *buf) \
{ __ps_smr_free(buf, m, __ps_parslab_free_tramp_##name); } \
static inline void \
ps_mem_free_##name(void *buf) \
{ ps_memptr_free_##name(&__ps_mem_##name, buf); } \
static void \
ps_memptr_init_##name(struct ps_mem *m, struct parsec *ps) \
{ __ps_memptr_init(m, ps); } \
static inline void \
ps_mem_init_##name(struct parsec *ps) \
{ \
ps_slabptr_init_##name(&__ps_mem_##name); \
ps_memptr_init_##name(&__ps_mem_##name, ps); \
} \
static inline struct ps_mem * \
ps_memptr_create_##name(struct parsec *ps) \
{ \
struct ps_mem *m = ps_slabptr_create_##name(); \
if (!m) return NULL; \
ps_memptr_init_##name(m, ps); \
return m; \
} \
static inline int \
ps_memptr_delete_##name(struct ps_mem *m) \
{ \
if (__ps_memptr_delete(m)) return -1; \
ps_slabptr_delete_##name(m); \
return 0; \
} \
static inline int \
ps_mem_delete_##name(void) \
{ return ps_memptr_delete_##name(&__ps_mem_##name); } \
static inline void \
ps_memptr_quiesce_##name(struct ps_mem *m) \
{ __ps_quiesce(m, __ps_parslab_free_tramp_##name); } \
static inline void \
ps_quiesce_##name(void) \
{ __ps_quiesce(&__ps_mem_##name, __ps_parslab_free_tramp_##name); }
#define PS_PARSLAB_CREATE_AFNS(name, objsz, allocsz, allocfn, freefn) \
__PS_PARSLAB_CREATE_AFNS(name, objsz, allocsz, sizeof(struct ps_slab), allocfn, freefn)
#define PS_PARSLAB_CREATE(name, objsz, allocsz) \
PS_PARSLAB_CREATE_AFNS(name, objsz, allocsz, ps_slab_defalloc, ps_slab_deffree)
#endif /* PS_SMR_H */