-
Notifications
You must be signed in to change notification settings - Fork 14
/
ps_slab.h
314 lines (275 loc) · 10.1 KB
/
ps_slab.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
/***
* Copyright 2011-2015 by Gabriel Parmer. All rights reserved.
* Redistribution of this file is permitted under the BSD 2 clause license.
*
* Author: Gabriel Parmer, [email protected], 2011
*
* History:
* - Initial slab allocator, 2011
* - Adapted for parsec, 2015
*/
#ifndef PS_SLAB_H
#define PS_SLAB_H
#include <ps_config.h>
#include <ps_list.h>
#include <ps_plat.h>
#include <ps_global.h>
/* #define PS_SLAB_DEBUG 1 */
/* The header for a slab. */
struct ps_slab {
/*
* Read-only data. coreid is read by _other_ cores, so we
* want it on a separate cache-line from the frequently
* modified stuff.
*/
void *memory; /* != NULL iff slab is separately allocated */
ps_desc_t start, end; /* A slab used as a namespace: min and max descriptor ids */
size_t memsz; /* size of backing memory */
coreid_t coreid; /* which is the home core for this slab? */
char pad[PS_CACHE_LINE-(sizeof(void *)+sizeof(size_t)+sizeof(u16_t)+sizeof(ps_desc_t)*2)];
/* Frequently modified data on the owning core... */
struct ps_mheader *freelist; /* free objs in this slab */
struct ps_list list; /* freelist of slabs */
size_t nfree; /* # allocations in freelist */
} PS_PACKED;
/*** Operations on the freelist of slabs ***/
/*
* These functions should really must be statically computed for
* efficiency (see macros below)...
*/
static inline unsigned long
__ps_slab_objmemsz(size_t obj_sz)
{ return PS_RNDUP(obj_sz + sizeof(struct ps_mheader), PS_WORD); }
static inline unsigned long
__ps_slab_max_nobjs(size_t obj_sz, size_t allocsz, size_t headoff)
{ return (allocsz - headoff) / __ps_slab_objmemsz(obj_sz); }
/* The offset of the given object in its slab */
static inline unsigned long
__ps_slab_objsoff(struct ps_slab *s, struct ps_mheader *h, size_t obj_sz, size_t headoff)
{ return ((unsigned long)h - ((unsigned long)s->memory + headoff)) / __ps_slab_objmemsz(obj_sz); }
#ifdef PS_SLAB_DEBUG
static inline void
__ps_slab_check_consistency(struct ps_slab *s)
{
struct ps_mheader *h;
unsigned int i;
assert(s);
h = s->freelist;
for (i = 0 ; h ; i++) {
assert(h->slab == s);
assert(h->tsc_free != 0);
h = h->next;
}
assert(i == s->nfree);
}
static inline void
__ps_slab_freelist_check(struct ps_slab_freelist *fl)
{
struct ps_slab *s = fl->list;
if (!s) return;
do {
assert(s->memory && s->freelist);
assert(ps_list_prev(ps_list_next(s, list), list) == s);
assert(ps_list_next(ps_list_prev(s, list), list) == s);
__ps_slab_check_consistency(s);
} while ((s = ps_list_next(s, list)) != fl->list);
}
#else /* PS_SLAB_DEBUG */
static inline void __ps_slab_check_consistency(struct ps_slab *s) { (void)s; }
static inline void __ps_slab_freelist_check(struct ps_slab_freelist *fl) { (void)fl; }
#endif /* PS_SLAB_DEBUG */
static void
__slab_freelist_rem(struct ps_slab_freelist *fl, struct ps_slab *s)
{
assert(s && fl);
if (fl->list == s) {
if (ps_list_singleton(s, list)) fl->list = NULL;
else fl->list = ps_list_next(s, list);
}
ps_list_rem(s, list);
}
static void
__slab_freelist_add(struct ps_slab_freelist *fl, struct ps_slab *s)
{
assert(s && fl);
assert(ps_list_singleton(s, list));
assert(s != fl->list);
if (fl->list) ps_list_add(fl->list, s, list);
fl->list = s;
/* TODO: sort based on emptiness...just use N bins */
}
/*** Alloc and free ***/
#define PS_SLAB_PARAMS coreid_t coreid, size_t obj_sz, size_t allocsz, size_t headoff, ps_alloc_fn_t afn, ps_free_fn_t ffn
#define PS_SLAB_ARGS coreid, obj_sz, allocsz, headoff, afn, ffn
#define PS_SLAB_DEWARN (void)coreid; (void)obj_sz; (void)allocsz; (void)headoff; (void)afn; (void)ffn
/* Create function prototypes for cross-object usage */
#define PS_SLAB_CREATE_PROTOS(name) \
inline void *ps_slab_alloc_##name(void); \
inline void ps_slab_free_##name(void *buf); \
inline size_t ps_slab_objmem_##name(void); \
inline size_t ps_slab_nobjs_##name(void);
void __ps_slab_mem_remote_free(struct ps_mem *mem, struct ps_mheader *h, coreid_t core_target);
void __ps_slab_mem_remote_process(struct ps_mem *mem, struct ps_slab_info *si, PS_SLAB_PARAMS);
void __ps_slab_init(struct ps_slab *s, struct ps_slab_info *si, PS_SLAB_PARAMS);
void ps_slab_deffree(struct ps_mem *m, struct ps_slab *x, size_t sz, coreid_t coreid);
struct ps_slab *ps_slab_defalloc(struct ps_mem *m, size_t sz, coreid_t coreid);
void ps_slabptr_init(struct ps_mem *m);
int ps_slabptr_isempty(struct ps_mem *m);
struct ps_slab_stats {
struct {
size_t nslabs, npartslabs, nfree, nremote;
} percore[PS_NUMCORES];
};
void ps_slabptr_stats(struct ps_mem *m, struct ps_slab_stats *stats);
static inline void
__ps_slab_mem_free(void *buf, struct ps_mem *mem, PS_SLAB_PARAMS)
{
struct ps_slab *s;
struct ps_mheader *h, *next;
unsigned int max_nobjs = __ps_slab_max_nobjs(obj_sz, allocsz, headoff);
struct ps_slab_freelist *fl;
coreid_t target;
assert(__ps_slab_objmemsz(obj_sz) + headoff <= allocsz);
PS_SLAB_DEWARN;
h = __ps_mhead_get(buf);
assert(!__ps_mhead_isfree(h)); /* freeing freed memory? */
s = h->slab;
assert(s);
target = s->coreid;
if (unlikely(target != coreid)) {
__ps_slab_mem_remote_free(mem, h, target);
return;
}
__ps_mhead_setfree(h, 1);
next = s->freelist;
s->freelist = h; /* TODO: should be atomic/locked */
h->next = next;
s->nfree++; /* TODO: ditto */
if (s->nfree == max_nobjs) {
struct ps_slab_info *si = &mem->percore[coreid].slab_info;
/* remove from the freelist */
fl = &si->fl;
si->nslabs--;
__slab_freelist_rem(fl, s);
ffn(mem, s, s->memsz, coreid);
} else if (s->nfree == 1) {
fl = &mem->percore[coreid].slab_info.fl;
/* add back onto the freelists */
assert(ps_list_singleton(s, list));
assert(s->memory && s->freelist);
__slab_freelist_add(fl, s);
}
__ps_slab_freelist_check(&mem->percore[coreid].slab_info.fl);
return;
}
static inline void *
__ps_slab_mem_alloc(struct ps_mem *mem, PS_SLAB_PARAMS)
{
struct ps_slab *s;
struct ps_mheader *h;
struct ps_slab_info *si = &mem->percore[coreid].slab_info;
assert(obj_sz + headoff <= allocsz);
PS_SLAB_DEWARN;
si->salloccnt++;
if (unlikely((si->salloccnt % PS_REMOTE_BATCH) == 0)) {
__ps_slab_mem_remote_process(mem, si, PS_SLAB_ARGS);
}
s = si->fl.list;
if (unlikely(!s)) {
/* allocation function must initialize s->memory */
s = afn(mem, allocsz, coreid);
if (unlikely(!s)) return NULL;
__ps_slab_init(s, si, PS_SLAB_ARGS);
si->nslabs++;
assert(s->memory && s->freelist);
}
assert(s && s->freelist);
/* TODO: atomic modification to the freelist */
h = s->freelist;
s->freelist = h->next;
h->next = NULL;
s->nfree--;
__ps_mhead_reset(h);
/* remove from the freelist */
if (s->nfree == 0) {
__slab_freelist_rem(&si->fl, s);
assert(ps_list_singleton(s, list));
}
assert(!__ps_mhead_isfree(h));
__ps_slab_freelist_check(&si->fl);
return __ps_mhead_mem(h);
}
/***
* This macro is very important for high-performance. It creates the
* functions for allocation and deallocation passing in the freelist
* directly, and size information for these objects, thus enabling the
* compiler to do partial evaluation. This avoids freelist lookups,
* and relies on the compilers optimizations to generate specialized
* code for the given sizes -- requiring function inlining, constant
* propagation, and dead-code elimination. To me, relying on these
* optimizations is better than putting all of the code for allocation
* and deallocation in the macro due to maintenance and readability.
*/
#define __PS_SLAB_CREATE_FNS(name, obj_sz, allocsz, headoff, afn, ffn) \
static inline void * \
ps_slabptr_alloc_##name(struct ps_mem *m) \
{ return __ps_slab_mem_alloc(m, ps_coreid(), obj_sz, allocsz, headoff, afn, ffn); } \
static inline void \
ps_slabptr_free_coreid_##name(struct ps_mem *m, void *buf, coreid_t coreid) \
{ __ps_slab_mem_free(buf, m, coreid, obj_sz, allocsz, headoff, afn, ffn); } \
static inline void \
ps_slabptr_free_##name(struct ps_mem *m, void *buf) \
{ ps_slabptr_free_coreid_##name(m, buf, ps_coreid()); } \
static inline void * \
ps_slab_alloc_##name(void) \
{ return ps_slabptr_alloc_##name(&__ps_mem_##name); } \
static inline void \
ps_slab_free_##name(void *buf) \
{ ps_slabptr_free_##name(&__ps_mem_##name, buf); } \
static inline void \
ps_slab_free_coreid_##name(void *buf, coreid_t curr) \
{ ps_slabptr_free_coreid_##name(&__ps_mem_##name, buf, curr); } \
static inline void \
ps_slabptr_init_##name(struct ps_mem *m) \
{ ps_slabptr_init(m); } \
static inline void \
ps_slab_init_##name(void) \
{ ps_slabptr_init_##name(&__ps_mem_##name); } \
static inline struct ps_mem * \
ps_slabptr_create_##name(void) \
{ \
struct ps_mem *m = ps_plat_alloc(sizeof(struct ps_mem), ps_coreid()); \
if (m) ps_slabptr_init_##name(m); \
return m; \
} \
static inline void \
ps_slabptr_delete_##name(struct ps_mem *m) \
{ ps_plat_free(m, sizeof(struct ps_mem), ps_coreid()); } \
static inline size_t \
ps_slab_objmem_##name(void) \
{ return __ps_slab_objmemsz(obj_sz); } \
static inline size_t \
ps_slab_nobjs_##name(void) \
{ return __ps_slab_max_nobjs(obj_sz, allocsz, headoff); } \
static inline unsigned int \
ps_slab_objoff_##name(void *obj) \
{ \
struct ps_mheader *h = __ps_mhead_get(obj); \
return __ps_slab_objsoff(h->slab, h, obj_sz, headoff); \
}
/*
* allocsz is the size of the backing memory allocation, and
* headintern is 0 or 1, should the ps_slab header be internally
* allocated from that slab of memory, or from elsewhere.
*
* Note: if you use headintern == 1, then you must manually create
* PS_SLAB_CREATE_DEF(meta, sizeof(struct ps_slab));
*/
#define PS_SLAB_CREATE_AFNS(name, size, allocsz, headoff, allocfn, freefn) \
__PS_MEM_CREATE_DATA(name) \
__PS_SLAB_CREATE_FNS(name, size, allocsz, headoff, allocfn, freefn)
#define PS_SLAB_CREATE(name, size, allocsz) \
PS_SLAB_CREATE_AFNS(name, size, allocsz, sizeof(struct ps_slab), ps_slab_defalloc, ps_slab_deffree)
#define PS_SLAB_CREATE_DEF(name, size) \
PS_SLAB_CREATE(name, size, PS_PAGE_SIZE)
#endif /* PS_SLAB_H */