forked from Villemoes/rvutils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tmppool.c
169 lines (145 loc) · 4.5 KB
/
tmppool.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#include <stdlib.h>
#include <stddef.h>
#include <assert.h>
#include <sched.h>
#include "tmppool.h"
#ifndef container_of
#define container_of(ptr, type, member) ({ \
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
(type *)( (char *)__mptr - offsetof(type,member) );})
#endif
#ifndef SLIST_FOREACH_SAFE
#define SLIST_FOREACH_SAFE(var, head, field, tvar) \
for ((var) = SLIST_FIRST((head)); \
(var) && ((tvar) = SLIST_NEXT((var), field), 1); \
(var) = (tvar))
#endif
#ifndef SLIST_SWAP
#define SLIST_SWAP(head1, head2, type) do { \
struct type *swap_first = SLIST_FIRST(head1); \
SLIST_FIRST(head1) = SLIST_FIRST(head2); \
SLIST_FIRST(head2) = swap_first; \
} while (0)
#endif
/*
* The allocated and initialized tmp objects are kept in an array of
* linked lists; each list is protected by its own mutex. When a tmp
* object is requested, we look for an available object in all lists,
* starting with the one corresponding to the current cpu number
* (modulo the number of lists). When an object is found, or if a new
* one needs to be malloc'ed and initialized, the cpu number is
* recorded in the tmp obj header.
*
* When a tmp obj is returned to the pool, we use that cpu number to
* decide the list to return it to.
*
* The theory is that, provided threads are not moved between cpus
* (too much), a thread which frequently uses tmp objects is likely to
* find one available in the first list it looks in, even if the
* objects are put back by another thread (e.g., a producer-consumer
* model). Also, multiple threads should be able to concurrently get
* and put tmp objects, provided they don't run on cpus "hashing" to
* the same index. Of course, there isn't really any such thing as
* "the current cpu number", since we can be preempted at any time,
* but the kernel only rarely moves threads betwen cpus, and the
* critical sections are extremely short.
*
* Objects are stored in a singly-linked list. This has the least
* memory overhead, and each get/put requires only a few pointer
* operations. Also, LIFO should have the most cache/TLB-friendly
* behaviour.
*/
/*
* This is our wrapper for a temporary object. The actual object is
* located immediately following the struct. Since the list
* bookkeeping field is only needed when the tmp obj is free and the
* cpu field is only needed when the obj is in use, we can use a
* union. Thus the memory overhead for a tmp obj is only sizeof(void*)
* (+ malloc overhead).
*/
struct tmppool_obj {
union {
SLIST_ENTRY(tmppool_obj) list;
unsigned cpu;
};
long obj[];
};
static void *
tmppool_alloc(const struct tmppool *pool, unsigned cpu)
{
struct tmppool_obj *t = malloc(sizeof(*t) + pool->obj_size);
if (!t)
return NULL;
if (pool->init_obj && pool->init_obj(t->obj, pool->obj_size)) {
free(t);
return NULL;
}
t->cpu = cpu;
return t->obj;
}
static void
tmppool_free(const struct tmppool *pool, struct tmppool_obj *t)
{
if (pool->destroy_obj)
pool->destroy_obj(t->obj, pool->obj_size);
free(t);
}
static inline unsigned
get_current_cpu(void)
{
return sched_getcpu();
/* unsigned cpu; */
/* asm ("rdtscp\n" : "=c"(cpu) : : "eax", "edx"); */
/* return cpu; */
}
void *
tmppool_get(struct tmppool *pool)
{
unsigned i, cpu;
struct tmppool_obj *t;
cpu = get_current_cpu();
i = cpu & pool->mask;
do {
pthread_mutex_lock(&pool->list[i].mtx);
t = SLIST_FIRST(&pool->list[i].head);
if (t) {
SLIST_REMOVE_HEAD(&pool->list[i].head, list);
pthread_mutex_unlock(&pool->list[i].mtx);
t->cpu = cpu;
return t->obj;
}
pthread_mutex_unlock(&pool->list[i].mtx);
++i;
i &= pool->mask;
} while (i != (cpu & pool->mask));
return tmppool_alloc(pool, cpu);
}
void
tmppool_put(struct tmppool *pool, void *obj)
{
unsigned idx;
struct tmppool_obj *t;
if (!obj)
return;
t = container_of(obj, struct tmppool_obj, obj);
idx = t->cpu & pool->mask;
pthread_mutex_lock(&pool->list[idx].mtx);
SLIST_INSERT_HEAD(&pool->list[idx].head, t, list);
pthread_mutex_unlock(&pool->list[idx].mtx);
}
void
tmppool_release(struct tmppool *pool)
{
unsigned idx;
struct tmppool_obj *t, *t2;
SLIST_HEAD(, tmppool_obj) head;
for (idx = 0; idx <= pool->mask; ++idx) {
SLIST_INIT(&head);
pthread_mutex_lock(&pool->list[idx].mtx);
SLIST_SWAP(&head, &pool->list[idx].head, tmppool_obj);
pthread_mutex_unlock(&pool->list[idx].mtx);
SLIST_FOREACH_SAFE(t, &head, list, t2) {
tmppool_free(pool, t);
}
}
}