Skip to content

Commit 6c460ad

Browse files
committed
Optimize rtree_get().
Specialize fast path to avoid code that cannot execute for dependent loads. Manually unroll.
1 parent 18903c5 commit 6c460ad

File tree

3 files changed

+134
-35
lines changed

3 files changed

+134
-35
lines changed

include/jemalloc/internal/rtree.h

Lines changed: 131 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,34 @@ typedef struct rtree_s rtree_t;
1616
*/
1717
#define LG_RTREE_BITS_PER_LEVEL 4
1818
#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL)
19-
#define RTREE_HEIGHT_MAX \
20-
((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
19+
/*
20+
* Avoid math in RTREE_HEIGHT_MAX definition so that it can be used in cpp
21+
* conditionals. The following defininitions are precomputed equivalents to:
22+
*
23+
* #define RTREE_HEIGHT_MAX \
24+
* ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
25+
*/
26+
#if LG_RTREE_BITS_PER_LEVEL == 2
27+
# if LG_SIZEOF_PTR == 3
28+
# define RTREE_HEIGHT_MAX 16
29+
# elif LG_SIZEOF_PTR == 2
30+
# define RTREE_HEIGHT_MAX 8
31+
# endif
32+
#elif LG_RTREE_BITS_PER_LEVEL == 3
33+
# if LG_SIZEOF_PTR == 3
34+
# define RTREE_HEIGHT_MAX 8
35+
# elif LG_SIZEOF_PTR == 2
36+
# define RTREE_HEIGHT_MAX 4
37+
# endif
38+
#elif LG_RTREE_BITS_PER_LEVEL == 4
39+
# if LG_SIZEOF_PTR == 3
40+
# define RTREE_HEIGHT_MAX 4
41+
# elif LG_SIZEOF_PTR == 2
42+
# define RTREE_HEIGHT_MAX 2
43+
# endif
44+
#else
45+
# error Unsupported LG_RTREE_BITS_PER_LEVEL
46+
#endif
2147

2248
/* Used for two-stage lock-free node initialization. */
2349
#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1)
@@ -111,15 +137,18 @@ unsigned rtree_start_level(rtree_t *rtree, uintptr_t key);
111137
uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
112138

113139
bool rtree_node_valid(rtree_node_elm_t *node);
114-
rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm);
140+
rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm,
141+
bool dependent);
115142
rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
116-
unsigned level);
143+
unsigned level, bool dependent);
117144
extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
118145
bool dependent);
119146
void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
120147
const extent_node_t *val);
121-
rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level);
122-
rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level);
148+
rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level,
149+
bool dependent);
150+
rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level,
151+
bool dependent);
123152

124153
extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
125154
bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
@@ -157,25 +186,28 @@ rtree_node_valid(rtree_node_elm_t *node)
157186
}
158187

159188
JEMALLOC_INLINE rtree_node_elm_t *
160-
rtree_child_tryread(rtree_node_elm_t *elm)
189+
rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
161190
{
162191
rtree_node_elm_t *child;
163192

164193
/* Double-checked read (first read may be stale. */
165194
child = elm->child;
166-
if (!rtree_node_valid(child))
195+
if (!dependent && !rtree_node_valid(child))
167196
child = atomic_read_p(&elm->pun);
197+
assert(!dependent || child != NULL);
168198
return (child);
169199
}
170200

171201
JEMALLOC_INLINE rtree_node_elm_t *
172-
rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
202+
rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
203+
bool dependent)
173204
{
174205
rtree_node_elm_t *child;
175206

176-
child = rtree_child_tryread(elm);
177-
if (unlikely(!rtree_node_valid(child)))
207+
child = rtree_child_tryread(elm, dependent);
208+
if (!dependent && unlikely(!rtree_node_valid(child)))
178209
child = rtree_child_read_hard(rtree, elm, level);
210+
assert(!dependent || child != NULL);
179211
return (child);
180212
}
181213

@@ -209,52 +241,116 @@ rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
209241
}
210242

211243
JEMALLOC_INLINE rtree_node_elm_t *
212-
rtree_subtree_tryread(rtree_t *rtree, unsigned level)
244+
rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
213245
{
214246
rtree_node_elm_t *subtree;
215247

216248
/* Double-checked read (first read may be stale. */
217249
subtree = rtree->levels[level].subtree;
218-
if (!rtree_node_valid(subtree))
250+
if (!dependent && unlikely(!rtree_node_valid(subtree)))
219251
subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
252+
assert(!dependent || subtree != NULL);
220253
return (subtree);
221254
}
222255

223256
JEMALLOC_INLINE rtree_node_elm_t *
224-
rtree_subtree_read(rtree_t *rtree, unsigned level)
257+
rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
225258
{
226259
rtree_node_elm_t *subtree;
227260

228-
subtree = rtree_subtree_tryread(rtree, level);
229-
if (unlikely(!rtree_node_valid(subtree)))
261+
subtree = rtree_subtree_tryread(rtree, level, dependent);
262+
if (!dependent && unlikely(!rtree_node_valid(subtree)))
230263
subtree = rtree_subtree_read_hard(rtree, level);
264+
assert(!dependent || subtree != NULL);
231265
return (subtree);
232266
}
233267

234268
JEMALLOC_INLINE extent_node_t *
235269
rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
236270
{
237271
uintptr_t subkey;
238-
unsigned i, start_level;
239-
rtree_node_elm_t *node, *child;
272+
unsigned start_level;
273+
rtree_node_elm_t *node;
240274

241275
start_level = rtree_start_level(rtree, key);
242276

243-
for (i = start_level, node = rtree_subtree_tryread(rtree, start_level);
244-
/**/; i++, node = child) {
245-
if (!dependent && unlikely(!rtree_node_valid(node)))
246-
return (NULL);
247-
subkey = rtree_subkey(rtree, key, i);
248-
if (i == rtree->height - 1) {
249-
/*
250-
* node is a leaf, so it contains values rather than
251-
* child pointers.
252-
*/
253-
return (rtree_val_read(rtree, &node[subkey],
254-
dependent));
255-
}
256-
assert(i < rtree->height - 1);
257-
child = rtree_child_tryread(&node[subkey]);
277+
node = rtree_subtree_tryread(rtree, start_level, dependent);
278+
#define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height)
279+
switch (start_level + RTREE_GET_BIAS) {
280+
#define RTREE_GET_SUBTREE(level) \
281+
case level: \
282+
assert(level < (RTREE_HEIGHT_MAX-1)); \
283+
if (!dependent && unlikely(!rtree_node_valid(node))) \
284+
return (NULL); \
285+
subkey = rtree_subkey(rtree, key, level - \
286+
RTREE_GET_BIAS); \
287+
node = rtree_child_tryread(&node[subkey], dependent); \
288+
/* Fall through. */
289+
#define RTREE_GET_LEAF(level) \
290+
case level: \
291+
assert(level == (RTREE_HEIGHT_MAX-1)); \
292+
if (!dependent && unlikely(!rtree_node_valid(node))) \
293+
return (NULL); \
294+
subkey = rtree_subkey(rtree, key, level - \
295+
RTREE_GET_BIAS); \
296+
/* \
297+
* node is a leaf, so it contains values rather than \
298+
* child pointers. \
299+
*/ \
300+
return (rtree_val_read(rtree, &node[subkey], \
301+
dependent));
302+
#if RTREE_HEIGHT_MAX > 1
303+
RTREE_GET_SUBTREE(0)
304+
#endif
305+
#if RTREE_HEIGHT_MAX > 2
306+
RTREE_GET_SUBTREE(1)
307+
#endif
308+
#if RTREE_HEIGHT_MAX > 3
309+
RTREE_GET_SUBTREE(2)
310+
#endif
311+
#if RTREE_HEIGHT_MAX > 4
312+
RTREE_GET_SUBTREE(3)
313+
#endif
314+
#if RTREE_HEIGHT_MAX > 5
315+
RTREE_GET_SUBTREE(4)
316+
#endif
317+
#if RTREE_HEIGHT_MAX > 6
318+
RTREE_GET_SUBTREE(5)
319+
#endif
320+
#if RTREE_HEIGHT_MAX > 7
321+
RTREE_GET_SUBTREE(6)
322+
#endif
323+
#if RTREE_HEIGHT_MAX > 8
324+
RTREE_GET_SUBTREE(7)
325+
#endif
326+
#if RTREE_HEIGHT_MAX > 9
327+
RTREE_GET_SUBTREE(8)
328+
#endif
329+
#if RTREE_HEIGHT_MAX > 10
330+
RTREE_GET_SUBTREE(9)
331+
#endif
332+
#if RTREE_HEIGHT_MAX > 11
333+
RTREE_GET_SUBTREE(10)
334+
#endif
335+
#if RTREE_HEIGHT_MAX > 12
336+
RTREE_GET_SUBTREE(11)
337+
#endif
338+
#if RTREE_HEIGHT_MAX > 13
339+
RTREE_GET_SUBTREE(12)
340+
#endif
341+
#if RTREE_HEIGHT_MAX > 14
342+
RTREE_GET_SUBTREE(13)
343+
#endif
344+
#if RTREE_HEIGHT_MAX > 15
345+
RTREE_GET_SUBTREE(14)
346+
#endif
347+
#if RTREE_HEIGHT_MAX > 16
348+
# error Unsupported RTREE_HEIGHT_MAX
349+
#endif
350+
RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1)
351+
#undef RTREE_GET_SUBTREE
352+
#undef RTREE_GET_LEAF
353+
default: not_reached();
258354
}
259355
not_reached();
260356
}
@@ -268,7 +364,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
268364

269365
start_level = rtree_start_level(rtree, key);
270366

271-
node = rtree_subtree_read(rtree, start_level);
367+
node = rtree_subtree_read(rtree, start_level, false);
272368
if (node == NULL)
273369
return (true);
274370
for (i = start_level; /**/; i++, node = child) {
@@ -282,7 +378,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
282378
return (false);
283379
}
284380
assert(i + 1 < rtree->height);
285-
child = rtree_child_read(rtree, &node[subkey], i);
381+
child = rtree_child_read(rtree, &node[subkey], i, false);
286382
if (child == NULL)
287383
return (true);
288384
}

src/rtree.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
1515
{
1616
unsigned bits_in_leaf, height, i;
1717

18+
assert(RTREE_HEIGHT_MAX == ((ZU(1) << (LG_SIZEOF_PTR+3)) /
19+
RTREE_BITS_PER_LEVEL));
1820
assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
1921

2022
bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL

src/util.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
malloc_write("<jemalloc>: Unreachable code reached\n"); \
1515
abort(); \
1616
} \
17+
unreachable(); \
1718
} while (0)
1819

1920
#define not_implemented() do { \

0 commit comments

Comments
 (0)