Skip to content

Commit f547dfd

Browse files
committed
[SEC] Make SEC owned by hpa_shard, simplify the code, add stats, lock per bin
1 parent f5f0f06 commit f547dfd

36 files changed

+1257
-1269
lines changed

Makefile.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
133133
$(srcroot)src/nstime.c \
134134
$(srcroot)src/pa.c \
135135
$(srcroot)src/pa_extra.c \
136-
$(srcroot)src/pai.c \
137136
$(srcroot)src/pac.c \
138137
$(srcroot)src/pages.c \
139138
$(srcroot)src/peak_event.c \
@@ -228,6 +227,7 @@ TESTS_UNIT := \
228227
$(srcroot)test/unit/hash.c \
229228
$(srcroot)test/unit/hook.c \
230229
$(srcroot)test/unit/hpa.c \
230+
$(srcroot)test/unit/hpa_sec_integration.c \
231231
$(srcroot)test/unit/hpa_thp_always.c \
232232
$(srcroot)test/unit/hpa_vectorized_madvise.c \
233233
$(srcroot)test/unit/hpa_vectorized_madvise_large_batch.c \

include/jemalloc/internal/arena_externs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
4646
const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
4747
size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
4848
bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats,
49-
hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
49+
hpa_shard_stats_t *hpastats);
5050
void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena);
5151
edata_t *arena_extent_alloc_large(
5252
tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero);

include/jemalloc/internal/ctl.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ typedef struct ctl_arena_stats_s {
5151
arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
5252
pac_estats_t estats[SC_NPSIZES];
5353
hpa_shard_stats_t hpastats;
54-
sec_stats_t secstats;
5554
} ctl_arena_stats_t;
5655

5756
typedef struct ctl_stats_s {

include/jemalloc/internal/edata.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ struct edata_s {
222222
* ssssssss [...] ssssssss ssssnnnn nnnnnnnn
223223
*/
224224
size_t e_size_esn;
225-
#define EDATA_SIZE_MASK ((size_t) ~(PAGE - 1))
225+
#define EDATA_SIZE_MASK ((size_t)~(PAGE - 1))
226226
#define EDATA_ESN_MASK ((size_t)PAGE - 1)
227227
/* Base extent size, which may not be a multiple of PAGE. */
228228
size_t e_bsize;

include/jemalloc/internal/hpa.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "jemalloc/internal/mutex.h"
1212
#include "jemalloc/internal/pai.h"
1313
#include "jemalloc/internal/psset.h"
14+
#include "jemalloc/internal/sec.h"
1415

1516
typedef struct hpa_central_s hpa_central_t;
1617
struct hpa_central_s {
@@ -80,6 +81,7 @@ typedef struct hpa_shard_stats_s hpa_shard_stats_t;
8081
struct hpa_shard_stats_s {
8182
psset_stats_t psset_stats;
8283
hpa_shard_nonderived_stats_t nonderived_stats;
84+
sec_stats_t secstats;
8385
};
8486

8587
typedef struct hpa_shard_s hpa_shard_t;
@@ -92,6 +94,10 @@ struct hpa_shard_s {
9294

9395
/* The central allocator we get our hugepages from. */
9496
hpa_central_t *central;
97+
98+
/* Small extent cache */
99+
sec_t sec;
100+
95101
/* Protects most of this shard's state. */
96102
malloc_mutex_t mtx;
97103
/*
@@ -167,9 +173,9 @@ bool hpa_hugepage_size_exceeds_limit(void);
167173
bool hpa_supported(void);
168174
bool hpa_central_init(
169175
hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
170-
bool hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
171-
base_t *base, edata_cache_t *edata_cache, unsigned ind,
172-
const hpa_shard_opts_t *opts);
176+
bool hpa_shard_init(tsdn_t *tsdn, hpa_shard_t *shard, hpa_central_t *central,
177+
emap_t *emap, base_t *base, edata_cache_t *edata_cache, unsigned ind,
178+
const hpa_shard_opts_t *opts, const sec_opts_t *sec_opts);
173179

174180
void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
175181
void hpa_shard_stats_merge(
@@ -182,15 +188,18 @@ void hpa_shard_stats_merge(
182188
*/
183189
void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard);
184190
void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
191+
/* Flush caches that shard may be using */
192+
void hpa_shard_flush(tsdn_t *tsdn, hpa_shard_t *shard);
185193

186194
void hpa_shard_set_deferral_allowed(
187195
tsdn_t *tsdn, hpa_shard_t *shard, bool deferral_allowed);
188196
void hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard);
189197

190198
/*
191199
* We share the fork ordering with the PA and arena prefork handling; that's why
192-
* these are 3 and 4 rather than 0 and 1.
200+
* these are 2, 3 and 4 rather than 0 and 1.
193201
*/
202+
void hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard);
194203
void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard);
195204
void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard);
196205
void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);

include/jemalloc/internal/pa.h

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,6 @@ struct pa_shard_s {
9696
/* Allocates from a PAC. */
9797
pac_t pac;
9898

99-
/*
100-
* We place a small extent cache in front of the HPA, since we intend
101-
* these configurations to use many fewer arenas, and therefore have a
102-
* higher risk of hot locks.
103-
*/
104-
sec_t hpa_sec;
10599
hpa_shard_t hpa_shard;
106100

107101
/* The source of edata_t objects. */
@@ -166,6 +160,9 @@ void pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard);
166160
*/
167161
void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard);
168162

163+
/* Flush any caches used by shard */
164+
void pa_shard_flush(tsdn_t *tsdn, pa_shard_t *shard);
165+
169166
/* Gets an edata for the given allocation. */
170167
edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
171168
size_t alignment, bool slab, szind_t szind, bool zero, bool guarded,
@@ -233,8 +230,7 @@ void pa_shard_basic_stats_merge(
233230

234231
void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
235232
pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
236-
hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
237-
size_t *resident);
233+
hpa_shard_stats_t *hpa_stats_out, size_t *resident);
238234

239235
/*
240236
* Reads the PA-owned mutex stats into the output stats array, at the

include/jemalloc/internal/pai.h

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,13 @@ struct pai_s {
1313
edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
1414
size_t alignment, bool zero, bool guarded, bool frequent_reuse,
1515
bool *deferred_work_generated);
16-
/*
17-
* Returns the number of extents added to the list (which may be fewer
18-
* than requested, in case of OOM). The list should already be
19-
* initialized. The only alignment guarantee is page-alignment, and
20-
* the results are not necessarily zeroed.
21-
*/
22-
size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size,
23-
size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
24-
bool *deferred_work_generated);
2516
bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
2617
size_t old_size, size_t new_size, bool zero,
2718
bool *deferred_work_generated);
2819
bool (*shrink)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
2920
size_t old_size, size_t new_size, bool *deferred_work_generated);
3021
void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
3122
bool *deferred_work_generated);
32-
/* This function empties out list as a side-effect of being called. */
33-
void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
34-
edata_list_active_t *list, bool *deferred_work_generated);
3523
uint64_t (*time_until_deferred_work)(tsdn_t *tsdn, pai_t *self);
3624
};
3725

@@ -47,14 +35,6 @@ pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
4735
frequent_reuse, deferred_work_generated);
4836
}
4937

50-
static inline size_t
51-
pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
52-
edata_list_active_t *results, bool frequent_reuse,
53-
bool *deferred_work_generated) {
54-
return self->alloc_batch(tsdn, self, size, nallocs, results,
55-
frequent_reuse, deferred_work_generated);
56-
}
57-
5838
static inline bool
5939
pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
6040
size_t new_size, bool zero, bool *deferred_work_generated) {
@@ -75,26 +55,9 @@ pai_dalloc(
7555
self->dalloc(tsdn, self, edata, deferred_work_generated);
7656
}
7757

78-
static inline void
79-
pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
80-
bool *deferred_work_generated) {
81-
self->dalloc_batch(tsdn, self, list, deferred_work_generated);
82-
}
83-
8458
static inline uint64_t
8559
pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
8660
return self->time_until_deferred_work(tsdn, self);
8761
}
8862

89-
/*
90-
* An implementation of batch allocation that simply calls alloc once for
91-
* each item in the list.
92-
*/
93-
size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
94-
size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
95-
bool *deferred_work_generated);
96-
/* Ditto, for dalloc. */
97-
void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
98-
edata_list_active_t *list, bool *deferred_work_generated);
99-
10063
#endif /* JEMALLOC_INTERNAL_PAI_H */

include/jemalloc/internal/sec.h

Lines changed: 62 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -17,91 +17,96 @@
1717
* knowledge of the underlying PAI implementation).
1818
*/
1919

20-
/*
21-
* For now, this is just one field; eventually, we'll probably want to get more
22-
* fine-grained data out (like per-size class statistics).
23-
*/
20+
typedef struct sec_bin_stats_s sec_bin_stats_t;
21+
struct sec_bin_stats_s {
22+
/* Number of alloc requests that did not find extent in this bin */
23+
size_t nmisses;
24+
/* Number of successful alloc requests. */
25+
size_t nhits;
26+
/* Number of dallocs causing the flush */
27+
size_t ndalloc_flush;
28+
/* Number of dallocs not causing the flush */
29+
size_t ndalloc_noflush;
30+
};
2431
typedef struct sec_stats_s sec_stats_t;
2532
struct sec_stats_s {
2633
/* Sum of bytes_cur across all shards. */
2734
size_t bytes;
35+
36+
/* Totals of bin_stats. */
37+
sec_bin_stats_t total;
2838
};
2939

40+
static inline void
41+
sec_bin_stats_init(sec_bin_stats_t *stats) {
42+
stats->ndalloc_flush = 0;
43+
stats->nmisses = 0;
44+
stats->nhits = 0;
45+
stats->ndalloc_noflush = 0;
46+
}
47+
48+
static inline void
49+
sec_bin_stats_accum(sec_bin_stats_t *dst, sec_bin_stats_t *src) {
50+
dst->nmisses += src->nmisses;
51+
dst->nhits += src->nhits;
52+
dst->ndalloc_flush += src->ndalloc_flush;
53+
dst->ndalloc_noflush += src->ndalloc_noflush;
54+
}
55+
3056
static inline void
3157
sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
3258
dst->bytes += src->bytes;
59+
sec_bin_stats_accum(&dst->total, &src->total);
3360
}
3461

3562
/* A collections of free extents, all of the same size. */
3663
typedef struct sec_bin_s sec_bin_t;
3764
struct sec_bin_s {
3865
/*
39-
* When we fail to fulfill an allocation, we do a batch-alloc on the
40-
* underlying allocator to fill extra items, as well. We drop the SEC
41-
* lock while doing so, to allow operations on other bins to succeed.
42-
* That introduces the possibility of other threads also trying to
43-
* allocate out of this bin, failing, and also going to the backing
44-
* allocator. To avoid a thundering herd problem in which lots of
45-
* threads do batch allocs and overfill this bin as a result, we only
46-
* allow one batch allocation at a time for a bin. This bool tracks
47-
* whether or not some thread is already batch allocating.
48-
*
49-
* Eventually, the right answer may be a smarter sharding policy for the
50-
* bins (e.g. a mutex per bin, which would also be more scalable
51-
* generally; the batch-allocating thread could hold it while
52-
* batch-allocating).
66+
* Protects the data members of the bin.
5367
*/
54-
bool being_batch_filled;
68+
malloc_mutex_t mtx;
5569

5670
/*
57-
* Number of bytes in this particular bin (as opposed to the
58-
* sec_shard_t's bytes_cur. This isn't user visible or reported in
59-
* stats; rather, it allows us to quickly determine the change in the
60-
* centralized counter when flushing.
71+
* Number of bytes in this particular bin.
6172
*/
6273
size_t bytes_cur;
6374
edata_list_active_t freelist;
75+
sec_bin_stats_t stats;
6476
};
6577

66-
typedef struct sec_shard_s sec_shard_t;
67-
struct sec_shard_s {
68-
/*
69-
* We don't keep per-bin mutexes, even though that would allow more
70-
* sharding; this allows global cache-eviction, which in turn allows for
71-
* better balancing across free lists.
72-
*/
73-
malloc_mutex_t mtx;
74-
/*
75-
* A SEC may need to be shut down (i.e. flushed of its contents and
76-
* prevented from further caching). To avoid tricky synchronization
77-
* issues, we just track enabled-status in each shard, guarded by a
78-
* mutex. In practice, this is only ever checked during brief races,
79-
* since the arena-level atomic boolean tracking HPA enabled-ness means
80-
* that we won't go down these pathways very often after custom extent
81-
* hooks are installed.
82-
*/
83-
bool enabled;
78+
typedef struct sec_s sec_t;
79+
struct sec_s {
80+
sec_opts_t opts;
8481
sec_bin_t *bins;
85-
/* Number of bytes in all bins in the shard. */
86-
size_t bytes_cur;
87-
/* The next pszind to flush in the flush-some pathways. */
88-
pszind_t to_flush_next;
82+
pszind_t npsizes;
8983
};
9084

91-
typedef struct sec_s sec_t;
92-
struct sec_s {
93-
pai_t pai;
94-
pai_t *fallback;
85+
static inline bool
86+
sec_is_used(sec_t *sec) {
87+
return sec->opts.nshards != 0;
88+
}
9589

96-
sec_opts_t opts;
97-
sec_shard_t *shards;
98-
pszind_t npsizes;
99-
};
90+
static inline bool
91+
sec_size_supported(sec_t *sec, size_t size) {
92+
return sec_is_used(sec) && size <= sec->opts.max_alloc;
93+
}
94+
95+
/* If sec does not have extent available, it will return NULL. */
96+
edata_t *sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size);
97+
void sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size,
98+
edata_list_active_t *result, size_t nallocs);
99+
100+
/*
101+
* Upon return dalloc_list may be empty if edata is consumed by sec or
102+
* non-empty if there are extents that need to be flushed from cache.
103+
*/
104+
void sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list);
105+
106+
bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts);
100107

101-
bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
102-
const sec_opts_t *opts);
103-
void sec_flush(tsdn_t *tsdn, sec_t *sec);
104-
void sec_disable(tsdn_t *tsdn, sec_t *sec);
108+
/* Fills to_flush with extents that need to be deallocated */
109+
void sec_flush(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *to_flush);
105110

106111
/*
107112
* Morally, these two stats methods probably ought to be a single one (and the

0 commit comments

Comments
 (0)