|
20 | 20 | //! Enable per-thread cache |
21 | 21 | #define ENABLE_THREAD_CACHE 1 |
22 | 22 | #endif |
| 23 | +#ifndef ENABLE_ADAPTIVE_THREAD_CACHE |
| 24 | +//! Enable adaptive size of per-thread cache (still bounded by THREAD_CACHE_MULTIPLIER hard limit) |
| 25 | +#define ENABLE_ADAPTIVE_THREAD_CACHE 1 |
| 26 | +#endif |
23 | 27 | #ifndef ENABLE_GLOBAL_CACHE |
24 | 28 | //! Enable global cache shared between all threads, requires thread cache |
25 | 29 | #define ENABLE_GLOBAL_CACHE 1 |
|
51 | 55 |
|
52 | 56 | #if ENABLE_THREAD_CACHE |
53 | 57 | #ifndef ENABLE_UNLIMITED_CACHE |
54 | | -//! Unlimited thread and global cache unified control |
| 58 | +//! Unlimited thread and global cache |
55 | 59 | #define ENABLE_UNLIMITED_CACHE 0 |
56 | 60 | #endif |
57 | 61 | #ifndef ENABLE_UNLIMITED_THREAD_CACHE |
|
78 | 82 | # define ENABLE_GLOBAL_CACHE 0 |
79 | 83 | #endif |
80 | 84 |
|
| 85 | +#if !ENABLE_THREAD_CACHE || ENABLE_UNLIMITED_THREAD_CACHE |
| 86 | +# undef ENABLE_ADAPTIVE_THREAD_CACHE |
| 87 | +# define ENABLE_ADAPTIVE_THREAD_CACHE 0 |
| 88 | +#endif |
| 89 | + |
81 | 90 | #if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE |
82 | 91 | # error Must use global cache if unmap is disabled |
83 | 92 | #endif |
@@ -266,6 +275,16 @@ union span_data_t { |
266 | 275 | uint64_t compound; |
267 | 276 | }; |
268 | 277 |
|
| 278 | +#if ENABLE_ADAPTIVE_THREAD_CACHE |
| 279 | +struct span_use_t { |
| 280 | + //! Current number of spans used (actually used, not in cache) |
| 281 | + unsigned int current; |
| 282 | + //! High water mark of spans used |
| 283 | + unsigned int high; |
| 284 | +}; |
| 285 | +typedef struct span_use_t span_use_t; |
| 286 | +#endif |
| 287 | + |
269 | 288 | //A span can either represent a single span of memory pages with size declared by span_map_count configuration variable, |
270 | 289 | //or a set of spans in a continuous region, a super span. Any reference to the term "span" usually refers to both a single |
271 | 290 | //span or a super span. A super span can further be divided into multiple spans (or this, super spans), where the first |
@@ -310,6 +329,10 @@ struct heap_t { |
310 | 329 | #if ENABLE_THREAD_CACHE |
311 | 330 | //! List of free spans (single linked list) |
312 | 331 | span_t* span_cache[LARGE_CLASS_COUNT]; |
| 332 | +#endif |
| 333 | +#if ENABLE_ADAPTIVE_THREAD_CACHE |
| 334 | + //! Current and high water mark of spans used per span count |
| 335 | + span_use_t span_use[LARGE_CLASS_COUNT]; |
313 | 336 | #endif |
314 | 337 | //! Mapped but unused spans |
315 | 338 | span_t* span_reserve; |
@@ -806,8 +829,21 @@ _memory_heap_cache_insert(heap_t* heap, span_t* span) { |
806 | 829 | _memory_span_list_push(&heap->span_cache[idx], span); |
807 | 830 | #else |
808 | 831 | const size_t release_count = (!idx ? _memory_span_release_count : _memory_span_release_count_large); |
809 | | - if (_memory_span_list_push(&heap->span_cache[idx], span) <= (release_count * THREAD_CACHE_MULTIPLIER)) |
| 832 | + size_t current_cache_size = _memory_span_list_push(&heap->span_cache[idx], span); |
| 833 | + if (current_cache_size <= release_count) |
| 834 | + return; |
| 835 | + const size_t hard_limit = release_count * THREAD_CACHE_MULTIPLIER; |
| 836 | + if (current_cache_size <= hard_limit) { |
| 837 | +#if ENABLE_ADAPTIVE_THREAD_CACHE |
| 838 | + //Require 25% of high water mark to remain in cache (and at least 1, if use is 0) |
| 839 | + size_t high_mark = heap->span_use[idx].high; |
| 840 | + const size_t min_limit = (high_mark >> 2) + release_count + 1; |
| 841 | + if (current_cache_size < min_limit) |
| 842 | + return; |
| 843 | +#else |
810 | 844 | return; |
| 845 | +#endif |
| 846 | + } |
811 | 847 | heap->span_cache[idx] = _memory_span_list_split(span, release_count); |
812 | 848 | assert(span->data.list.size == release_count); |
813 | 849 | #if ENABLE_STATISTICS |
@@ -954,6 +990,12 @@ _memory_allocate_from_heap(heap_t* heap, size_t size) { |
954 | 990 | return span; |
955 | 991 | } |
956 | 992 |
|
| 993 | +#if ENABLE_ADAPTIVE_THREAD_CACHE |
| 994 | + ++heap->span_use[0].current; |
| 995 | + if (heap->span_use[0].current > heap->span_use[0].high) |
| 996 | + heap->span_use[0].high = heap->span_use[0].current; |
| 997 | +#endif |
| 998 | + |
957 | 999 | //Mark span as owned by this heap and set base data |
958 | 1000 | assert(span->span_count == 1); |
959 | 1001 | span->size_class = (uint16_t)class_idx; |
@@ -989,6 +1031,11 @@ _memory_allocate_large_from_heap(heap_t* heap, size_t size) { |
989 | 1031 | if (size & (_memory_span_size - 1)) |
990 | 1032 | ++span_count; |
991 | 1033 | size_t idx = span_count - 1; |
| 1034 | +#if ENABLE_ADAPTIVE_THREAD_CACHE |
| 1035 | + ++heap->span_use[idx].current; |
| 1036 | + if (heap->span_use[idx].current > heap->span_use[idx].high) |
| 1037 | + heap->span_use[idx].high = heap->span_use[idx].current; |
| 1038 | +#endif |
992 | 1039 |
|
993 | 1040 | //Step 1: Find span in one of the cache levels |
994 | 1041 | span_t* span = _memory_heap_cache_extract(heap, span_count); |
@@ -1084,6 +1131,10 @@ _memory_deallocate_to_heap(heap_t* heap, span_t* span, void* p) { |
1084 | 1131 | //block (guard for classes with only 1 block) and add to heap cache |
1085 | 1132 | if (block_data->free_count > 0) |
1086 | 1133 | _memory_span_list_doublelink_remove(&heap->size_cache[class_idx], span); |
| 1134 | +#if ENABLE_ADAPTIVE_THREAD_CACHE |
| 1135 | + if (heap->span_use[0].current) |
| 1136 | + --heap->span_use[0].current; |
| 1137 | +#endif |
1087 | 1138 | _memory_heap_cache_insert(heap, span); |
1088 | 1139 | } |
1089 | 1140 | return; |
@@ -1116,6 +1167,11 @@ _memory_deallocate_large_to_heap(heap_t* heap, span_t* span) { |
1116 | 1167 | assert(span->size_class - SIZE_CLASS_COUNT < LARGE_CLASS_COUNT); |
1117 | 1168 | assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN)); |
1118 | 1169 | assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN)); |
| 1170 | +#if ENABLE_ADAPTIVE_THREAD_CACHE |
| 1171 | + size_t idx = span->span_count - 1; |
| 1172 | + if (heap->span_use[idx].current) |
| 1173 | + --heap->span_use[idx].current; |
| 1174 | +#endif |
1119 | 1175 | if ((span->span_count > 1) && !heap->spans_reserved) { |
1120 | 1176 | heap->span_reserve = span; |
1121 | 1177 | heap->spans_reserved = span->span_count; |
@@ -1481,7 +1537,7 @@ rpmalloc_initialize_config(const rpmalloc_config_t* config) { |
1481 | 1537 | _memory_config.enable_huge_pages = _memory_huge_pages; |
1482 | 1538 |
|
1483 | 1539 | _memory_span_release_count = (_memory_span_map_count > 4 ? ((_memory_span_map_count < 64) ? _memory_span_map_count : 64) : 4); |
1484 | | - _memory_span_release_count_large = (_memory_span_release_count > 4 ? (_memory_span_release_count / 2) : 2); |
| 1540 | + _memory_span_release_count_large = (_memory_span_release_count > 8 ? (_memory_span_release_count / 4) : 2); |
1485 | 1541 |
|
1486 | 1542 | #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD |
1487 | 1543 | if (pthread_key_create(&_memory_thread_heap, 0)) |
|
0 commit comments