From e8c0da5d1b8ac9592c75f94dd9eae0aee7d028a6 Mon Sep 17 00:00:00 2001 From: Liviu Chircu Date: Tue, 20 Jun 2023 11:00:41 +0300 Subject: [PATCH] F_MALLOC: Optimize the free() operation Commit bdaaf60b2c introduced a side-effect of gradually moving the fragment "action" towards the exponential, non-optimized hash table buckets (i.e. buckets 2049 ... 2100). Here, the fragments were inserted in a sorted fashion, with the sorted-insert algorithm costing a O(N) iteration on each free operation instead of a simple O(1). Consequently, the user experience of this effect is that "dr_reload" operations were stalling for 12 minutes (coming up from 24 seconds!), when working with large rule sets (millions of rules). Interestingly enough, the stalling was not due to the caching phase -- malloc() -- rather due to the cleanup phase, when clearing the old rules -- free()! To address this issue: * we drop the sorted insertion completely for buckets 2049 ... 2100, and simply do a list prepend operation: O(1), as with the others * we make all allocation requests from these buckets return the next bucket (!!), thus guarantee'ing our requested fragment. Examples: malloc(18K) -> now you always get a 32K+ frag, but instantly! malloc(37K) -> now you always get a 64K+ frag, but instantly! * this does not make F_MALLOC more wasteful, since the extra frag space gets split anyway into a new fragment, with the two eventually coalescing together again thanks to commit bdaaf60b2c (cherry picked from commit e6b4de51298eb78aef097cbfd1c34ada17b9b78f) --- mem/f_malloc.c | 29 +++++++++++++++++++---------- mem/f_malloc_dyn.h | 11 ++++++----- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/mem/f_malloc.c b/mem/f_malloc.c index 873a0723941..470f27e6272 100644 --- a/mem/f_malloc.c +++ b/mem/f_malloc.c @@ -52,11 +52,27 @@ #define ROUNDUP(s) (((s)+(ROUNDTO-1))&ROUNDTO_MASK) #define ROUNDDOWN(s) ((s)&ROUNDTO_MASK) -/* finds the hash value for s, s=ROUNDTO multiple*/ -#define GET_HASH(s) ( ((unsigned long)(s)<=F_MALLOC_OPTIMIZE)?\ +/** + * The @inc is tied to the exponential, non-optimized buckets + * (e.g. indexes 2049 ... 2100 with FACTOR = 14UL), where it allows us to fully + * drop the slow fragment sorted insertion algorithm for *huge* speed gains, by + * always returning anywhere up to 4x (previously 2x) required frag size than + * requested... + * For example: + * - malloc(18K) -> now you always get a frag of 32K+ size, but instantly! + * - malloc(37K) -> now you always get a frag of 64K+ size, but instantly! + * + * Finally, the extra fragment size is *not* wasted, thanks to splitting! + * + * A possible disadvantage of this approach is that it will make allocating + * more than 50% of the remaining free memory pool in a single allocation even + * harder than before (borderline impossible now)... + */ +#define _GET_HASH(s, inc) ( ((unsigned long)(s)<=F_MALLOC_OPTIMIZE)?\ (unsigned long)(s)/ROUNDTO: \ F_MALLOC_OPTIMIZE/ROUNDTO+big_hash_idx((s))- \ - F_MALLOC_OPTIMIZE_FACTOR+1 ) + F_MALLOC_OPTIMIZE_FACTOR + 1 + (inc)) +#define GET_HASH(s) _GET_HASH(s, 0) #define UN_HASH(h) ( ((unsigned long)(h)<=(F_MALLOC_OPTIMIZE/ROUNDTO))?\ (unsigned long)(h)*ROUNDTO: \ @@ -123,13 +139,6 @@ static inline void fm_insert_free(struct fm_block *fm, struct fm_frag *frag) hash=GET_HASH(frag->size); f=&(fm->free_hash[hash].first); - if (frag->size > F_MALLOC_OPTIMIZE){ /* because of '<=' in GET_HASH, - (different from 0.8.1[24] on - purpose --andrei ) */ - for(; *f; f=&((*f)->u.nxt_free)){ - if (frag->size <= (*f)->size) break; - } - } /*insert it here*/ frag->prev = f; diff --git a/mem/f_malloc_dyn.h b/mem/f_malloc_dyn.h index f07882a3fbc..6423129f78a 100644 --- a/mem/f_malloc_dyn.h +++ b/mem/f_malloc_dyn.h @@ -111,11 +111,12 @@ void *fm_malloc(struct fm_block *fm, unsigned long size, /*search for a suitable free frag*/ - for(hash=GET_HASH(size);hashfree_hash[hash].first; - for( ; frag; frag = frag->u.nxt_free ) - if ( frag->size >= size ) goto found; - /* try in a bigger bucket */ + for (hash = _GET_HASH(size, +1); hash < F_HASH_SIZE; ++hash) { + if (!fm->free_hash[hash].first) + continue; /* try in a bigger bucket */ + + frag = fm->free_hash[hash].first; + goto found; } /* not found, bad! */