diff --git a/include/jemalloc/internal/bitmap_inlines.h b/include/jemalloc/internal/bitmap_inlines.h index fc4bad4c..c2362018 100644 --- a/include/jemalloc/internal/bitmap_inlines.h +++ b/include/jemalloc/internal/bitmap_inlines.h @@ -16,6 +16,12 @@ void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_)) JEMALLOC_INLINE bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) { +#ifdef BITMAP_USE_TREE + size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1; + bitmap_t rg = bitmap[rgoff]; + /* The bitmap is full iff the root group is 0. */ + return (rg == 0); +#else size_t i; for (i = 0; i < binfo->ngroups; i++) { @@ -24,6 +30,7 @@ bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) { } } return true; +#endif } JEMALLOC_INLINE bool @@ -52,6 +59,24 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) { g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(bitmap_get(bitmap, binfo, bit)); +#ifdef BITMAP_USE_TREE + /* Propagate group state transitions up the tree. */ + if (g == 0) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (g != 0) { + break; + } + } + } +#endif } /* ffu: find first unset >= bit. */ @@ -59,6 +84,44 @@ JEMALLOC_INLINE size_t bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) { assert(min_bit < binfo->nbits); +#ifdef BITMAP_USE_TREE + size_t bit = 0; + for (unsigned level = binfo->nlevels; level--;) { + size_t lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level + + 1)); + bitmap_t group = bitmap[binfo->levels[level].group_offset + (bit + >> lg_bits_per_group)]; + unsigned group_nmask = ((min_bit > bit) ? (min_bit - bit) : 0) + >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS); + assert(group_nmask <= BITMAP_GROUP_NBITS); + bitmap_t group_mask = ~((1LU << group_nmask) - 1); + bitmap_t group_masked = group & group_mask; + if (group_masked == 0LU) { + if (group == 0LU) { + return binfo->nbits; + } + /* + * min_bit was preceded by one or more unset bits in + * this group, but there are no other unset bits in this + * group. Try again starting at the first bit of the + * next sibling. This will recurse at most once per + * non-root level. + */ + size_t sib_base = bit + (1U << lg_bits_per_group); + assert(sib_base > min_bit); + assert(sib_base > bit); + if (sib_base >= binfo->nbits) { + return binfo->nbits; + } + return bitmap_ffu(bitmap, binfo, sib_base); + } + bit += (ffs_lu(group_masked) - 1) << (lg_bits_per_group - + LG_BITMAP_GROUP_NBITS); + } + assert(bit >= min_bit); + assert(bit < binfo->nbits); + return bit; +#else size_t i = min_bit >> LG_BITMAP_GROUP_NBITS; bitmap_t g = bitmap[i] & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK)) - 1); @@ -72,6 +135,7 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) { g = bitmap[i]; } while (i < binfo->ngroups); return binfo->nbits; +#endif } /* sfu: set first unset. */ @@ -83,6 +147,16 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) { assert(!bitmap_full(bitmap, binfo)); +#ifdef BITMAP_USE_TREE + i = binfo->nlevels - 1; + g = bitmap[binfo->levels[i].group_offset]; + bit = ffs_lu(g) - 1; + while (i > 0) { + i--; + g = bitmap[binfo->levels[i].group_offset + bit]; + bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1); + } +#else i = 0; g = bitmap[0]; while ((bit = ffs_lu(g)) == 0) { @@ -90,6 +164,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) { g = bitmap[i]; } bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1); +#endif bitmap_set(bitmap, binfo, bit); return bit; } @@ -111,6 +186,26 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) { g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(!bitmap_get(bitmap, binfo, bit)); +#ifdef BITMAP_USE_TREE + /* Propagate group state transitions up the tree. */ + if (propagate) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + propagate = (g == 0); + assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) + == 0); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (!propagate) { + break; + } + } + } +#endif /* BITMAP_USE_TREE */ } #endif diff --git a/include/jemalloc/internal/bitmap_structs.h b/include/jemalloc/internal/bitmap_structs.h index dde15328..297ae669 100644 --- a/include/jemalloc/internal/bitmap_structs.h +++ b/include/jemalloc/internal/bitmap_structs.h @@ -10,8 +10,19 @@ struct bitmap_info_s { /* Logical number of bits in bitmap (stored at bottom level). */ size_t nbits; +#ifdef BITMAP_USE_TREE + /* Number of levels necessary for nbits. */ + unsigned nlevels; + + /* + * Only the first (nlevels+1) elements are used, and levels are ordered + * bottom to top (e.g. the bottom level is stored in levels[0]). + */ + bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; +#else /* BITMAP_USE_TREE */ /* Number of groups necessary for nbits. */ size_t ngroups; +#endif /* BITMAP_USE_TREE */ }; #endif /* JEMALLOC_INTERNAL_BITMAP_STRUCTS_H */ diff --git a/include/jemalloc/internal/bitmap_types.h b/include/jemalloc/internal/bitmap_types.h index 091ccead..b334769f 100644 --- a/include/jemalloc/internal/bitmap_types.h +++ b/include/jemalloc/internal/bitmap_types.h @@ -21,10 +21,115 @@ typedef unsigned long bitmap_t; #define BITMAP_GROUP_NBITS (1U << LG_BITMAP_GROUP_NBITS) #define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) +/* + * Do some analysis on how big the bitmap is before we use a tree. For a brute + * force linear search, if we would have to call ffs_lu() more than 2^3 times, + * use a tree instead. + */ +#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3 +# define BITMAP_USE_TREE +#endif + /* Number of groups required to store a given number of bits. */ #define BITMAP_BITS2GROUPS(nbits) \ (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) +/* + * Number of groups required at a particular level for a given number of bits. + */ +#define BITMAP_GROUPS_L0(nbits) \ + BITMAP_BITS2GROUPS(nbits) +#define BITMAP_GROUPS_L1(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits)) +#define BITMAP_GROUPS_L2(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))) +#define BITMAP_GROUPS_L3(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \ + BITMAP_BITS2GROUPS((nbits))))) +#define BITMAP_GROUPS_L4(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))))) + +/* + * Assuming the number of levels, number of groups required for a given number + * of bits. + */ +#define BITMAP_GROUPS_1_LEVEL(nbits) \ + BITMAP_GROUPS_L0(nbits) +#define BITMAP_GROUPS_2_LEVEL(nbits) \ + (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits)) +#define BITMAP_GROUPS_3_LEVEL(nbits) \ + (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits)) +#define BITMAP_GROUPS_4_LEVEL(nbits) \ + (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits)) +#define BITMAP_GROUPS_5_LEVEL(nbits) \ + (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits)) + +/* + * Maximum number of groups required to support LG_BITMAP_MAXBITS. + */ +#ifdef BITMAP_USE_TREE + +#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_1_LEVEL(nbits) +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2 +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_2_LEVEL(nbits) +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3 +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_3_LEVEL(nbits) +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4 +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_4_LEVEL(nbits) +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5 +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_5_LEVEL(nbits) +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS) +#else +# error "Unsupported bitmap size" +#endif + +/* + * Maximum number of levels possible. This could be statically computed based + * on LG_BITMAP_MAXBITS: + * + * #define BITMAP_MAX_LEVELS \ + * (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ + * + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) + * + * However, that would not allow the generic BITMAP_INFO_INITIALIZER() macro, so + * instead hardcode BITMAP_MAX_LEVELS to the largest number supported by the + * various cascading macros. The only additional cost this incurs is some + * unused trailing entries in bitmap_info_t structures; the bitmaps themselves + * are not impacted. + */ +#define BITMAP_MAX_LEVELS 5 + +#define BITMAP_INFO_INITIALIZER(nbits) { \ + /* nbits. */ \ + nbits, \ + /* nlevels. */ \ + (BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) + \ + (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) + \ + (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) + \ + (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1, \ + /* levels. */ \ + { \ + {0}, \ + {BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) + \ + BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) + \ + BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) + \ + BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) \ + + BITMAP_GROUPS_L0(nbits)} \ + } \ +} + +#else /* BITMAP_USE_TREE */ + #define BITMAP_GROUPS(nbits) BITMAP_BITS2GROUPS(nbits) #define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS) @@ -35,4 +140,6 @@ typedef unsigned long bitmap_t; BITMAP_BITS2GROUPS(nbits) \ } +#endif /* BITMAP_USE_TREE */ + #endif /* JEMALLOC_INTERNAL_BITMAP_TYPES_H */ diff --git a/src/bitmap.c b/src/bitmap.c index 275636b9..468b3178 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -6,6 +6,82 @@ /******************************************************************************/ +#ifdef BITMAP_USE_TREE + +void +bitmap_info_init(bitmap_info_t *binfo, size_t nbits) { + unsigned i; + size_t group_count; + + assert(nbits > 0); + assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS)); + + /* + * Compute the number of groups necessary to store nbits bits, and + * progressively work upward through the levels until reaching a level + * that requires only one group. + */ + binfo->levels[0].group_offset = 0; + group_count = BITMAP_BITS2GROUPS(nbits); + for (i = 1; group_count > 1; i++) { + assert(i < BITMAP_MAX_LEVELS); + binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + + group_count; + group_count = BITMAP_BITS2GROUPS(group_count); + } + binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + + group_count; + assert(binfo->levels[i].group_offset <= BITMAP_GROUPS_MAX); + binfo->nlevels = i; + binfo->nbits = nbits; +} + +static size_t +bitmap_info_ngroups(const bitmap_info_t *binfo) { + return binfo->levels[binfo->nlevels].group_offset; +} + +void +bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) { + size_t extra; + unsigned i; + + /* + * Bits are actually inverted with regard to the external bitmap + * interface. + */ + + if (fill) { + /* The "filled" bitmap starts out with all 0 bits. */ + memset(bitmap, 0, bitmap_size(binfo)); + return; + } + + /* + * The "empty" bitmap starts out with all 1 bits, except for trailing + * unused bits (if any). Note that each group uses bit 0 to correspond + * to the first logical bit in the group, so extra bits are the most + * significant bits of the last group. + */ + memset(bitmap, 0xffU, bitmap_size(binfo)); + extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) + & BITMAP_GROUP_NBITS_MASK; + if (extra != 0) { + bitmap[binfo->levels[1].group_offset - 1] >>= extra; + } + for (i = 1; i < binfo->nlevels; i++) { + size_t group_count = binfo->levels[i].group_offset - + binfo->levels[i-1].group_offset; + extra = (BITMAP_GROUP_NBITS - (group_count & + BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK; + if (extra != 0) { + bitmap[binfo->levels[i+1].group_offset - 1] >>= extra; + } + } +} + +#else /* BITMAP_USE_TREE */ + void bitmap_info_init(bitmap_info_t *binfo, size_t nbits) { assert(nbits > 0); @@ -37,6 +113,8 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) { } } +#endif /* BITMAP_USE_TREE */ + size_t bitmap_size(const bitmap_info_t *binfo) { return (bitmap_info_ngroups(binfo) << LG_SIZEOF_BITMAP); diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c index f65ed53e..cafb2039 100644 --- a/test/unit/bitmap.c +++ b/test/unit/bitmap.c @@ -103,8 +103,24 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) { assert_zu_eq(binfo->nbits, binfo_dyn.nbits, "Unexpected difference between static and dynamic initialization, " "nbits=%zu", nbits); +#ifdef BITMAP_USE_TREE + assert_u_eq(binfo->nlevels, binfo_dyn.nlevels, + "Unexpected difference between static and dynamic initialization, " + "nbits=%zu", nbits); + { + unsigned i; + + for (i = 0; i < binfo->nlevels; i++) { + assert_zu_eq(binfo->levels[i].group_offset, + binfo_dyn.levels[i].group_offset, + "Unexpected difference between static and dynamic " + "initialization, nbits=%zu, level=%u", nbits, i); + } + } +#else assert_zu_eq(binfo->ngroups, binfo_dyn.ngroups, "Unexpected difference between static and dynamic initialization"); +#endif } TEST_BEGIN(test_bitmap_initializer) {