Bug 1669392 - Add more jemalloc size classes r=glandium

Differential Revision: https://phabricator.services.mozilla.com/D92729
This commit is contained in:
Paul Bone 2021-10-13 06:53:20 +00:00
Родитель 16f869402f
Коммит cc06afa32e
5 изменённых файлов: 198 добавлений и 90 удалений

Просмотреть файл

@ -20,6 +20,20 @@ struct Log2 : mozilla::tl::CeilingLog2<N> {
};
#define LOG2(N) Log2<N>::value
// Like Log2, but ignores 0.
template <size_t N>
struct Log2Or0 : mozilla::tl::CeilingLog2<N> {
using mozilla::tl::CeilingLog2<N>::value;
static_assert(1ULL << value == N, "Number is not a power of 2");
};
template <>
struct Log2Or0<0> {
// This makes no sense but neither does any other value. It's just enough
// that this can be used on the unused side of a conditional expression.
static const size_t value = 0;
};
#define LOG2_OR_0(N) Log2Or0<N>::value
enum class Order {
eLess = -1,
eEqual = 0,

Просмотреть файл

@ -52,40 +52,57 @@
//
// Allocation requests are rounded up to the nearest size class, and no record
// of the original request size is maintained. Allocations are broken into
// categories according to size class. Assuming runtime defaults, 4 kB pages
// and a 16 byte quantum on a 32-bit system, the size classes in each category
// are as follows:
// categories according to size class. Assuming runtime defaults, the size
// classes in each category are as follows (for x86, x86_64 and Apple Silicon):
//
// |=====================================|
// | Category | Subcategory | Size |
// |=====================================|
// | Small | Tiny | 4 |
// | | | 8 |
// | |----------------+---------|
// | | Quantum-spaced | 16 |
// | | | 32 |
// | | | 48 |
// | | | ... |
// | | | 480 |
// | | | 496 |
// | | | 512 |
// | |----------------+---------|
// | | Sub-page | 1 kB |
// | | | 2 kB |
// |=====================================|
// | Large | 4 kB |
// | | 8 kB |
// | | 12 kB |
// | | ... |
// | | 1012 kB |
// | | 1016 kB |
// | | 1020 kB |
// |=====================================|
// | Huge | 1 MB |
// | | 2 MB |
// | | 3 MB |
// | | ... |
// |=====================================|
// |===============================================================|
// | Category | Subcategory | x86 | x86_64 | Apple Silicon |
// |---------------------------+---------+---------+---------------+
// | Word size | 32 bit | 64 bit | 64 bit |
// | Page size | 4 Kb | 4 Kb | 16 Kb |
// |===============================================================|
// | Small | Tiny | 4/- | - | - |
// | | | 8 | 8/- | 8 |
// | |----------------+---------|---------|---------------|
// | | Quantum-spaced | 16 | 16 | 16 |
// | | | 32 | 32 | 32 |
// | | | 48 | 48 | 48 |
// | | | ... | ... | ... |
// | | | 480 | 480 | 480 |
// | | | 496 | 496 | 496 |
// | |----------------+---------|---------|---------------|
// | | Quantum-wide- | 512 | 512 | 512 |
// | | spaced | 768 | 768 | 768 |
// | | | ... | ... | ... |
// | | | 3584 | 3584 | 3584 |
// | | | 3840 | 3840 | 3840 |
// | |----------------+---------|---------|---------------|
// | | Sub-page | - | - | 4096 |
// | | | - | - | 8 kB |
// |===============================================================|
// | Large | 4 kB | 4 kB | - |
// | | 8 kB | 8 kB | - |
// | | 12 kB | 12 kB | - |
// | | 16 kB | 16 kB | 16 kB |
// | | ... | ... | - |
// | | 32 kB | 32 kB | 32 kB |
// | | ... | ... | ... |
// | | 1008 kB | 1008 kB | 1008 kB |
// | | 1012 kB | 1012 kB | - |
// | | 1016 kB | 1012 kB | - |
// | | 1020 kB | 1020 kB | - |
// |===============================================================|
// | Huge | 1 MB | 1 MB | 1 MB |
// | | 2 MB | 2 MB | 2 MB |
// | | 3 MB | 3 MB | 3 MB |
// | | ... | ... | ... |
// |===============================================================|
//
// Legend:
// n: Size class exists for this platform.
// n/-: This size class doesn't exist on Windows (see kMinTinyClass).
// -: This size class doesn't exist for this platform.
// ...: Size classes follow a pattern here.
//
// NOTE: Due to Mozilla bug 691003, we cannot reserve less than one word for an
// allocation on Linux or Mac. So on 32-bit *nix, the smallest bucket size is
@ -377,6 +394,10 @@ struct arena_chunk_t {
// negatively affect performance.
static const size_t kCacheLineSize = 64;
// Our size classes are inclusive ranges of memory sizes. By describing the
// minimums and how memory is allocated in each range the maximums can be
// calculated.
// Smallest size class to support. On Windows the smallest allocation size
// must be 8 bytes on 32-bit, 16 bytes on 64-bit. On Linux and Mac, even
// malloc(1) must reserve a word's worth of memory (see Mozilla bug 691003).
@ -389,28 +410,47 @@ static const size_t kMinTinyClass = sizeof(void*);
// Maximum tiny size class.
static const size_t kMaxTinyClass = 8;
// Amount (quantum) separating quantum-spaced size classes.
static const size_t kQuantum = 16;
static const size_t kQuantumMask = kQuantum - 1;
// Smallest quantum-spaced size classes. It could actually also be labelled a
// tiny allocation, and is spaced as such from the largest tiny size class.
// Tiny classes being powers of 2, this is twice as large as the largest of
// them.
static const size_t kMinQuantumClass = kMaxTinyClass * 2;
static const size_t kMinQuantumWideClass = 512;
static const size_t kMinSubPageClass = 4_KiB;
// Largest quantum-spaced size classes.
static const size_t kMaxQuantumClass = 512;
// Amount (quantum) separating quantum-spaced size classes.
static const size_t kQuantum = 16;
static const size_t kQuantumMask = kQuantum - 1;
static const size_t kQuantumWide = 256;
static const size_t kQuantumWideMask = kQuantumWide - 1;
static const size_t kMaxQuantumClass = kMinQuantumWideClass - kQuantum;
static const size_t kMaxQuantumWideClass = kMinSubPageClass - kQuantumWide;
// We can optimise some divisions to shifts if these are powers of two.
static_assert(mozilla::IsPowerOfTwo(kQuantum),
"kQuantum is not a power of two");
static_assert(mozilla::IsPowerOfTwo(kQuantumWide),
"kQuantumWide is not a power of two");
static_assert(kMaxQuantumClass % kQuantum == 0,
"kMaxQuantumClass is not a multiple of kQuantum");
static_assert(kMaxQuantumWideClass % kQuantumWide == 0,
"kMaxQuantumWideClass is not a multiple of kQuantumWide");
static_assert(kQuantum < kQuantumWide,
"kQuantum must be smaller than kQuantumWide");
static_assert(mozilla::IsPowerOfTwo(kMinSubPageClass),
"kMinSubPageClass is not a power of two");
// Number of (2^n)-spaced tiny classes.
static const size_t kNumTinyClasses =
LOG2(kMinQuantumClass) - LOG2(kMinTinyClass);
LOG2(kMaxTinyClass) - LOG2(kMinTinyClass) + 1;
// Number of quantum-spaced classes.
static const size_t kNumQuantumClasses = kMaxQuantumClass / kQuantum;
static const size_t kNumQuantumClasses =
(kMaxQuantumClass - kMinQuantumClass) / kQuantum + 1;
static const size_t kNumQuantumWideClasses =
(kMaxQuantumWideClass - kMinQuantumWideClass) / kQuantumWide + 1;
// Size and alignment of memory chunks that are allocated by the OS's virtual
// memory system.
@ -443,6 +483,7 @@ static size_t gPageSize;
# define END_GLOBALS
# define DEFINE_GLOBAL(type) static const type
# define GLOBAL_LOG2 LOG2
# define GLOBAL_LOG2_OR_0 LOG2_OR_0
# define GLOBAL_ASSERT_HELPER1(x) static_assert(x, # x)
# define GLOBAL_ASSERT_HELPER2(x, y) static_assert(x, y)
# define GLOBAL_ASSERT(...) \
@ -455,6 +496,7 @@ static size_t gPageSize;
# define END_GLOBALS }
# define DEFINE_GLOBAL(type)
# define GLOBAL_LOG2 FloorLog2
# define GLOBAL_LOG2_OR_0 FloorLog2
# define GLOBAL_ASSERT MOZ_RELEASE_ASSERT
#endif
@ -467,15 +509,21 @@ DECLARE_GLOBAL(size_t, gChunkHeaderNumPages)
DECLARE_GLOBAL(size_t, gMaxLargeClass)
DEFINE_GLOBALS
// Largest sub-page size class.
DEFINE_GLOBAL(size_t) gMaxSubPageClass = gPageSize / 2;
// Largest sub-page size class, or zero if there are none
DEFINE_GLOBAL(size_t)
gMaxSubPageClass = gPageSize / 2 >= kMinSubPageClass ? gPageSize / 2 : 0;
// Max size class for bins.
#define gMaxBinClass gMaxSubPageClass
#define gMaxBinClass \
(gMaxSubPageClass ? gMaxSubPageClass : kMaxQuantumWideClass)
// Number of (2^n)-spaced sub-page bins.
// Number of sub-page bins.
DEFINE_GLOBAL(uint8_t)
gNumSubPageClasses = GLOBAL_LOG2(gMaxSubPageClass) - LOG2(kMaxQuantumClass);
gNumSubPageClasses =
static_cast<uint8_t>(gMaxSubPageClass ? GLOBAL_LOG2_OR_0(gMaxSubPageClass) -
LOG2(kMinSubPageClass) + 1
: 0);
DEFINE_GLOBAL(uint8_t) gPageSize2Pow = GLOBAL_LOG2(gPageSize);
DEFINE_GLOBAL(size_t) gPageSizeMask = gPageSize - 1;
@ -500,9 +548,16 @@ gMaxLargeClass =
GLOBAL_ASSERT(1ULL << gPageSize2Pow == gPageSize,
"Page size is not a power of two");
GLOBAL_ASSERT(kQuantum >= sizeof(void*));
GLOBAL_ASSERT(kQuantum <= gPageSize);
GLOBAL_ASSERT(kQuantum <= kQuantumWide);
GLOBAL_ASSERT(kQuantumWide <= (kMinSubPageClass - kMaxQuantumClass));
GLOBAL_ASSERT(kQuantumWide <= kMaxQuantumClass);
GLOBAL_ASSERT(gMaxSubPageClass >= kMinSubPageClass || gMaxSubPageClass == 0);
GLOBAL_ASSERT(gMaxLargeClass >= gMaxSubPageClass);
GLOBAL_ASSERT(kChunkSize >= gPageSize);
GLOBAL_ASSERT(kQuantum * 4 <= kChunkSize);
END_GLOBALS
// Recycle at most 128 MiB of chunks. This means we retain at most
@ -526,13 +581,19 @@ static size_t opt_dirty_max = DIRTY_MAX_DEFAULT;
// Return the smallest quantum multiple that is >= a.
#define QUANTUM_CEILING(a) (((a) + (kQuantumMask)) & ~(kQuantumMask))
#define QUANTUM_WIDE_CEILING(a) \
(((a) + (kQuantumWideMask)) & ~(kQuantumWideMask))
// Return the smallest sub page-size that is >= a.
#define SUBPAGE_CEILING(a) (RoundUpPow2(a))
// Return the smallest pagesize multiple that is >= s.
#define PAGE_CEILING(s) (((s) + gPageSizeMask) & ~gPageSizeMask)
// Number of all the small-allocated classes
#define NUM_SMALL_CLASSES \
(kNumTinyClasses + kNumQuantumClasses + gNumSubPageClasses)
#define NUM_SMALL_CLASSES \
(kNumTinyClasses + kNumQuantumClasses + kNumQuantumWideClasses + \
gNumSubPageClasses)
// ***************************************************************************
// MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are mutually exclusive.
@ -658,6 +719,7 @@ class SizeClass {
enum ClassType {
Tiny,
Quantum,
QuantumWide,
SubPage,
Large,
};
@ -669,9 +731,12 @@ class SizeClass {
} else if (aSize <= kMaxQuantumClass) {
mType = Quantum;
mSize = QUANTUM_CEILING(aSize);
} else if (aSize <= kMaxQuantumWideClass) {
mType = QuantumWide;
mSize = QUANTUM_WIDE_CEILING(aSize);
} else if (aSize <= gMaxSubPageClass) {
mType = SubPage;
mSize = RoundUpPow2(aSize);
mSize = SUBPAGE_CEILING(aSize);
} else if (aSize <= gMaxLargeClass) {
mType = Large;
mSize = PAGE_CEILING(aSize);
@ -878,7 +943,10 @@ struct arena_bin_t {
// 304 12 KiB 320 12 KiB 336 4 KiB 352 8 KiB
// 368 4 KiB 384 8 KiB 400 20 KiB 416 16 KiB
// 432 12 KiB 448 4 KiB 464 16 KiB 480 8 KiB
// 496 20 KiB 512 32 KiB 1024 64 KiB 2048 128 KiB
// 496 20 KiB 512 32 KiB 768 16 KiB 1024 64 KiB
// 1280 24 KiB 1536 32 KiB 1792 16 KiB 2048 128 KiB
// 2304 16 KiB 2560 48 KiB 2816 36 KiB 3072 64 KiB
// 3328 36 KiB 3584 32 KiB 3840 64 KiB
inline void Init(SizeClass aSizeClass);
};
@ -972,8 +1040,12 @@ struct arena_t {
// 33 | 496 |
// 34 | 512 |
// --------+------+
// 35 | 1024 |
// 36 | 2048 |
// 35 | 768 |
// 36 | 1024 |
// : :
// : :
// 46 | 3584 |
// 47 | 3840 |
// --------+------+
arena_bin_t mBins[1]; // Dynamically sized.
@ -2821,11 +2893,21 @@ void* arena_t::MallocSmall(size_t aSize, bool aZero) {
bin = &mBins[FloorLog2(aSize / kMinTinyClass)];
break;
case SizeClass::Quantum:
bin = &mBins[kNumTinyClasses + (aSize / kQuantum) - 1];
// Although we divide 2 things by kQuantum, the compiler will
// reduce `kMinQuantumClass / kQuantum` and `kNumTinyClasses` to a
// single constant.
bin = &mBins[kNumTinyClasses + (aSize / kQuantum) -
(kMinQuantumClass / kQuantum)];
break;
case SizeClass::QuantumWide:
bin =
&mBins[kNumTinyClasses + kNumQuantumClasses + (aSize / kQuantumWide) -
(kMinQuantumWideClass / kQuantumWide)];
break;
case SizeClass::SubPage:
bin = &mBins[kNumTinyClasses + kNumQuantumClasses +
(FloorLog2(aSize / kMaxQuantumClass) - 1)];
bin =
&mBins[kNumTinyClasses + kNumQuantumClasses + kNumQuantumWideClasses +
(FloorLog2(aSize) - LOG2(kMinSubPageClass))];
break;
default:
MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("Unexpected size class type");
@ -3558,8 +3640,8 @@ arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {
arena_bin_t& bin = mBins[i];
bin.Init(sizeClass);
// SizeClass doesn't want sizes larger than gMaxSubPageClass for now.
if (sizeClass.Size() == gMaxSubPageClass) {
// SizeClass doesn't want sizes larger than gMaxBinClass for now.
if (sizeClass.Size() == gMaxBinClass) {
break;
}
sizeClass = sizeClass.Next();
@ -4253,6 +4335,9 @@ inline void MozJemalloc::jemalloc_stats_internal(
aStats->opt_zero = opt_zero;
aStats->quantum = kQuantum;
aStats->quantum_max = kMaxQuantumClass;
aStats->quantum_wide = kQuantumWide;
aStats->quantum_wide_max = kMaxQuantumWideClass;
aStats->subpage_max = gMaxSubPageClass;
aStats->large_max = gMaxLargeClass;
aStats->chunksize = kChunkSize;
aStats->page_size = gPageSize;

Просмотреть файл

@ -77,16 +77,18 @@ typedef struct arena_params_s {
// file.
typedef struct {
// Run-time configuration settings.
bool opt_junk; // Fill allocated memory with kAllocJunk?
bool opt_zero; // Fill allocated memory with 0x0?
size_t narenas; // Number of arenas.
size_t quantum; // Allocation quantum.
size_t quantum_max; // Max quantum-spaced allocation size.
// The next size class, sub-pagesize's max is always page_size/2.
size_t large_max; // Max sub-chunksize allocation size.
size_t chunksize; // Size of each virtual memory mapping.
size_t page_size; // Size of pages.
size_t dirty_max; // Max dirty pages per arena.
bool opt_junk; // Fill allocated memory with kAllocJunk?
bool opt_zero; // Fill allocated memory with 0x0?
size_t narenas; // Number of arenas.
size_t quantum; // Allocation quantum.
size_t quantum_max; // Max quantum-spaced allocation size.
size_t quantum_wide; // Allocation quantum (QuantuWide).
size_t quantum_wide_max; // Max quantum-wide-spaced allocation size.
size_t subpage_max; // Max subpage allocation size.
size_t large_max; // Max sub-chunksize allocation size.
size_t chunksize; // Size of each virtual memory mapping.
size_t page_size; // Size of pages.
size_t dirty_max; // Max dirty pages per arena.
// Current memory usage statistics.
size_t mapped; // Bytes mapped (not necessarily committed).
@ -111,7 +113,8 @@ typedef struct {
size_t bytes_per_run; // The number of bytes per run, including headers.
} jemalloc_bin_stats_t;
#define JEMALLOC_MAX_STATS_BINS 40
// This is the total number of bins.
#define JEMALLOC_MAX_STATS_BINS 51
enum PtrInfoTag {
// The pointer is not currently known to the allocator.

Просмотреть файл

@ -136,9 +136,10 @@ TEST(Jemalloc, PtrInfo)
jemalloc_ptr_info_t info;
Vector<char*> small, large, huge;
// For small (<= 2KiB) allocations, test every position within many possible
// sizes.
size_t small_max = stats.page_size / 2;
// For small (less than half the page size) allocations, test every position
// within many possible sizes.
size_t small_max =
stats.subpage_max ? stats.subpage_max : stats.quantum_wide_max;
for (size_t n = 0; n <= small_max; n += 8) {
auto p = (char*)moz_arena_malloc(arenaId, n);
size_t usable = moz_malloc_size_of(p);
@ -149,7 +150,7 @@ TEST(Jemalloc, PtrInfo)
}
}
// Similar for large (2KiB + 1 KiB .. 1MiB - 8KiB) allocations.
// Similar for large (small_max + 1 KiB .. 1MiB - 8KiB) allocations.
for (size_t n = small_max + 1_KiB; n <= stats.large_max; n += 1_KiB) {
auto p = (char*)moz_arena_malloc(arenaId, n);
size_t usable = moz_malloc_size_of(p);
@ -199,7 +200,7 @@ TEST(Jemalloc, PtrInfo)
// the former.
ASSERT_TRUE(isFreedAlloc != 0);
ASSERT_TRUE(isFreedPage != 0);
ASSERT_TRUE(isFreedAlloc / isFreedPage > 10);
ASSERT_TRUE(isFreedAlloc / isFreedPage > 8);
// Free the large allocations and recheck them.
len = large.length();
@ -277,7 +278,7 @@ TEST(Jemalloc, PtrInfo)
moz_dispose_arena(arenaId);
}
size_t sSizes[] = {1, 42, 79, 918, 1.5_KiB,
size_t sSizes[] = {1, 42, 79, 918, 1.4_KiB,
73_KiB, 129_KiB, 1.1_MiB, 2.6_MiB, 5.1_MiB};
TEST(Jemalloc, Arenas)

Просмотреть файл

@ -802,7 +802,8 @@ class Replay {
num_sloppy_objects++;
}
if (used <= stats.page_size / 2) {
if (used <=
(stats.subpage_max ? stats.subpage_max : stats.quantum_wide_max)) {
// We know that this is an inefficient linear search, but there's a
// small number of bins and this is simple.
for (unsigned i = 0; i < JEMALLOC_MAX_STATS_BINS; i++) {
@ -828,24 +829,25 @@ class Replay {
stats.bookkeeping + stats.bin_unused;
FdPrintf(mStdErr, "\n");
FdPrintf(mStdErr, "Objects: %9zu\n", num_objects);
FdPrintf(mStdErr, "Slots: %9zu\n", mNumUsedSlots);
FdPrintf(mStdErr, "Ops: %9zu\n", mOps);
FdPrintf(mStdErr, "mapped: %9zu\n", stats.mapped);
FdPrintf(mStdErr, "committed: %9zu\n", committed);
FdPrintf(mStdErr, "Objects: %9zu\n", num_objects);
FdPrintf(mStdErr, "Slots: %9zu\n", mNumUsedSlots);
FdPrintf(mStdErr, "Ops: %9zu\n", mOps);
FdPrintf(mStdErr, "mapped: %9zu\n", stats.mapped);
FdPrintf(mStdErr, "committed: %9zu\n", committed);
#ifdef XP_LINUX
if (rss) {
FdPrintf(mStdErr, "rss: %9zu\n", rss);
FdPrintf(mStdErr, "rss: %9zu\n", rss);
}
#endif
FdPrintf(mStdErr, "allocated: %9zu\n", stats.allocated);
FdPrintf(mStdErr, "waste: %9zu\n", stats.waste);
FdPrintf(mStdErr, "dirty: %9zu\n", stats.page_cache);
FdPrintf(mStdErr, "bookkeep: %9zu\n", stats.bookkeeping);
FdPrintf(mStdErr, "bin-unused: %9zu\n", stats.bin_unused);
FdPrintf(mStdErr, "quantum-max: %9zu\n", stats.quantum_max);
FdPrintf(mStdErr, "subpage-max: %9zu\n", stats.page_size / 2);
FdPrintf(mStdErr, "large-max: %9zu\n", stats.large_max);
FdPrintf(mStdErr, "allocated: %9zu\n", stats.allocated);
FdPrintf(mStdErr, "waste: %9zu\n", stats.waste);
FdPrintf(mStdErr, "dirty: %9zu\n", stats.page_cache);
FdPrintf(mStdErr, "bookkeep: %9zu\n", stats.bookkeeping);
FdPrintf(mStdErr, "bin-unused: %9zu\n", stats.bin_unused);
FdPrintf(mStdErr, "quantum-max: %9zu\n", stats.quantum_max);
FdPrintf(mStdErr, "quantum-wide-max: %9zu\n", stats.quantum_wide_max);
FdPrintf(mStdErr, "subpage-max: %9zu\n", stats.subpage_max);
FdPrintf(mStdErr, "large-max: %9zu\n", stats.large_max);
if (mCalculateSlop) {
size_t slop = mTotalAllocatedSize - mTotalRequestedSize;
FdPrintf(mStdErr,
@ -916,6 +918,9 @@ class Replay {
} else if (bin.size <= stats.quantum_max) {
// 4 buckets, (4 bytes per bucket with a 16 byte quantum).
dist = Distribution(bin.size, last_size, stats.quantum / 4);
} else if (bin.size <= stats.quantum_wide_max) {
// 8 buckets, (32 bytes per bucket with a 256 byte quantum-wide).
dist = Distribution(bin.size, last_size, stats.quantum_wide / 8);
} else {
// 16 buckets.
dist = Distribution(bin.size, last_size, (bin.size - last_size) / 16);