зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1656155 - pt 6. Add slop information to Replay.cpp r=glandium
Differential Revision: https://phabricator.services.mozilla.com/D88403
This commit is contained in:
Родитель
6655819c99
Коммит
de7f32735e
|
@ -76,13 +76,17 @@ class MappedArray {
|
|||
/* Type for records of allocations. */
|
||||
struct MemSlot {
|
||||
void* mPtr;
|
||||
|
||||
// mRequest is only valid if mPtr is non-null. It doesn't need to be cleared
|
||||
// when memory is freed or realloc()ed.
|
||||
size_t mRequest;
|
||||
};
|
||||
|
||||
/* An almost infinite list of slots.
|
||||
* In essence, this is a linked list of arrays of groups of slots.
|
||||
* Each group is 1MB. On 64-bits, one group allows to store 128k allocations.
|
||||
* Each group is 1MB. On 64-bits, one group allows to store 64k allocations.
|
||||
* Each MemSlotList instance can store 1023 such groups, which means more
|
||||
* than 130M allocations. In case more would be needed, we chain to another
|
||||
* than 67M allocations. In case more would be needed, we chain to another
|
||||
* MemSlotList, and so on.
|
||||
* Using 1023 groups makes the MemSlotList itself page sized on 32-bits
|
||||
* and 2 pages-sized on 64-bits.
|
||||
|
@ -294,7 +298,12 @@ size_t parseNumber(Buffer aBuf) {
|
|||
/* Class to handle dispatching the replay function calls to replace-malloc. */
|
||||
class Replay {
|
||||
public:
|
||||
Replay() : mOps(0) {
|
||||
Replay()
|
||||
: mOps(0),
|
||||
mNumUsedSlots(0),
|
||||
mTotalRequestedSize(0),
|
||||
mTotalAllocatedSize(0),
|
||||
mCalculateSlop(false) {
|
||||
#ifdef _WIN32
|
||||
// See comment in FdPrintf.h as to why native win32 handles are used.
|
||||
mStdErr = reinterpret_cast<intptr_t>(GetStdHandle(STD_ERROR_HANDLE));
|
||||
|
@ -303,6 +312,8 @@ class Replay {
|
|||
#endif
|
||||
}
|
||||
|
||||
void enableSlopCalculation() { mCalculateSlop = true; }
|
||||
|
||||
MemSlot& operator[](size_t index) const { return mSlots[index]; }
|
||||
|
||||
void malloc(Buffer& aArgs, Buffer& aResult) {
|
||||
|
@ -310,6 +321,13 @@ class Replay {
|
|||
mOps++;
|
||||
size_t size = parseNumber(aArgs);
|
||||
aSlot.mPtr = ::malloc_impl(size);
|
||||
if (aSlot.mPtr) {
|
||||
aSlot.mRequest = size;
|
||||
if (mCalculateSlop) {
|
||||
mTotalRequestedSize += size;
|
||||
mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void posix_memalign(Buffer& aArgs, Buffer& aResult) {
|
||||
|
@ -320,6 +338,11 @@ class Replay {
|
|||
void* ptr;
|
||||
if (::posix_memalign_impl(&ptr, alignment, size) == 0) {
|
||||
aSlot.mPtr = ptr;
|
||||
aSlot.mRequest = size;
|
||||
if (mCalculateSlop) {
|
||||
mTotalRequestedSize += size;
|
||||
mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
|
||||
}
|
||||
} else {
|
||||
aSlot.mPtr = nullptr;
|
||||
}
|
||||
|
@ -331,6 +354,13 @@ class Replay {
|
|||
size_t alignment = parseNumber(aArgs.SplitChar(','));
|
||||
size_t size = parseNumber(aArgs);
|
||||
aSlot.mPtr = ::aligned_alloc_impl(alignment, size);
|
||||
if (aSlot.mPtr) {
|
||||
aSlot.mRequest = size;
|
||||
if (mCalculateSlop) {
|
||||
mTotalRequestedSize += size;
|
||||
mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void calloc(Buffer& aArgs, Buffer& aResult) {
|
||||
|
@ -339,6 +369,13 @@ class Replay {
|
|||
size_t num = parseNumber(aArgs.SplitChar(','));
|
||||
size_t size = parseNumber(aArgs);
|
||||
aSlot.mPtr = ::calloc_impl(num, size);
|
||||
if (aSlot.mPtr) {
|
||||
aSlot.mRequest = num * size;
|
||||
if (mCalculateSlop) {
|
||||
mTotalRequestedSize += num * size;
|
||||
mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void realloc(Buffer& aArgs, Buffer& aResult) {
|
||||
|
@ -354,6 +391,13 @@ class Replay {
|
|||
void* old_ptr = old_slot.mPtr;
|
||||
old_slot.mPtr = nullptr;
|
||||
aSlot.mPtr = ::realloc_impl(old_ptr, size);
|
||||
if (aSlot.mPtr) {
|
||||
aSlot.mRequest = size;
|
||||
if (mCalculateSlop) {
|
||||
mTotalRequestedSize += size;
|
||||
mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void free(Buffer& aArgs, Buffer& aResult) {
|
||||
|
@ -377,6 +421,13 @@ class Replay {
|
|||
size_t alignment = parseNumber(aArgs.SplitChar(','));
|
||||
size_t size = parseNumber(aArgs);
|
||||
aSlot.mPtr = ::memalign_impl(alignment, size);
|
||||
if (aSlot.mPtr) {
|
||||
aSlot.mRequest = size;
|
||||
if (mCalculateSlop) {
|
||||
mTotalRequestedSize += size;
|
||||
mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void valloc(Buffer& aArgs, Buffer& aResult) {
|
||||
|
@ -384,6 +435,13 @@ class Replay {
|
|||
mOps++;
|
||||
size_t size = parseNumber(aArgs);
|
||||
aSlot.mPtr = ::valloc_impl(size);
|
||||
if (aSlot.mPtr) {
|
||||
aSlot.mRequest = size;
|
||||
if (mCalculateSlop) {
|
||||
mTotalRequestedSize += size;
|
||||
mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void jemalloc_stats(Buffer& aArgs, Buffer& aResult) {
|
||||
|
@ -394,7 +452,52 @@ class Replay {
|
|||
jemalloc_stats_t stats;
|
||||
jemalloc_bin_stats_t bin_stats[JEMALLOC_MAX_STATS_BINS];
|
||||
::jemalloc_stats_internal(&stats, bin_stats);
|
||||
|
||||
size_t num_objects = 0;
|
||||
size_t num_sloppy_objects = 0;
|
||||
size_t total_allocated = 0;
|
||||
size_t total_slop = 0;
|
||||
size_t large_slop = 0;
|
||||
size_t large_used = 0;
|
||||
size_t huge_slop = 0;
|
||||
size_t huge_used = 0;
|
||||
size_t bin_slop[JEMALLOC_MAX_STATS_BINS] = {0};
|
||||
|
||||
for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) {
|
||||
MemSlot& slot = mSlots[slot_id];
|
||||
if (slot.mPtr) {
|
||||
size_t used = ::malloc_usable_size_impl(slot.mPtr);
|
||||
size_t slop = used - slot.mRequest;
|
||||
total_allocated += used;
|
||||
total_slop += slop;
|
||||
num_objects++;
|
||||
if (slop) {
|
||||
num_sloppy_objects++;
|
||||
}
|
||||
|
||||
if (used <= stats.page_size / 2) {
|
||||
// We know that this is an inefficient linear search, but there's a
|
||||
// small number of bins and this is simple.
|
||||
for (unsigned i = 0; i < JEMALLOC_MAX_STATS_BINS; i++) {
|
||||
auto& bin = bin_stats[i];
|
||||
if (used == bin.size) {
|
||||
bin_slop[i] += slop;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (used <= stats.large_max) {
|
||||
large_slop += slop;
|
||||
large_used += used;
|
||||
} else {
|
||||
huge_slop += slop;
|
||||
huge_used += used;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FdPrintf(mStdErr, "\n");
|
||||
FdPrintf(mStdErr, "Objects: %9zu\n", num_objects);
|
||||
FdPrintf(mStdErr, "Slots: %9zu\n", mNumUsedSlots);
|
||||
FdPrintf(mStdErr, "Ops: %9zu\n", mOps);
|
||||
FdPrintf(mStdErr, "mapped: %9zu\n", stats.mapped);
|
||||
FdPrintf(mStdErr, "allocated: %9zu\n", stats.allocated);
|
||||
|
@ -405,6 +508,19 @@ class Replay {
|
|||
FdPrintf(mStdErr, "quantum-max: %9zu\n", stats.quantum_max);
|
||||
FdPrintf(mStdErr, "subpage-max: %9zu\n", stats.page_size / 2);
|
||||
FdPrintf(mStdErr, "large-max: %9zu\n", stats.large_max);
|
||||
if (mCalculateSlop) {
|
||||
size_t slop = mTotalAllocatedSize - mTotalRequestedSize;
|
||||
FdPrintf(mStdErr,
|
||||
"Total slop for all allocations: %zuKiB/%zuKiB (%zu%%)\n",
|
||||
slop / 1024, mTotalAllocatedSize / 1024,
|
||||
percent(slop, mTotalAllocatedSize));
|
||||
}
|
||||
FdPrintf(mStdErr, "Live sloppy objects: %zu/%zu (%zu%%)\n",
|
||||
num_sloppy_objects, num_objects,
|
||||
percent(num_sloppy_objects, num_objects));
|
||||
FdPrintf(mStdErr, "Live sloppy bytes: %zuKiB/%zuKiB (%zu%%)\n",
|
||||
total_slop / 1024, total_allocated / 1024,
|
||||
percent(total_slop, total_allocated));
|
||||
|
||||
FdPrintf(mStdErr, "\n%8s %11s %10s %8s %9s %9s %8s\n", "bin-size",
|
||||
"unused (c)", "total (c)", "used (c)", "non-full (r)", "total (r)",
|
||||
|
@ -419,6 +535,20 @@ class Replay {
|
|||
}
|
||||
}
|
||||
|
||||
FdPrintf(mStdErr, "\n%5s %8s %9s %7s\n", "bin", "slop", "used", "percent");
|
||||
for (unsigned i = 0; i < JEMALLOC_MAX_STATS_BINS; i++) {
|
||||
auto& bin = bin_stats[i];
|
||||
if (bin.size) {
|
||||
size_t used = bin.bytes_total - bin.bytes_unused;
|
||||
FdPrintf(mStdErr, "%5zu %8zu %9zu %6zu%%\n", bin.size, bin_slop[i],
|
||||
used, percent(bin_slop[i], used));
|
||||
}
|
||||
}
|
||||
FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "large", large_slop, large_used,
|
||||
percent(large_slop, large_used));
|
||||
FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "huge", huge_slop, huge_used,
|
||||
percent(huge_slop, huge_used));
|
||||
|
||||
/* TODO: Add more data, like actual RSS as measured by OS, but compensated
|
||||
* for the replay internal data. */
|
||||
}
|
||||
|
@ -440,19 +570,41 @@ class Replay {
|
|||
}
|
||||
|
||||
size_t slot_id = parseNumber(aResult);
|
||||
mNumUsedSlots = std::max(mNumUsedSlots, slot_id + 1);
|
||||
|
||||
return mSlots[slot_id];
|
||||
}
|
||||
|
||||
intptr_t mStdErr;
|
||||
size_t mOps;
|
||||
|
||||
// The number of slots that have been used. It is used to iterate over slots
|
||||
// without accessing those we haven't initialised.
|
||||
size_t mNumUsedSlots;
|
||||
|
||||
MemSlotList mSlots;
|
||||
size_t mTotalRequestedSize;
|
||||
size_t mTotalAllocatedSize;
|
||||
// Whether to calculate slop for all allocations over the runtime of a
|
||||
// process.
|
||||
bool mCalculateSlop;
|
||||
};
|
||||
|
||||
int main() {
|
||||
int main(int argc, const char* argv[]) {
|
||||
size_t first_pid = 0;
|
||||
FdReader reader(0);
|
||||
Replay replay;
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
const char *option = argv[i];
|
||||
if (strcmp(option, "-s") == 0) {
|
||||
replay.enableSlopCalculation();
|
||||
} else {
|
||||
fprintf(stderr, "Unknown command line option: %s\n", option);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Read log from stdin and dispatch function calls to the Replay instance.
|
||||
* The log format is essentially:
|
||||
* <pid> <tid> <function>([<args>])[=<result>]
|
||||
|
|
Загрузка…
Ссылка в новой задаче