bug 424040. add valgrind hooks to jemalloc. patch from Jason Evans <jasone@canonware.com> r=me

This commit is contained in:
pavlov@pavlov.net 2008-04-08 00:19:40 -07:00
Родитель b533dd5542
Коммит a82c2e36c8
2 изменённых файлов: 73 добавлений и 194 удалений

Просмотреть файл

@ -6144,7 +6144,7 @@ dnl ========================================================
dnl = Enable jemalloc
dnl ========================================================
MOZ_ARG_ENABLE_BOOL(jemalloc,
[ --enable-jemalloc Replace memory allocator with jemalloc],
[ --enable-jemalloc Replace memory allocator with jemalloc],
MOZ_MEMORY=1,
MOZ_MEMORY=)
@ -6242,6 +6242,14 @@ if test "$MOZ_MEMORY"; then
AC_MSG_ERROR([--enable-jemalloc not supported on ${target}])
;;
esac
AC_ARG_WITH([valgrind],
[ --with-valgrind Enable valgrind integration hooks],
[enable_valgrind="yes"], [enable_valgrind="no"])
AC_CHECK_HEADER([valgrind/valgrind.h], [], [enable_valgrind="no"])
if test "x$enable_valgrind" = "xyes" ; then
AC_DEFINE(MOZ_VALGRIND)
fi
fi
AC_SUBST(MOZ_MEMORY)
AC_SUBST(WIN32_CRT_SRC_DIR)

Просмотреть файл

@ -126,12 +126,16 @@
# define MALLOC_SYSV
#endif
/*
* MALLOC_LAZY_FREE enables the use of a per-thread vector of slots that free()
* can atomically stuff object pointers into. This can reduce arena lock
* contention.
*/
/* #define MALLOC_LAZY_FREE */
/* Embed no-op macros that support memory allocation tracking via valgrind. */
#ifdef MOZ_VALGRIND
# define MALLOC_VALGRIND
#endif
#ifdef MALLOC_VALGRIND
# include <valgrind/valgrind.h>
#else
# define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed)
# define VALGRIND_FREELIKE_BLOCK(addr, rzB)
#endif
/*
* MALLOC_BALANCE enables monitoring of arena lock contention and dynamically
@ -409,10 +413,6 @@ static const bool __isthreaded = true;
/* MALLOC_BALANCE requires TLS. */
# ifdef MALLOC_BALANCE
# undef MALLOC_BALANCE
# endif
/* MALLOC_LAZY_FREE requires TLS. */
# ifdef MALLOC_LAZY_FREE
# undef MALLOC_LAZY_FREE
# endif
#endif
@ -473,19 +473,6 @@ static const bool __isthreaded = true;
#define RUN_MAX_SMALL_2POW 15
#define RUN_MAX_SMALL (1U << RUN_MAX_SMALL_2POW)
#ifdef MALLOC_LAZY_FREE
/* Default size of each arena's lazy free cache. */
# define LAZY_FREE_2POW_DEFAULT 8
/*
* Number of pseudo-random probes to conduct before considering the cache to
* be overly full. It takes on average n probes to detect fullness of
* (n-1)/n. However, we are effectively doing multiple non-independent
* trials (each deallocation is a trial), so the actual average threshold
* for clearing the cache is somewhat lower.
*/
# define LAZY_FREE_NPROBES 5
#endif
/*
* Hyper-threaded CPUs may need a special instruction inside spin loops in
* order to yield to another virtual CPU. If no such instruction is defined
@ -863,16 +850,6 @@ struct arena_s {
uint32_t contention;
#endif
#ifdef MALLOC_LAZY_FREE
/*
* Deallocation of small objects can be lazy, in which case free_cache
* stores pointers to those objects that have not yet been deallocated.
* In order to avoid lock contention, slots are chosen randomly. Empty
* slots contain NULL.
*/
void **free_cache;
#endif
/*
* bins is used to store rings of free regions of the following sizes,
* assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
@ -1063,9 +1040,6 @@ static bool opt_dss = true;
static bool opt_mmap = true;
#endif
static size_t opt_dirty_max = DIRTY_MAX_DEFAULT;
#ifdef MALLOC_LAZY_FREE
static int opt_lazy_free_2pow = LAZY_FREE_2POW_DEFAULT;
#endif
#ifdef MALLOC_BALANCE
static uint64_t opt_balance_threshold = BALANCE_THRESHOLD_DEFAULT;
#endif
@ -1178,10 +1152,6 @@ static void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
static void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
size_t alloc_size);
static size_t arena_salloc(const void *ptr);
#ifdef MALLOC_LAZY_FREE
static void arena_dalloc_lazy_hard(arena_t *arena, arena_chunk_t *chunk,
void *ptr, size_t pageind, arena_chunk_map_t *mapelm);
#endif
static void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk,
void *ptr);
static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
@ -1472,7 +1442,7 @@ pow2_ceil(size_t x)
return (x);
}
#if (defined(MALLOC_LAZY_FREE) || defined(MALLOC_BALANCE))
#ifdef MALLOC_BALANCE
/*
* Use a simple linear congruential pseudo-random number generator:
*
@ -1521,12 +1491,6 @@ prn_##suffix(uint32_t lg_range) \
* problems.
*/
#ifdef MALLOC_LAZY_FREE
/* Define the per-thread PRNG used for lazy deallocation. */
static __thread uint32_t lazy_free_x;
PRN_DEFINE(lazy_free, lazy_free_x, 12345, 12347)
#endif
#ifdef MALLOC_BALANCE
/* Define the PRNG used for arena assignment. */
static __thread uint32_t balance_x;
@ -1785,6 +1749,7 @@ base_alloc(size_t size)
}
#endif
malloc_mutex_unlock(&base_mtx);
VALGRIND_MALLOCLIKE_BLOCK(ret, size, 0, false);
return (ret);
}
@ -1795,6 +1760,12 @@ base_calloc(size_t number, size_t size)
void *ret;
ret = base_alloc(number * size);
#ifdef MALLOC_VALGRIND
if (ret != NULL) {
VALGRIND_FREELIKE_BLOCK(ret, 0);
VALGRIND_MALLOCLIKE_BLOCK(ret, size, 0, true);
}
#endif
memset(ret, 0, number * size);
return (ret);
@ -1809,6 +1780,8 @@ base_node_alloc(void)
if (base_nodes != NULL) {
ret = base_nodes;
base_nodes = *(extent_node_t **)ret;
VALGRIND_FREELIKE_BLOCK(ret, 0);
VALGRIND_MALLOCLIKE_BLOCK(ret, sizeof(extent_node_t), 0, false);
malloc_mutex_unlock(&base_mtx);
} else {
malloc_mutex_unlock(&base_mtx);
@ -1823,6 +1796,8 @@ base_node_dealloc(extent_node_t *node)
{
malloc_mutex_lock(&base_mtx);
VALGRIND_FREELIKE_BLOCK(node, 0);
VALGRIND_MALLOCLIKE_BLOCK(node, sizeof(extent_node_t *), 0, false);
*(extent_node_t **)node = base_nodes;
base_nodes = node;
malloc_mutex_unlock(&base_mtx);
@ -2628,20 +2603,6 @@ choose_arena_hard(void)
assert(__isthreaded);
#ifdef MALLOC_LAZY_FREE
/*
* Seed the PRNG used for lazy deallocation. Since seeding only occurs
* on the first allocation by a thread, it is possible for a thread to
* deallocate before seeding. This is not a critical issue though,
* since it is extremely unusual for an application to to use threads
* that deallocate but *never* allocate, and because even if seeding
* never occurs for multiple threads, they will tend to drift apart
* unless some aspect of the application forces deallocation
* synchronization.
*/
SPRN(lazy_free, (uint32_t)(uintptr_t)(_pthread_self()));
#endif
#ifdef MALLOC_BALANCE
/*
* Seed the PRNG used for arena load balancing. We can get away with
@ -3017,6 +2978,8 @@ arena_chunk_alloc(arena_t *arena)
chunk = (arena_chunk_t *)chunk_alloc(chunksize, true);
if (chunk == NULL)
return (NULL);
VALGRIND_MALLOCLIKE_BLOCK(chunk, (arena_chunk_header_npages <<
pagesize_2pow), 0, false);
#ifdef MALLOC_STATS
arena->stats.mapped += chunksize;
#endif
@ -3089,6 +3052,7 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks,
arena->spare);
arena->ndirty -= arena->spare->ndirty;
VALGRIND_FREELIKE_BLOCK(arena->spare, 0);
chunk_dealloc((void *)arena->spare, chunksize);
#ifdef MALLOC_STATS
arena->stats.mapped -= chunksize;
@ -3402,6 +3366,9 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
if (run == NULL)
return (NULL);
VALGRIND_MALLOCLIKE_BLOCK(run, sizeof(arena_run_t) + (sizeof(unsigned) *
bin->regs_mask_nelms - 1), 0, false);
/* Initialize run internals. */
run->bin = bin;
@ -3656,6 +3623,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
#endif
malloc_spin_unlock(&arena->lock);
VALGRIND_MALLOCLIKE_BLOCK(ret, size, 0, zero);
if (zero == false) {
#ifdef MALLOC_FILL
if (opt_junk)
@ -3692,6 +3660,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
#endif
malloc_spin_unlock(&arena->lock);
VALGRIND_MALLOCLIKE_BLOCK(ret, size, 0, zero);
if (zero == false) {
#ifdef MALLOC_FILL
if (opt_junk)
@ -3813,6 +3782,7 @@ arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
#endif
malloc_spin_unlock(&arena->lock);
VALGRIND_MALLOCLIKE_BLOCK(ret, size, 0, false);
#ifdef MALLOC_FILL
if (opt_junk)
memset(ret, 0xa5, size);
@ -4031,6 +4001,7 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
#ifdef MALLOC_DEBUG
run->magic = 0;
#endif
VALGRIND_FREELIKE_BLOCK(run, 0);
arena_run_dalloc(arena, run, true);
#ifdef MALLOC_STATS
bin->stats.curruns--;
@ -4059,90 +4030,6 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
#endif
}
#ifdef MALLOC_LAZY_FREE
static inline void
arena_dalloc_lazy(arena_t *arena, arena_chunk_t *chunk, void *ptr,
size_t pageind, arena_chunk_map_t *mapelm)
{
void **free_cache = arena->free_cache;
unsigned i, slot;
if (__isthreaded == false || opt_lazy_free_2pow < 0) {
malloc_spin_lock(&arena->lock);
arena_dalloc_small(arena, chunk, ptr, pageind, *mapelm);
malloc_spin_unlock(&arena->lock);
return;
}
for (i = 0; i < LAZY_FREE_NPROBES; i++) {
slot = PRN(lazy_free, opt_lazy_free_2pow);
if (atomic_cmpset_ptr((uintptr_t *)&free_cache[slot],
(uintptr_t)NULL, (uintptr_t)ptr)) {
return;
}
}
arena_dalloc_lazy_hard(arena, chunk, ptr, pageind, mapelm);
}
static void
arena_dalloc_lazy_hard(arena_t *arena, arena_chunk_t *chunk, void *ptr,
size_t pageind, arena_chunk_map_t *mapelm)
{
void **free_cache = arena->free_cache;
unsigned i, slot;
malloc_spin_lock(&arena->lock);
arena_dalloc_small(arena, chunk, ptr, pageind, *mapelm);
/*
* Check whether another thread already cleared the cache. It is
* possible that another thread cleared the cache *and* this slot was
* already refilled, which could result in a mostly fruitless cache
* sweep, but such a sequence of events causes no correctness issues.
*/
if ((ptr = (void *)atomic_readandclear_ptr(
(uintptr_t *)&free_cache[slot]))
!= NULL) {
unsigned lazy_free_mask;
/*
* Clear the cache, since we failed to find a slot. It is
* possible that other threads will continue to insert objects
* into the cache while this one sweeps, but that is okay,
* since on average the cache is still swept with the same
* frequency.
*/
/* Handle pointer at current slot. */
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >>
pagesize_2pow);
mapelm = &chunk->map[pageind];
arena_dalloc_small(arena, chunk, ptr, pageind, *mapelm);
/* Sweep remainder of slots. */
lazy_free_mask = (1U << opt_lazy_free_2pow) - 1;
for (i = (slot + 1) & lazy_free_mask;
i != slot;
i = (i + 1) & lazy_free_mask) {
ptr = (void *)atomic_readandclear_ptr(
(uintptr_t *)&free_cache[i]);
if (ptr != NULL) {
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
pageind = (((uintptr_t)ptr - (uintptr_t)chunk)
>> pagesize_2pow);
mapelm = &chunk->map[pageind];
arena_dalloc_small(arena, chunk, ptr, pageind,
*mapelm);
}
}
}
malloc_spin_unlock(&arena->lock);
}
#endif
static void
arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
{
@ -4197,17 +4084,14 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
mapelm = &chunk->map[pageind];
if ((*mapelm & CHUNK_MAP_LARGE) == 0) {
/* Small allocation. */
#ifdef MALLOC_LAZY_FREE
arena_dalloc_lazy(arena, chunk, ptr, pageind, mapelm);
#else
malloc_spin_lock(&arena->lock);
arena_dalloc_small(arena, chunk, ptr, pageind, *mapelm);
malloc_spin_unlock(&arena->lock);
#endif
} else {
assert((*mapelm & CHUNK_MAP_POS_MASK) == 0);
arena_dalloc_large(arena, chunk, ptr);
}
VALGRIND_FREELIKE_BLOCK(ptr, 0);
}
static inline void
@ -4424,10 +4308,28 @@ iralloc(void *ptr, size_t size)
oldsize = isalloc(ptr);
#ifndef MALLOC_VALGRIND
if (size <= arena_maxclass)
return (arena_ralloc(ptr, size, oldsize));
else
return (huge_ralloc(ptr, size, oldsize));
#else
/*
* Valgrind does not provide a public interface for modifying an
* existing allocation, so use malloc/memcpy/free instead.
*/
{
void *ret = imalloc(size);
if (ret != NULL) {
if (oldsize < size)
memcpy(ret, ptr, oldsize);
else
memcpy(ret, ptr, size);
idalloc(ptr);
}
return (ret);
}
#endif
}
static bool
@ -4457,15 +4359,6 @@ arena_new(arena_t *arena)
#ifdef MALLOC_BALANCE
arena->contention = 0;
#endif
#ifdef MALLOC_LAZY_FREE
if (opt_lazy_free_2pow >= 0) {
arena->free_cache = (void **) base_calloc(1, sizeof(void *)
* (1U << opt_lazy_free_2pow));
if (arena->free_cache == NULL)
return (true);
} else
arena->free_cache = NULL;
#endif
/* Initialize bins. */
prev_run_size = pagesize;
@ -4615,6 +4508,12 @@ huge_malloc(size_t size, bool zero)
pages_decommit((void *)((uintptr_t)ret + psize), csize - psize);
#endif
#ifdef MALLOC_DECOMMIT
VALGRIND_MALLOCLIKE_BLOCK(ret, psize, 0, zero);
#else
VALGRIND_MALLOCLIKE_BLOCK(ret, csize, 0, zero);
#endif
#ifdef MALLOC_FILL
if (zero == false) {
if (opt_junk)
@ -4754,6 +4653,12 @@ huge_palloc(size_t alignment, size_t size)
}
#endif
#ifdef MALLOC_DECOMMIT
VALGRIND_MALLOCLIKE_BLOCK(ret, psize, 0, false);
#else
VALGRIND_MALLOCLIKE_BLOCK(ret, chunk_size, 0, false);
#endif
#ifdef MALLOC_FILL
if (opt_junk)
# ifdef MALLOC_DECOMMIT
@ -4890,6 +4795,7 @@ huge_dalloc(void *ptr)
#else
chunk_dealloc(node->addr, node->size);
#endif
VALGRIND_FREELIKE_BLOCK(node->addr, 0);
base_node_dealloc(node);
}
@ -5085,13 +4991,6 @@ malloc_print_stats(void)
_malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
_malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
#ifdef MALLOC_LAZY_FREE
if (opt_lazy_free_2pow >= 0) {
_malloc_message("Lazy free slots: ",
umax2s(1U << opt_lazy_free_2pow, s), "\n", "");
} else
_malloc_message("Lazy free slots: 0\n", "", "", "");
#endif
#ifdef MALLOC_BALANCE
_malloc_message("Arena balance threshold: ",
umax2s(opt_balance_threshold, s), "\n", "");
@ -5281,11 +5180,6 @@ malloc_init_hard(void)
pagesize_mask = result - 1;
pagesize_2pow = ffs((int)result) - 1;
#ifdef MALLOC_LAZY_FREE
if (ncpus == 1)
opt_lazy_free_2pow = -1;
#endif
for (i = 0; i < 3; i++) {
unsigned j;
@ -5427,18 +5321,6 @@ MALLOC_OUT:
(sizeof(size_t) << 3))
opt_chunk_2pow++;
break;
case 'l':
#ifdef MALLOC_LAZY_FREE
if (opt_lazy_free_2pow >= 0)
opt_lazy_free_2pow--;
#endif
break;
case 'L':
#ifdef MALLOC_LAZY_FREE
if (ncpus > 1)
opt_lazy_free_2pow++;
#endif
break;
case 'm':
#ifdef MALLOC_DSS
opt_mmap = false;
@ -5585,14 +5467,6 @@ MALLOC_OUT:
}
arena_maxclass = chunksize - (arena_chunk_header_npages <<
pagesize_2pow);
#ifdef MALLOC_LAZY_FREE
/*
* Make sure that allocating the free_cache does not exceed the limits
* of what base_alloc() can handle.
*/
while ((sizeof(void *) << opt_lazy_free_2pow) > chunksize)
opt_lazy_free_2pow--;
#endif
UTRACE(0, 0, 0);
@ -5747,9 +5621,6 @@ MALLOC_OUT:
* Seed here for the initial thread, since choose_arena_hard() is only
* called for other threads. The seed values don't really matter.
*/
#ifdef MALLOC_LAZY_FREE
SPRN(lazy_free, 42);
#endif
#ifdef MALLOC_BALANCE
SPRN(balance, 42);
#endif