зеркало из https://github.com/github/ruby.git
rb_shape_transition_shape_capa: use optimal sizes transitions
Previously the growth was 3(embed), 6, 12, 24, ... With this change it's now 3(embed), 8, 16, 32, 64, ... by default. However, since power of two isn't the best size for all allocators, if `malloc_usable_size` is vailable, we use it to discover the best offset. On Linux/glibc 2.35 for instance, the growth will be 3(embed), 7, 15, 31 to avoid wasting 8B per object. Test program: ```c size_t test(size_t slots) { size_t allocated = slots * VALUE_SIZE; void *test_ptr = malloc(allocated); size_t wasted = malloc_usable_size(test_ptr) - allocated; free(test_ptr); fprintf(stderr, "slots = %lu, wasted_bytes = %lu\n", slots, wasted); return wasted; } int main(int argc, char *argv[]) { size_t best_padding = 0; size_t padding = 0; for (padding = 0; padding <= 2; padding++) { size_t wasted = test(8 - padding); if (wasted == 0) { best_padding = padding; break; } } size_t index = 0; fprintf(stderr, "=============== naive ================\n"); size_t list_size = 4; for (index = 0; index < 10; index++) { test(list_size); list_size *= 2; } fprintf(stderr, "=============== auto-padded (-%lu) ================\n", best_padding); list_size = 4; for (index = 0; index < 10; index ++) { test(list_size - best_padding); list_size *= 2; } fprintf(stderr, "\n\n"); return 0; } ``` ``` ===== glibc ====== slots = 8, wasted_bytes = 8 slots = 7, wasted_bytes = 0 =============== naive ================ slots = 4, wasted_bytes = 8 slots = 8, wasted_bytes = 8 slots = 16, wasted_bytes = 8 slots = 32, wasted_bytes = 8 slots = 64, wasted_bytes = 8 slots = 128, wasted_bytes = 8 slots = 256, wasted_bytes = 8 slots = 512, wasted_bytes = 8 slots = 1024, wasted_bytes = 8 slots = 2048, wasted_bytes = 8 =============== auto-padded (-1) ================ slots = 3, wasted_bytes = 0 slots = 7, wasted_bytes = 0 slots = 15, wasted_bytes = 0 slots = 31, wasted_bytes = 0 slots = 63, wasted_bytes = 0 slots = 127, wasted_bytes = 0 slots = 255, wasted_bytes = 0 slots = 511, wasted_bytes = 0 slots = 1023, wasted_bytes = 0 slots = 2047, wasted_bytes = 0 ``` ``` ========== jemalloc ======= slots = 8, wasted_bytes = 0 =============== naive ================ slots = 4, wasted_bytes = 0 slots = 8, wasted_bytes = 0 slots = 16, wasted_bytes = 0 slots = 32, wasted_bytes = 0 slots = 64, wasted_bytes = 0 slots = 128, wasted_bytes = 0 slots = 256, wasted_bytes = 0 slots = 512, wasted_bytes = 0 slots = 1024, wasted_bytes = 0 slots = 2048, wasted_bytes = 0 =============== auto-padded (-0) ================ slots = 4, wasted_bytes = 0 slots = 8, wasted_bytes = 0 slots = 16, wasted_bytes = 0 slots = 32, wasted_bytes = 0 slots = 64, wasted_bytes = 0 slots = 128, wasted_bytes = 0 slots = 256, wasted_bytes = 0 slots = 512, wasted_bytes = 0 slots = 1024, wasted_bytes = 0 slots = 2048, wasted_bytes = 0 ```
This commit is contained in:
Родитель
e7d845b1d0
Коммит
e5364ea496
64
gc.c
64
gc.c
|
@ -157,6 +157,68 @@
|
|||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
|
||||
static size_t malloc_offset = 0;
|
||||
#if defined(HAVE_MALLOC_USABLE_SIZE)
|
||||
static size_t
|
||||
gc_compute_malloc_offset(void)
|
||||
{
|
||||
// Different allocators use different metadata storage strategies which result in different
|
||||
// ideal sizes.
|
||||
// For instance malloc(64) will waste 8B with glibc, but waste 0B with jemalloc.
|
||||
// But malloc(56) will waste 0B with glibc, but waste 8B with jemalloc.
|
||||
// So we try allocating 64, 56 and 48 bytes and select the first offset that doesn't
|
||||
// waste memory.
|
||||
// This was tested on Linux with glibc 2.35 and jemalloc 5, and for both it result in
|
||||
// no wasted memory.
|
||||
size_t offset = 0;
|
||||
for (offset = 0; offset <= 16; offset += 8) {
|
||||
size_t allocated = (64 - offset);
|
||||
void *test_ptr = malloc(allocated);
|
||||
size_t wasted = malloc_usable_size(test_ptr) - allocated;
|
||||
free(test_ptr);
|
||||
|
||||
if (wasted == 0) {
|
||||
return offset;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static size_t
|
||||
gc_compute_malloc_offset(void)
|
||||
{
|
||||
// If we don't have malloc_usable_size, we use powers of 2.
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t
|
||||
rb_malloc_grow_capa(size_t current, size_t type_size)
|
||||
{
|
||||
size_t current_capacity = current;
|
||||
if (current_capacity < 4) {
|
||||
current_capacity = 4;
|
||||
}
|
||||
current_capacity *= type_size;
|
||||
|
||||
// We double the current capacity.
|
||||
size_t new_capacity = (current_capacity * 2);
|
||||
|
||||
// And round up to the next power of 2 if it's not already one.
|
||||
if (rb_popcount64(new_capacity) != 1) {
|
||||
new_capacity = (size_t)(1 << (64 - nlz_int64(new_capacity)));
|
||||
}
|
||||
|
||||
new_capacity -= malloc_offset;
|
||||
new_capacity /= type_size;
|
||||
if (current > new_capacity) {
|
||||
rb_bug("rb_malloc_grow_capa: current_capacity=%zu, new_capacity=%zu, malloc_offset=%zu", current, new_capacity, malloc_offset);
|
||||
}
|
||||
RUBY_ASSERT(new_capacity > current);
|
||||
return new_capacity;
|
||||
}
|
||||
|
||||
static inline struct rbimpl_size_mul_overflow_tag
|
||||
size_add_overflow(size_t x, size_t y)
|
||||
{
|
||||
|
@ -13979,6 +14041,8 @@ void
|
|||
Init_GC(void)
|
||||
{
|
||||
#undef rb_intern
|
||||
malloc_offset = gc_compute_malloc_offset();
|
||||
|
||||
VALUE rb_mObjSpace;
|
||||
VALUE rb_mProfiler;
|
||||
VALUE gc_constants;
|
||||
|
|
|
@ -227,6 +227,7 @@ __attribute__((__alloc_align__(1)))
|
|||
RUBY_ATTR_MALLOC void *rb_aligned_malloc(size_t, size_t) RUBY_ATTR_ALLOC_SIZE((2));
|
||||
size_t rb_size_mul_or_raise(size_t, size_t, VALUE); /* used in compile.c */
|
||||
size_t rb_size_mul_add_or_raise(size_t, size_t, size_t, VALUE); /* used in iseq.h */
|
||||
size_t rb_malloc_grow_capa(size_t current_capacity, size_t type_size);
|
||||
RUBY_ATTR_MALLOC void *rb_xmalloc_mul_add(size_t, size_t, size_t);
|
||||
RUBY_ATTR_MALLOC void *rb_xcalloc_mul_add(size_t, size_t, size_t);
|
||||
void *rb_xrealloc_mul_add(const void *, size_t, size_t, size_t);
|
||||
|
|
10
shape.c
10
shape.c
|
@ -418,19 +418,21 @@ rb_shape_get_next(rb_shape_t* shape, VALUE obj, ID id)
|
|||
}
|
||||
|
||||
static inline rb_shape_t *
|
||||
rb_shape_transition_shape_capa_create(rb_shape_t* shape, uint32_t new_capacity)
|
||||
rb_shape_transition_shape_capa_create(rb_shape_t* shape, size_t new_capacity)
|
||||
{
|
||||
RUBY_ASSERT(new_capacity < (size_t)MAX_IVARS);
|
||||
|
||||
ID edge_name = rb_make_temporary_id(new_capacity);
|
||||
bool dont_care;
|
||||
rb_shape_t * new_shape = get_next_shape_internal(shape, edge_name, SHAPE_CAPACITY_CHANGE, &dont_care, true, false);
|
||||
new_shape->capacity = new_capacity;
|
||||
new_shape->capacity = (uint32_t)new_capacity;
|
||||
return new_shape;
|
||||
}
|
||||
|
||||
rb_shape_t *
|
||||
rb_shape_transition_shape_capa(rb_shape_t* shape)
|
||||
{
|
||||
return rb_shape_transition_shape_capa_create(shape, shape->capacity * 2);
|
||||
return rb_shape_transition_shape_capa_create(shape, rb_malloc_grow_capa(shape->capacity, sizeof(VALUE)));
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -833,7 +835,7 @@ Init_default_shapes(void)
|
|||
|
||||
// Shapes by size pool
|
||||
for (int i = 1; i < SIZE_POOL_COUNT; i++) {
|
||||
uint32_t capa = (uint32_t)((rb_size_pool_slot_size(i) - offsetof(struct RObject, as.ary)) / sizeof(VALUE));
|
||||
size_t capa = ((rb_size_pool_slot_size(i) - offsetof(struct RObject, as.ary)) / sizeof(VALUE));
|
||||
rb_shape_t * new_shape = rb_shape_transition_shape_capa_create(root, capa);
|
||||
new_shape->type = SHAPE_INITIAL_CAPACITY;
|
||||
new_shape->size_pool_index = i;
|
||||
|
|
Загрузка…
Ссылка в новой задаче