vm_getivar: assume the cached shape_id like have a common ancestor

When an inline cache misses, it is very likely that the stale shape_id
and the current instance shape_id have a close common ancestor.

For example if the instance variable is sometimes frozen sometimes
not, one of the two shape will be the direct parent of the other.

Another pattern that commonly cause IC misses is "memoization",
in such case the object will have a "base common shape" and then
a number of close descendants.

In addition, when we find a common ancestor, we store it in the
inline cache instead of the current shape. This help prevent the
cache from flip-flopping, ensuring the next lookup will be marginally
faster and more generally avoid writing in memory too much.

However, now that shapes have an ancestors index, we only check
for a few ancestors before falling back to use the index.

So overall this change speeds up what is assumed to be the more common
case, but makes what is assumed to be the less common case a bit slower.

```
compare-ruby: ruby 3.3.0dev (2023-10-26T05:30:17Z master 701ca070b4) [arm64-darwin22]
built-ruby: ruby 3.3.0dev (2023-10-26T09:25:09Z shapes_double_sear.. a723a85235) [arm64-darwin22]
warming up......

|                                     |compare-ruby|built-ruby|
|:------------------------------------|-----------:|---------:|
|vm_ivar_stable_shape                 |     11.672M|   11.679M|
|                                     |           -|     1.00x|
|vm_ivar_memoize_unstable_shape       |      7.551M|   10.506M|
|                                     |           -|     1.39x|
|vm_ivar_memoize_unstable_shape_miss  |     11.591M|   11.624M|
|                                     |           -|     1.00x|
|vm_ivar_unstable_undef               |      9.037M|    7.981M|
|                                     |       1.13x|         -|
|vm_ivar_divergent_shape              |      8.034M|    6.657M|
|                                     |       1.21x|         -|
|vm_ivar_divergent_shape_imbalanced   |     10.471M|    9.231M|
|                                     |       1.13x|         -|
```

Co-Authored-By: John Hawthorn <john@hawthorn.email>
This commit is contained in:
Jean Boussier 2023-10-26 11:08:05 +02:00 коммит произвёл Jean Boussier
Родитель 0ae6a2a1cf
Коммит b92b9e1e9e
4 изменённых файлов: 156 добавлений и 7 удалений

Просмотреть файл

@ -0,0 +1,85 @@
prelude: |
IVARS = 60
class Record
def initialize(offset = false)
@offset = 1 if offset
@first = 0
IVARS.times do |i|
instance_variable_set("@ivar_#{i}", i)
end
end
def first
@first
end
def lazy_set
@lazy_set ||= 123
end
def undef
@undef
end
end
Record.new # Need one alloc to right size
BASE = Record.new
LAZY = Record.new
LAZY.lazy_set
class Miss < Record
@first = 0
IVARS.times do |i|
instance_variable_set("@i_#{i}", i)
end
end
Miss.new # Need one alloc to right size
MISS = Miss.new
DIVERGENT = Record.new(true)
benchmark:
vm_ivar_stable_shape: |
BASE.first
BASE.first
BASE.first
BASE.first
BASE.first
BASE.first
vm_ivar_memoize_unstable_shape: |
BASE.first
LAZY.first
BASE.first
LAZY.first
BASE.first
LAZY.first
vm_ivar_memoize_unstable_shape_miss: |
BASE.first
MISS.first
BASE.first
MISS.first
BASE.first
MISS.first
vm_ivar_unstable_undef: |
BASE.undef
LAZY.undef
BASE.undef
LAZY.undef
BASE.undef
LAZY.undef
vm_ivar_divergent_shape: |
BASE.first
DIVERGENT.first
BASE.first
DIVERGENT.first
BASE.first
DIVERGENT.first
vm_ivar_divergent_shape_imbalanced: |
BASE.first
DIVERGENT.first
DIVERGENT.first
DIVERGENT.first
DIVERGENT.first
DIVERGENT.first

57
shape.c
Просмотреть файл

@ -38,6 +38,7 @@
#define SINGLE_CHILD(x) (rb_shape_t *)((uintptr_t)x & SINGLE_CHILD_MASK)
#define ANCESTOR_CACHE_THRESHOLD 10
#define MAX_SHAPE_ID (SHAPE_BUFFER_SIZE - 1)
#define ANCESTOR_SEARCH_MAX_DEPTH 2
static ID id_frozen;
static ID id_t_object;
@ -723,6 +724,62 @@ rb_shape_transition_shape_capa(rb_shape_t* shape)
return rb_shape_transition_shape_capa_create(shape, rb_malloc_grow_capa(shape->capacity, sizeof(VALUE)));
}
// Same as rb_shape_get_iv_index, but uses a provided valid shape id and index
// to return a result faster if branches of the shape tree are closely related.
bool
rb_shape_get_iv_index_with_hint(shape_id_t shape_id, ID id, attr_index_t *value, shape_id_t *shape_id_hint)
{
attr_index_t index_hint = *value;
rb_shape_t *shape = rb_shape_get_shape_by_id(shape_id);
rb_shape_t *initial_shape = shape;
if (*shape_id_hint == INVALID_SHAPE_ID) {
*shape_id_hint = shape_id;
return rb_shape_get_iv_index(shape, id, value);
}
rb_shape_t * shape_hint = rb_shape_get_shape_by_id(*shape_id_hint);
// We assume it's likely shape_id_hint and shape_id have a close common
// ancestor, so we check up to ANCESTOR_SEARCH_MAX_DEPTH ancestors before
// eventually using the index, as in case of a match it will be faster.
// However if the shape doesn't have an index, we walk the entire tree.
int depth = INT_MAX;
if (shape->ancestor_index && shape->next_iv_index >= ANCESTOR_CACHE_THRESHOLD) {
depth = ANCESTOR_SEARCH_MAX_DEPTH;
}
while (depth > 0 && shape->next_iv_index > index_hint) {
while (shape_hint->next_iv_index > shape->next_iv_index) {
shape_hint = rb_shape_get_parent(shape_hint);
}
if (shape_hint == shape) {
// We've found a common ancestor so use the index hint
*value = index_hint;
*shape_id_hint = rb_shape_id(shape);
return true;
}
if (shape->edge_name == id) {
// We found the matching id before a common ancestor
*value = shape->next_iv_index - 1;
*shape_id_hint = rb_shape_id(shape);
return true;
}
shape = rb_shape_get_parent(shape);
depth--;
}
// If the original shape had an index but its ancestor doesn't
// we switch back to the original one as it will be faster.
if (!shape->ancestor_index && initial_shape->ancestor_index) {
shape = initial_shape;
}
*shape_id_hint = shape_id;
return rb_shape_get_iv_index(shape, id, value);
}
bool
rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t *value)
{

Просмотреть файл

@ -155,6 +155,7 @@ rb_shape_t* rb_shape_get_shape_by_id(shape_id_t shape_id);
shape_id_t rb_shape_get_shape_id(VALUE obj);
rb_shape_t * rb_shape_get_next_iv_shape(rb_shape_t * shape, ID id);
bool rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t * value);
bool rb_shape_get_iv_index_with_hint(shape_id_t shape_id, ID id, attr_index_t * value, shape_id_t *shape_id_hint);
bool rb_shape_obj_too_complex(VALUE obj);
void rb_shape_set_shape(VALUE obj, rb_shape_t* shape);

Просмотреть файл

@ -1286,8 +1286,6 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call
}
#endif
rb_shape_t *shape = rb_shape_get_shape_by_id(shape_id);
if (shape_id == OBJ_TOO_COMPLEX_SHAPE_ID) {
st_table *table = NULL;
switch (BUILTIN_TYPE(obj)) {
@ -1314,15 +1312,23 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call
}
}
else {
if (rb_shape_get_iv_index(shape, id, &index)) {
shape_id_t previous_cached_id = cached_id;
if (rb_shape_get_iv_index_with_hint(shape_id, id, &index, &cached_id)) {
// This fills in the cache with the shared cache object.
// "ent" is the shared cache object
fill_ivar_cache(iseq, ic, cc, is_attr, index, shape_id);
if (cached_id != previous_cached_id) {
fill_ivar_cache(iseq, ic, cc, is_attr, index, cached_id);
}
if (index == ATTR_INDEX_NOT_SET) {
val = default_value;
}
else {
// We fetched the ivar list above
val = ivar_list[index];
RUBY_ASSERT(!UNDEF_P(val));
}
}
else {
if (is_attr) {
vm_cc_attr_index_initialize(cc, shape_id);