From ee9cc8e32e04597a4a86b391df2c19aed9fde3e5 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 24 Jul 2023 15:26:10 -0400 Subject: [PATCH] Implement WeakMap using weak references --- test/ruby/test_weakmap.rb | 14 +- weakmap.c | 529 ++++++++++++++++---------------------- 2 files changed, 238 insertions(+), 305 deletions(-) diff --git a/test/ruby/test_weakmap.rb b/test/ruby/test_weakmap.rb index 6b3dc678ad..b42787ad5e 100644 --- a/test/ruby/test_weakmap.rb +++ b/test/ruby/test_weakmap.rb @@ -193,9 +193,10 @@ class TestWeakMap < Test::Unit::TestCase end; end - def test_compaction_bug_19529 + def test_compaction omit "compaction is not supported on this platform" unless GC.respond_to?(:compact) + # [Bug #19529] obj = Object.new 100.times do |i| GC.compact @@ -211,6 +212,17 @@ class TestWeakMap < Test::Unit::TestCase end GC.compact end; + + assert_separately(%w(-robjspace), <<-'end;') + wm = ObjectSpace::WeakMap.new + key = Object.new + val = Object.new + wm[key] = val + + GC.verify_compaction_references(expand_heap: true, toward: :empty) + + assert_equal(val, wm[key]) + end; end def test_replaced_values_bug_19531 diff --git a/weakmap.c b/weakmap.c index ef859f89e8..bb65b1690b 100644 --- a/weakmap.c +++ b/weakmap.c @@ -6,43 +6,132 @@ #include "ruby/st.h" #include "ruby/st.h" +/* ===== WeakMap ===== + * + * WeakMap contains one ST table which contains a pointer to the object as the + * key and a pointer to the object as the value. This means that the key and + * value of the table are both of the type `VALUE *`. + * + * The objects are not directly stored as keys and values in the table because + * `rb_gc_mark_weak` requires a pointer to the memory location to overwrite + * when the object is reclaimed. Using a pointer into the ST table entry is not + * safe because the pointer can change when the ST table is resized. + * + * WeakMap hashes and compares using the pointer address of the object. + * + * For performance and memory efficiency reasons, the key and value + * are allocated at the same time and adjacent to each other. + * + * During GC and while iterating, reclaimed entries (i.e. either the key or + * value points to `Qundef`) are removed from the ST table. + */ + struct weakmap { - st_table *obj2wmap; /* obj -> [ref,...] */ - st_table *wmap2obj; /* ref -> obj */ - VALUE final; + st_table *table; }; -static int -wmap_replace_ref(st_data_t *key, st_data_t *value, st_data_t _argp, int existing) +static bool +wmap_live_p(VALUE obj) { - *key = rb_gc_location((VALUE)*key); + return !UNDEF_P(obj); +} - VALUE *values = (VALUE *)*value; - VALUE size = values[0]; +static void +wmap_free_entry(VALUE *key, VALUE *val) +{ + assert(key + 1 == val); - for (VALUE index = 1; index <= size; index++) { - values[index] = rb_gc_location(values[index]); - } - - return ST_CONTINUE; + /* We only need to free key because val is allocated beside key on in the + * same malloc call. */ + ruby_sized_xfree(key, sizeof(VALUE) * 2); } static int -wmap_foreach_replace(st_data_t key, st_data_t value, st_data_t _argp, int error) +wmap_mark_weak_table_i(st_data_t key, st_data_t val, st_data_t _) { - if (rb_gc_location((VALUE)key) != (VALUE)key) { - return ST_REPLACE; + VALUE key_obj = *(VALUE *)key; + VALUE val_obj = *(VALUE *)val; + + if (wmap_live_p(key_obj) && wmap_live_p(val_obj)) { + rb_gc_mark_weak((VALUE *)key); + rb_gc_mark_weak((VALUE *)val); + + return ST_CONTINUE; } + else { + wmap_free_entry((VALUE *)key, (VALUE *)val); - VALUE *values = (VALUE *)value; - VALUE size = values[0]; + return ST_DELETE; + } +} - for (VALUE index = 1; index <= size; index++) { - VALUE val = values[index]; - if (rb_gc_location(val) != val) { - return ST_REPLACE; +static void +wmap_mark(void *ptr) +{ + struct weakmap *w = ptr; + if (w->table) { + st_foreach(w->table, wmap_mark_weak_table_i, (st_data_t)0); + } +} + +static int +wmap_free_table_i(st_data_t key, st_data_t val, st_data_t arg) +{ + wmap_free_entry((VALUE *)key, (VALUE *)val); + return ST_CONTINUE; +} + +static void +wmap_free(void *ptr) +{ + struct weakmap *w = ptr; + + st_foreach(w->table, wmap_free_table_i, 0); + st_free_table(w->table); + xfree(w); +} + +static size_t +wmap_memsize(const void *ptr) +{ + const struct weakmap *w = ptr; + + size_t size = sizeof(*w); + size += st_memsize(w->table); + /* The key and value of the table each take sizeof(VALUE) in size. */ + size += st_table_size(w->table) * (2 * sizeof(VALUE)); + + return size; +} + +static int +wmap_compact_table_i(st_data_t key, st_data_t val, st_data_t data) +{ + st_table *table = (st_table *)data; + + VALUE key_obj = *(VALUE *)key; + VALUE val_obj = *(VALUE *)val; + + if (wmap_live_p(key_obj) && wmap_live_p(val_obj)) { + VALUE new_key_obj = rb_gc_location(key_obj); + + *(VALUE *)val = rb_gc_location(val_obj); + + /* If the key object moves, then we must reinsert because the hash is + * based on the pointer rather than the object itself. */ + if (key_obj != new_key_obj) { + *(VALUE *)key = new_key_obj; + + st_insert(table, key, val); + + return ST_DELETE; } } + else { + wmap_free_entry((VALUE *)key, (VALUE *)val); + + return ST_DELETE; + } return ST_CONTINUE; } @@ -51,54 +140,8 @@ static void wmap_compact(void *ptr) { struct weakmap *w = ptr; - if (w->wmap2obj) rb_gc_update_tbl_refs(w->wmap2obj); - if (w->obj2wmap) st_foreach_with_replace(w->obj2wmap, wmap_foreach_replace, wmap_replace_ref, (st_data_t)NULL); - w->final = rb_gc_location(w->final); -} -static void -wmap_mark(void *ptr) -{ - struct weakmap *w = ptr; - rb_gc_mark_movable(w->final); -} - -static int -wmap_free_map(st_data_t key, st_data_t val, st_data_t arg) -{ - VALUE *ptr = (VALUE *)val; - ruby_sized_xfree(ptr, (ptr[0] + 1) * sizeof(VALUE)); - return ST_CONTINUE; -} - -static void -wmap_free(void *ptr) -{ - struct weakmap *w = ptr; - st_foreach(w->obj2wmap, wmap_free_map, 0); - st_free_table(w->obj2wmap); - st_free_table(w->wmap2obj); - xfree(w); -} - -static int -wmap_memsize_map(st_data_t key, st_data_t val, st_data_t arg) -{ - VALUE *ptr = (VALUE *)val; - *(size_t *)arg += (ptr[0] + 1) * sizeof(VALUE); - return ST_CONTINUE; -} - -static size_t -wmap_memsize(const void *ptr) -{ - size_t size; - const struct weakmap *w = ptr; - size = sizeof(*w); - size += st_memsize(w->obj2wmap); - size += st_memsize(w->wmap2obj); - st_foreach(w->obj2wmap, wmap_memsize_map, (st_data_t)&size); - return size; + st_foreach(w->table, wmap_compact_table_i, (st_data_t)w->table); } static const rb_data_type_t weakmap_type = { @@ -112,111 +155,68 @@ static const rb_data_type_t weakmap_type = { 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED }; -static VALUE wmap_finalize(RB_BLOCK_CALL_FUNC_ARGLIST(objid, self)); +static int +wmap_cmp(st_data_t x, st_data_t y) +{ + return *(VALUE *)x != *(VALUE *)y; +} + +static st_index_t +wmap_hash(st_data_t n) +{ + return st_numhash(*(VALUE *)n); +} + +static const struct st_hash_type wmap_hash_type = { + wmap_cmp, + wmap_hash, +}; static VALUE wmap_allocate(VALUE klass) { struct weakmap *w; VALUE obj = TypedData_Make_Struct(klass, struct weakmap, &weakmap_type, w); - w->obj2wmap = rb_init_identtable(); - w->wmap2obj = rb_init_identtable(); - RB_OBJ_WRITE(obj, &w->final, rb_func_lambda_new(wmap_finalize, obj, 1, 1)); + w->table = st_init_table(&wmap_hash_type); return obj; } +struct wmap_foreach_data { + struct weakmap *w; + void (*func)(VALUE, VALUE, st_data_t); + st_data_t arg; +}; + static int -wmap_live_p(VALUE obj) +wmap_foreach_i(st_data_t key, st_data_t val, st_data_t arg) { - if (SPECIAL_CONST_P(obj)) return TRUE; - /* If rb_gc_is_ptr_to_obj returns false, the page could be in the tomb heap - * or have already been freed. */ - if (!rb_gc_is_ptr_to_obj((void *)obj)) return FALSE; + struct wmap_foreach_data *data = (struct wmap_foreach_data *)arg; - void *poisoned = asan_poisoned_object_p(obj); - asan_unpoison_object(obj, false); + VALUE key_obj = *(VALUE *)key; + VALUE val_obj = *(VALUE *)val; - enum ruby_value_type t = BUILTIN_TYPE(obj); - int ret = (!(t == T_NONE || t >= T_FIXNUM || t == T_ICLASS) && - !rb_objspace_garbage_object_p(obj)); - - if (poisoned) { - asan_poison_object(obj); + if (wmap_live_p(key_obj) && wmap_live_p(val_obj)) { + data->func(key_obj, val_obj, data->arg); } + else { + wmap_free_entry((VALUE *)key, (VALUE *)val); - return ret; -} - -static int -wmap_remove_inverse_ref(st_data_t *key, st_data_t *val, st_data_t arg, int existing) -{ - if (!existing) return ST_STOP; - - VALUE old_ref = (VALUE)arg; - - VALUE *values = (VALUE *)*val; - VALUE size = values[0]; - - if (size == 1) { - // fast path, we only had one backref - RUBY_ASSERT(values[1] == old_ref); - ruby_sized_xfree(values, 2 * sizeof(VALUE)); return ST_DELETE; } - bool found = false; - VALUE index = 1; - for (; index <= size; index++) { - if (values[index] == old_ref) { - found = true; - break; - } - } - if (!found) return ST_STOP; - - if (size > index) { - MEMMOVE(&values[index], &values[index + 1], VALUE, size - index); - } - - size -= 1; - values[0] = size; - SIZED_REALLOC_N(values, VALUE, size + 1, size + 2); - *val = (st_data_t)values; return ST_CONTINUE; } -/* :nodoc: */ -static VALUE -wmap_finalize(RB_BLOCK_CALL_FUNC_ARGLIST(objid, self)) +static void +wmap_foreach(struct weakmap *w, void (*func)(VALUE, VALUE, st_data_t), st_data_t arg) { - st_data_t orig, wmap, data; - VALUE obj, *rids, i, size; - struct weakmap *w; + struct wmap_foreach_data foreach_data = { + .w = w, + .func = func, + .arg = arg, + }; - TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); - /* Get reference from object id. */ - if (UNDEF_P(obj = rb_gc_id2ref_obj_tbl(objid))) { - rb_bug("wmap_finalize: objid is not found."); - } - - /* obj is original referenced object and/or weak reference. */ - orig = (st_data_t)obj; - if (st_delete(w->obj2wmap, &orig, &data)) { - rids = (VALUE *)data; - size = *rids++; - for (i = 0; i < size; ++i) { - wmap = (st_data_t)rids[i]; - st_delete(w->wmap2obj, &wmap, NULL); - } - ruby_sized_xfree((VALUE *)data, (size + 1) * sizeof(VALUE)); - } - - wmap = (st_data_t)obj; - if (st_delete(w->wmap2obj, &wmap, &orig)) { - wmap = (st_data_t)obj; - st_update(w->obj2wmap, orig, wmap_remove_inverse_ref, wmap); - } - return self; + st_foreach(w->table, wmap_foreach_i, (st_data_t)&foreach_data); } static VALUE @@ -225,19 +225,15 @@ wmap_inspect_append(VALUE str, VALUE obj) if (SPECIAL_CONST_P(obj)) { return rb_str_append(str, rb_inspect(obj)); } - else if (wmap_live_p(obj)) { - return rb_str_append(str, rb_any_to_s(obj)); - } else { - return rb_str_catf(str, "#", (void*)obj); + return rb_str_append(str, rb_any_to_s(obj)); } } -static int -wmap_inspect_i(st_data_t key, st_data_t val, st_data_t arg) +static void +wmap_inspect_i(VALUE key, VALUE val, st_data_t data) { - VALUE str = (VALUE)arg; - VALUE k = (VALUE)key, v = (VALUE)val; + VALUE str = (VALUE)data; if (RSTRING_PTR(str)[0] == '#') { rb_str_cat2(str, ", "); @@ -246,11 +242,10 @@ wmap_inspect_i(st_data_t key, st_data_t val, st_data_t arg) rb_str_cat2(str, ": "); RSTRING_PTR(str)[0] = '#'; } - wmap_inspect_append(str, k); - rb_str_cat2(str, " => "); - wmap_inspect_append(str, v); - return ST_CONTINUE; + wmap_inspect_append(str, key); + rb_str_cat2(str, " => "); + wmap_inspect_append(str, val); } static VALUE @@ -261,30 +256,19 @@ wmap_inspect(VALUE self) TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); VALUE str = rb_sprintf("-<%"PRIsVALUE":%p", c, (void *)self); - if (w->wmap2obj) { - st_foreach(w->wmap2obj, wmap_inspect_i, (st_data_t)str); - } + + wmap_foreach(w, wmap_inspect_i, (st_data_t)str); + RSTRING_PTR(str)[0] = '#'; rb_str_cat2(str, ">"); + return str; } -static inline bool -wmap_live_entry_p(st_data_t key, st_data_t val) +static void +wmap_each_i(VALUE key, VALUE val, st_data_t _) { - return wmap_live_p((VALUE)key) && wmap_live_p((VALUE)val); -} - -static int -wmap_each_i(st_data_t key, st_data_t val, st_data_t _) -{ - if (wmap_live_entry_p(key, val)) { - rb_yield_values(2, (VALUE)key, (VALUE)val); - return ST_CONTINUE; - } - else { - return ST_DELETE; - } + rb_yield_values(2, key, val); } /* Iterates over keys and objects in a weakly referenced object */ @@ -292,22 +276,17 @@ static VALUE wmap_each(VALUE self) { struct weakmap *w; - TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); - st_foreach(w->wmap2obj, wmap_each_i, (st_data_t)0); + + wmap_foreach(w, wmap_each_i, (st_data_t)0); + return self; } -static int -wmap_each_key_i(st_data_t key, st_data_t val, st_data_t arg) +static void +wmap_each_key_i(VALUE key, VALUE _val, st_data_t _data) { - if (wmap_live_entry_p(key, val)) { - rb_yield((VALUE)key); - return ST_CONTINUE; - } - else { - return ST_DELETE; - } + rb_yield(key); } /* Iterates over keys and objects in a weakly referenced object */ @@ -315,22 +294,17 @@ static VALUE wmap_each_key(VALUE self) { struct weakmap *w; - TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); - st_foreach(w->wmap2obj, wmap_each_key_i, (st_data_t)0); + + wmap_foreach(w, wmap_each_key_i, (st_data_t)0); + return self; } -static int -wmap_each_value_i(st_data_t key, st_data_t val, st_data_t arg) +static void +wmap_each_value_i(VALUE _key, VALUE val, st_data_t _data) { - if (wmap_live_entry_p(key, val)) { - rb_yield((VALUE)val); - return ST_CONTINUE; - } - else { - return ST_DELETE; - } + rb_yield(val); } /* Iterates over keys and objects in a weakly referenced object */ @@ -338,24 +312,19 @@ static VALUE wmap_each_value(VALUE self) { struct weakmap *w; - TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); - st_foreach(w->wmap2obj, wmap_each_value_i, (st_data_t)0); + + wmap_foreach(w, wmap_each_value_i, (st_data_t)0); + return self; } -static int -wmap_keys_i(st_data_t key, st_data_t val, st_data_t arg) +static void +wmap_keys_i(st_data_t key, st_data_t _, st_data_t arg) { VALUE ary = (VALUE)arg; - if (wmap_live_entry_p(key, val)) { - rb_ary_push(ary, (VALUE)key); - return ST_CONTINUE; - } - else { - return ST_DELETE; - } + rb_ary_push(ary, key); } /* Iterates over keys and objects in a weakly referenced object */ @@ -366,22 +335,17 @@ wmap_keys(VALUE self) TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); VALUE ary = rb_ary_new(); - st_foreach(w->wmap2obj, wmap_keys_i, (st_data_t)ary); + wmap_foreach(w, wmap_keys_i, (st_data_t)ary); + return ary; } -static int +static void wmap_values_i(st_data_t key, st_data_t val, st_data_t arg) { VALUE ary = (VALUE)arg; - if (wmap_live_entry_p(key, val)) { - rb_ary_push(ary, (VALUE)val); - return ST_CONTINUE; - } - else { - return ST_DELETE; - } + rb_ary_push(ary, (VALUE)val); } /* Iterates over values and objects in a weakly referenced object */ @@ -392,39 +356,11 @@ wmap_values(VALUE self) TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); VALUE ary = rb_ary_new(); - st_foreach(w->wmap2obj, wmap_values_i, (st_data_t)ary); + wmap_foreach(w, wmap_values_i, (st_data_t)ary); + return ary; } -static int -wmap_aset_update(st_data_t *key, st_data_t *val, st_data_t arg, int existing) -{ - VALUE size, *ptr, *optr; - if (existing) { - size = (ptr = optr = (VALUE *)*val)[0]; - - for (VALUE index = 1; index <= size; index++) { - if (ptr[index] == (VALUE)arg) { - // The reference was already registered. - return ST_STOP; - } - } - - ++size; - SIZED_REALLOC_N(ptr, VALUE, size + 1, size); - } - else { - optr = 0; - size = 1; - ptr = ruby_xmalloc(2 * sizeof(VALUE)); - } - ptr[0] = size; - ptr[size] = (VALUE)arg; - if (ptr == optr) return ST_STOP; - *val = (st_data_t)ptr; - return ST_CONTINUE; -} - static VALUE nonspecial_obj_id(VALUE obj) { @@ -437,57 +373,39 @@ nonspecial_obj_id(VALUE obj) #endif } -struct wmap_aset_replace_args { - VALUE new_value; - VALUE old_value; -}; - static int -wmap_aset_replace_value(st_data_t *key, st_data_t *val, st_data_t _args, int existing) +wmap_aset_replace(st_data_t *key, st_data_t *val, st_data_t new_key, int existing) { - struct wmap_aset_replace_args *args = (struct wmap_aset_replace_args *)_args; - if (existing) { - args->old_value = *val; + VALUE *orig_pair = ((VALUE *)*key); + assert(orig_pair[0] == *(VALUE *)new_key); + + wmap_free_entry(orig_pair, orig_pair + 1); } - *val = (st_data_t)args->new_value; + + *key = new_key; + *val = (st_data_t)(((VALUE *)new_key) + 1); + return ST_CONTINUE; } /* Creates a weak reference from the given key to the given value */ static VALUE -wmap_aset(VALUE self, VALUE key, VALUE value) +wmap_aset(VALUE self, VALUE key, VALUE val) { struct weakmap *w; - TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); - if (FL_ABLE(value)) { - rb_define_finalizer_no_check(value, w->final); - } - if (FL_ABLE(key)) { - rb_define_finalizer_no_check(key, w->final); - } - struct wmap_aset_replace_args aset_args = { - .new_value = value, - .old_value = Qundef, - }; - st_update(w->wmap2obj, (st_data_t)key, wmap_aset_replace_value, (st_data_t)&aset_args); + VALUE *pair = xmalloc(sizeof(VALUE) * 2); + pair[0] = key; + pair[1] = val; - // If the value is unchanged, we have nothing to do. - if (value != aset_args.old_value) { - if (!UNDEF_P(aset_args.old_value) && FL_ABLE(aset_args.old_value)) { - // That key existed and had an inverse reference, we need to clear the outdated inverse reference. - st_update(w->obj2wmap, (st_data_t)aset_args.old_value, wmap_remove_inverse_ref, key); - } + st_update(w->table, (st_data_t)pair, wmap_aset_replace, (st_data_t)pair); - if (FL_ABLE(value)) { - // If the value has no finalizer, we don't need to keep the inverse reference - st_update(w->obj2wmap, (st_data_t)value, wmap_aset_update, key); - } - } + RB_OBJ_WRITTEN(self, Qundef, key); + RB_OBJ_WRITTEN(self, Qundef, val); - return nonspecial_obj_id(value); + return nonspecial_obj_id(val); } /* Retrieves a weakly referenced object with the given key */ @@ -500,11 +418,11 @@ wmap_lookup(VALUE self, VALUE key) TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); st_data_t data; - if (!st_lookup(w->wmap2obj, (st_data_t)key, &data)) return Qundef; + if (!st_lookup(w->table, (st_data_t)&key, &data)) return Qundef; - VALUE obj = (VALUE)data; - if (!wmap_live_p(obj)) return Qundef; - return obj; + if (!wmap_live_p(*(VALUE *)data)) return Qundef; + + return *(VALUE *)data; } /* Retrieves a weakly referenced object with the given key */ @@ -519,20 +437,23 @@ wmap_aref(VALUE self, VALUE key) static VALUE wmap_delete(VALUE self, VALUE key) { - assert(wmap_live_p(key)); - struct weakmap *w; TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); - VALUE old_value = Qnil; - if (st_delete(w->wmap2obj, (st_data_t *)&key, (st_data_t *)&old_value)) { - if (FL_ABLE(old_value)) { - // That key existed and had an inverse reference, we need to clear the outdated inverse reference. - st_update(w->obj2wmap, (st_data_t)old_value, wmap_remove_inverse_ref, key); + VALUE orig_key = key; + st_data_t orig_key_data = (st_data_t)&orig_key; + st_data_t orig_val_data; + if (st_delete(w->table, &orig_key_data, &orig_val_data)) { + VALUE orig_val = *(VALUE *)orig_val_data; + + wmap_free_entry((VALUE *)orig_key_data, (VALUE *)orig_val_data); + + if (wmap_live_p(orig_val)) { + return orig_val; } - return old_value; } - else if (rb_block_given_p()) { + + if (rb_block_given_p()) { return rb_yield(key); } else { @@ -552,10 +473,10 @@ static VALUE wmap_size(VALUE self) { struct weakmap *w; - st_index_t n; - TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); - n = w->wmap2obj->num_entries; + + st_index_t n = st_table_size(w->table); + #if SIZEOF_ST_INDEX_T <= SIZEOF_LONG return ULONG2NUM(n); #else