Implement WeakMap using weak references

This commit is contained in:
Peter Zhu 2023-07-24 15:26:10 -04:00
Родитель 2091bf9493
Коммит ee9cc8e32e
2 изменённых файлов: 238 добавлений и 305 удалений

Просмотреть файл

@ -193,9 +193,10 @@ class TestWeakMap < Test::Unit::TestCase
end;
end
def test_compaction_bug_19529
def test_compaction
omit "compaction is not supported on this platform" unless GC.respond_to?(:compact)
# [Bug #19529]
obj = Object.new
100.times do |i|
GC.compact
@ -211,6 +212,17 @@ class TestWeakMap < Test::Unit::TestCase
end
GC.compact
end;
assert_separately(%w(-robjspace), <<-'end;')
wm = ObjectSpace::WeakMap.new
key = Object.new
val = Object.new
wm[key] = val
GC.verify_compaction_references(expand_heap: true, toward: :empty)
assert_equal(val, wm[key])
end;
end
def test_replaced_values_bug_19531

529
weakmap.c
Просмотреть файл

@ -6,43 +6,132 @@
#include "ruby/st.h"
#include "ruby/st.h"
/* ===== WeakMap =====
*
* WeakMap contains one ST table which contains a pointer to the object as the
* key and a pointer to the object as the value. This means that the key and
* value of the table are both of the type `VALUE *`.
*
* The objects are not directly stored as keys and values in the table because
* `rb_gc_mark_weak` requires a pointer to the memory location to overwrite
* when the object is reclaimed. Using a pointer into the ST table entry is not
* safe because the pointer can change when the ST table is resized.
*
* WeakMap hashes and compares using the pointer address of the object.
*
* For performance and memory efficiency reasons, the key and value
* are allocated at the same time and adjacent to each other.
*
* During GC and while iterating, reclaimed entries (i.e. either the key or
* value points to `Qundef`) are removed from the ST table.
*/
struct weakmap {
st_table *obj2wmap; /* obj -> [ref,...] */
st_table *wmap2obj; /* ref -> obj */
VALUE final;
st_table *table;
};
static int
wmap_replace_ref(st_data_t *key, st_data_t *value, st_data_t _argp, int existing)
static bool
wmap_live_p(VALUE obj)
{
*key = rb_gc_location((VALUE)*key);
return !UNDEF_P(obj);
}
VALUE *values = (VALUE *)*value;
VALUE size = values[0];
static void
wmap_free_entry(VALUE *key, VALUE *val)
{
assert(key + 1 == val);
for (VALUE index = 1; index <= size; index++) {
values[index] = rb_gc_location(values[index]);
}
return ST_CONTINUE;
/* We only need to free key because val is allocated beside key on in the
* same malloc call. */
ruby_sized_xfree(key, sizeof(VALUE) * 2);
}
static int
wmap_foreach_replace(st_data_t key, st_data_t value, st_data_t _argp, int error)
wmap_mark_weak_table_i(st_data_t key, st_data_t val, st_data_t _)
{
if (rb_gc_location((VALUE)key) != (VALUE)key) {
return ST_REPLACE;
VALUE key_obj = *(VALUE *)key;
VALUE val_obj = *(VALUE *)val;
if (wmap_live_p(key_obj) && wmap_live_p(val_obj)) {
rb_gc_mark_weak((VALUE *)key);
rb_gc_mark_weak((VALUE *)val);
return ST_CONTINUE;
}
else {
wmap_free_entry((VALUE *)key, (VALUE *)val);
VALUE *values = (VALUE *)value;
VALUE size = values[0];
return ST_DELETE;
}
}
for (VALUE index = 1; index <= size; index++) {
VALUE val = values[index];
if (rb_gc_location(val) != val) {
return ST_REPLACE;
static void
wmap_mark(void *ptr)
{
struct weakmap *w = ptr;
if (w->table) {
st_foreach(w->table, wmap_mark_weak_table_i, (st_data_t)0);
}
}
static int
wmap_free_table_i(st_data_t key, st_data_t val, st_data_t arg)
{
wmap_free_entry((VALUE *)key, (VALUE *)val);
return ST_CONTINUE;
}
static void
wmap_free(void *ptr)
{
struct weakmap *w = ptr;
st_foreach(w->table, wmap_free_table_i, 0);
st_free_table(w->table);
xfree(w);
}
static size_t
wmap_memsize(const void *ptr)
{
const struct weakmap *w = ptr;
size_t size = sizeof(*w);
size += st_memsize(w->table);
/* The key and value of the table each take sizeof(VALUE) in size. */
size += st_table_size(w->table) * (2 * sizeof(VALUE));
return size;
}
static int
wmap_compact_table_i(st_data_t key, st_data_t val, st_data_t data)
{
st_table *table = (st_table *)data;
VALUE key_obj = *(VALUE *)key;
VALUE val_obj = *(VALUE *)val;
if (wmap_live_p(key_obj) && wmap_live_p(val_obj)) {
VALUE new_key_obj = rb_gc_location(key_obj);
*(VALUE *)val = rb_gc_location(val_obj);
/* If the key object moves, then we must reinsert because the hash is
* based on the pointer rather than the object itself. */
if (key_obj != new_key_obj) {
*(VALUE *)key = new_key_obj;
st_insert(table, key, val);
return ST_DELETE;
}
}
else {
wmap_free_entry((VALUE *)key, (VALUE *)val);
return ST_DELETE;
}
return ST_CONTINUE;
}
@ -51,54 +140,8 @@ static void
wmap_compact(void *ptr)
{
struct weakmap *w = ptr;
if (w->wmap2obj) rb_gc_update_tbl_refs(w->wmap2obj);
if (w->obj2wmap) st_foreach_with_replace(w->obj2wmap, wmap_foreach_replace, wmap_replace_ref, (st_data_t)NULL);
w->final = rb_gc_location(w->final);
}
static void
wmap_mark(void *ptr)
{
struct weakmap *w = ptr;
rb_gc_mark_movable(w->final);
}
static int
wmap_free_map(st_data_t key, st_data_t val, st_data_t arg)
{
VALUE *ptr = (VALUE *)val;
ruby_sized_xfree(ptr, (ptr[0] + 1) * sizeof(VALUE));
return ST_CONTINUE;
}
static void
wmap_free(void *ptr)
{
struct weakmap *w = ptr;
st_foreach(w->obj2wmap, wmap_free_map, 0);
st_free_table(w->obj2wmap);
st_free_table(w->wmap2obj);
xfree(w);
}
static int
wmap_memsize_map(st_data_t key, st_data_t val, st_data_t arg)
{
VALUE *ptr = (VALUE *)val;
*(size_t *)arg += (ptr[0] + 1) * sizeof(VALUE);
return ST_CONTINUE;
}
static size_t
wmap_memsize(const void *ptr)
{
size_t size;
const struct weakmap *w = ptr;
size = sizeof(*w);
size += st_memsize(w->obj2wmap);
size += st_memsize(w->wmap2obj);
st_foreach(w->obj2wmap, wmap_memsize_map, (st_data_t)&size);
return size;
st_foreach(w->table, wmap_compact_table_i, (st_data_t)w->table);
}
static const rb_data_type_t weakmap_type = {
@ -112,111 +155,68 @@ static const rb_data_type_t weakmap_type = {
0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
};
static VALUE wmap_finalize(RB_BLOCK_CALL_FUNC_ARGLIST(objid, self));
static int
wmap_cmp(st_data_t x, st_data_t y)
{
return *(VALUE *)x != *(VALUE *)y;
}
static st_index_t
wmap_hash(st_data_t n)
{
return st_numhash(*(VALUE *)n);
}
static const struct st_hash_type wmap_hash_type = {
wmap_cmp,
wmap_hash,
};
static VALUE
wmap_allocate(VALUE klass)
{
struct weakmap *w;
VALUE obj = TypedData_Make_Struct(klass, struct weakmap, &weakmap_type, w);
w->obj2wmap = rb_init_identtable();
w->wmap2obj = rb_init_identtable();
RB_OBJ_WRITE(obj, &w->final, rb_func_lambda_new(wmap_finalize, obj, 1, 1));
w->table = st_init_table(&wmap_hash_type);
return obj;
}
struct wmap_foreach_data {
struct weakmap *w;
void (*func)(VALUE, VALUE, st_data_t);
st_data_t arg;
};
static int
wmap_live_p(VALUE obj)
wmap_foreach_i(st_data_t key, st_data_t val, st_data_t arg)
{
if (SPECIAL_CONST_P(obj)) return TRUE;
/* If rb_gc_is_ptr_to_obj returns false, the page could be in the tomb heap
* or have already been freed. */
if (!rb_gc_is_ptr_to_obj((void *)obj)) return FALSE;
struct wmap_foreach_data *data = (struct wmap_foreach_data *)arg;
void *poisoned = asan_poisoned_object_p(obj);
asan_unpoison_object(obj, false);
VALUE key_obj = *(VALUE *)key;
VALUE val_obj = *(VALUE *)val;
enum ruby_value_type t = BUILTIN_TYPE(obj);
int ret = (!(t == T_NONE || t >= T_FIXNUM || t == T_ICLASS) &&
!rb_objspace_garbage_object_p(obj));
if (poisoned) {
asan_poison_object(obj);
if (wmap_live_p(key_obj) && wmap_live_p(val_obj)) {
data->func(key_obj, val_obj, data->arg);
}
else {
wmap_free_entry((VALUE *)key, (VALUE *)val);
return ret;
}
static int
wmap_remove_inverse_ref(st_data_t *key, st_data_t *val, st_data_t arg, int existing)
{
if (!existing) return ST_STOP;
VALUE old_ref = (VALUE)arg;
VALUE *values = (VALUE *)*val;
VALUE size = values[0];
if (size == 1) {
// fast path, we only had one backref
RUBY_ASSERT(values[1] == old_ref);
ruby_sized_xfree(values, 2 * sizeof(VALUE));
return ST_DELETE;
}
bool found = false;
VALUE index = 1;
for (; index <= size; index++) {
if (values[index] == old_ref) {
found = true;
break;
}
}
if (!found) return ST_STOP;
if (size > index) {
MEMMOVE(&values[index], &values[index + 1], VALUE, size - index);
}
size -= 1;
values[0] = size;
SIZED_REALLOC_N(values, VALUE, size + 1, size + 2);
*val = (st_data_t)values;
return ST_CONTINUE;
}
/* :nodoc: */
static VALUE
wmap_finalize(RB_BLOCK_CALL_FUNC_ARGLIST(objid, self))
static void
wmap_foreach(struct weakmap *w, void (*func)(VALUE, VALUE, st_data_t), st_data_t arg)
{
st_data_t orig, wmap, data;
VALUE obj, *rids, i, size;
struct weakmap *w;
struct wmap_foreach_data foreach_data = {
.w = w,
.func = func,
.arg = arg,
};
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
/* Get reference from object id. */
if (UNDEF_P(obj = rb_gc_id2ref_obj_tbl(objid))) {
rb_bug("wmap_finalize: objid is not found.");
}
/* obj is original referenced object and/or weak reference. */
orig = (st_data_t)obj;
if (st_delete(w->obj2wmap, &orig, &data)) {
rids = (VALUE *)data;
size = *rids++;
for (i = 0; i < size; ++i) {
wmap = (st_data_t)rids[i];
st_delete(w->wmap2obj, &wmap, NULL);
}
ruby_sized_xfree((VALUE *)data, (size + 1) * sizeof(VALUE));
}
wmap = (st_data_t)obj;
if (st_delete(w->wmap2obj, &wmap, &orig)) {
wmap = (st_data_t)obj;
st_update(w->obj2wmap, orig, wmap_remove_inverse_ref, wmap);
}
return self;
st_foreach(w->table, wmap_foreach_i, (st_data_t)&foreach_data);
}
static VALUE
@ -225,19 +225,15 @@ wmap_inspect_append(VALUE str, VALUE obj)
if (SPECIAL_CONST_P(obj)) {
return rb_str_append(str, rb_inspect(obj));
}
else if (wmap_live_p(obj)) {
return rb_str_append(str, rb_any_to_s(obj));
}
else {
return rb_str_catf(str, "#<collected:%p>", (void*)obj);
return rb_str_append(str, rb_any_to_s(obj));
}
}
static int
wmap_inspect_i(st_data_t key, st_data_t val, st_data_t arg)
static void
wmap_inspect_i(VALUE key, VALUE val, st_data_t data)
{
VALUE str = (VALUE)arg;
VALUE k = (VALUE)key, v = (VALUE)val;
VALUE str = (VALUE)data;
if (RSTRING_PTR(str)[0] == '#') {
rb_str_cat2(str, ", ");
@ -246,11 +242,10 @@ wmap_inspect_i(st_data_t key, st_data_t val, st_data_t arg)
rb_str_cat2(str, ": ");
RSTRING_PTR(str)[0] = '#';
}
wmap_inspect_append(str, k);
rb_str_cat2(str, " => ");
wmap_inspect_append(str, v);
return ST_CONTINUE;
wmap_inspect_append(str, key);
rb_str_cat2(str, " => ");
wmap_inspect_append(str, val);
}
static VALUE
@ -261,30 +256,19 @@ wmap_inspect(VALUE self)
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
VALUE str = rb_sprintf("-<%"PRIsVALUE":%p", c, (void *)self);
if (w->wmap2obj) {
st_foreach(w->wmap2obj, wmap_inspect_i, (st_data_t)str);
}
wmap_foreach(w, wmap_inspect_i, (st_data_t)str);
RSTRING_PTR(str)[0] = '#';
rb_str_cat2(str, ">");
return str;
}
static inline bool
wmap_live_entry_p(st_data_t key, st_data_t val)
static void
wmap_each_i(VALUE key, VALUE val, st_data_t _)
{
return wmap_live_p((VALUE)key) && wmap_live_p((VALUE)val);
}
static int
wmap_each_i(st_data_t key, st_data_t val, st_data_t _)
{
if (wmap_live_entry_p(key, val)) {
rb_yield_values(2, (VALUE)key, (VALUE)val);
return ST_CONTINUE;
}
else {
return ST_DELETE;
}
rb_yield_values(2, key, val);
}
/* Iterates over keys and objects in a weakly referenced object */
@ -292,22 +276,17 @@ static VALUE
wmap_each(VALUE self)
{
struct weakmap *w;
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
st_foreach(w->wmap2obj, wmap_each_i, (st_data_t)0);
wmap_foreach(w, wmap_each_i, (st_data_t)0);
return self;
}
static int
wmap_each_key_i(st_data_t key, st_data_t val, st_data_t arg)
static void
wmap_each_key_i(VALUE key, VALUE _val, st_data_t _data)
{
if (wmap_live_entry_p(key, val)) {
rb_yield((VALUE)key);
return ST_CONTINUE;
}
else {
return ST_DELETE;
}
rb_yield(key);
}
/* Iterates over keys and objects in a weakly referenced object */
@ -315,22 +294,17 @@ static VALUE
wmap_each_key(VALUE self)
{
struct weakmap *w;
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
st_foreach(w->wmap2obj, wmap_each_key_i, (st_data_t)0);
wmap_foreach(w, wmap_each_key_i, (st_data_t)0);
return self;
}
static int
wmap_each_value_i(st_data_t key, st_data_t val, st_data_t arg)
static void
wmap_each_value_i(VALUE _key, VALUE val, st_data_t _data)
{
if (wmap_live_entry_p(key, val)) {
rb_yield((VALUE)val);
return ST_CONTINUE;
}
else {
return ST_DELETE;
}
rb_yield(val);
}
/* Iterates over keys and objects in a weakly referenced object */
@ -338,24 +312,19 @@ static VALUE
wmap_each_value(VALUE self)
{
struct weakmap *w;
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
st_foreach(w->wmap2obj, wmap_each_value_i, (st_data_t)0);
wmap_foreach(w, wmap_each_value_i, (st_data_t)0);
return self;
}
static int
wmap_keys_i(st_data_t key, st_data_t val, st_data_t arg)
static void
wmap_keys_i(st_data_t key, st_data_t _, st_data_t arg)
{
VALUE ary = (VALUE)arg;
if (wmap_live_entry_p(key, val)) {
rb_ary_push(ary, (VALUE)key);
return ST_CONTINUE;
}
else {
return ST_DELETE;
}
rb_ary_push(ary, key);
}
/* Iterates over keys and objects in a weakly referenced object */
@ -366,22 +335,17 @@ wmap_keys(VALUE self)
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
VALUE ary = rb_ary_new();
st_foreach(w->wmap2obj, wmap_keys_i, (st_data_t)ary);
wmap_foreach(w, wmap_keys_i, (st_data_t)ary);
return ary;
}
static int
static void
wmap_values_i(st_data_t key, st_data_t val, st_data_t arg)
{
VALUE ary = (VALUE)arg;
if (wmap_live_entry_p(key, val)) {
rb_ary_push(ary, (VALUE)val);
return ST_CONTINUE;
}
else {
return ST_DELETE;
}
rb_ary_push(ary, (VALUE)val);
}
/* Iterates over values and objects in a weakly referenced object */
@ -392,39 +356,11 @@ wmap_values(VALUE self)
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
VALUE ary = rb_ary_new();
st_foreach(w->wmap2obj, wmap_values_i, (st_data_t)ary);
wmap_foreach(w, wmap_values_i, (st_data_t)ary);
return ary;
}
static int
wmap_aset_update(st_data_t *key, st_data_t *val, st_data_t arg, int existing)
{
VALUE size, *ptr, *optr;
if (existing) {
size = (ptr = optr = (VALUE *)*val)[0];
for (VALUE index = 1; index <= size; index++) {
if (ptr[index] == (VALUE)arg) {
// The reference was already registered.
return ST_STOP;
}
}
++size;
SIZED_REALLOC_N(ptr, VALUE, size + 1, size);
}
else {
optr = 0;
size = 1;
ptr = ruby_xmalloc(2 * sizeof(VALUE));
}
ptr[0] = size;
ptr[size] = (VALUE)arg;
if (ptr == optr) return ST_STOP;
*val = (st_data_t)ptr;
return ST_CONTINUE;
}
static VALUE
nonspecial_obj_id(VALUE obj)
{
@ -437,57 +373,39 @@ nonspecial_obj_id(VALUE obj)
#endif
}
struct wmap_aset_replace_args {
VALUE new_value;
VALUE old_value;
};
static int
wmap_aset_replace_value(st_data_t *key, st_data_t *val, st_data_t _args, int existing)
wmap_aset_replace(st_data_t *key, st_data_t *val, st_data_t new_key, int existing)
{
struct wmap_aset_replace_args *args = (struct wmap_aset_replace_args *)_args;
if (existing) {
args->old_value = *val;
VALUE *orig_pair = ((VALUE *)*key);
assert(orig_pair[0] == *(VALUE *)new_key);
wmap_free_entry(orig_pair, orig_pair + 1);
}
*val = (st_data_t)args->new_value;
*key = new_key;
*val = (st_data_t)(((VALUE *)new_key) + 1);
return ST_CONTINUE;
}
/* Creates a weak reference from the given key to the given value */
static VALUE
wmap_aset(VALUE self, VALUE key, VALUE value)
wmap_aset(VALUE self, VALUE key, VALUE val)
{
struct weakmap *w;
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
if (FL_ABLE(value)) {
rb_define_finalizer_no_check(value, w->final);
}
if (FL_ABLE(key)) {
rb_define_finalizer_no_check(key, w->final);
}
struct wmap_aset_replace_args aset_args = {
.new_value = value,
.old_value = Qundef,
};
st_update(w->wmap2obj, (st_data_t)key, wmap_aset_replace_value, (st_data_t)&aset_args);
VALUE *pair = xmalloc(sizeof(VALUE) * 2);
pair[0] = key;
pair[1] = val;
// If the value is unchanged, we have nothing to do.
if (value != aset_args.old_value) {
if (!UNDEF_P(aset_args.old_value) && FL_ABLE(aset_args.old_value)) {
// That key existed and had an inverse reference, we need to clear the outdated inverse reference.
st_update(w->obj2wmap, (st_data_t)aset_args.old_value, wmap_remove_inverse_ref, key);
}
st_update(w->table, (st_data_t)pair, wmap_aset_replace, (st_data_t)pair);
if (FL_ABLE(value)) {
// If the value has no finalizer, we don't need to keep the inverse reference
st_update(w->obj2wmap, (st_data_t)value, wmap_aset_update, key);
}
}
RB_OBJ_WRITTEN(self, Qundef, key);
RB_OBJ_WRITTEN(self, Qundef, val);
return nonspecial_obj_id(value);
return nonspecial_obj_id(val);
}
/* Retrieves a weakly referenced object with the given key */
@ -500,11 +418,11 @@ wmap_lookup(VALUE self, VALUE key)
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
st_data_t data;
if (!st_lookup(w->wmap2obj, (st_data_t)key, &data)) return Qundef;
if (!st_lookup(w->table, (st_data_t)&key, &data)) return Qundef;
VALUE obj = (VALUE)data;
if (!wmap_live_p(obj)) return Qundef;
return obj;
if (!wmap_live_p(*(VALUE *)data)) return Qundef;
return *(VALUE *)data;
}
/* Retrieves a weakly referenced object with the given key */
@ -519,20 +437,23 @@ wmap_aref(VALUE self, VALUE key)
static VALUE
wmap_delete(VALUE self, VALUE key)
{
assert(wmap_live_p(key));
struct weakmap *w;
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
VALUE old_value = Qnil;
if (st_delete(w->wmap2obj, (st_data_t *)&key, (st_data_t *)&old_value)) {
if (FL_ABLE(old_value)) {
// That key existed and had an inverse reference, we need to clear the outdated inverse reference.
st_update(w->obj2wmap, (st_data_t)old_value, wmap_remove_inverse_ref, key);
VALUE orig_key = key;
st_data_t orig_key_data = (st_data_t)&orig_key;
st_data_t orig_val_data;
if (st_delete(w->table, &orig_key_data, &orig_val_data)) {
VALUE orig_val = *(VALUE *)orig_val_data;
wmap_free_entry((VALUE *)orig_key_data, (VALUE *)orig_val_data);
if (wmap_live_p(orig_val)) {
return orig_val;
}
return old_value;
}
else if (rb_block_given_p()) {
if (rb_block_given_p()) {
return rb_yield(key);
}
else {
@ -552,10 +473,10 @@ static VALUE
wmap_size(VALUE self)
{
struct weakmap *w;
st_index_t n;
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
n = w->wmap2obj->num_entries;
st_index_t n = st_table_size(w->table);
#if SIZEOF_ST_INDEX_T <= SIZEOF_LONG
return ULONG2NUM(n);
#else