Implement weak references in the GC

[Feature #19783]

This commit adds support for weak references in the GC through the
function `rb_gc_mark_weak`. Unlike strong references, weak references
does not mark the object, but rather lets the GC know that an object
refers to another one. If the child object is freed, the pointer from
the parent object is overwritten with `Qundef`.

Co-Authored-By: Jean Boussier <byroot@ruby-lang.org>
This commit is contained in:
Peter Zhu 2023-07-24 14:21:50 -04:00
Родитель 633243958c
Коммит bfb395c620
4 изменённых файлов: 99 добавлений и 16 удалений

Просмотреть файл

@ -6980,6 +6980,7 @@ gc.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
gc.$(OBJEXT): {$(VPATH)}builtin.h
gc.$(OBJEXT): {$(VPATH)}config.h
gc.$(OBJEXT): {$(VPATH)}constant.h
gc.$(OBJEXT): {$(VPATH)}darray.h
gc.$(OBJEXT): {$(VPATH)}debug.h
gc.$(OBJEXT): {$(VPATH)}debug_counter.h
gc.$(OBJEXT): {$(VPATH)}defines.h
@ -8401,6 +8402,7 @@ load.$(OBJEXT): $(CCAN_DIR)/str/str.h
load.$(OBJEXT): $(hdrdir)/ruby/ruby.h
load.$(OBJEXT): $(top_srcdir)/internal/array.h
load.$(OBJEXT): $(top_srcdir)/internal/basic_operators.h
load.$(OBJEXT): $(top_srcdir)/internal/bits.h
load.$(OBJEXT): $(top_srcdir)/internal/compilers.h
load.$(OBJEXT): $(top_srcdir)/internal/dir.h
load.$(OBJEXT): $(top_srcdir)/internal/error.h

Просмотреть файл

@ -5,6 +5,7 @@
#include <stddef.h>
#include <stdlib.h>
#include "internal/bits.h"
#include "internal/gc.h"
// Type for a dynamic array. Use to declare a dynamic array.
@ -85,6 +86,15 @@
rb_darray_make_impl((ptr_to_ary), size, sizeof(**(ptr_to_ary)), \
sizeof((*(ptr_to_ary))->data[0]), rb_darray_calloc_mul_add_without_gc)
/* Resize the darray to a new capacity. The new capacity must be greater than
* or equal to the size of the darray.
*
* void rb_darray_resize_capa(rb_darray(T) *ptr_to_ary, size_t capa);
*/
#define rb_darray_resize_capa_without_gc(ptr_to_ary, capa) \
rb_darray_resize_capa_impl((ptr_to_ary), rb_darray_next_power_of_two(capa), sizeof(**(ptr_to_ary)), \
sizeof((*(ptr_to_ary))->data[0]), rb_darray_realloc_mul_add_without_gc)
#define rb_darray_data_ptr(ary) ((ary)->data)
typedef struct rb_darray_meta {
@ -92,9 +102,8 @@ typedef struct rb_darray_meta {
size_t capa;
} rb_darray_meta_t;
// Set the size of the array to zero without freeing the backing memory.
// Allows reusing the same array.
//
/* Set the size of the array to zero without freeing the backing memory.
* Allows reusing the same array. */
static inline void
rb_darray_clear(void *ary)
{
@ -162,6 +171,40 @@ rb_darray_realloc_mul_add_without_gc(const void *orig_ptr, size_t x, size_t y, s
return ptr;
}
/* Internal function. Returns the next power of two that is greater than or
* equal to n. */
static inline size_t
rb_darray_next_power_of_two(size_t n)
{
return (size_t)(1 << (64 - nlz_int64(n)));
}
/* Internal function. Resizes the capacity of a darray. The new capacity must
* be greater than or equal to the size of the darray. */
static inline void
rb_darray_resize_capa_impl(void *ptr_to_ary, size_t new_capa, size_t header_size, size_t element_size,
void *(*realloc_mul_add_impl)(const void *, size_t, size_t, size_t))
{
rb_darray_meta_t **ptr_to_ptr_to_meta = ptr_to_ary;
rb_darray_meta_t *meta = *ptr_to_ptr_to_meta;
rb_darray_meta_t *new_ary = realloc_mul_add_impl(meta, new_capa, element_size, header_size);
if (meta == NULL) {
/* First allocation. Initialize size. On subsequence allocations
* realloc takes care of carrying over the size. */
new_ary->size = 0;
}
assert(new_ary->size <= new_capa);
new_ary->capa = new_capa;
// We don't have access to the type of the dynamic array in function context.
// Write out result with memcpy to avoid strict aliasing issue.
memcpy(ptr_to_ary, &new_ary, sizeof(new_ary));
}
// Internal function
// Ensure there is space for one more element.
// Note: header_size can be bigger than sizeof(rb_darray_meta_t) when T is __int128_t, for example.
@ -177,19 +220,7 @@ rb_darray_ensure_space(void *ptr_to_ary, size_t header_size, size_t element_size
// Double the capacity
size_t new_capa = current_capa == 0 ? 1 : current_capa * 2;
rb_darray_meta_t *doubled_ary = realloc_mul_add_impl(meta, new_capa, element_size, header_size);
if (meta == NULL) {
// First allocation. Initialize size. On subsequence allocations
// realloc takes care of carrying over the size.
doubled_ary->size = 0;
}
doubled_ary->capa = new_capa;
// We don't have access to the type of the dynamic array in function context.
// Write out result with memcpy to avoid strict aliasing issue.
memcpy(ptr_to_ary, &doubled_ary, sizeof(doubled_ary));
rb_darray_resize_capa_impl(ptr_to_ary, new_capa, header_size, element_size, realloc_mul_add_impl);
}
static inline void

48
gc.c
Просмотреть файл

@ -95,6 +95,7 @@
#undef LIST_HEAD /* ccan/list conflicts with BSD-origin sys/queue.h. */
#include "constant.h"
#include "darray.h"
#include "debug_counter.h"
#include "eval_intern.h"
#include "id_table.h"
@ -869,6 +870,8 @@ typedef struct rb_objspace {
#if GC_DEBUG_STRESS_TO_CLASS
VALUE stress_to_class;
#endif
rb_darray(VALUE *) weak_references;
} rb_objspace_t;
@ -1831,6 +1834,8 @@ rb_objspace_alloc(void)
ccan_list_head_init(&SIZE_POOL_TOMB_HEAP(size_pool)->pages);
}
rb_darray_make_without_gc(&objspace->weak_references, 0);
dont_gc_on();
return objspace;
@ -1879,6 +1884,8 @@ rb_objspace_free(rb_objspace_t *objspace)
free_stack_chunks(&objspace->mark_stack);
mark_stack_free_cache(&objspace->mark_stack);
rb_darray_free_without_gc(objspace->weak_references);
free(objspace);
}
@ -6878,6 +6885,23 @@ rb_gc_mark_and_move(VALUE *ptr)
}
}
void
rb_gc_mark_weak(VALUE *ptr)
{
rb_objspace_t *objspace = &rb_objspace;
if (UNLIKELY(!during_gc)) return;
VALUE obj = *ptr;
if (RB_SPECIAL_CONST_P(obj)) return;
GC_ASSERT(objspace->rgengc.parent_object == 0 || FL_TEST(objspace->rgengc.parent_object, FL_WB_PROTECTED));
rgengc_check_relation(objspace, obj);
rb_darray_append_without_gc(&objspace->weak_references, ptr);
}
/* CAUTION: THIS FUNCTION ENABLE *ONLY BEFORE* SWEEPING.
* This function is only for GC_END_MARK timing.
*/
@ -8099,6 +8123,28 @@ gc_marks_wb_unprotected_objects(rb_objspace_t *objspace, rb_heap_t *heap)
gc_mark_stacked_objects_all(objspace);
}
static void
gc_update_weak_references(rb_objspace_t *objspace)
{
size_t retained_weak_references_count = 0;
VALUE **ptr_ptr;
rb_darray_foreach(objspace->weak_references, i, ptr_ptr) {
VALUE obj = **ptr_ptr;
if (RB_SPECIAL_CONST_P(obj)) continue;
if (!RVALUE_MARKED(obj)) {
**ptr_ptr = Qundef;
}
else {
retained_weak_references_count++;
}
}
rb_darray_clear(objspace->weak_references);
rb_darray_resize_capa_without_gc(&objspace->weak_references, retained_weak_references_count);
}
static void
gc_marks_finish(rb_objspace_t *objspace)
{
@ -8125,6 +8171,8 @@ gc_marks_finish(rb_objspace_t *objspace)
}
}
gc_update_weak_references(objspace);
#if RGENGC_CHECK_MODE >= 2
gc_verify_internal_consistency(objspace);
#endif

Просмотреть файл

@ -235,6 +235,8 @@ VALUE rb_define_finalizer_no_check(VALUE obj, VALUE block);
void rb_gc_mark_and_move(VALUE *ptr);
void rb_gc_mark_weak(VALUE *ptr);
#define rb_gc_mark_and_move_ptr(ptr) do { \
VALUE _obj = (VALUE)*(ptr); \
rb_gc_mark_and_move(&_obj); \