Implement Hash AR tables on VWA

This commit is contained in:
Peter Zhu 2023-04-19 15:59:25 -04:00
Родитель 264ba0f89a
Коммит 5199f2aaf9
9 изменённых файлов: 60 добавлений и 163 удалений

Просмотреть файл

@ -16475,7 +16475,6 @@ transient_heap.$(OBJEXT): $(top_srcdir)/internal/array.h
transient_heap.$(OBJEXT): $(top_srcdir)/internal/basic_operators.h
transient_heap.$(OBJEXT): $(top_srcdir)/internal/compilers.h
transient_heap.$(OBJEXT): $(top_srcdir)/internal/gc.h
transient_heap.$(OBJEXT): $(top_srcdir)/internal/hash.h
transient_heap.$(OBJEXT): $(top_srcdir)/internal/imemo.h
transient_heap.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h
transient_heap.$(OBJEXT): $(top_srcdir)/internal/serial.h

33
gc.c
Просмотреть файл

@ -3568,19 +3568,7 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
RB_DEBUG_COUNTER_INC(obj_hash_st);
}
#endif
if (/* RHASH_AR_TABLE_P(obj) */ !FL_TEST_RAW(obj, RHASH_ST_TABLE_FLAG)) {
struct ar_table_struct *tab = RHASH(obj)->as.ar;
if (tab) {
if (RHASH_TRANSIENT_P(obj)) {
RB_DEBUG_COUNTER_INC(obj_hash_transient);
}
else {
ruby_xfree(tab);
}
}
}
else {
if (!RHASH_AR_TABLE_P(obj)) {
GC_ASSERT(RHASH_ST_TABLE_P(obj));
st_free_table(RHASH(obj)->as.st);
}
@ -4922,10 +4910,6 @@ obj_memsize_of(VALUE obj, int use_all_types)
break;
case T_HASH:
if (RHASH_AR_TABLE_P(obj)) {
if (RHASH_AR_TABLE(obj) != NULL) {
size_t rb_hash_ar_table_size(void);
size += rb_hash_ar_table_size();
}
}
else {
VM_ASSERT(RHASH_ST_TABLE(obj) != NULL);
@ -6608,14 +6592,6 @@ mark_hash(rb_objspace_t *objspace, VALUE hash)
rb_hash_stlike_foreach(hash, mark_keyvalue, (st_data_t)objspace);
}
if (RHASH_AR_TABLE_P(hash)) {
if (LIKELY(during_gc) && RHASH_TRANSIENT_P(hash)) {
rb_transient_heap_mark(hash, RHASH_AR_TABLE(hash));
}
}
else {
VM_ASSERT(!RHASH_TRANSIENT_P(hash));
}
gc_mark(objspace, RHASH(hash)->ifnone);
}
@ -8425,6 +8401,10 @@ gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, V
obj_size = rb_str_size_as_embedded(src);
break;
case T_HASH:
obj_size = rb_hash_size_as_embedded(src);
break;
default:
return src_pool;
}
@ -13583,9 +13563,8 @@ rb_raw_obj_info_buitin_type(char *const buff, const size_t buff_size, const VALU
break;
}
case T_HASH: {
APPEND_F("[%c%c] %"PRIdSIZE,
APPEND_F("[%c] %"PRIdSIZE,
RHASH_AR_TABLE_P(obj) ? 'A' : 'S',
RHASH_TRANSIENT_P(obj) ? 'T' : ' ',
RHASH_SIZE(obj));
break;
}

99
hash.c
Просмотреть файл

@ -388,10 +388,15 @@ typedef struct ar_table_struct {
ar_table_pair pairs[RHASH_AR_TABLE_MAX_SIZE];
} ar_table;
#define RHASH_EMBED_SIZE (offsetof(struct RHash, as.st) + sizeof(ar_table))
size_t
rb_hash_ar_table_size(void)
rb_hash_size_as_embedded(VALUE hash)
{
return sizeof(ar_table);
if (RHASH_AR_TABLE_P(hash)) {
return RHASH_EMBED_SIZE;
}
return sizeof(struct RHash);
}
static inline st_hash_t
@ -534,14 +539,6 @@ hash_verify_(VALUE hash, const char *file, int line)
HASH_ASSERT(RHASH_AR_TABLE_SIZE_RAW(hash) == 0);
HASH_ASSERT(RHASH_AR_TABLE_BOUND_RAW(hash) == 0);
}
#if USE_TRANSIENT_HEAP
if (RHASH_TRANSIENT_P(hash)) {
volatile st_data_t MAYBE_UNUSED(key) = RHASH_AR_TABLE_REF(hash, 0)->key; /* read */
HASH_ASSERT(RHASH_AR_TABLE(hash) != NULL);
HASH_ASSERT(rb_transient_heap_managed_ptr_p(RHASH_AR_TABLE(hash)));
}
#endif
return hash;
}
@ -552,7 +549,7 @@ hash_verify_(VALUE hash, const char *file, int line)
static inline int
RHASH_TABLE_NULL_P(VALUE hash)
{
if (RHASH(hash)->as.ar == NULL) {
if (RHASH_AR_TABLE(hash) == NULL) {
HASH_ASSERT(RHASH_AR_TABLE_P(hash));
return TRUE;
}
@ -579,8 +576,7 @@ static void
hash_ar_table_set(VALUE hash, ar_table *ar)
{
HASH_ASSERT(RHASH_AR_TABLE_P(hash));
HASH_ASSERT((RHASH_TRANSIENT_P(hash) && ar == NULL) ? FALSE : TRUE);
RHASH(hash)->as.ar = ar;
*(RHASH_AR_TABLE(hash)) = *ar;
hash_verify(hash);
}
@ -641,25 +637,20 @@ RHASH_AR_TABLE_CLEAR(VALUE h)
RBASIC(h)->flags &= ~RHASH_AR_TABLE_SIZE_MASK;
RBASIC(h)->flags &= ~RHASH_AR_TABLE_BOUND_MASK;
hash_ar_table_set(h, NULL);
memset(RHASH_AR_TABLE(h), 0, sizeof(ar_table));
}
static ar_table*
ar_alloc_table(VALUE hash)
{
ar_table *tab = (ar_table*)rb_transient_heap_alloc(hash, sizeof(ar_table));
ar_table *tab = RHASH_AR_TABLE(hash);
memset(tab, 0, sizeof(ar_table));
if (tab != NULL) {
RHASH_SET_TRANSIENT_FLAG(hash);
}
else {
RHASH_UNSET_TRANSIENT_FLAG(hash);
tab = (ar_table*)ruby_xmalloc(sizeof(ar_table));
}
// RHASH_UNSET_ST_FLAG(hash);
// hash_ar_table_set(hash, tab);
RHASH_AR_TABLE_SIZE_SET(hash, 0);
RHASH_AR_TABLE_BOUND_SET(hash, 0);
hash_ar_table_set(hash, tab);
return tab;
}
@ -722,23 +713,14 @@ ar_find_entry(VALUE hash, st_hash_t hash_value, st_data_t key)
return ar_find_entry_hint(hash, hint, key);
}
//old one
static inline void
ar_free_and_clear_table(VALUE hash)
{
ar_table *tab = RHASH_AR_TABLE(hash);
if (tab) {
if (RHASH_TRANSIENT_P(hash)) {
RHASH_UNSET_TRANSIENT_FLAG(hash);
}
else {
ruby_xfree(RHASH_AR_TABLE(hash));
}
RHASH_AR_TABLE_CLEAR(hash);
}
HASH_ASSERT(RHASH_AR_TABLE_SIZE(hash) == 0);
HASH_ASSERT(RHASH_AR_TABLE_BOUND(hash) == 0);
HASH_ASSERT(RHASH_TRANSIENT_P(hash) == 0);
}
static void
@ -1055,6 +1037,7 @@ ar_insert(VALUE hash, st_data_t key, st_data_t value)
return -1;
}
HASH_ASSERT(RHASH_AR_TABLE(hash));
hash_ar_table(hash); /* prepare ltbl */
bin = ar_find_entry(hash, hash_value, key);
@ -1064,6 +1047,7 @@ ar_insert(VALUE hash, st_data_t key, st_data_t value)
}
else if (bin >= RHASH_AR_TABLE_MAX_BOUND) {
bin = ar_compact_table(hash);
HASH_ASSERT(RHASH_AR_TABLE(hash));
hash_ar_table(hash);
}
HASH_ASSERT(bin < RHASH_AR_TABLE_MAX_BOUND);
@ -1202,17 +1186,10 @@ ar_copy(VALUE hash1, VALUE hash2)
ar_table *new_tab = RHASH_AR_TABLE(hash1);
if (new_tab == NULL) {
new_tab = (ar_table*) rb_transient_heap_alloc(hash1, sizeof(ar_table));
if (new_tab != NULL) {
RHASH_SET_TRANSIENT_FLAG(hash1);
}
else {
RHASH_UNSET_TRANSIENT_FLAG(hash1);
new_tab = (ar_table*)ruby_xmalloc(sizeof(ar_table));
}
new_tab = ar_alloc_table(hash1);
}
*new_tab = *old_tab;
memcpy(new_tab, old_tab, sizeof(ar_table));
RHASH(hash1)->ar_hint.word = RHASH(hash2)->ar_hint.word;
RHASH_AR_TABLE_BOUND_SET(hash1, RHASH_AR_TABLE_BOUND(hash2));
RHASH_AR_TABLE_SIZE_SET(hash1, RHASH_AR_TABLE_SIZE(hash2));
@ -1236,33 +1213,6 @@ ar_clear(VALUE hash)
}
}
#if USE_TRANSIENT_HEAP
void
rb_hash_transient_heap_evacuate(VALUE hash, int promote)
{
if (RHASH_TRANSIENT_P(hash)) {
ar_table *new_tab;
ar_table *old_tab = RHASH_AR_TABLE(hash);
if (UNLIKELY(old_tab == NULL)) {
return;
}
HASH_ASSERT(old_tab != NULL);
if (! promote) {
new_tab = rb_transient_heap_alloc(hash, sizeof(ar_table));
if (new_tab == NULL) promote = true;
}
if (promote) {
new_tab = ruby_xmalloc(sizeof(ar_table));
RHASH_UNSET_TRANSIENT_FLAG(hash);
}
*new_tab = *old_tab;
hash_ar_table_set(hash, new_tab);
}
hash_verify(hash);
}
#endif
typedef int st_foreach_func(st_data_t, st_data_t, st_data_t);
struct foreach_safe_arg {
@ -1504,7 +1454,7 @@ static VALUE
hash_alloc_flags(VALUE klass, VALUE flags, VALUE ifnone)
{
const VALUE wb = (RGENGC_WB_PROTECTED_HASH ? FL_WB_PROTECTED : 0);
NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags, sizeof(struct RHash), 0);
NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags, RHASH_EMBED_SIZE, 0);
RHASH_SET_IFNONE((VALUE)hash, ifnone);
@ -1547,10 +1497,7 @@ rb_hash_new_with_size(st_index_t size)
if (size == 0) {
/* do nothing */
}
else if (size <= RHASH_AR_TABLE_MAX_SIZE) {
ar_alloc_table(ret);
}
else {
else if (size > RHASH_AR_TABLE_MAX_SIZE) {
RHASH_ST_TABLE_SET(ret, st_init_table_with_size(&objhash, size));
}
return ret;
@ -2027,11 +1974,9 @@ rb_hash_rehash(VALUE hash)
rb_hash_modify_check(hash);
if (RHASH_AR_TABLE_P(hash)) {
tmp = hash_alloc(0);
ar_alloc_table(tmp);
rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp);
ar_free_and_clear_table(hash);
ar_copy(hash, tmp);
ar_free_and_clear_table(tmp);
}
else if (RHASH_ST_TABLE_P(hash)) {
st_table *old_tab = RHASH_ST_TABLE(hash);

Просмотреть файл

@ -42,15 +42,14 @@ enum ruby_rhash_flags {
struct RHash {
struct RBasic basic;
union {
st_table *st;
struct ar_table_struct *ar; /* possibly 0 */
} as;
const VALUE ifnone;
union {
ar_hint_t ary[RHASH_AR_TABLE_MAX_SIZE];
VALUE word;
} ar_hint;
union {
st_table *st;
} as;
};
#define RHASH(obj) ((struct RHash *)(obj))
@ -86,6 +85,8 @@ int rb_hash_stlike_foreach_with_replace(VALUE hash, st_foreach_check_callback_fu
int rb_hash_stlike_update(VALUE hash, st_data_t key, st_update_callback_func *func, st_data_t arg);
VALUE rb_ident_hash_new_with_size(st_index_t size);
size_t rb_hash_size_as_embedded(VALUE hash);
static inline unsigned RHASH_AR_TABLE_SIZE_RAW(VALUE h);
static inline VALUE RHASH_IFNONE(VALUE h);
static inline size_t RHASH_SIZE(VALUE h);
@ -96,9 +97,6 @@ static inline struct ar_table_struct *RHASH_AR_TABLE(VALUE h);
static inline st_table *RHASH_ST_TABLE(VALUE h);
static inline size_t RHASH_ST_SIZE(VALUE h);
static inline void RHASH_ST_CLEAR(VALUE h);
static inline bool RHASH_TRANSIENT_P(VALUE h);
static inline void RHASH_SET_TRANSIENT_FLAG(VALUE h);
static inline void RHASH_UNSET_TRANSIENT_FLAG(VALUE h);
RUBY_SYMBOL_EXPORT_BEGIN
/* hash.c (export) */
@ -128,7 +126,7 @@ RHASH_AR_TABLE_P(VALUE h)
static inline struct ar_table_struct *
RHASH_AR_TABLE(VALUE h)
{
return RHASH(h)->as.ar;
return (struct ar_table_struct *)((uintptr_t)h + offsetof(struct RHash, as.st));
}
static inline st_table *
@ -187,30 +185,4 @@ RHASH_AR_TABLE_SIZE_RAW(VALUE h)
return (unsigned)ret;
}
static inline bool
RHASH_TRANSIENT_P(VALUE h)
{
#if USE_TRANSIENT_HEAP
return FL_TEST_RAW(h, RHASH_TRANSIENT_FLAG);
#else
return false;
#endif
}
static inline void
RHASH_SET_TRANSIENT_FLAG(VALUE h)
{
#if USE_TRANSIENT_HEAP
FL_SET_RAW(h, RHASH_TRANSIENT_FLAG);
#endif
}
static inline void
RHASH_UNSET_TRANSIENT_FLAG(VALUE h)
{
#if USE_TRANSIENT_HEAP
FL_UNSET_RAW(h, RHASH_TRANSIENT_FLAG);
#endif
}
#endif /* INTERNAL_HASH_H */

Просмотреть файл

@ -3112,7 +3112,6 @@ obj_traverse_replace_rec(struct obj_traverse_replace_data *data)
#if USE_TRANSIENT_HEAP
void rb_ary_transient_heap_evacuate(VALUE ary, int promote);
void rb_obj_transient_heap_evacuate(VALUE obj, int promote);
void rb_hash_transient_heap_evacuate(VALUE hash, int promote);
void rb_struct_transient_heap_evacuate(VALUE st, int promote);
#endif
@ -3249,12 +3248,8 @@ obj_traverse_replace_i(VALUE obj, struct obj_traverse_replace_data *data)
RB_GC_GUARD(obj);
}
break;
case T_HASH:
{
#if USE_TRANSIENT_HEAP
if (data->move) rb_hash_transient_heap_evacuate(obj, TRUE);
#endif
struct obj_traverse_replace_callback_data d = {
.stop = false,
.data = data,

Просмотреть файл

@ -417,6 +417,26 @@ class TestGCCompact < Test::Unit::TestCase
end;
end
def test_moving_hashes_down_size_pools
omit if GC::INTERNAL_CONSTANTS[:SIZE_POOL_COUNT] == 1
assert_separately(%w[-robjspace], "#{<<~"begin;"}\n#{<<~"end;"}", timeout: 10, signal: :SEGV)
begin;
HASH_COUNT = 500
GC.verify_compaction_references(expand_heap: true, toward: :empty)
base_hash = { a: 1, b: 2, c: 3, d: 4, e: 5, f: 6, g: 7, h: 8 }
ary = HASH_COUNT.times.map { base_hash.dup }
ary.each { |h| h[:i] = 9 }
stats = GC.verify_compaction_references(expand_heap: true, toward: :empty)
assert_operator(stats[:moved_down][:T_HASH], :>=, HASH_COUNT)
assert_include(ObjectSpace.dump(ary[0]), '"slot_size":' + GC::INTERNAL_CONSTANTS[:BASE_SLOT_SIZE].to_s)
end;
end
def test_moving_objects_between_size_pools_keeps_shape_frozen_status
# [Bug #19536]
assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}")

Просмотреть файл

@ -372,8 +372,10 @@ class TestShapes < Test::Unit::TestCase
assert_shape_equal(RubyVM::Shape.root_shape, RubyVM::Shape.of([]))
end
def test_hash_has_root_shape
assert_shape_equal(RubyVM::Shape.root_shape, RubyVM::Shape.of({}))
def test_hash_has_correct_pool_shape
# All hashes are now allocated their own ar_table, so start in a
# larger pool, and have already transitioned once.
assert_shape_equal(RubyVM::Shape.root_shape, RubyVM::Shape.of({}).parent)
end
def test_true_has_special_const_shape_id

Просмотреть файл

@ -10,7 +10,6 @@
#include "internal.h"
#include "internal/array.h"
#include "internal/gc.h"
#include "internal/hash.h"
#include "internal/sanitizers.h"
#include "internal/static_assert.h"
#include "internal/struct.h"
@ -607,15 +606,6 @@ transient_heap_ptr(VALUE obj, int error)
ptr = rb_struct_const_heap_ptr(obj);
}
break;
case T_HASH:
if (RHASH_TRANSIENT_P(obj)) {
TH_ASSERT(RHASH_AR_TABLE_P(obj));
ptr = (VALUE *)(RHASH(obj)->as.ar);
}
else {
ptr = NULL;
}
break;
default:
if (error) {
rb_bug("transient_heap_ptr: unknown obj %s\n", rb_obj_info(obj));
@ -736,9 +726,6 @@ transient_heap_block_evacuate(struct transient_heap* theap, struct transient_hea
case T_STRUCT:
rb_struct_transient_heap_evacuate(obj, !TRANSIENT_HEAP_DEBUG_DONT_PROMOTE);
break;
case T_HASH:
rb_hash_transient_heap_evacuate(obj, !TRANSIENT_HEAP_DEBUG_DONT_PROMOTE);
break;
default:
rb_bug("unsupported: %s\n", rb_obj_info(obj));
}

Просмотреть файл

@ -42,7 +42,6 @@ int rb_transient_heap_managed_ptr_p(const void *ptr);
/* evacuate functions for each type */
void rb_ary_transient_heap_evacuate(VALUE ary, int promote);
void rb_obj_transient_heap_evacuate(VALUE obj, int promote);
void rb_hash_transient_heap_evacuate(VALUE hash, int promote);
void rb_struct_transient_heap_evacuate(VALUE st, int promote);
#else /* USE_TRANSIENT_HEAP */
@ -58,7 +57,6 @@ void rb_struct_transient_heap_evacuate(VALUE st, int promote);
#define rb_ary_transient_heap_evacuate(x, y) ((void)0)
#define rb_obj_transient_heap_evacuate(x, y) ((void)0)
#define rb_hash_transient_heap_evacuate(x, y) ((void)0)
#define rb_struct_transient_heap_evacuate(x, y) ((void)0)
#endif /* USE_TRANSIENT_HEAP */