From 147ca9585ede559fd68e162cbbbaba84f009c9a1 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Wed, 17 Apr 2024 16:30:41 -0700 Subject: [PATCH] Implement equality for CI comparison when CC searching When we're searching for CCs, compare the argc and flags for CI rather than comparing pointers. This means we don't need to store a reference to the CI, and it also naturally "de-duplicates" CC objects. We can observe the effect with the following code: ```ruby require "objspace" hash = {} p ObjectSpace.memsize_of(Hash) eval ("a".."zzz").map { |key| "hash.merge(:#{key} => 1)" }.join("; ") p ObjectSpace.memsize_of(Hash) ``` On master: ``` $ ruby -v test.rb ruby 3.4.0dev (2024-04-15T16:21:41Z master d019b3baec) [arm64-darwin23] test.rb:3: warning: assigned but unused variable - hash 3424 527736 ``` On this branch: ``` $ make runruby compiling vm.c linking miniruby builtin_binary.inc updated compiling builtin.c linking static-library libruby.3.4-static.a ln -sf ../../rbconfig.rb .ext/arm64-darwin23/rbconfig.rb linking ruby ld: warning: ignoring duplicate libraries: '-ldl', '-lobjc', '-lpthread' RUBY_ON_BUG='gdb -x ./.gdbinit -p' ./miniruby -I./lib -I. -I.ext/common ./tool/runruby.rb --extout=.ext -- --disable-gems ./test.rb 2240 2368 ``` Co-authored-by: John Hawthorn --- gc.c | 3 --- imemo.c | 1 - vm_callinfo.h | 3 ++- vm_insnhelper.c | 24 ++++++++++++++++-------- vm_method.c | 1 - 5 files changed, 18 insertions(+), 14 deletions(-) diff --git a/gc.c b/gc.c index 377ee528ea..279841fd7f 100644 --- a/gc.c +++ b/gc.c @@ -9978,9 +9978,6 @@ update_cc_tbl_i(VALUE ccs_ptr, void *data) } for (int i=0; ilen; i++) { - if (gc_object_moved_p(objspace, (VALUE)ccs->entries[i].ci)) { - ccs->entries[i].ci = (struct rb_callinfo *)rb_gc_location((VALUE)ccs->entries[i].ci); - } if (gc_object_moved_p(objspace, (VALUE)ccs->entries[i].cc)) { ccs->entries[i].cc = (struct rb_callcache *)rb_gc_location((VALUE)ccs->entries[i].cc); } diff --git a/imemo.c b/imemo.c index 8403859146..3ecee05652 100644 --- a/imemo.c +++ b/imemo.c @@ -196,7 +196,6 @@ cc_table_mark_i(ID id, VALUE ccs_ptr, void *data) VM_ASSERT((VALUE)data == ccs->entries[i].cc->klass); VM_ASSERT(vm_cc_check_cme(ccs->entries[i].cc, ccs->cme)); - rb_gc_mark_movable((VALUE)ccs->entries[i].ci); rb_gc_mark_movable((VALUE)ccs->entries[i].cc); } return ID_TABLE_CONTINUE; diff --git a/vm_callinfo.h b/vm_callinfo.h index 21e0755aa8..1629d9f0a9 100644 --- a/vm_callinfo.h +++ b/vm_callinfo.h @@ -575,7 +575,8 @@ struct rb_class_cc_entries { int len; const struct rb_callable_method_entry_struct *cme; struct rb_class_cc_entries_entry { - const struct rb_callinfo *ci; + unsigned int argc; + unsigned int flag; const struct rb_callcache *cc; } *entries; }; diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 84ef212053..108201295b 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -2023,7 +2023,8 @@ vm_ccs_push(VALUE klass, struct rb_class_cc_entries *ccs, const struct rb_callin VM_ASSERT(ccs->len < ccs->capa); const int pos = ccs->len++; - RB_OBJ_WRITE(klass, &ccs->entries[pos].ci, ci); + ccs->entries[pos].argc = vm_ci_argc(ci); + ccs->entries[pos].flag = vm_ci_flag(ci); RB_OBJ_WRITE(klass, &ccs->entries[pos].cc, cc); if (RB_DEBUG_COUNTER_SETMAX(ccs_maxlen, ccs->len)) { @@ -2038,7 +2039,9 @@ rb_vm_ccs_dump(struct rb_class_cc_entries *ccs) { ruby_debug_printf("ccs:%p (%d,%d)\n", (void *)ccs, ccs->len, ccs->capa); for (int i=0; ilen; i++) { - vm_ci_dump(ccs->entries[i].ci); + ruby_debug_printf("CCS CI ID:flag:%x argc:%u\n", + ccs->entries[i].flag, + ccs->entries[i].argc); rp(ccs->entries[i].cc); } } @@ -2050,11 +2053,8 @@ vm_ccs_verify(struct rb_class_cc_entries *ccs, ID mid, VALUE klass) VM_ASSERT(ccs->len <= ccs->capa); for (int i=0; ilen; i++) { - const struct rb_callinfo *ci = ccs->entries[i].ci; const struct rb_callcache *cc = ccs->entries[i].cc; - VM_ASSERT(vm_ci_p(ci)); - VM_ASSERT(vm_ci_mid(ci) == mid); VM_ASSERT(IMEMO_TYPE_P(cc, imemo_callcache)); VM_ASSERT(vm_cc_class_check(cc, klass)); VM_ASSERT(vm_cc_check_cme(cc, ccs->cme)); @@ -2076,6 +2076,8 @@ vm_search_cc(const VALUE klass, const struct rb_callinfo * const ci) VALUE ccs_data; if (cc_tbl) { + // CCS data is keyed on method id, so we don't need the method id + // for doing comparisons in the `for` loop below. if (rb_id_table_lookup(cc_tbl, mid, &ccs_data)) { ccs = (struct rb_class_cc_entries *)ccs_data; const int ccs_len = ccs->len; @@ -2088,14 +2090,20 @@ vm_search_cc(const VALUE klass, const struct rb_callinfo * const ci) else { VM_ASSERT(vm_ccs_verify(ccs, mid, klass)); + // We already know the method id is correct because we had + // to look up the ccs_data by method id. All we need to + // compare is argc and flag + unsigned int argc = vm_ci_argc(ci); + unsigned int flag = vm_ci_flag(ci); + for (int i=0; ientries[i].ci; + unsigned int ccs_ci_argc = ccs->entries[i].argc; + unsigned int ccs_ci_flag = ccs->entries[i].flag; const struct rb_callcache *ccs_cc = ccs->entries[i].cc; - VM_ASSERT(vm_ci_p(ccs_ci)); VM_ASSERT(IMEMO_TYPE_P(ccs_cc, imemo_callcache)); - if (ccs_ci == ci) { // TODO: equality + if (ccs_ci_argc == argc && ccs_ci_flag == flag) { RB_DEBUG_COUNTER_INC(cc_found_in_ccs); VM_ASSERT(vm_cc_cme(ccs_cc)->called_id == mid); diff --git a/vm_method.c b/vm_method.c index 5ca302a86a..b58049bf7d 100644 --- a/vm_method.c +++ b/vm_method.c @@ -31,7 +31,6 @@ vm_ccs_dump_i(ID mid, VALUE val, void *data) rp(ccs->cme); for (int i=0; ilen; i++) { - fprintf(stderr, " | [%d]\t", i); vm_ci_dump(ccs->entries[i].ci); rp_m( " | \t", ccs->entries[i].cc); }