From c20e819e8b04e84a4103ca9a036022543459c213 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Tue, 30 Jan 2024 17:15:44 -0800 Subject: [PATCH] Fix crash when passing large keyword splat to method accepting keywords and keyword splat The following code previously caused a crash: ```ruby h = {} 1000000.times{|i| h[i.to_s.to_sym] = i} def f(kw: 1, **kws) end f(**h) ``` Inside a thread or fiber, the size of the keyword splat could be much smaller and still cause a crash. I found this issue while optimizing method calling by reducing implicit allocations. Given the following code: ```ruby def f(kw: , **kws) end kw = {kw: 1} f(**kw) ``` The `f(**kw)` call previously allocated two hashes callee side instead of a single hash. This is because `setup_parameters_complex` would extract the keywords from the keyword splat hash to the C stack, to attempt to mirror the case when literal keywords are passed without a keyword splat. Then, `make_rest_kw_hash` would build a new hash based on the extracted keywords that weren't used for literal keywords. Switch the implementation so that if a keyword splat is passed, literal keywords are deleted from the keyword splat hash (or a copy of the hash if the hash is not mutable). In addition to avoiding the crash, this new approach is much more efficient in all cases. With the included benchmark: ``` 1 miniruby: 5247879.9 i/s miniruby-before: 2474050.2 i/s - 2.12x slower 1_mutable miniruby: 1797036.5 i/s miniruby-before: 1239543.3 i/s - 1.45x slower 10 miniruby: 1094750.1 i/s miniruby-before: 365529.6 i/s - 2.99x slower 10_mutable miniruby: 407781.7 i/s miniruby-before: 225364.0 i/s - 1.81x slower 100 miniruby: 100992.3 i/s miniruby-before: 32703.6 i/s - 3.09x slower 100_mutable miniruby: 40092.3 i/s miniruby-before: 21266.9 i/s - 1.89x slower 1000 miniruby: 21694.2 i/s miniruby-before: 4949.8 i/s - 4.38x slower 1000_mutable miniruby: 5819.5 i/s miniruby-before: 2995.0 i/s - 1.94x slower ``` --- benchmark/vm_call_kw_and_kw_splat.yml | 25 +++++++ test/ruby/test_keyword.rb | 14 ++++ vm_args.c | 104 +++++++++++++++++++------- 3 files changed, 118 insertions(+), 25 deletions(-) create mode 100644 benchmark/vm_call_kw_and_kw_splat.yml diff --git a/benchmark/vm_call_kw_and_kw_splat.yml b/benchmark/vm_call_kw_and_kw_splat.yml new file mode 100644 index 0000000000..aa6e549e0c --- /dev/null +++ b/benchmark/vm_call_kw_and_kw_splat.yml @@ -0,0 +1,25 @@ +prelude: | + h1, h10, h100, h1000 = [1, 10, 100, 1000].map do |n| + h = {kw: 1} + n.times{|i| h[i.to_s.to_sym] = i} + h + end + eh = {} + def kw(kw: nil, **kws) end +benchmark: + 1: | + kw(**h1) + 1_mutable: | + kw(**eh, **h1) + 10: | + kw(**h10) + 10_mutable: | + kw(**eh, **h10) + 100: | + kw(**h100) + 100_mutable: | + kw(**eh, **h100) + 1000: | + kw(**h1000) + 1000_mutable: | + kw(**eh, **h1000) diff --git a/test/ruby/test_keyword.rb b/test/ruby/test_keyword.rb index 956cc9c56d..06bba60ac5 100644 --- a/test/ruby/test_keyword.rb +++ b/test/ruby/test_keyword.rb @@ -4002,6 +4002,20 @@ class TestKeywordArguments < Test::Unit::TestCase }, bug8964 end + def test_large_kwsplat_to_method_taking_kw_and_kwsplat + assert_separately(['-'], "#{<<~"begin;"}\n#{<<~'end;'}") + begin; + n = 100000 + x = Fiber.new do + h = {kw: 2} + n.times{|i| h[i.to_s.to_sym] = i} + def self.f(kw: 1, **kws) kws.size end + f(**h) + end.resume + assert_equal(n, x) + end; + end + def test_dynamic_symbol_keyword bug10266 = '[ruby-dev:48564] [Bug #10266]' assert_separately(['-', bug10266], "#{<<~"begin;"}\n#{<<~'end;'}") diff --git a/vm_args.c b/vm_args.c index aa8c2ec299..28c0e339a3 100644 --- a/vm_args.c +++ b/vm_args.c @@ -396,6 +396,83 @@ args_setup_kw_parameters(rb_execution_context_t *const ec, const rb_iseq_t *cons locals[key_num] = unspecified_bits_value; } +static void +args_setup_kw_parameters_from_kwsplat(rb_execution_context_t *const ec, const rb_iseq_t *const iseq, + VALUE keyword_hash, VALUE *const locals) +{ + const ID *acceptable_keywords = ISEQ_BODY(iseq)->param.keyword->table; + const int req_key_num = ISEQ_BODY(iseq)->param.keyword->required_num; + const int key_num = ISEQ_BODY(iseq)->param.keyword->num; + const VALUE * const default_values = ISEQ_BODY(iseq)->param.keyword->default_values; + VALUE missing = 0; + int i, di; + int unspecified_bits = 0; + VALUE unspecified_bits_value = Qnil; + + for (i=0; i hash */ + int j; + unspecified_bits_value = rb_hash_new(); + + for (j=0; jparam.flags.has_kwrest) { + const int rest_hash_index = key_num + 1; + locals[rest_hash_index] = keyword_hash; + } + else { + if (!RHASH_EMPTY_P(keyword_hash)) { + argument_kw_error(ec, iseq, "unknown", rb_hash_keys(keyword_hash)); + } + } + + if (NIL_P(unspecified_bits_value)) { + unspecified_bits_value = INT2FIX(unspecified_bits); + } + locals[key_num] = unspecified_bits_value; +} + static inline void args_setup_kw_rest_parameter(VALUE keyword_hash, VALUE *locals, int kw_flag) { @@ -415,22 +492,6 @@ args_setup_block_parameter(const rb_execution_context_t *ec, struct rb_calling_i *locals = rb_vm_bh_to_procval(ec, block_handler); } -struct fill_values_arg { - VALUE *keys; - VALUE *vals; - int argc; -}; - -static int -fill_keys_values(st_data_t key, st_data_t val, st_data_t ptr) -{ - struct fill_values_arg *arg = (struct fill_values_arg *)ptr; - int i = arg->argc++; - arg->keys[i] = (VALUE)key; - arg->vals[i] = (VALUE)val; - return ST_CONTINUE; -} - static inline int ignore_keyword_hash_p(VALUE keyword_hash, const rb_iseq_t * const iseq, unsigned int * kw_flag, VALUE * converted_keyword_hash) { @@ -746,15 +807,8 @@ setup_parameters_complex(rb_execution_context_t * const ec, const rb_iseq_t * co args_setup_kw_parameters(ec, iseq, args->kw_argv, kw_arg->keyword_len, kw_arg->keywords, klocals); } else if (!NIL_P(keyword_hash)) { - int kw_len = rb_long2int(RHASH_SIZE(keyword_hash)); - struct fill_values_arg arg; - /* copy kw_argv */ - arg.keys = args->kw_argv = ALLOCA_N(VALUE, kw_len * 2); - arg.vals = arg.keys + kw_len; - arg.argc = 0; - rb_hash_foreach(keyword_hash, fill_keys_values, (VALUE)&arg); - VM_ASSERT(arg.argc == kw_len); - args_setup_kw_parameters(ec, iseq, arg.vals, kw_len, arg.keys, klocals); + keyword_hash = check_kwrestarg(keyword_hash, &kw_flag); + args_setup_kw_parameters_from_kwsplat(ec, iseq, keyword_hash, klocals); } else { VM_ASSERT(args_argc(args) == 0);