From 2217e08340cbeba427fc58c7f955fc2382ab0372 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Sat, 27 Jan 2024 10:16:52 -0800 Subject: [PATCH] Optimize compilation of large literal arrays To avoid stack overflow, Ruby splits compilation of large arrays into smaller arrays, and concatenates the small arrays together. It previously used newarray/concatarray for this, which is inefficient. This switches the compilation to use pushtoarray, which is much faster. This makes almost all literal arrays only allocate a single array. For cases where there is a large amount of static values in the array, Ruby will statically compile subarrays, and previously added them using concatarray. This switches to concattoarray, avoiding an array allocation for the append. Keyword splats are also supported in arrays, and ignored if the keyword splat is empty. Previously, this used newarraykwsplat and concatarray. This still uses newarraykwsplat, but switches to concattoarray to save an allocation. So large arrays with keyword splats can allocate 2 arrays instead of 1. Previously, for the following array sizes (assuming local variable access for each element), Ruby allocated the following number of arrays: 1000 elements: 7 arrays 10000 elements: 79 arrays 100000 elements: 781 arrays With these changes, only a single array is allocated (or 2 for a large array with a keyword splat. Results using the included benchmark: ``` array_1000 miniruby: 34770.0 i/s ./miniruby-before: 10511.7 i/s - 3.31x slower array_10000 miniruby: 4938.8 i/s ./miniruby-before: 483.8 i/s - 10.21x slower array_100000 miniruby: 727.2 i/s ./miniruby-before: 4.1 i/s - 176.98x slower ``` Co-authored-by: Nobuyoshi Nakada --- benchmark/array_large_literal.yml | 19 +++++++++++++++++++ compile.c | 27 ++++++++++++++------------- 2 files changed, 33 insertions(+), 13 deletions(-) create mode 100644 benchmark/array_large_literal.yml diff --git a/benchmark/array_large_literal.yml b/benchmark/array_large_literal.yml new file mode 100644 index 0000000000..423d68391f --- /dev/null +++ b/benchmark/array_large_literal.yml @@ -0,0 +1,19 @@ +prelude: | + def def_array(size) + Object.class_eval(<<-END) + def array_#{size} + x = 1 + [#{(['x'] * size).join(',')}] + end + END + end + def_array(100) + def_array(1000) + def_array(10000) + def_array(100000) +benchmark: + array_100: array_100 + array_1000: array_1000 + array_10000: array_10000 + array_100000: array_100000 + diff --git a/compile.c b/compile.c index eea92b3638..71b657d9f2 100644 --- a/compile.c +++ b/compile.c @@ -4822,8 +4822,8 @@ compile_array(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int pop * * [x1,x2,...,x10000] => * push x1 ; push x2 ; ...; push x256; newarray 256; - * push x257; push x258; ...; push x512; newarray 256; concatarray; - * push x513; push x514; ...; push x768; newarray 256; concatarray; + * push x257; push x258; ...; push x512; pushtoarray 256; + * push x513; push x514; ...; push x768; pushtoarray 256; * ... * * - Long subarray can be optimized by pre-allocating a hidden array. @@ -4833,8 +4833,8 @@ compile_array(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int pop * * [x, 1,2,3,...,100, z] => * push x; newarray 1; - * putobject [1,2,3,...,100] (<- hidden array); concatarray; - * push z; newarray 1; concatarray + * putobject [1,2,3,...,100] (<- hidden array); concattoarray; + * push z; pushtoarray 1; * * - If the last element is a keyword, newarraykwsplat should be emitted * to check and remove empty keyword arguments hash from array. @@ -4849,11 +4849,11 @@ compile_array(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int pop int stack_len = 0; int first_chunk = 1; - /* Convert pushed elements to an array, and concatarray if needed */ -#define FLUSH_CHUNK(newarrayinsn) \ + /* Either create a new array, or push to the existing array */ +#define FLUSH_CHUNK \ if (stack_len) { \ - ADD_INSN1(ret, line_node, newarrayinsn, INT2FIX(stack_len)); \ - if (!first_chunk) ADD_INSN(ret, line_node, concatarray); \ + if (first_chunk) ADD_INSN1(ret, line_node, newarray, INT2FIX(stack_len)); \ + else ADD_INSN1(ret, line_node, pushtoarray, INT2FIX(stack_len)); \ first_chunk = stack_len = 0; \ } @@ -4877,14 +4877,14 @@ compile_array(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int pop OBJ_FREEZE(ary); /* Emit optimized code */ - FLUSH_CHUNK(newarray); + FLUSH_CHUNK; if (first_chunk) { ADD_INSN1(ret, line_node, duparray, ary); first_chunk = 0; } else { ADD_INSN1(ret, line_node, putobject, ary); - ADD_INSN(ret, line_node, concatarray); + ADD_INSN(ret, line_node, concattoarray); } RB_OBJ_WRITTEN(iseq, Qundef, ary); } @@ -4901,16 +4901,17 @@ compile_array(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int pop if (!RNODE_LIST(node)->nd_next && keyword_node_p(RNODE_LIST(node)->nd_head)) { /* Reached the end, and the last element is a keyword */ - FLUSH_CHUNK(newarraykwsplat); + ADD_INSN1(ret, line_node, newarraykwsplat, INT2FIX(stack_len)); + if (!first_chunk) ADD_INSN(ret, line_node, concattoarray); return 1; } /* If there are many pushed elements, flush them to avoid stack overflow */ - if (stack_len >= max_stack_len) FLUSH_CHUNK(newarray); + if (stack_len >= max_stack_len) FLUSH_CHUNK; } } - FLUSH_CHUNK(newarray); + FLUSH_CHUNK; #undef FLUSH_CHUNK return 1; }