Optimize compilation of large literal arrays

To avoid stack overflow, Ruby splits compilation of large arrays
into smaller arrays, and concatenates the small arrays together.
It previously used newarray/concatarray for this, which is
inefficient.  This switches the compilation to use pushtoarray,
which is much faster. This makes almost all literal arrays only
allocate a single array.

For cases where there is a large amount of static values in the
array, Ruby will statically compile subarrays, and previously
added them using concatarray.  This switches to concattoarray,
avoiding an array allocation for the append.

Keyword splats are also supported in arrays, and ignored if the
keyword splat is empty.  Previously, this used newarraykwsplat and
concatarray.  This still uses newarraykwsplat, but switches to
concattoarray to save an allocation.  So large arrays with keyword
splats can allocate 2 arrays instead of 1.

Previously, for the following array sizes (assuming local variable
access for each element), Ruby allocated the following number of
arrays:

  1000 elements: 7 arrays
 10000 elements: 79 arrays
100000 elements: 781 arrays

With these changes, only a single array is allocated (or 2 for a
large array with a keyword splat.

Results using the included benchmark:

```
                       array_1000
            miniruby:     34770.0 i/s
   ./miniruby-before:     10511.7 i/s - 3.31x  slower

                      array_10000
            miniruby:      4938.8 i/s
   ./miniruby-before:       483.8 i/s - 10.21x  slower

                     array_100000
            miniruby:       727.2 i/s
   ./miniruby-before:         4.1 i/s - 176.98x  slower
```

Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
This commit is contained in:
Jeremy Evans 2024-01-27 10:16:52 -08:00 коммит произвёл GitHub
Родитель 0bac390e07
Коммит 2217e08340
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
2 изменённых файлов: 33 добавлений и 13 удалений

Просмотреть файл

@ -0,0 +1,19 @@
prelude: |
def def_array(size)
Object.class_eval(<<-END)
def array_#{size}
x = 1
[#{(['x'] * size).join(',')}]
end
END
end
def_array(100)
def_array(1000)
def_array(10000)
def_array(100000)
benchmark:
array_100: array_100
array_1000: array_1000
array_10000: array_10000
array_100000: array_100000

Просмотреть файл

@ -4822,8 +4822,8 @@ compile_array(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int pop
*
* [x1,x2,...,x10000] =>
* push x1 ; push x2 ; ...; push x256; newarray 256;
* push x257; push x258; ...; push x512; newarray 256; concatarray;
* push x513; push x514; ...; push x768; newarray 256; concatarray;
* push x257; push x258; ...; push x512; pushtoarray 256;
* push x513; push x514; ...; push x768; pushtoarray 256;
* ...
*
* - Long subarray can be optimized by pre-allocating a hidden array.
@ -4833,8 +4833,8 @@ compile_array(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int pop
*
* [x, 1,2,3,...,100, z] =>
* push x; newarray 1;
* putobject [1,2,3,...,100] (<- hidden array); concatarray;
* push z; newarray 1; concatarray
* putobject [1,2,3,...,100] (<- hidden array); concattoarray;
* push z; pushtoarray 1;
*
* - If the last element is a keyword, newarraykwsplat should be emitted
* to check and remove empty keyword arguments hash from array.
@ -4849,11 +4849,11 @@ compile_array(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int pop
int stack_len = 0;
int first_chunk = 1;
/* Convert pushed elements to an array, and concatarray if needed */
#define FLUSH_CHUNK(newarrayinsn) \
/* Either create a new array, or push to the existing array */
#define FLUSH_CHUNK \
if (stack_len) { \
ADD_INSN1(ret, line_node, newarrayinsn, INT2FIX(stack_len)); \
if (!first_chunk) ADD_INSN(ret, line_node, concatarray); \
if (first_chunk) ADD_INSN1(ret, line_node, newarray, INT2FIX(stack_len)); \
else ADD_INSN1(ret, line_node, pushtoarray, INT2FIX(stack_len)); \
first_chunk = stack_len = 0; \
}
@ -4877,14 +4877,14 @@ compile_array(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int pop
OBJ_FREEZE(ary);
/* Emit optimized code */
FLUSH_CHUNK(newarray);
FLUSH_CHUNK;
if (first_chunk) {
ADD_INSN1(ret, line_node, duparray, ary);
first_chunk = 0;
}
else {
ADD_INSN1(ret, line_node, putobject, ary);
ADD_INSN(ret, line_node, concatarray);
ADD_INSN(ret, line_node, concattoarray);
}
RB_OBJ_WRITTEN(iseq, Qundef, ary);
}
@ -4901,16 +4901,17 @@ compile_array(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int pop
if (!RNODE_LIST(node)->nd_next && keyword_node_p(RNODE_LIST(node)->nd_head)) {
/* Reached the end, and the last element is a keyword */
FLUSH_CHUNK(newarraykwsplat);
ADD_INSN1(ret, line_node, newarraykwsplat, INT2FIX(stack_len));
if (!first_chunk) ADD_INSN(ret, line_node, concattoarray);
return 1;
}
/* If there are many pushed elements, flush them to avoid stack overflow */
if (stack_len >= max_stack_len) FLUSH_CHUNK(newarray);
if (stack_len >= max_stack_len) FLUSH_CHUNK;
}
}
FLUSH_CHUNK(newarray);
FLUSH_CHUNK;
#undef FLUSH_CHUNK
return 1;
}