Speed up ISeq by marking via bitmaps and IC rearranging

This commit adds a bitfield to the iseq body that stores offsets inside
the iseq buffer that contain values we need to mark.  We can use this
bitfield to mark objects instead of disassembling the instructions.

This commit also groups inline storage entries and adds a counter for
each entry.  This allows us to iterate and mark each entry without
disassembling instructions

Since we have a bitfield and grouped inline caches, we can mark all
VALUE objects associated with instructions without actually
disassembling the instructions at mark time.

[Feature #18875] [ruby-core:109042]
This commit is contained in:
Aaron Patterson 2022-06-17 15:28:14 -07:00 коммит произвёл Aaron Patterson
Родитель 6fd9cb8087
Коммит e23540e566
5 изменённых файлов: 172 добавлений и 39 удалений

116
compile.c
Просмотреть файл

@ -2069,7 +2069,7 @@ get_ivar_ic_value(rb_iseq_t *iseq,ID id)
tbl = rb_id_table_create(1);
ISEQ_COMPILE_DATA(iseq)->ivar_cache_table = tbl;
}
val = INT2FIX(ISEQ_BODY(iseq)->is_size++);
val = INT2FIX(ISEQ_BODY(iseq)->ivc_size++);
rb_id_table_insert(tbl,id,val);
return val;
}
@ -2327,14 +2327,23 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor)
generated_iseq = ALLOC_N(VALUE, code_index);
insns_info = ALLOC_N(struct iseq_insn_info_entry, insn_num);
positions = ALLOC_N(unsigned int, insn_num);
body->is_entries = ZALLOC_N(union iseq_inline_storage_entry, body->is_size);
body->is_entries = ZALLOC_N(union iseq_inline_storage_entry, ISEQ_IS_SIZE(body));
body->call_data = ZALLOC_N(struct rb_call_data, body->ci_size);
ISEQ_COMPILE_DATA(iseq)->ci_index = 0;
// Calculate the bitmask buffer size.
// Round the generated_iseq size up to the nearest multiple
// of the number if bits in an unsigned long.
// Allocate enough room for the bitmask list
iseq_bits_t * mark_offset_bits = ZALLOC_N(iseq_bits_t, ISEQ_MBITS_BUFLEN(code_index));
list = FIRST_ELEMENT(anchor);
insns_info_index = code_index = sp = 0;
while (list) {
unsigned int ic_index = 0;
switch (list->type) {
case ISEQ_ELEMENT_INSN:
{
@ -2375,6 +2384,7 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor)
rb_hash_rehash(map);
freeze_hide_obj(map);
generated_iseq[code_index + 1 + j] = map;
ISEQ_MBITS_SET(mark_offset_bits, code_index + 1 + j);
RB_OBJ_WRITTEN(iseq, Qundef, map);
FL_SET(iseqv, ISEQ_MARKABLE_ISEQ);
break;
@ -2383,30 +2393,34 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor)
case TS_NUM: /* ulong */
generated_iseq[code_index + 1 + j] = FIX2INT(operands[j]);
break;
case TS_VALUE: /* VALUE */
case TS_ISEQ: /* iseq */
case TS_VALUE: /* VALUE */
{
VALUE v = operands[j];
generated_iseq[code_index + 1 + j] = v;
/* to mark ruby object */
if (!SPECIAL_CONST_P(v)) {
RB_OBJ_WRITTEN(iseq, Qundef, v);
ISEQ_MBITS_SET(mark_offset_bits, code_index + 1 + j);
FL_SET(iseqv, ISEQ_MARKABLE_ISEQ);
}
break;
}
case TS_IC: /* inline cache */
case TS_ISE: /* inline storage entry */
/* [ TS_(ICVARC|IVC) ... | TS_ISE | TS_IC ] */
case TS_IC: /* inline cache: constants */
ic_index += body->ise_size;
case TS_ISE: /* inline storage entry: `once` insn */
ic_index += body->ivc_size;
case TS_ICVARC: /* inline cvar cache */
case TS_IVC: /* inline ivar cache */
{
unsigned int ic_index = FIX2UINT(operands[j]);
ic_index += FIX2UINT(operands[j]);
IC ic = (IC)&body->is_entries[ic_index];
if (UNLIKELY(ic_index >= body->is_size)) {
if (UNLIKELY(ic_index >= ISEQ_IS_SIZE(body))) {
BADINSN_DUMP(anchor, &iobj->link, 0);
COMPILE_ERROR(iseq, iobj->insn_info.line_no,
"iseq_set_sequence: ic_index overflow: index: %d, size: %d",
ic_index, body->is_size);
ic_index, ISEQ_IS_SIZE(body));
}
generated_iseq[code_index + 1 + j] = (VALUE)ic;
FL_SET(iseqv, ISEQ_MARKABLE_ISEQ);
@ -2491,6 +2505,7 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor)
xfree(generated_iseq);
xfree(insns_info);
xfree(positions);
xfree(mark_offset_bits);
debug_list(anchor, list);
COMPILE_ERROR(iseq, adjust->line_no,
"iseq_set_sequence: adjust bug to %d %d < %d",
@ -2510,6 +2525,7 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor)
body->iseq_encoded = (void *)generated_iseq;
body->iseq_size = code_index;
body->stack_max = stack_max;
body->mark_offset_bits = mark_offset_bits;
/* get rid of memory leak when REALLOC failed */
body->insns_info.body = insns_info;
@ -8843,7 +8859,7 @@ compile_colon2(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node,
if (rb_is_const_id(node->nd_mid)) {
/* constant */
LABEL *lend = NEW_LABEL(line);
int ic_index = ISEQ_BODY(iseq)->is_size++;
int ic_index = ISEQ_BODY(iseq)->ic_size++;
DECL_ANCHOR(pref);
DECL_ANCHOR(body);
@ -8888,7 +8904,7 @@ compile_colon3(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node,
{
const int line = nd_line(node);
LABEL *lend = NEW_LABEL(line);
int ic_index = ISEQ_BODY(iseq)->is_size++;
int ic_index = ISEQ_BODY(iseq)->ic_size++;
debugi("colon3#nd_mid", node->nd_mid);
@ -9407,7 +9423,7 @@ iseq_compile_each0(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const no
if (ISEQ_COMPILE_DATA(iseq)->option->inline_const_cache) {
LABEL *lend = NEW_LABEL(line);
int ic_index = body->is_size++;
int ic_index = body->ic_size++;
ADD_INSN2(ret, node, opt_getinlinecache, lend, INT2FIX(ic_index));
ADD_INSN1(ret, node, putobject, Qtrue);
@ -9532,7 +9548,7 @@ iseq_compile_each0(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const no
break;
}
case NODE_ONCE:{
int ic_index = body->is_size++;
int ic_index = body->ise_size++;
const rb_iseq_t *block_iseq;
block_iseq = NEW_CHILD_ISEQ(node->nd_body, make_name_for_block(iseq), ISEQ_TYPE_PLAIN, line);
@ -9763,7 +9779,7 @@ iseq_compile_each0(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const no
/* compiled to:
* ONCE{ rb_mRubyVMFrozenCore::core#set_postexe{ ... } }
*/
int is_index = body->is_size++;
int is_index = body->ise_size++;
struct rb_iseq_new_with_callback_callback_func *ifunc =
rb_iseq_new_with_callback_new_callback(build_postexe_iseq, node->nd_body);
const rb_iseq_t *once_iseq =
@ -10294,12 +10310,24 @@ iseq_build_from_ary_body(rb_iseq_t *iseq, LINK_ANCHOR *const anchor,
}
break;
case TS_ISE:
argv[j] = op;
if (NUM2UINT(op) >= ISEQ_BODY(iseq)->ise_size) {
ISEQ_BODY(iseq)->ise_size = NUM2INT(op) + 1;
}
FL_SET((VALUE)iseq, ISEQ_MARKABLE_ISEQ);
break;
case TS_IC:
argv[j] = op;
if (NUM2UINT(op) >= ISEQ_BODY(iseq)->ic_size) {
ISEQ_BODY(iseq)->ic_size = NUM2INT(op) + 1;
}
FL_SET((VALUE)iseq, ISEQ_MARKABLE_ISEQ);
break;
case TS_IVC: /* inline ivar cache */
case TS_ICVARC: /* inline cvar cache */
argv[j] = op;
if (NUM2UINT(op) >= ISEQ_BODY(iseq)->is_size) {
ISEQ_BODY(iseq)->is_size = NUM2INT(op) + 1;
if (NUM2UINT(op) >= ISEQ_BODY(iseq)->ivc_size) {
ISEQ_BODY(iseq)->ivc_size = NUM2INT(op) + 1;
}
FL_SET((VALUE)iseq, ISEQ_MARKABLE_ISEQ);
break;
@ -11110,12 +11138,12 @@ ibf_dump_code(struct ibf_dump *dump, const rb_iseq_t *iseq)
wv = (VALUE)ibf_dump_iseq(dump, (const rb_iseq_t *)op);
break;
case TS_IC:
case TS_ISE:
case TS_IVC:
case TS_ICVARC:
case TS_ISE:
{
unsigned int i;
for (i=0; i<body->is_size; i++) {
for (i=0; i<ISEQ_IS_SIZE(body); i++) {
if (op == (VALUE)&body->is_entries[i]) {
break;
}
@ -11150,7 +11178,7 @@ ibf_dump_code(struct ibf_dump *dump, const rb_iseq_t *iseq)
}
static VALUE *
ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecode_offset, ibf_offset_t bytecode_size, unsigned int iseq_size)
ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecode_offset, ibf_offset_t bytecode_size, unsigned int iseq_size, const unsigned int is_size)
{
VALUE iseqv = (VALUE)iseq;
unsigned int code_index;
@ -11161,6 +11189,12 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod
struct rb_call_data *cd_entries = load_body->call_data;
union iseq_inline_storage_entry *is_entries = load_body->is_entries;
iseq_bits_t * mark_offset_bits = ZALLOC_N(iseq_bits_t, ISEQ_MBITS_BUFLEN(iseq_size));
load_body->mark_offset_bits = mark_offset_bits;
unsigned int min_ic_index, min_ise_index, min_ivc_index;
min_ic_index = min_ise_index = min_ivc_index = UINT_MAX;
for (code_index=0; code_index<iseq_size;) {
/* opcode */
const VALUE insn = code[code_index] = ibf_load_small_value(load, &reading_pos);
@ -11181,6 +11215,7 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod
code[code_index] = v;
if (!SPECIAL_CONST_P(v)) {
RB_OBJ_WRITTEN(iseqv, Qundef, v);
ISEQ_MBITS_SET(mark_offset_bits, code_index);
FL_SET(iseqv, ISEQ_MARKABLE_ISEQ);
}
break;
@ -11200,6 +11235,7 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod
pinned_list_store(load->current_buffer->obj_list, (long)op, v);
code[code_index] = v;
ISEQ_MBITS_SET(mark_offset_bits, code_index);
RB_OBJ_WRITTEN(iseqv, Qundef, v);
FL_SET(iseqv, ISEQ_MARKABLE_ISEQ);
break;
@ -11211,16 +11247,36 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod
code[code_index] = v;
if (!SPECIAL_CONST_P(v)) {
RB_OBJ_WRITTEN(iseqv, Qundef, v);
ISEQ_MBITS_SET(mark_offset_bits, code_index);
FL_SET(iseqv, ISEQ_MARKABLE_ISEQ);
}
break;
}
case TS_ISE:
case TS_IC:
case TS_IVC:
case TS_ISE:
case TS_ICVARC:
case TS_IVC:
{
VALUE op = ibf_load_small_value(load, &reading_pos);
unsigned int op = (unsigned int)ibf_load_small_value(load, &reading_pos);
switch(operand_type)
{
case TS_IC:
if (op < min_ic_index) {
min_ic_index = op;
}
break;
case TS_ISE:
if (op < min_ise_index) {
min_ise_index = op;
}
break;
default:
if (op < min_ivc_index) {
min_ivc_index = op;
}
}
code[code_index] = (VALUE)&is_entries[op];
if (insn == BIN(opt_getinlinecache) && operand_type == TS_IC) {
@ -11257,6 +11313,19 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod
rb_raise(rb_eRuntimeError, "operand size mismatch");
}
}
if (min_ic_index != UINT_MAX) {
load_body->ic_size = is_size - min_ic_index;
}
if (min_ise_index != UINT_MAX) {
load_body->ise_size = (is_size - load_body->ic_size) - min_ise_index;
}
if (min_ivc_index != UINT_MAX) {
load_body->ivc_size = (is_size - load_body->ic_size - load_body->ise_size) - min_ivc_index;
}
load_body->iseq_encoded = code;
load_body->iseq_size = code_index;
@ -11740,7 +11809,7 @@ ibf_dump_iseq_each(struct ibf_dump *dump, const rb_iseq_t *iseq)
ibf_dump_write_small_value(dump, IBF_BODY_OFFSET(outer_variables_offset));
ibf_dump_write_small_value(dump, body->variable.flip_count);
ibf_dump_write_small_value(dump, body->local_table_size);
ibf_dump_write_small_value(dump, body->is_size);
ibf_dump_write_small_value(dump, ISEQ_IS_SIZE(body));
ibf_dump_write_small_value(dump, body->ci_size);
ibf_dump_write_small_value(dump, body->stack_max);
ibf_dump_write_small_value(dump, body->catch_except_p);
@ -11876,7 +11945,6 @@ ibf_load_iseq_each(struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t offset)
load_body->param.post_num = param_post_num;
load_body->param.block_start = param_block_start;
load_body->local_table_size = local_table_size;
load_body->is_size = is_size;
load_body->ci_size = ci_size;
load_body->insns_info.size = insns_info_size;
@ -11908,7 +11976,7 @@ ibf_load_iseq_each(struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t offset)
load_body->local_iseq = ibf_load_iseq(load, (const rb_iseq_t *)(VALUE)local_iseq_index);
load_body->mandatory_only_iseq = ibf_load_iseq(load, (const rb_iseq_t *)(VALUE)mandatory_only_iseq_index);
ibf_load_code(load, iseq, bytecode_offset, bytecode_size, iseq_size);
ibf_load_code(load, iseq, bytecode_offset, bytecode_size, iseq_size, is_size);
#if VM_INSN_INFO_TABLE_IMPL == 2
rb_iseq_insns_info_encode_positions(iseq);
#endif

70
iseq.c
Просмотреть файл

@ -193,6 +193,7 @@ rb_iseq_free(const rb_iseq_t *iseq)
}
ruby_xfree((void *)body->catch_table);
ruby_xfree((void *)body->param.opt_table);
ruby_xfree((void *)body->mark_offset_bits);
if (body->param.keyword != NULL) {
ruby_xfree((void *)body->param.keyword->default_values);
@ -317,19 +318,69 @@ rb_iseq_each_value(const rb_iseq_t *iseq, iseq_value_itr_t * func, void *data)
{
unsigned int size;
VALUE *code;
size_t n;
rb_vm_insns_translator_t *const translator =
#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
(FL_TEST((VALUE)iseq, ISEQ_TRANSLATED)) ? rb_vm_insn_addr2insn2 :
#endif
rb_vm_insn_null_translator;
const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
size = body->iseq_size;
code = body->iseq_encoded;
for (n = 0; n < size;) {
n += iseq_extract_values(code, n, func, data, translator);
union iseq_inline_storage_entry *is_entries = body->is_entries;
// IVC and ICVARC entries
for (unsigned int i = 0; i < body->ivc_size; i++, is_entries++) {
IVC ivc = (IVC)is_entries;
if (ivc->entry) {
if (RB_TYPE_P(ivc->entry->class_value, T_NONE)) {
rb_bug("!! %u", ivc->entry->index);
}
VALUE nv = func(data, ivc->entry->class_value);
if (ivc->entry->class_value != nv) {
ivc->entry->class_value = nv;
}
}
}
// ISE entries
for (unsigned int i = 0; i < body->ise_size; i++, is_entries++) {
union iseq_inline_storage_entry *const is = (union iseq_inline_storage_entry *)is_entries;
if (is->once.value) {
VALUE nv = func(data, is->once.value);
if (is->once.value != nv) {
is->once.value = nv;
}
}
}
// IC Entries
for (unsigned int i = 0; i < body->ic_size; i++, is_entries++) {
IC ic = (IC)is_entries;
if (ic->entry) {
VALUE nv = func(data, (VALUE)ic->entry);
if ((VALUE)ic->entry != nv) {
ic->entry = (void *)nv;
}
}
}
// Embedded VALUEs
for (unsigned int i = 0; i < ISEQ_MBITS_BUFLEN(size); i++) {
iseq_bits_t bits = body->mark_offset_bits[i];
if (bits) {
unsigned int count = 0;
while(bits) {
if (bits & 0x1) {
unsigned int index = (i * ISEQ_MBITS_BITLENGTH) + count;
VALUE op = code[index];
VALUE newop = func(data, op);
if (newop != op) {
code[index] = newop;
}
}
bits >>= 1;
count++;
}
}
}
}
@ -588,6 +639,7 @@ rb_iseq_memsize(const rb_iseq_t *iseq)
size += body->iseq_size * sizeof(VALUE);
size += body->insns_info.size * (sizeof(struct iseq_insn_info_entry) + sizeof(unsigned int));
size += body->local_table_size * sizeof(ID);
size += ISEQ_MBITS_BUFLEN(body->iseq_size) * ISEQ_MBITS_SIZE;
if (body->catch_table) {
size += iseq_catch_table_bytes(body->catch_table->size);
}
@ -595,7 +647,7 @@ rb_iseq_memsize(const rb_iseq_t *iseq)
size += param_keyword_size(body->param.keyword);
/* body->is_entries */
size += body->is_size * sizeof(union iseq_inline_storage_entry);
size += ISEQ_IS_SIZE(body) * sizeof(union iseq_inline_storage_entry);
/* body->call_data */
size += body->ci_size * sizeof(struct rb_call_data);

6
iseq.h
Просмотреть файл

@ -17,6 +17,12 @@ RUBY_EXTERN const int ruby_api_version[];
#define ISEQ_MAJOR_VERSION ((unsigned int)ruby_api_version[0])
#define ISEQ_MINOR_VERSION ((unsigned int)ruby_api_version[1])
#define ISEQ_MBITS_SIZE sizeof(iseq_bits_t)
#define ISEQ_MBITS_BITLENGTH (ISEQ_MBITS_SIZE * CHAR_BIT)
#define ISEQ_MBITS_SET(buf, i) (buf[(i) / ISEQ_MBITS_BITLENGTH] |= ((iseq_bits_t)1 << ((i) % ISEQ_MBITS_BITLENGTH)))
#define ISEQ_MBITS_SET_P(buf, i) ((buf[(i) / ISEQ_MBITS_BITLENGTH] >> ((i) % ISEQ_MBITS_BITLENGTH)) & 0x1)
#define ISEQ_MBITS_BUFLEN(size) (((size + (ISEQ_MBITS_BITLENGTH - 1)) & -ISEQ_MBITS_BITLENGTH) / ISEQ_MBITS_BITLENGTH)
#ifndef USE_ISEQ_NODE_ID
#define USE_ISEQ_NODE_ID 1
#endif

Просмотреть файл

@ -327,7 +327,7 @@ mjit_capture_is_entries(const struct rb_iseq_constant_body *body, union iseq_inl
{
if (is_entries == NULL)
return;
memcpy(is_entries, body->is_entries, sizeof(union iseq_inline_storage_entry) * body->is_size);
memcpy(is_entries, body->is_entries, sizeof(union iseq_inline_storage_entry) * ISEQ_IS_SIZE(body));
}
static bool
@ -492,8 +492,8 @@ init_ivar_compile_status(const struct rb_iseq_constant_body *body, struct compil
.stack_size_for_pos = (int *)alloca(sizeof(int) * body->iseq_size), \
.inlined_iseqs = compile_root_p ? \
alloca(sizeof(const struct rb_iseq_constant_body *) * body->iseq_size) : NULL, \
.is_entries = (body->is_size > 0) ? \
alloca(sizeof(union iseq_inline_storage_entry) * body->is_size) : NULL, \
.is_entries = (ISEQ_IS_SIZE(body) > 0) ? \
alloca(sizeof(union iseq_inline_storage_entry) * ISEQ_IS_SIZE(body)) : NULL, \
.cc_entries_index = (body->ci_size > 0) ? \
mjit_capture_cc_entries(status.compiled_iseq, body) : -1, \
.compiled_id = status.compiled_id, \

Просмотреть файл

@ -335,6 +335,10 @@ pathobj_realpath(VALUE pathobj)
/* Forward declarations */
struct rb_mjit_unit;
typedef uintptr_t iseq_bits_t;
#define ISEQ_IS_SIZE(body) (body->ic_size + body->ivc_size + body->ise_size)
struct rb_iseq_constant_body {
enum iseq_type {
ISEQ_TYPE_TOP,
@ -444,7 +448,7 @@ struct rb_iseq_constant_body {
const struct rb_iseq_struct *parent_iseq;
struct rb_iseq_struct *local_iseq; /* local_iseq->flip_cnt can be modified */
union iseq_inline_storage_entry *is_entries;
union iseq_inline_storage_entry *is_entries; /* [ TS_(ICVARC|IVC) ... | TS_ISE | TS_IC ] */
struct rb_call_data *call_data; //struct rb_call_data calls[ci_size];
struct {
@ -456,9 +460,12 @@ struct rb_iseq_constant_body {
} variable;
unsigned int local_table_size;
unsigned int is_size;
unsigned int ic_size; // Number if IC caches
unsigned int ise_size; // Number of ISE caches
unsigned int ivc_size; // Number of IVC and ICVARC caches
unsigned int ci_size;
unsigned int stack_max; /* for stack overflow check */
iseq_bits_t * mark_offset_bits; /* Find references for GC */
char catch_except_p; /* If a frame of this ISeq may catch exception, set TRUE */
// If true, this ISeq is leaf *and* backtraces are not used, for example,