From 50efa18c6cf102e070ba0f95edc01c72516495a3 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Fri, 19 Jun 2020 14:52:35 +0900 Subject: [PATCH] compile.c: Improve branch coverage instrumentation [Bug #16967] Formerly, branch coverage measurement counters are generated for each compilation traverse of the AST. However, ensure clause node is traversed twice; one is for normal-exit case (the resulted bytecode is embedded in its outer scope), and the other is for exceptional case (the resulted bytecode is used in catch table). Two branch coverage counters are generated for the two cases, but it is not desired. This changeset revamps the internal representation of branch coverage measurement. Branch coverage counters are generated only at the first visit of a branch node. Visiting the same node reuses the already-generated counter, so double counting is avoided. --- compile.c | 96 +++++++++++++++++++++++++++++++---------- ext/coverage/coverage.c | 76 +++++++++++++++++++++----------- thread.c | 39 +++++++++++------ 3 files changed, 149 insertions(+), 62 deletions(-) diff --git a/compile.c b/compile.c index c3c1921e98..972a2997ff 100644 --- a/compile.c +++ b/compile.c @@ -610,34 +610,74 @@ static VALUE decl_branch_base(rb_iseq_t *iseq, const NODE *node, const char *typ if (!branch_coverage_valid_p(iseq, first_lineno)) return Qundef; + /* + * if !structure[node] + * structure[node] = [type, first_lineno, first_column, last_lineno, last_column, branches = {}] + * else + * branches = structure[node][5] + * end + */ + VALUE structure = RARRAY_AREF(ISEQ_BRANCH_COVERAGE(iseq), 0); - VALUE branches = rb_ary_tmp_new(5); - rb_ary_push(structure, branches); - rb_ary_push(branches, ID2SYM(rb_intern(type))); - rb_ary_push(branches, INT2FIX(first_lineno)); - rb_ary_push(branches, INT2FIX(first_column)); - rb_ary_push(branches, INT2FIX(last_lineno)); - rb_ary_push(branches, INT2FIX(last_column)); + VALUE key = (VALUE)node | 1; // FIXNUM for hash key + VALUE branch_base = rb_hash_aref(structure, key); + VALUE branches; + + if (NIL_P(branch_base)) { + branch_base = rb_ary_tmp_new(6); + rb_hash_aset(structure, key, branch_base); + rb_ary_push(branch_base, ID2SYM(rb_intern(type))); + rb_ary_push(branch_base, INT2FIX(first_lineno)); + rb_ary_push(branch_base, INT2FIX(first_column)); + rb_ary_push(branch_base, INT2FIX(last_lineno)); + rb_ary_push(branch_base, INT2FIX(last_column)); + branches = rb_hash_new(); + rb_obj_hide(branches); + rb_ary_push(branch_base, branches); + } + else { + branches = RARRAY_AREF(branch_base, 5); + } return branches; } -static void add_trace_branch_coverage(rb_iseq_t *iseq, LINK_ANCHOR *const seq, const NODE *node, const char *type, VALUE branches) +static void add_trace_branch_coverage(rb_iseq_t *iseq, LINK_ANCHOR *const seq, const NODE *node, int branch_id, const char *type, VALUE branches) { const int first_lineno = nd_first_lineno(node), first_column = nd_first_column(node); const int last_lineno = nd_last_lineno(node), last_column = nd_last_column(node); if (!branch_coverage_valid_p(iseq, first_lineno)) return; - VALUE counters = RARRAY_AREF(ISEQ_BRANCH_COVERAGE(iseq), 1); - long counter_idx = RARRAY_LEN(counters); - rb_ary_push(counters, INT2FIX(0)); - rb_ary_push(branches, ID2SYM(rb_intern(type))); - rb_ary_push(branches, INT2FIX(first_lineno)); - rb_ary_push(branches, INT2FIX(first_column)); - rb_ary_push(branches, INT2FIX(last_lineno)); - rb_ary_push(branches, INT2FIX(last_column)); - rb_ary_push(branches, INT2FIX(counter_idx)); + /* + * if !branches[branch_id] + * branches[branch_id] = [type, first_lineno, first_column, last_lineno, last_column, counter_idx] + * else + * counter_idx= branches[branch_id][5] + * end + */ + + VALUE key = INT2FIX(branch_id); + VALUE branch = rb_hash_aref(branches, key); + long counter_idx; + + if (NIL_P(branch)) { + branch = rb_ary_tmp_new(6); + rb_hash_aset(branches, key, branch); + rb_ary_push(branch, ID2SYM(rb_intern(type))); + rb_ary_push(branch, INT2FIX(first_lineno)); + rb_ary_push(branch, INT2FIX(first_column)); + rb_ary_push(branch, INT2FIX(last_lineno)); + rb_ary_push(branch, INT2FIX(last_column)); + VALUE counters = RARRAY_AREF(ISEQ_BRANCH_COVERAGE(iseq), 1); + counter_idx = RARRAY_LEN(counters); + rb_ary_push(branch, LONG2FIX(counter_idx)); + rb_ary_push(counters, INT2FIX(0)); + } + else { + counter_idx = FIX2LONG(RARRAY_AREF(branch, 5)); + } + ADD_TRACE_WITH_DATA(seq, RUBY_EVENT_COVERAGE_BRANCH, counter_idx); ADD_INSN(seq, last_lineno, nop); } @@ -5359,6 +5399,7 @@ compile_if(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, int iseq, ret, node_body ? node_body : node, + 0, type == NODE_IF ? "then" : "else", branches); end_label = NEW_LABEL(line); @@ -5374,6 +5415,7 @@ compile_if(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, int iseq, ret, node_else ? node_else : node, + 1, type == NODE_IF ? "else" : "then", branches); } @@ -5401,6 +5443,7 @@ compile_case(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const orig_nod int line; enum node_type type; VALUE branches = Qfalse; + int branch_id = 0; INIT_ANCHOR(head); INIT_ANCHOR(body_seq); @@ -5432,6 +5475,7 @@ compile_case(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const orig_nod iseq, body_seq, node->nd_body ? node->nd_body : node, + branch_id++, "when", branches); CHECK(COMPILE_(body_seq, "when body", node->nd_body, popped)); @@ -5469,7 +5513,7 @@ compile_case(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const orig_nod if (node) { ADD_LABEL(cond_seq, elselabel); ADD_INSN(cond_seq, line, pop); - add_trace_branch_coverage(iseq, cond_seq, node, "else", branches); + add_trace_branch_coverage(iseq, cond_seq, node, branch_id, "else", branches); CHECK(COMPILE_(cond_seq, "else", node, popped)); ADD_INSNL(cond_seq, line, jump, endlabel); } @@ -5477,7 +5521,7 @@ compile_case(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const orig_nod debugs("== else (implicit)\n"); ADD_LABEL(cond_seq, elselabel); ADD_INSN(cond_seq, nd_line(orig_node), pop); - add_trace_branch_coverage(iseq, cond_seq, orig_node, "else", branches); + add_trace_branch_coverage(iseq, cond_seq, orig_node, branch_id, "else", branches); if (!popped) { ADD_INSN(cond_seq, nd_line(orig_node), putnil); } @@ -5506,6 +5550,7 @@ compile_case2(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const orig_no LABEL *endlabel; DECL_ANCHOR(body_seq); VALUE branches = Qfalse; + int branch_id = 0; branches = decl_branch_base(iseq, orig_node, "case"); @@ -5520,6 +5565,7 @@ compile_case2(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const orig_no iseq, body_seq, node->nd_body ? node->nd_body : node, + branch_id++, "when", branches); CHECK(COMPILE_(body_seq, "when", node->nd_body, popped)); @@ -5559,6 +5605,7 @@ compile_case2(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const orig_no iseq, ret, node ? node : orig_node, + branch_id, "else", branches); CHECK(COMPILE_(ret, "else", node, popped)); @@ -6220,6 +6267,7 @@ compile_case3(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const orig_no int line; enum node_type type; VALUE branches = 0; + int branch_id = 0; INIT_ANCHOR(head); INIT_ANCHOR(body_seq); @@ -6249,6 +6297,7 @@ compile_case3(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const orig_no iseq, body_seq, node->nd_body ? node->nd_body : node, + branch_id++, "in", branches); CHECK(COMPILE_(body_seq, "in body", node->nd_body, popped)); @@ -6278,14 +6327,14 @@ compile_case3(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const orig_no if (node) { ADD_LABEL(cond_seq, elselabel); ADD_INSN(cond_seq, line, pop); - add_trace_branch_coverage(iseq, cond_seq, node, "else", branches); + add_trace_branch_coverage(iseq, cond_seq, node, branch_id, "else", branches); CHECK(COMPILE_(cond_seq, "else", node, popped)); ADD_INSNL(cond_seq, line, jump, endlabel); } else { debugs("== else (implicit)\n"); ADD_LABEL(cond_seq, elselabel); - add_trace_branch_coverage(iseq, cond_seq, orig_node, "else", branches); + add_trace_branch_coverage(iseq, cond_seq, orig_node, branch_id, "else", branches); ADD_INSN1(cond_seq, nd_line(orig_node), putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); ADD_INSN1(cond_seq, nd_line(orig_node), putobject, rb_eNoMatchingPatternError); ADD_INSN1(cond_seq, nd_line(orig_node), topn, INT2FIX(2)); @@ -6349,6 +6398,7 @@ compile_loop(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, in iseq, ret, node->nd_body ? node->nd_body : node, + 0, "body", branches); CHECK(COMPILE_POPPED(ret, "while body", node->nd_body)); @@ -6923,7 +6973,7 @@ qcall_branch_start(rb_iseq_t *iseq, LINK_ANCHOR *const recv, VALUE *branches, co *branches = br; ADD_INSN(recv, line, dup); ADD_INSNL(recv, line, branchnil, else_label); - add_trace_branch_coverage(iseq, recv, node, "then", br); + add_trace_branch_coverage(iseq, recv, node, 0, "then", br); return else_label; } @@ -6935,7 +6985,7 @@ qcall_branch_end(rb_iseq_t *iseq, LINK_ANCHOR *const ret, LABEL *else_label, VAL end_label = NEW_LABEL(line); ADD_INSNL(ret, line, jump, end_label); ADD_LABEL(ret, else_label); - add_trace_branch_coverage(iseq, ret, node, "else", branches); + add_trace_branch_coverage(iseq, ret, node, 1, "else", branches); ADD_LABEL(ret, end_label); } diff --git a/ext/coverage/coverage.c b/ext/coverage/coverage.c index 34b0849fc4..49f7ffa0f8 100644 --- a/ext/coverage/coverage.c +++ b/ext/coverage/coverage.c @@ -76,36 +76,62 @@ rb_coverage_start(int argc, VALUE *argv, VALUE klass) return Qnil; } +struct branch_coverage_result_builder +{ + int id; + VALUE result; + VALUE children; + VALUE counters; +}; + +static int +branch_coverage_ii(VALUE _key, VALUE branch, VALUE v) +{ + struct branch_coverage_result_builder *b = (struct branch_coverage_result_builder *) v; + + VALUE target_label = RARRAY_AREF(branch, 0); + VALUE target_first_lineno = RARRAY_AREF(branch, 1); + VALUE target_first_column = RARRAY_AREF(branch, 2); + VALUE target_last_lineno = RARRAY_AREF(branch, 3); + VALUE target_last_column = RARRAY_AREF(branch, 4); + long counter_idx = FIX2LONG(RARRAY_AREF(branch, 5)); + rb_hash_aset(b->children, rb_ary_new_from_args(6, target_label, LONG2FIX(b->id++), target_first_lineno, target_first_column, target_last_lineno, target_last_column), RARRAY_AREF(b->counters, counter_idx)); + + return ST_CONTINUE; +} + +static int +branch_coverage_i(VALUE _key, VALUE branch_base, VALUE v) +{ + struct branch_coverage_result_builder *b = (struct branch_coverage_result_builder *) v; + + VALUE base_type = RARRAY_AREF(branch_base, 0); + VALUE base_first_lineno = RARRAY_AREF(branch_base, 1); + VALUE base_first_column = RARRAY_AREF(branch_base, 2); + VALUE base_last_lineno = RARRAY_AREF(branch_base, 3); + VALUE base_last_column = RARRAY_AREF(branch_base, 4); + VALUE branches = RARRAY_AREF(branch_base, 5); + VALUE children = rb_hash_new(); + rb_hash_aset(b->result, rb_ary_new_from_args(6, base_type, LONG2FIX(b->id++), base_first_lineno, base_first_column, base_last_lineno, base_last_column), children); + b->children = children; + rb_hash_foreach(branches, branch_coverage_ii, v); + + return ST_CONTINUE; +} + static VALUE branch_coverage(VALUE branches) { - VALUE ret = rb_hash_new(); - VALUE structure = rb_ary_dup(RARRAY_AREF(branches, 0)); - VALUE counters = rb_ary_dup(RARRAY_AREF(branches, 1)); - int i, j; - long id = 0; + VALUE structure = RARRAY_AREF(branches, 0); - for (i = 0; i < RARRAY_LEN(structure); i++) { - VALUE branches = RARRAY_AREF(structure, i); - VALUE base_type = RARRAY_AREF(branches, 0); - VALUE base_first_lineno = RARRAY_AREF(branches, 1); - VALUE base_first_column = RARRAY_AREF(branches, 2); - VALUE base_last_lineno = RARRAY_AREF(branches, 3); - VALUE base_last_column = RARRAY_AREF(branches, 4); - VALUE children = rb_hash_new(); - rb_hash_aset(ret, rb_ary_new_from_args(6, base_type, LONG2FIX(id++), base_first_lineno, base_first_column, base_last_lineno, base_last_column), children); - for (j = 5; j < RARRAY_LEN(branches); j += 6) { - VALUE target_label = RARRAY_AREF(branches, j); - VALUE target_first_lineno = RARRAY_AREF(branches, j + 1); - VALUE target_first_column = RARRAY_AREF(branches, j + 2); - VALUE target_last_lineno = RARRAY_AREF(branches, j + 3); - VALUE target_last_column = RARRAY_AREF(branches, j + 4); - int idx = FIX2INT(RARRAY_AREF(branches, j + 5)); - rb_hash_aset(children, rb_ary_new_from_args(6, target_label, LONG2FIX(id++), target_first_lineno, target_first_column, target_last_lineno, target_last_column), RARRAY_AREF(counters, idx)); - } - } + struct branch_coverage_result_builder b; + b.id = 0; + b.result = rb_hash_new(); + b.counters = RARRAY_AREF(branches, 1); - return ret; + rb_hash_foreach(structure, branch_coverage_i, (VALUE)&b); + + return b.result; } static int diff --git a/thread.c b/thread.c index 91a953b9ab..6fdddde720 100644 --- a/thread.c +++ b/thread.c @@ -5644,20 +5644,31 @@ rb_default_coverage(int n) RARRAY_ASET(coverage, COVERAGE_INDEX_LINES, lines); if (mode & COVERAGE_TARGET_BRANCHES) { - branches = rb_ary_tmp_new_fill(2); - /* internal data structures for branch coverage: - * - * [[base_type, base_first_lineno, base_first_column, base_last_lineno, base_last_column, - * target_type_1, target_first_lineno_1, target_first_column_1, target_last_lineno_1, target_last_column_1, target_counter_index_1, - * target_type_2, target_first_lineno_2, target_first_column_2, target_last_lineno_2, target_last_column_2, target_counter_index_2, ...], - * ...] - * - * Example: [[:case, 1, 0, 4, 3, - * :when, 2, 8, 2, 9, 0, - * :when, 3, 8, 3, 9, 1, ...], - * ...] - */ - RARRAY_ASET(branches, 0, rb_ary_tmp_new(0)); + branches = rb_ary_tmp_new_fill(2); + /* internal data structures for branch coverage: + * + * { branch base node => + * [base_type, base_first_lineno, base_first_column, base_last_lineno, base_last_column, { + * branch target id => + * [target_type, target_first_lineno, target_first_column, target_last_lineno, target_last_column, target_counter_index], + * ... + * }], + * ... + * } + * + * Example: + * { NODE_CASE => + * [1, 0, 4, 3, { + * NODE_WHEN => [2, 8, 2, 9, 0], + * NODE_WHEN => [3, 8, 3, 9, 1], + * ... + * }], + * ... + * } + */ + VALUE structure = rb_hash_new(); + rb_obj_hide(structure); + RARRAY_ASET(branches, 0, structure); /* branch execution counters */ RARRAY_ASET(branches, 1, rb_ary_tmp_new(0)); }