From 9581954a964c6b20bafd133a19da3e27aa135dce Mon Sep 17 00:00:00 2001 From: normal Date: Wed, 3 Dec 2014 22:16:58 +0000 Subject: [PATCH] mostly fix rb_iseq_load This allows reporters commenters of [Feature #8543] to load instruction sequences directly. Some test cases are still failing but documented in test/-ext-/iseq_load/test_iseq_load.rb. * compile.c (rb_iseq_build_from_exception): entry->sp is unsigned (iseq_build_callinfo_from_hash): account for kw_arg (iseq_build_from_ary_body): update for r35459 (CHECK_STRING, CHECK_INTEGER): remove unused checks (int_param): new function for checking new `params' hash (iseq_build_kw): new function for loading rb_iseq_param_keyword (rb_iseq_build_from_ary): account for `misc' entry and general structure changes [Feature #8543] * iseq.c (CHECK_HASH): new macro (for `misc' and `param' entries) (iseq_load): account for `misc' and `params' hashes (iseq_data_to_ary): add final opt to arg_opt_labels, fix kw support, account for unsigned entry->sp * ext/-test-/iseq_load/iseq_load.c: new ext for test * ext/-test-/iseq_load/extconf.rb: ditto * test/-ext-/iseq_load/test_iseq_load.rb: new test git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48705 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 19 ++ compile.c | 230 +++++++++++++++++++------ ext/-test-/iseq_load/extconf.rb | 1 + ext/-test-/iseq_load/iseq_load.c | 21 +++ iseq.c | 56 ++++-- iseq.h | 3 +- test/-ext-/iseq_load/test_iseq_load.rb | 95 ++++++++++ 7 files changed, 355 insertions(+), 70 deletions(-) create mode 100644 ext/-test-/iseq_load/extconf.rb create mode 100644 ext/-test-/iseq_load/iseq_load.c create mode 100644 test/-ext-/iseq_load/test_iseq_load.rb diff --git a/ChangeLog b/ChangeLog index c9debd5d76..7c7043184d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +Thu Dec 4 07:06:02 2014 Eric Wong + + * compile.c (rb_iseq_build_from_exception): entry->sp is unsigned + (iseq_build_callinfo_from_hash): account for kw_arg + (iseq_build_from_ary_body): update for r35459 + (CHECK_STRING, CHECK_INTEGER): remove unused checks + (int_param): new function for checking new `params' hash + (iseq_build_kw): new function for loading rb_iseq_param_keyword + (rb_iseq_build_from_ary): account for `misc' entry and general + structure changes + [Feature #8543] + * iseq.c (CHECK_HASH): new macro (for `misc' and `param' entries) + (iseq_load): account for `misc' and `params' hashes + (iseq_data_to_ary): add final opt to arg_opt_labels, + fix kw support, account for unsigned entry->sp + * ext/-test-/iseq_load/iseq_load.c: new ext for test + * ext/-test-/iseq_load/extconf.rb: ditto + * test/-ext-/iseq_load/test_iseq_load.rb: new test + Thu Dec 4 06:56:57 2014 Eric Wong * iseq.c (iseq_free): avoid segfault on incomplete iseq diff --git a/compile.c b/compile.c index 860d5adf3a..217c5f5e28 100644 --- a/compile.c +++ b/compile.c @@ -5732,7 +5732,7 @@ iseq_build_from_ary_exception(rb_iseq_t *iseq, struct st_table *labels_table, VALUE v, type, eiseqval; const VALUE *ptr; LABEL *lstart, *lend, *lcont; - int sp; + unsigned int sp; v = rb_convert_type(RARRAY_AREF(exception, i), T_ARRAY, "Array", "to_ary"); @@ -5751,7 +5751,7 @@ iseq_build_from_ary_exception(rb_iseq_t *iseq, struct st_table *labels_table, lstart = register_label(iseq, labels_table, ptr[2]); lend = register_label(iseq, labels_table, ptr[3]); lcont = register_label(iseq, labels_table, ptr[4]); - sp = NUM2INT(ptr[5]); + sp = NUM2UINT(ptr[5]); (void)sp; @@ -5807,14 +5807,25 @@ iseq_build_callinfo_from_hash(rb_iseq_t *iseq, VALUE op) VALUE vflag = rb_hash_aref(op, ID2SYM(rb_intern("flag"))); VALUE vorig_argc = rb_hash_aref(op, ID2SYM(rb_intern("orig_argc"))); VALUE vblock = rb_hash_aref(op, ID2SYM(rb_intern("blockptr"))); + VALUE vkw_arg = rb_hash_aref(op, ID2SYM(rb_intern("kw_arg"))); if (!NIL_P(vmid)) mid = SYM2ID(vmid); if (!NIL_P(vflag)) flag = NUM2UINT(vflag); if (!NIL_P(vorig_argc)) orig_argc = FIX2INT(vorig_argc); if (!NIL_P(vblock)) block = iseq_build_load_iseq(iseq, vblock); - } - /* TODO: support keywords */ + if (!NIL_P(vkw_arg)) { + int i; + int len = RARRAY_LENINT(vkw_arg); + size_t n = sizeof(rb_call_info_kw_arg_t) + sizeof(ID) * (len - 1); + + kw_arg = xmalloc(n); + kw_arg->keyword_len = len; + for (i = 0; i < len; i++) { + kw_arg->keywords[i] = SYM2ID(RARRAY_AREF(vkw_arg, i)); + } + } + } return (VALUE)new_callinfo(iseq, mid, orig_argc, block, flag, kw_arg); } @@ -5915,16 +5926,21 @@ iseq_build_from_ary_body(rb_iseq_t *iseq, LINK_ANCHOR *anchor, case TS_CDHASH: { int i; + VALUE map = rb_hash_new(); + + rb_hash_tbl_raw(map)->type = &cdhash_type; op = rb_convert_type(op, T_ARRAY, "Array", "to_ary"); op = rb_ary_dup(op); for (i=0; iparam.flags.has_kw = TRUE; + + iseq->param.keyword = ZALLOC(struct rb_iseq_param_keyword); + iseq->param.keyword->num = len; +#define SYM(s) ID2SYM(rb_intern(#s)) + (void)int_param(&iseq->param.keyword->bits_start, params, SYM(kwbits)); + i = iseq->param.keyword->bits_start - iseq->param.keyword->num; + iseq->param.keyword->table = &iseq->local_table[i]; +#undef SYM + + /* required args */ + for (i = 0; i < len; i++) { + VALUE val = RARRAY_AREF(keywords, i); + + if (!SYMBOL_P(val)) { + goto default_values; + } + iseq->param.keyword->table[i] = SYM2ID(val); + iseq->param.keyword->required_num++; + } + +default_values: /* note: we intentionally preserve `i' from previous loop */ + default_len = len - i; + if (default_len == 0) { + return; + } + + iseq->param.keyword->default_values = ALLOC_N(VALUE, default_len); + + for (j = 0; i < len; i++, j++) { + key = RARRAY_AREF(keywords, i); + CHECK_ARRAY(key); + + switch (RARRAY_LEN(key)) { + case 1: + sym = RARRAY_AREF(key, 0); + default_val = Qundef; + break; + case 2: + sym = RARRAY_AREF(key, 0); + default_val = RARRAY_AREF(key, 1); + break; + default: + rb_raise(rb_eTypeError, + "keyword default has unsupported len %+"PRIsVALUE, + key); + } + iseq->param.keyword->table[i] = SYM2ID(sym); + iseq->param.keyword->default_values[j] = default_val; + } +} VALUE -rb_iseq_build_from_ary(rb_iseq_t *iseq, VALUE locals, VALUE args, +rb_iseq_build_from_ary(rb_iseq_t *iseq, VALUE misc, VALUE locals, VALUE params, VALUE exception, VALUE body) { - int i; +#define SYM(s) ID2SYM(rb_intern(#s)) + int i, len; ID *tbl; struct st_table *labels_table = st_init_numtable(); + VALUE arg_opt_labels = rb_hash_aref(params, SYM(opt)); + VALUE keywords = rb_hash_aref(params, SYM(keyword)); + VALUE sym_arg_rest = ID2SYM(rb_intern("#arg_rest")); DECL_ANCHOR(anchor); INIT_ANCHOR(anchor); - iseq->local_table_size = RARRAY_LENINT(locals); + len = RARRAY_LENINT(locals); + iseq->local_table_size = len; iseq->local_table = tbl = (ID *)ALLOC_N(ID, iseq->local_table_size); iseq->local_size = iseq->local_table_size + 1; - for (i=0; iparam.size = iseq->param.lead_num = FIX2INT(args); - iseq->param.flags.has_lead = TRUE; - } - else { - int i = 0; - VALUE argc = CHECK_INTEGER(rb_ary_entry(args, i++)); - VALUE arg_opt_labels = CHECK_ARRAY(rb_ary_entry(args, i++)); - VALUE arg_post_num = CHECK_INTEGER(rb_ary_entry(args, i++)); - VALUE arg_post_start = CHECK_INTEGER(rb_ary_entry(args, i++)); - VALUE arg_rest = CHECK_INTEGER(rb_ary_entry(args, i++)); - VALUE arg_block = CHECK_INTEGER(rb_ary_entry(args, i++)); - - iseq->param.lead_num = FIX2INT(argc); - iseq->param.rest_start = FIX2INT(arg_rest); - iseq->param.post_num = FIX2INT(arg_post_num); - iseq->param.post_start = FIX2INT(arg_post_start); - iseq->param.block_start = FIX2INT(arg_block); - iseq->param.opt_num = RARRAY_LENINT(arg_opt_labels) - 1; - iseq->param.opt_table = (VALUE *)ALLOC_N(VALUE, iseq->param.opt_num + 1); - - if (iseq->param.flags.has_block) { - iseq->param.size = iseq->param.block_start + 1; - } - else if (iseq->param.flags.has_post) { - iseq->param.size = iseq->param.post_start + iseq->param.post_num; - } - else if (iseq->param.flags.has_rest) { - iseq->param.size = iseq->param.rest_start + 1; + if (sym_arg_rest == lv) { + tbl[i] = 0; } else { - iseq->param.size = iseq->param.lead_num + iseq->param.opt_num; - } - - for (i=0; iparam.opt_table[i] = (VALUE)register_label(iseq, labels_table, rb_ary_entry(arg_opt_labels, i)); + tbl[i] = FIXNUM_P(lv) ? (ID)FIX2LONG(lv) : SYM2ID(CHECK_SYMBOL(lv)); } } +#define MISC_PARAM(D,F) do { \ + if (!int_param(D, misc, SYM(F))) { \ + rb_raise(rb_eTypeError, "misc field missing: %s", #F); \ + } } while (0) + MISC_PARAM(&iseq->local_size, local_size); + /* iseq->stack_size and iseq->param.size are calculated */ +#undef MISC_PARAM + +#define INT_PARAM(F) int_param(&iseq->param.F, params, SYM(F)) + if (INT_PARAM(lead_num)) { + iseq->param.flags.has_lead = TRUE; + } + if (INT_PARAM(post_num)) iseq->param.flags.has_post = TRUE; + if (INT_PARAM(post_start)) iseq->param.flags.has_post = TRUE; + if (INT_PARAM(rest_start)) iseq->param.flags.has_rest = TRUE; + if (INT_PARAM(block_start)) iseq->param.flags.has_block = TRUE; +#undef INT_PARAM + + switch (TYPE(arg_opt_labels)) { + case T_ARRAY: + len = RARRAY_LENINT(arg_opt_labels); + iseq->param.flags.has_opt = !!(len - 1 >= 0); + + if (iseq->param.flags.has_opt) { + iseq->param.opt_num = len - 1; + iseq->param.opt_table = (VALUE *)ALLOC_N(VALUE, len); + + for (i = 0; i < len; i++) { + VALUE ent = RARRAY_AREF(arg_opt_labels, i); + LABEL *label = register_label(iseq, labels_table, ent); + + iseq->param.opt_table[i] = (VALUE)label; + } + } + case T_NIL: + break; + default: + rb_raise(rb_eTypeError, ":opt param is not an array: %+"PRIsVALUE, + arg_opt_labels); + } + + switch (TYPE(keywords)) { + case T_ARRAY: + iseq_build_kw(iseq, params, keywords); + case T_NIL: + break; + default: + rb_raise(rb_eTypeError, ":keywords param is not an array: %+"PRIsVALUE, + keywords); + } + + if (Qtrue == rb_hash_aref(params, SYM(ambiguous_param0))) { + iseq->param.flags.ambiguous_param0 = TRUE; + } + + if (int_param(&i, params, SYM(kwrest))) { + if (!iseq->param.keyword) { + iseq->param.keyword = ZALLOC(struct rb_iseq_param_keyword); + } + iseq->param.keyword->rest_start = i; + iseq->param.flags.has_kwrest = TRUE; + + } +#undef SYM + iseq_calc_param_size(iseq); + /* exception */ iseq_build_from_ary_exception(iseq, labels_table, exception); diff --git a/ext/-test-/iseq_load/extconf.rb b/ext/-test-/iseq_load/extconf.rb new file mode 100644 index 0000000000..860f30befd --- /dev/null +++ b/ext/-test-/iseq_load/extconf.rb @@ -0,0 +1 @@ +create_makefile("-test-/iseq_load/iseq_load") diff --git a/ext/-test-/iseq_load/iseq_load.c b/ext/-test-/iseq_load/iseq_load.c new file mode 100644 index 0000000000..ffdde347e1 --- /dev/null +++ b/ext/-test-/iseq_load/iseq_load.c @@ -0,0 +1,21 @@ +#include + +VALUE rb_iseq_load(VALUE data, VALUE parent, VALUE opt); + +static VALUE +iseq_load(int argc, VALUE *argv, VALUE self) +{ + VALUE data, opt = Qnil; + + rb_scan_args(argc, argv, "11", &data, &opt); + + return rb_iseq_load(data, 0, opt); +} + +void +Init_iseq_load(void) +{ + VALUE rb_cISeq = rb_eval_string("RubyVM::InstructionSequence"); + + rb_define_singleton_method(rb_cISeq, "iseq_load", iseq_load, -1); +} diff --git a/iseq.c b/iseq.c index 655bb64ef8..d31076a32f 100644 --- a/iseq.c +++ b/iseq.c @@ -468,6 +468,7 @@ rb_iseq_new_with_bopt(NODE *node, VALUE name, VALUE path, VALUE absolute_path, V } #define CHECK_ARRAY(v) rb_convert_type((v), T_ARRAY, "Array", "to_ary") +#define CHECK_HASH(v) rb_convert_type((v), T_HASH, "Hash", "to_hash") #define CHECK_STRING(v) rb_convert_type((v), T_STRING, "String", "to_str") #define CHECK_SYMBOL(v) rb_convert_type((v), T_SYMBOL, "Symbol", "to_sym") static inline VALUE CHECK_INTEGER(VALUE v) {(void)NUM2LONG(v); return v;} @@ -506,7 +507,7 @@ iseq_load(VALUE self, VALUE data, VALUE parent, VALUE opt) VALUE magic, version1, version2, format_type, misc; VALUE name, path, absolute_path, first_lineno; - VALUE type, body, locals, args, exception; + VALUE type, body, locals, params, exception; st_data_t iseq_type; rb_iseq_t *iseq; @@ -524,8 +525,8 @@ iseq_load(VALUE self, VALUE data, VALUE parent, VALUE opt) version1 = CHECK_INTEGER(rb_ary_entry(data, i++)); version2 = CHECK_INTEGER(rb_ary_entry(data, i++)); format_type = CHECK_INTEGER(rb_ary_entry(data, i++)); - misc = rb_ary_entry(data, i++); /* TODO */ - ((void)magic, (void)version1, (void)version2, (void)format_type, (void)misc); + misc = CHECK_HASH(rb_ary_entry(data, i++)); + ((void)magic, (void)version1, (void)version2, (void)format_type); name = CHECK_STRING(rb_ary_entry(data, i++)); path = CHECK_STRING(rb_ary_entry(data, i++)); @@ -535,12 +536,7 @@ iseq_load(VALUE self, VALUE data, VALUE parent, VALUE opt) type = CHECK_SYMBOL(rb_ary_entry(data, i++)); locals = CHECK_ARRAY(rb_ary_entry(data, i++)); - - args = rb_ary_entry(data, i++); - if (FIXNUM_P(args) || (args = CHECK_ARRAY(args))) { - /* */ - } - + params = CHECK_HASH(rb_ary_entry(data, i++)); exception = CHECK_ARRAY(rb_ary_entry(data, i++)); body = CHECK_ARRAY(rb_ary_entry(data, i++)); @@ -558,10 +554,11 @@ iseq_load(VALUE self, VALUE data, VALUE parent, VALUE opt) } make_compile_option(&option, opt); + prepare_iseq_build(iseq, name, path, absolute_path, first_lineno, parent, (enum iseq_type)iseq_type, 0, &option); - rb_iseq_build_from_ary(iseq, locals, args, exception, body); + rb_iseq_build_from_ary(iseq, misc, locals, params, exception, body); cleanup_iseq_build(iseq); return iseqval; @@ -1733,16 +1730,21 @@ iseq_data_to_ary(rb_iseq_t *iseq) /* params */ { - VALUE arg_opt_labels = rb_ary_new(); int j; - for (j=0; j < iseq->param.opt_num; j++) { - rb_ary_push(arg_opt_labels, register_label(labels_table, iseq->param.opt_table[j])); - } + if (iseq->param.flags.has_opt) { + int len = iseq->param.opt_num + 1; + VALUE arg_opt_labels = rb_ary_new2(len); + + for (j = 0; j < len; j++) { + VALUE l = register_label(labels_table, iseq->param.opt_table[j]); + rb_ary_push(arg_opt_labels, l); + } + rb_hash_aset(params, ID2SYM(rb_intern("opt")), arg_opt_labels); + } /* commit */ if (iseq->param.flags.has_lead) rb_hash_aset(params, ID2SYM(rb_intern("lead_num")), INT2FIX(iseq->param.lead_num)); - if (iseq->param.flags.has_opt) rb_hash_aset(params, ID2SYM(rb_intern("opt")), arg_opt_labels); if (iseq->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_num")), INT2FIX(iseq->param.post_num)); if (iseq->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_start")), INT2FIX(iseq->param.post_start)); if (iseq->param.flags.has_rest) rb_hash_aset(params, ID2SYM(rb_intern("rest_start")), INT2FIX(iseq->param.rest_start)); @@ -1760,6 +1762,9 @@ iseq_data_to_ary(rb_iseq_t *iseq) } rb_ary_push(keywords, key); } + + rb_hash_aset(params, ID2SYM(rb_intern("kwbits")), + INT2FIX(iseq->param.keyword->bits_start)); rb_hash_aset(params, ID2SYM(rb_intern("keyword")), keywords); } if (iseq->param.flags.has_kwrest) rb_hash_aset(params, ID2SYM(rb_intern("kwrest")), INT2FIX(iseq->param.keyword->rest_start)); @@ -1818,10 +1823,25 @@ iseq_data_to_ary(rb_iseq_t *iseq) { rb_call_info_t *ci = (rb_call_info_t *)*seq; VALUE e = rb_hash_new(); + int orig_argc = ci->orig_argc; + rb_hash_aset(e, ID2SYM(rb_intern("mid")), ci->mid ? ID2SYM(ci->mid) : Qnil); - rb_hash_aset(e, ID2SYM(rb_intern("flag")), ULONG2NUM(ci->flag)); - rb_hash_aset(e, ID2SYM(rb_intern("orig_argc")), INT2FIX(ci->orig_argc)); + rb_hash_aset(e, ID2SYM(rb_intern("flag")), UINT2NUM(ci->flag)); rb_hash_aset(e, ID2SYM(rb_intern("blockptr")), ci->blockiseq ? iseq_data_to_ary(ci->blockiseq) : Qnil); + + if (ci->kw_arg) { + int i; + VALUE kw = rb_ary_new2((long)ci->kw_arg->keyword_len); + + orig_argc -= ci->kw_arg->keyword_len; + for (i = 0; i < ci->kw_arg->keyword_len; i++) { + rb_ary_push(kw, ID2SYM(ci->kw_arg->keywords[i])); + } + rb_hash_aset(e, ID2SYM(rb_intern("kw_arg")), kw); + } + + rb_hash_aset(e, ID2SYM(rb_intern("orig_argc")), + INT2FIX(orig_argc)); rb_ary_push(ary, e); } break; @@ -1871,7 +1891,7 @@ iseq_data_to_ary(rb_iseq_t *iseq) rb_ary_push(ary, register_label(labels_table, entry->start)); rb_ary_push(ary, register_label(labels_table, entry->end)); rb_ary_push(ary, register_label(labels_table, entry->cont)); - rb_ary_push(ary, INT2FIX(entry->sp)); + rb_ary_push(ary, UINT2NUM(entry->sp)); rb_ary_push(exception, ary); } diff --git a/iseq.h b/iseq.h index bf3a714e9d..2dec51585d 100644 --- a/iseq.h +++ b/iseq.h @@ -18,7 +18,8 @@ RUBY_SYMBOL_EXPORT_BEGIN VALUE rb_iseq_compile_node(VALUE self, NODE *node); int rb_iseq_translate_threaded_code(rb_iseq_t *iseq); VALUE *rb_iseq_original_iseq(rb_iseq_t *iseq); -VALUE rb_iseq_build_from_ary(rb_iseq_t *iseq, VALUE locals, VALUE args, +VALUE rb_iseq_build_from_ary(rb_iseq_t *iseq, VALUE misc, + VALUE locals, VALUE args, VALUE exception, VALUE body); /* iseq.c */ diff --git a/test/-ext-/iseq_load/test_iseq_load.rb b/test/-ext-/iseq_load/test_iseq_load.rb new file mode 100644 index 0000000000..5bbd49efba --- /dev/null +++ b/test/-ext-/iseq_load/test_iseq_load.rb @@ -0,0 +1,95 @@ +require 'test/unit' + +class TestIseqLoad < Test::Unit::TestCase + require '-test-/iseq_load/iseq_load' + ISeq = RubyVM::InstructionSequence + + def test_bug8543 + assert_iseq_roundtrip <<-'end;' + puts "tralivali" + def funct(a, b) + a**b + end + 3.times { |i| puts "Hello, world#{funct(2,i)}!" } + end; + end + + def test_case_when + assert_iseq_roundtrip <<-'end;' + def user_mask(target) + target.each_char.inject(0) do |mask, chr| + case chr + when "u" + mask | 04700 + when "g" + mask | 02070 + when "o" + mask | 01007 + when "a" + mask | 07777 + else + raise ArgumentError, "invalid `who' symbol in file mode: #{chr}" + end + end + end + end; + end + + def test_splatsplat + assert_iseq_roundtrip('def splatsplat(**); end') + end + + def test_hidden + assert_iseq_roundtrip('def x(a, (b, *c), d: false); end') + end + + def assert_iseq_roundtrip(src) + a = ISeq.compile(src).to_a + b = ISeq.iseq_load(a).to_a + warn diff(a, b) if a != b + assert_equal a, b + assert_equal a, ISeq.iseq_load(b).to_a + end + + def test_next_in_block_in_block + skip "failing due to stack_max mismatch" + assert_iseq_roundtrip <<-'end;' + 3.times { 3.times { next } } + end; + end + + def test_break_ensure + skip "failing due to exception entry sp mismatch" + assert_iseq_roundtrip <<-'end;' + def m + bad = true + while true + begin + break + ensure + bad = false + end + end + end + end; + end + + # FIXME: still failing + def test_require_integration + skip "iseq loader require integration tests still failing" + f = File.expand_path(__FILE__) + # $(top_srcdir)/test/ruby/test_....rb + 3.times { f = File.dirname(f) } + Dir[File.join(f, 'ruby', '*.rb')].each do |f| + iseq = ISeq.compile_file(f) + orig = iseq.to_a.freeze + + loaded = ISeq.iseq_load(orig).to_a + if loaded != orig + warn f + warn diff(orig, loaded) + end + #assert_equal orig, loaded + end + end +end