From 3dbb390180a0e9f98623b6db0d71b0213359c541 Mon Sep 17 00:00:00 2001 From: ko1 Date: Tue, 8 Dec 2015 13:58:50 +0000 Subject: [PATCH] * introduce new ISeq binary format serializer/de-serializer and a pre-compilation/runtime loader sample. [Feature #11788] * iseq.c: add new methods: * RubyVM::InstructionSequence#to_binary_format(extra_data = nil) * RubyVM::InstructionSequence.from_binary_format(binary) * RubyVM::InstructionSequence.from_binary_format_extra_data(binary) * compile.c: implement body of this new feature. * load.c (rb_load_internal0), iseq.c (rb_iseq_load_iseq): call RubyVM::InstructionSequence.load_iseq(fname) with loading script name if this method is defined. We can return any ISeq object as a result value. Otherwise loading will be continue as usual. This interface is not matured and is not extensible. So that we don't guarantee the future compatibility of this method. Basically, you should'nt use this method. * iseq.h: move ISEQ_MAJOR/MINOR_VERSION (and some definitions) from iseq.c. * encoding.c (rb_data_is_encoding), internal.h: added. * vm_core.h: add several supports for lazy load. * add USE_LAZY_LOAD macro to specify enable or disable of this feature. * add several fields to rb_iseq_t. * introduce new macro rb_iseq_check(). * insns.def: some check for lazy loading feature. * vm_insnhelper.c: ditto. * proc.c: ditto. * vm.c: ditto. * test/lib/iseq_loader_checker.rb: enabled iff suitable environment variables are provided. * test/runner.rb: enable lib/iseq_loader_checker.rb. * sample/iseq_loader.rb: add sample compiler and loader. $ ruby sample/iseq_loader.rb [dir] will compile all ruby scripts in [dir]. With default setting, this compile creates *.rb.yarb files in same directory of target .rb scripts. $ ruby -r sample/iseq_loader.rb [app] will run with enable to load compiled binary data. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@52949 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 60 ++ NEWS | 8 + compile.c | 1431 +++++++++++++++++++++++++++++++ encoding.c | 6 + insns.def | 6 +- internal.h | 1 + iseq.c | 149 +++- iseq.h | 33 +- load.c | 16 +- proc.c | 13 +- sample/iseq_loader.rb | 240 ++++++ test/lib/iseq_loader_checker.rb | 47 +- test/runner.rb | 2 +- vm.c | 3 +- vm_core.h | 46 +- vm_insnhelper.c | 7 +- 16 files changed, 1979 insertions(+), 89 deletions(-) create mode 100644 sample/iseq_loader.rb diff --git a/ChangeLog b/ChangeLog index ab8e055f65..adbdaf79ec 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,63 @@ +Tue Dec 8 22:31:58 2015 Koichi Sasada + + * introduce new ISeq binary format serializer/de-serializer + and a pre-compilation/runtime loader sample. + [Feature #11788] + + * iseq.c: add new methods: + * RubyVM::InstructionSequence#to_binary_format(extra_data = nil) + * RubyVM::InstructionSequence.from_binary_format(binary) + * RubyVM::InstructionSequence.from_binary_format_extra_data(binary) + + * compile.c: implement body of this new feature. + + * load.c (rb_load_internal0), iseq.c (rb_iseq_load_iseq): + call RubyVM::InstructionSequence.load_iseq(fname) with + loading script name if this method is defined. + + We can return any ISeq object as a result value. + Otherwise loading will be continue as usual. + + This interface is not matured and is not extensible. + So that we don't guarantee the future compatibility of this method. + Basically, you should'nt use this method. + + * iseq.h: move ISEQ_MAJOR/MINOR_VERSION (and some definitions) + from iseq.c. + + * encoding.c (rb_data_is_encoding), internal.h: added. + + * vm_core.h: add several supports for lazy load. + * add USE_LAZY_LOAD macro to specify enable or disable of + this feature. + * add several fields to rb_iseq_t. + * introduce new macro rb_iseq_check(). + + * insns.def: some check for lazy loading feature. + + * vm_insnhelper.c: ditto. + + * proc.c: ditto. + + * vm.c: ditto. + + * test/lib/iseq_loader_checker.rb: enabled iff suitable + environment variables are provided. + + * test/runner.rb: enable lib/iseq_loader_checker.rb. + + * sample/iseq_loader.rb: add sample compiler and loader. + + $ ruby sample/iseq_loader.rb [dir] + + will compile all ruby scripts in [dir]. + With default setting, this compile creates *.rb.yarb files + in same directory of target .rb scripts. + + $ ruby -r sample/iseq_loader.rb [app] + + will run with enable to load compiled binary data. + Tue Dec 8 21:21:16 2015 Kazuhiro NISHIYAMA * NEWS: mention about Enumerator::Lazy#grep_v. diff --git a/NEWS b/NEWS index 4a288ed50d..ea1782477a 100644 --- a/NEWS +++ b/NEWS @@ -116,6 +116,14 @@ with all sufficient information, see the ChangeLog file. * Regexp/String: Updated Unicode version from 7.0.0 to 8.0.0 +* RubyVM::InstructionSequence + * add the following methods as a primitive tool of iseq loader. + See sample/iseq_loader.rb for usage. + [Feature #11788] + * RubyVM::InstructionSequence#to_binary_format(extra_data = nil) + * RubyVM::InstructionSequence.from_binary_format(binary) + * RubyVM::InstructionSequence.from_binary_format_extra_data(binary) + * String * String#+@ and String#- are added to get mutable/frozen strings. diff --git a/compile.c b/compile.c index bb0a07c940..bd1497823d 100644 --- a/compile.c +++ b/compile.c @@ -10,6 +10,8 @@ **********************************************************************/ #include "internal.h" +#include "ruby/re.h" +#include "encindex.h" #include #define USE_INSN_STACK_INCREASE 1 @@ -17,6 +19,7 @@ #include "iseq.h" #include "insns.inc" #include "insns_info.inc" +#include "gc.h" #ifdef HAVE_DLADDR # include @@ -6785,3 +6788,1431 @@ rb_method_for_self_aset(VALUE name, VALUE arg, rb_insn_func_t func) { return method_for_self(name, arg, func, for_self_aset); } + +/* ISeq binary format */ + +typedef unsigned int ibf_offset_t; +#define IBF_OFFSET(ptr) ((ibf_offset_t)(VALUE)(ptr)) + +struct ibf_header { + char magic[4]; /* YARB */ + unsigned int major_version; + unsigned int minor_version; + unsigned int size; + unsigned int extra_size; + + unsigned int iseq_list_size; + unsigned int id_list_size; + unsigned int object_list_size; + + ibf_offset_t iseq_list_offset; + ibf_offset_t id_list_offset; + ibf_offset_t object_list_offset; +}; + +struct ibf_id_entry { + enum { + ibf_id_enc_ascii, + ibf_id_enc_utf8, + ibf_id_enc_other + } enc : 2; + char body[1]; +}; + +struct ibf_dump { + VALUE str; + VALUE iseq_list; /* [iseq0 offset, ...] */ + VALUE obj_list; /* [objs] */ + st_table *iseq_table; /* iseq -> iseq number */ + st_table *id_table; /* id -> id number */ +}; + +rb_iseq_t * iseq_alloc(void); + +struct ibf_load { + const char *buff; + const struct ibf_header *header; + ID *id_list; /* [id0, ...] */ + VALUE iseq_list; /* [iseq0, ...] */ + VALUE obj_list; /* [obj0, ...] */ + VALUE loader_obj; + VALUE str; + rb_iseq_t *iseq; +}; + +static ibf_offset_t +ibf_dump_pos(struct ibf_dump *dump) +{ + return (unsigned int)rb_str_strlen(dump->str); +} + +static ibf_offset_t +ibf_dump_write(struct ibf_dump *dump, const void *buff, unsigned long size) +{ + ibf_offset_t pos = ibf_dump_pos(dump); + rb_str_cat(dump->str, (const char *)buff, size); + /* TODO: overflow check */ + return pos; +} + +static void +ibf_dump_overwrite(struct ibf_dump *dump, void *buff, unsigned int size, long offset) +{ + VALUE str = dump->str; + char *ptr = RSTRING_PTR(str); + if (size + offset > RSTRING_LEN(str)) rb_bug("ibf_dump_overwrite: overflow"); + memcpy(ptr + offset, buff, size); +} + +static void * +ibf_load_alloc(const struct ibf_load *load, ibf_offset_t offset, int size) +{ + void *buff = ruby_xmalloc(size); + memcpy(buff, load->buff + offset, size); + return buff; +} + +#define IBF_W(b, type, n) (type *)(VALUE)ibf_dump_write(dump, (b), sizeof(type) * (n)) +#define IBF_WV(variable) ibf_dump_write(dump, &(variable), sizeof(variable)) +#define IBF_WP(b, type, n) ibf_dump_write(dump, (b), sizeof(type) * (n)) +#define IBF_R(val, type, n) (type *)ibf_load_alloc(load, IBF_OFFSET(val), sizeof(type) * (n)) + +static int +ibf_table_lookup(struct st_table *table, st_data_t key) +{ + st_data_t val; + + if (st_lookup(table, key, &val)) { + return (int)val; + } + else { + return -1; + } +} + +static int +ibf_table_index(struct st_table *table, st_data_t key) +{ + int index = ibf_table_lookup(table, key); + + if (index < 0) { /* not found */ + index = (int)table->num_entries; + st_insert(table, key, (st_data_t)index); + } + + return index; +} + +/* dump/load generic */ + +static VALUE ibf_load_object(const struct ibf_load *load, VALUE object_index); +static rb_iseq_t *ibf_load_iseq(const struct ibf_load *load, const rb_iseq_t *index_iseq); + +static VALUE +ibf_dump_object(struct ibf_dump *dump, VALUE obj) +{ + long index = RARRAY_LEN(dump->obj_list); + long i; + for (i=0; iobj_list, i) == obj) return (VALUE)i; /* dedup */ + } + rb_ary_push(dump->obj_list, obj); + return (VALUE)index; +} + +static VALUE +ibf_dump_id(struct ibf_dump *dump, ID id) +{ + return (VALUE)ibf_table_index(dump->id_table, (st_data_t)id); +} + +static ID +ibf_load_id(const struct ibf_load *load, const ID id_index) +{ + ID id; + + if (id_index == 0) { + id = 0; + } + else { + id = load->id_list[(long)id_index]; + + if (id == 0) { + long *indices = (long *)(load->buff + load->header->id_list_offset); + VALUE str = ibf_load_object(load, indices[id_index]); + id = NIL_P(str) ? 0 : rb_intern_str(str); /* str == nil -> internal junk id */ + load->id_list[(long)id_index] = id; + } + } + + return id; +} + +/* dump/load: code */ + +static VALUE +ibf_dump_callinfo(struct ibf_dump *dump, const struct rb_call_info *ci) +{ + return (ci->flag & VM_CALL_KWARG) ? Qtrue : Qfalse; +} + +static ibf_offset_t ibf_dump_iseq_each(struct ibf_dump *dump, const rb_iseq_t *iseq); + +static rb_iseq_t * +ibf_dump_iseq(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + if (iseq == NULL) { + return (rb_iseq_t *)-1; + } + else { + int iseq_index = ibf_table_lookup(dump->iseq_table, (st_data_t)iseq); + if (iseq_index < 0) { + iseq_index = ibf_table_index(dump->iseq_table, (st_data_t)iseq); + rb_ary_store(dump->iseq_list, iseq_index, LONG2NUM(ibf_dump_iseq_each(dump, rb_iseq_check(iseq)))); + } + return (rb_iseq_t *)(VALUE)iseq_index; + } +} + +static VALUE +ibf_dump_gentry(struct ibf_dump *dump, const struct rb_global_entry *entry) +{ + return (VALUE)ibf_dump_id(dump, entry->id); +} + +static VALUE +ibf_load_gentry(const struct ibf_load *load, const struct rb_global_entry *entry) +{ + ID gid = ibf_load_id(load, (ID)(VALUE)entry); + return (VALUE)rb_global_entry(gid); +} + +static VALUE * +ibf_dump_code(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + const int iseq_size = iseq->body->iseq_size; + int code_index; + VALUE *code; + const VALUE *orig_code = rb_iseq_original_iseq(iseq); + + code = ALLOCA_N(VALUE, iseq_size); + + for (code_index=0; code_indexbody->is_size; i++) { + if (op == (VALUE)&iseq->body->is_entries[i]) { + break; + } + } + code[code_index] = i; + } + break; + case TS_CALLINFO: + code[code_index] = ibf_dump_callinfo(dump, (const struct rb_call_info *)op); + break; + case TS_CALLCACHE: + code[code_index] = 0; + break; + case TS_ID: + code[code_index] = ibf_dump_id(dump, (ID)op); + break; + case TS_GENTRY: + code[code_index] = ibf_dump_gentry(dump, (const struct rb_global_entry *)op); + break; + case TS_FUNCPTR: + rb_raise(rb_eRuntimeError, "TS_FUNCPTR is not supported"); + break; + default: + code[code_index] = op; + break; + } + } + assert(insn_len(insn) == op_index+1); + } + + return IBF_W(code, VALUE, iseq_size); +} + +static VALUE * +ibf_load_code(const struct ibf_load *load, const rb_iseq_t *iseq, const struct rb_iseq_constant_body *body) +{ + const int iseq_size = body->iseq_size; + int code_index; + VALUE *code = IBF_R(body->iseq_encoded, VALUE, iseq_size); + + struct rb_call_info *ci_entries = iseq->body->ci_entries; + struct rb_call_info_with_kwarg *ci_kw_entries = (struct rb_call_info_with_kwarg *)&iseq->body->ci_entries[iseq->body->ci_size]; + struct rb_call_cache *cc_entries = iseq->body->cc_entries; + union iseq_inline_storage_entry *is_entries = iseq->body->is_entries; + + for (code_index=0; code_indexbody->param.opt_num; + + if (opt_num > 0) { + return IBF_W(iseq->body->param.opt_table, VALUE, opt_num + 1); + } + else { + return NULL; + } +} + +static VALUE * +ibf_load_param_opt_table(const struct ibf_load *load, const struct rb_iseq_constant_body *body) +{ + int opt_num = body->param.opt_num; + + if (opt_num > 0) { + ibf_offset_t offset = IBF_OFFSET(body->param.opt_table); + VALUE *table = ALLOC_N(VALUE, opt_num+1); + MEMCPY(table, load->buff + offset, VALUE, opt_num+1); + return table; + } + else { + return NULL; + } +} + +static struct rb_iseq_param_keyword * +ibf_dump_param_keyword(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + const struct rb_iseq_param_keyword *kw = iseq->body->param.keyword; + + if (kw) { + struct rb_iseq_param_keyword dump_kw = *kw; + int dv_num = kw->num - kw->required_num; + ID *ids = kw->num > 0 ? ALLOCA_N(ID, kw->num) : NULL; + VALUE *dvs = dv_num > 0 ? ALLOCA_N(VALUE, dv_num) : NULL; + int i; + + for (i=0; inum; i++) ids[i] = (ID)ibf_dump_id(dump, kw->table[i]); + for (i=0; idefault_values[i]); + + dump_kw.table = IBF_W(ids, ID, kw->num); + dump_kw.default_values = IBF_W(dvs, VALUE, dv_num); + return IBF_W(&dump_kw, struct rb_iseq_param_keyword, 1); + } + else { + return NULL; + } +} + +static const struct rb_iseq_param_keyword * +ibf_load_param_keyword(const struct ibf_load *load, const struct rb_iseq_constant_body *body) +{ + if (body->param.keyword) { + struct rb_iseq_param_keyword *kw = IBF_R(body->param.keyword, struct rb_iseq_param_keyword, 1); + ID *ids = IBF_R(kw->table, ID, kw->num); + int dv_num = kw->num - kw->required_num; + VALUE *dvs = IBF_R(kw->default_values, VALUE, dv_num); + int i; + + for (i=0; inum; i++) { + ids[i] = ibf_load_id(load, ids[i]); + } + for (i=0; itable = ids; + kw->default_values = dvs; + return kw; + } + else { + return NULL; + } +} + +static struct iseq_line_info_entry * +ibf_dump_line_info_table(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + return IBF_W(iseq->body->line_info_table, struct iseq_line_info_entry, iseq->body->line_info_size); +} + +static struct iseq_line_info_entry * +ibf_load_line_info_table(const struct ibf_load *load, const struct rb_iseq_constant_body *body) +{ + return IBF_R(body->line_info_table, struct iseq_line_info_entry, body->line_info_size); +} + +static ID * +ibf_dump_local_table(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + const int size = iseq->body->local_size - 1; + ID *table = ALLOCA_N(ID, size); + int i; + + for (i=0; ibody->local_table[i]); + } + + return IBF_W(table, ID, size); +} + +static ID * +ibf_load_local_table(const struct ibf_load *load, const struct rb_iseq_constant_body *body) +{ + const int size = body->local_size - 1; + + if (size > 0) { + ID *table = IBF_R(body->local_table, ID, size); + int i; + + for (i=0; ibody->catch_table; + + if (table) { + int byte_size = iseq_catch_table_bytes(iseq->body->catch_table->size); + struct iseq_catch_table *dump_table = (struct iseq_catch_table *)ALLOCA_N(char, byte_size); + unsigned int i; + dump_table->size = table->size; + for (i=0; isize; i++) { + dump_table->entries[i] = table->entries[i]; + dump_table->entries[i].iseq = ibf_dump_iseq(dump, table->entries[i].iseq); + } + return (struct iseq_catch_table *)(VALUE)ibf_dump_write(dump, dump_table, byte_size); + } + else { + return NULL; + } +} + +static struct iseq_catch_table * +ibf_load_catch_table(const struct ibf_load *load, const struct rb_iseq_constant_body *body) +{ + if (body->catch_table) { + struct iseq_catch_table *table; + unsigned int i; + unsigned int size; + size = *(unsigned int *)(load->buff + IBF_OFFSET(body->catch_table)); + table = ibf_load_alloc(load, IBF_OFFSET(body->catch_table), iseq_catch_table_bytes(size)); + for (i=0; ientries[i].iseq = ibf_load_iseq(load, table->entries[i].iseq); + } + return table; + } + else { + return NULL; + } +} + +static struct rb_call_info * +ibf_dump_ci_entries(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + const unsigned int ci_size = iseq->body->ci_size; + const unsigned int ci_kw_size = iseq->body->ci_kw_size; + const struct rb_call_info *ci_entries = iseq->body->ci_entries; + struct rb_call_info *dump_ci_entries; + struct rb_call_info_with_kwarg *dump_ci_kw_entries; + int byte_size = ci_size * sizeof(struct rb_call_info) + + ci_kw_size * sizeof(struct rb_call_info_with_kwarg); + unsigned int i; + + dump_ci_entries = (struct rb_call_info *)ALLOCA_N(char, byte_size); + dump_ci_kw_entries = (struct rb_call_info_with_kwarg *)&dump_ci_entries[ci_size]; + memcpy(dump_ci_entries, ci_entries, byte_size); + + for (i=0; ikeyword_len); + for (j=0; jkeyword_len; j++) { + keywords[j] = (VALUE)ibf_dump_object(dump, kw_arg->keywords[j]); /* kw_arg->keywords[n] is Symbol */ + } + dump_ci_kw_entries[i].kw_arg = (struct rb_call_info_kw_arg *)(VALUE)ibf_dump_write(dump, &kw_arg->keyword_len, sizeof(int)); + ibf_dump_write(dump, keywords, sizeof(VALUE) * kw_arg->keyword_len); + + dump_ci_kw_entries[i].ci.mid = ibf_dump_id(dump, dump_ci_kw_entries[i].ci.mid); + } + return (struct rb_call_info *)(VALUE)ibf_dump_write(dump, dump_ci_entries, byte_size); +} + +static struct rb_call_info * +ibf_load_ci_entries(const struct ibf_load *load, const struct rb_iseq_constant_body *body) +{ + unsigned int i; + const unsigned int ci_size = body->ci_size; + const unsigned int ci_kw_size = body->ci_kw_size; + struct rb_call_info *ci_entries = ibf_load_alloc(load, IBF_OFFSET(body->ci_entries), + sizeof(struct rb_call_info) * body->ci_size + + sizeof(struct rb_call_info_with_kwarg) * body->ci_kw_size); + struct rb_call_info_with_kwarg *ci_kw_entries = (struct rb_call_info_with_kwarg *)&ci_entries[ci_size]; + + for (i=0; ibuff + kw_arg_offset); + const VALUE *keywords = (VALUE *)(load->buff + kw_arg_offset + sizeof(int)); + struct rb_call_info_kw_arg *kw_arg = ruby_xmalloc(sizeof(struct rb_call_info_kw_arg) + sizeof(VALUE) * (keyword_len - 1)); + kw_arg->keyword_len = keyword_len; + for (j=0; jkeyword_len; j++) { + kw_arg->keywords[j] = (VALUE)ibf_load_object(load, keywords[j]); + } + ci_kw_entries[i].kw_arg = kw_arg; + ci_kw_entries[i].ci.mid = ibf_load_id(load, ci_kw_entries[i].ci.mid); + } + + return ci_entries; +} + +static ibf_offset_t +ibf_dump_iseq_each(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + struct rb_iseq_constant_body dump_body; + dump_body = *iseq->body; + + dump_body.location.path = ibf_dump_object(dump, dump_body.location.path); + dump_body.location.absolute_path = ibf_dump_object(dump, dump_body.location.absolute_path); + dump_body.location.base_label = ibf_dump_object(dump, dump_body.location.base_label); + dump_body.location.label = ibf_dump_object(dump, dump_body.location.label); + + dump_body.iseq_encoded = ibf_dump_code(dump, iseq); + dump_body.param.opt_table = ibf_dump_param_opt_table(dump, iseq); + dump_body.param.keyword = ibf_dump_param_keyword(dump, iseq); + dump_body.line_info_table = ibf_dump_line_info_table(dump, iseq); + dump_body.local_table = ibf_dump_local_table(dump, iseq); + dump_body.catch_table = ibf_dump_catch_table(dump, iseq); + dump_body.parent_iseq = ibf_dump_iseq(dump, iseq->body->parent_iseq); + dump_body.local_iseq = ibf_dump_iseq(dump, iseq->body->local_iseq); + dump_body.is_entries = NULL; + dump_body.ci_entries = ibf_dump_ci_entries(dump, iseq); + dump_body.cc_entries = NULL; + dump_body.mark_ary = ISEQ_FLIP_CNT(iseq); + + return ibf_dump_write(dump, &dump_body, sizeof(dump_body)); +} + +static VALUE +ibf_load_location_str(const struct ibf_load *load, VALUE str_index) +{ + VALUE str = ibf_load_object(load, str_index); + if (str != Qnil) { + str = rb_fstring(str); + } + return str; +} + +static void +ibf_load_iseq_each(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t offset) +{ + struct rb_iseq_constant_body *load_body = iseq->body = ZALLOC(struct rb_iseq_constant_body); + const struct rb_iseq_constant_body *body = (struct rb_iseq_constant_body *)(load->buff + offset); + + /* memcpy(load_body, load->buff + offset, sizeof(*load_body)); */ + load_body->type = body->type; + load_body->stack_max = body->stack_max; + load_body->local_size = body->local_size; + load_body->iseq_size = body->iseq_size; + load_body->param = body->param; + load_body->local_table_size = body->local_table_size; + load_body->is_size = body->is_size; + load_body->ci_size = body->ci_size; + load_body->ci_kw_size = body->ci_kw_size; + load_body->line_info_size = body->line_info_size; + + RB_OBJ_WRITE(iseq, &load_body->mark_ary, iseq_mark_ary_create((int)body->mark_ary)); + + RB_OBJ_WRITE(iseq, &load_body->location.path, ibf_load_location_str(load, body->location.path)); + RB_OBJ_WRITE(iseq, &load_body->location.absolute_path, ibf_load_location_str(load, body->location.absolute_path)); + RB_OBJ_WRITE(iseq, &load_body->location.base_label, ibf_load_location_str(load, body->location.base_label)); + RB_OBJ_WRITE(iseq, &load_body->location.label, ibf_load_location_str(load, body->location.label)); + load_body->location.first_lineno = body->location.first_lineno; + + load_body->is_entries = ZALLOC_N(union iseq_inline_storage_entry, body->is_size); + load_body->ci_entries = ibf_load_ci_entries(load, body); + load_body->cc_entries = ZALLOC_N(struct rb_call_cache, body->ci_size + body->ci_kw_size); + load_body->param.opt_table = ibf_load_param_opt_table(load, body); + load_body->param.keyword = ibf_load_param_keyword(load, body); + load_body->line_info_table = ibf_load_line_info_table(load, body); + load_body->local_table = ibf_load_local_table(load, body); + load_body->catch_table = ibf_load_catch_table(load, body); + load_body->parent_iseq = ibf_load_iseq(load, body->parent_iseq); + load_body->local_iseq = ibf_load_iseq(load, body->local_iseq); + + load_body->iseq_encoded = ibf_load_code(load, iseq, body); + + rb_iseq_translate_threaded_code(iseq); +} + + +static void +ibf_dump_iseq_list(struct ibf_dump *dump, struct ibf_header *header) +{ + const long size = RARRAY_LEN(dump->iseq_list); + ibf_offset_t *list = ALLOCA_N(ibf_offset_t, size); + long i; + + for (i=0; iiseq_list, i)); + } + + header->iseq_list_offset = ibf_dump_write(dump, list, sizeof(ibf_offset_t) * size); + header->iseq_list_size = (unsigned int)size; +} + +struct ibf_dump_id_list_i_arg { + struct ibf_dump *dump; + long *list; + int current_i; +}; + +static int +ibf_dump_id_list_i(st_data_t key, st_data_t val, st_data_t ptr) +{ + struct ibf_dump_id_list_i_arg *arg = (struct ibf_dump_id_list_i_arg *)ptr; + int i = (int)val; + ID id = (ID)key; + assert(arg->current_i == i); + arg->current_i++; + + if (rb_id2name(id)) { + arg->list[i] = (long)ibf_dump_object(arg->dump, rb_id2str(id)); + } + else { + arg->list[i] = 0; + } + + return ST_CONTINUE; +} + +static void +ibf_dump_id_list(struct ibf_dump *dump, struct ibf_header *header) +{ + const long size = dump->id_table->num_entries; + struct ibf_dump_id_list_i_arg arg; + arg.list = ALLOCA_N(long, size); + arg.dump = dump; + arg.current_i = 0; + + st_foreach(dump->id_table, ibf_dump_id_list_i, (st_data_t)&arg); + + header->id_list_offset = ibf_dump_write(dump, arg.list, sizeof(long) * size); + header->id_list_size = (unsigned int)size; +} + +#define IBF_OBJECT_INTERNAL FL_PROMOTED0 + +/* + * Binary format + * - ibf_object_header + * - ibf_object_xxx (xxx is type) + */ + +struct ibf_object_header { + unsigned int type: 5; + unsigned int special_const: 1; + unsigned int frozen: 1; + unsigned int internal: 1; +}; + +enum ibf_object_class_index { + IBF_OBJECT_CLASS_OBJECT, + IBF_OBJECT_CLASS_ARRAY, + IBF_OBJECT_CLASS_STANDARD_ERROR +}; + +struct ibf_object_string { + long encindex; + long len; + char ptr[1]; +}; + +struct ibf_object_regexp { + long srcstr; + char option; +}; + +struct ibf_object_array { + long len; + long ary[1]; +}; + +struct ibf_object_hash { + long len; + long keyval[1]; +}; + +struct ibf_object_struct_range { + long class_index; + long len; + long beg; + long end; + int excl; +}; + +struct ibf_object_bignum { + ssize_t slen; + BDIGIT digits[1]; +}; + +enum ibf_object_data_type { + IBF_OBJECT_DATA_ENCODING +}; + +struct ibf_object_complex_rational { + long a, b; +}; + +struct ibf_object_symbol { + long str; +}; + +#define IBF_OBJHEADER(offset) (struct ibf_object_header *)(load->buff + (offset)) +#define IBF_OBJBODY(type, offset) (type *)(load->buff + sizeof(struct ibf_object_header) + (offset)) + +static void +ibf_dump_object_unsupported(struct ibf_dump *dump, VALUE obj) +{ + rb_obj_info_dump(obj); + rb_bug("ibf_dump_object_unsupported: unsupporetd"); +} + +static VALUE +ibf_load_object_unsupported(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + rb_bug("unsupported"); + return Qnil; +} + +static void +ibf_dump_object_class(struct ibf_dump *dump, VALUE obj) +{ + enum ibf_object_class_index cindex; + if (obj == rb_cObject) { + cindex = IBF_OBJECT_CLASS_OBJECT; + } + else if (obj == rb_cArray) { + cindex = IBF_OBJECT_CLASS_ARRAY; + } + else if (obj == rb_eStandardError) { + cindex = IBF_OBJECT_CLASS_STANDARD_ERROR; + } + else { + rb_obj_info_dump(obj); + rb_p(obj); + rb_bug("unsupported class"); + } + ibf_dump_write(dump, &cindex, sizeof(cindex)); +} + +static VALUE +ibf_load_object_class(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + enum ibf_object_class_index *cindexp = IBF_OBJBODY(enum ibf_object_class_index, offset); + enum ibf_object_class_index cindex = *cindexp; + + switch (cindex) { + case IBF_OBJECT_CLASS_OBJECT: + return rb_cObject; + case IBF_OBJECT_CLASS_ARRAY: + return rb_cArray; + case IBF_OBJECT_CLASS_STANDARD_ERROR: + return rb_eStandardError; + } + + rb_bug("ibf_load_object_class: unknown class (%d)", (int)cindex); +} + + +static void +ibf_dump_object_float(struct ibf_dump *dump, VALUE obj) +{ + double dbl = RFLOAT_VALUE(obj); + ibf_dump_write(dump, &dbl, sizeof(dbl)); +} + +static VALUE +ibf_load_object_float(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + double *dblp = IBF_OBJBODY(double, offset); + return DBL2NUM(*dblp); +} + +static void +ibf_dump_object_string(struct ibf_dump *dump, VALUE obj) +{ + long encindex = (long)rb_enc_get_index(obj); + long len = RSTRING_LEN(obj); + const char *ptr = RSTRING_PTR(obj); + + if (encindex > RUBY_ENCINDEX_BUILTIN_MAX) { + rb_encoding *enc = rb_enc_from_index((int)encindex); + const char *enc_name = rb_enc_name(enc); + encindex = RUBY_ENCINDEX_BUILTIN_MAX + ibf_dump_object(dump, rb_str_new2(enc_name)); + } + + IBF_WV(encindex); + IBF_WV(len); + IBF_WP(ptr, char, len); +} + +static VALUE +ibf_load_object_string(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const struct ibf_object_string *string = IBF_OBJBODY(struct ibf_object_string, offset); + VALUE str = rb_str_new(string->ptr, string->len); + int encindex = (int)string->encindex; + + if (encindex > RUBY_ENCINDEX_BUILTIN_MAX) { + VALUE enc_name_str = ibf_load_object(load, encindex - RUBY_ENCINDEX_BUILTIN_MAX); + encindex = rb_enc_find_index(RSTRING_PTR(enc_name_str)); + } + rb_enc_associate_index(str, encindex); + + if (header->internal) rb_obj_hide(str); + if (header->frozen) rb_obj_freeze(str); + + return str; +} + +static void +ibf_dump_object_regexp(struct ibf_dump *dump, VALUE obj) +{ + struct ibf_object_regexp regexp; + regexp.srcstr = RREGEXP_SRC(obj); + regexp.option = (char)rb_reg_options(obj); + regexp.srcstr = (long)ibf_dump_object(dump, regexp.srcstr); + IBF_WV(regexp); +} + +static VALUE +ibf_load_object_regexp(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const struct ibf_object_regexp *regexp = IBF_OBJBODY(struct ibf_object_regexp, offset); + VALUE srcstr = ibf_load_object(load, regexp->srcstr); + VALUE reg = rb_reg_compile(srcstr, (int)regexp->option, NULL, 0); + + if (header->internal) rb_obj_hide(reg); + if (header->frozen) rb_obj_freeze(reg); + + return reg; +} + +static void +ibf_dump_object_array(struct ibf_dump *dump, VALUE obj) +{ + long i, len = (int)RARRAY_LEN(obj); + IBF_WV(len); + for (i=0; ilen); + int i; + + for (i=0; ilen; i++) { + rb_ary_push(ary, ibf_load_object(load, array->ary[i])); + } + + if (header->internal) rb_obj_hide(ary); + if (header->frozen) rb_obj_freeze(ary); + + return ary; +} + +static int +ibf_dump_object_hash_i(st_data_t key, st_data_t val, st_data_t ptr) +{ + struct ibf_dump *dump = (struct ibf_dump *)ptr; + long key_index = (long)ibf_dump_object(dump, (VALUE)key); + long val_index = (long)ibf_dump_object(dump, (VALUE)val); + IBF_WV(key_index); + IBF_WV(val_index); + return ST_CONTINUE; +} + +static void +ibf_dump_object_hash(struct ibf_dump *dump, VALUE obj) +{ + long len = RHASH_SIZE(obj); + IBF_WV(len); + if (len > 0) st_foreach(RHASH(obj)->ntbl, ibf_dump_object_hash_i, (st_data_t)dump); +} + +static VALUE +ibf_load_object_hash(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const struct ibf_object_hash *hash = IBF_OBJBODY(struct ibf_object_hash, offset); + VALUE obj = rb_hash_new(); + int i; + + for (i=0; ilen; i++) { + VALUE key = ibf_load_object(load, hash->keyval[i*2 ]); + VALUE val = ibf_load_object(load, hash->keyval[i*2+1]); + rb_hash_aset(obj, key, val); + } + + if (header->internal) rb_obj_hide(obj); + if (header->frozen) rb_obj_freeze(obj); + + return obj; +} + +static void +ibf_dump_object_struct(struct ibf_dump *dump, VALUE obj) +{ + if (rb_obj_is_kind_of(obj, rb_cRange)) { + struct ibf_object_struct_range range; + VALUE beg, end; + range.len = 3; + range.class_index = 0; + + rb_range_values(obj, &beg, &end, &range.excl); + range.beg = (long)ibf_dump_object(dump, beg); + range.end = (long)ibf_dump_object(dump, end); + + IBF_WV(range); + } + else { + rb_bug("ibf_dump_object_struct: unsupported class"); + } +} + +static VALUE +ibf_load_object_struct(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const struct ibf_object_struct_range *range = IBF_OBJBODY(struct ibf_object_struct_range, offset); + VALUE beg = ibf_load_object(load, range->beg); + VALUE end = ibf_load_object(load, range->end); + VALUE obj = rb_range_new(beg, end, range->excl); + if (header->internal) rb_obj_hide(obj); + if (header->frozen) rb_obj_freeze(obj); + return obj; +} + +static void +ibf_dump_object_bignum(struct ibf_dump *dump, VALUE obj) +{ + ssize_t len = BIGNUM_LEN(obj); + ssize_t slen = BIGNUM_SIGN(obj) > 0 ? len : len * -1; + BDIGIT *d = BIGNUM_DIGITS(obj); + + IBF_WV(slen); + IBF_WP(d, BDIGIT, len); +} + +static VALUE +ibf_load_object_bignum(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const struct ibf_object_bignum *bignum = IBF_OBJBODY(struct ibf_object_bignum, offset); + int sign = bignum->slen > 0; + ssize_t len = sign > 0 ? bignum->slen : -1 * bignum->slen; + VALUE obj = rb_integer_unpack(bignum->digits, len * 2, 2, 0, + INTEGER_PACK_LITTLE_ENDIAN | (sign == 0 ? INTEGER_PACK_NEGATIVE : 0)); + if (header->internal) rb_obj_hide(obj); + if (header->frozen) rb_obj_freeze(obj); + return obj; +} + +static void +ibf_dump_object_data(struct ibf_dump *dump, VALUE obj) +{ + if (rb_data_is_encoding(obj)) { + rb_encoding *enc = rb_to_encoding(obj); + const char *name = rb_enc_name(enc); + enum ibf_object_data_type type = IBF_OBJECT_DATA_ENCODING; + long len = strlen(name) + 1; + IBF_WV(type); + IBF_WV(len); + IBF_WP(name, char, strlen(name) + 1); + } + else { + ibf_dump_object_unsupported(dump, obj); + } +} + +static VALUE +ibf_load_object_data(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const enum ibf_object_data_type *typep = IBF_OBJBODY(enum ibf_object_data_type, offset); + /* const long *lenp = IBF_OBJBODY(long, offset + sizeof(enum ibf_object_data_type)); */ + const char *data = IBF_OBJBODY(char, offset + sizeof(enum ibf_object_data_type) + sizeof(long)); + + switch (*typep) { + case IBF_OBJECT_DATA_ENCODING: + { + VALUE encobj = rb_enc_from_encoding(rb_enc_find(data)); + return encobj; + } + } + + return ibf_load_object_unsupported(load, header, offset); +} + +static void +ibf_dump_object_complex_rational(struct ibf_dump *dump, VALUE obj) +{ + long real = (long)ibf_dump_object(dump, RCOMPLEX(obj)->real); + long imag = (long)ibf_dump_object(dump, RCOMPLEX(obj)->imag); + + IBF_WV(real); + IBF_WV(imag); +} + +static VALUE +ibf_load_object_complex_rational(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const struct ibf_object_complex_rational *nums = IBF_OBJBODY(struct ibf_object_complex_rational, offset); + VALUE a = ibf_load_object(load, nums->a); + VALUE b = ibf_load_object(load, nums->b); + VALUE obj = header->type == T_COMPLEX ? + rb_complex_new(a, b) : rb_rational_new(a, b); + + if (header->internal) rb_obj_hide(obj); + if (header->frozen) rb_obj_freeze(obj); + return obj; +} + +static void +ibf_dump_object_symbol(struct ibf_dump *dump, VALUE obj) +{ + VALUE str = rb_sym2str(obj); + long str_index = (long)ibf_dump_object(dump, str); + IBF_WV(str_index); +} + +static VALUE +ibf_load_object_symbol(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + /* const struct ibf_object_header *header = IBF_OBJHEADER(offset); */ + const struct ibf_object_symbol *symbol = IBF_OBJBODY(struct ibf_object_symbol, offset); + VALUE str = ibf_load_object(load, symbol->str); + ID id = rb_intern_str(str); + return ID2SYM(id); +} + +typedef void (*ibf_dump_object_function)(struct ibf_dump *dump, VALUE obj); +static ibf_dump_object_function dump_object_functions[RUBY_T_MASK+1] = { + ibf_dump_object_unsupported, /* T_NONE */ + ibf_dump_object_unsupported, /* T_OBJECT */ + ibf_dump_object_class, /* T_CLASS */ + ibf_dump_object_unsupported, /* T_MODULE */ + ibf_dump_object_float, /* T_FLOAT */ + ibf_dump_object_string, /* T_STRING */ + ibf_dump_object_regexp, /* T_REGEXP */ + ibf_dump_object_array, /* T_ARRAY */ + ibf_dump_object_hash, /* T_HASH */ + ibf_dump_object_struct, /* T_STRUCT */ + ibf_dump_object_bignum, /* T_BIGNUM */ + ibf_dump_object_unsupported, /* T_FILE */ + ibf_dump_object_data, /* T_DATA */ + ibf_dump_object_unsupported, /* T_MATCH */ + ibf_dump_object_complex_rational, /* T_COMPLEX */ + ibf_dump_object_complex_rational, /* T_RATIONAL */ + ibf_dump_object_unsupported, /* 0x10 */ + ibf_dump_object_unsupported, /* 0x11 T_NIL */ + ibf_dump_object_unsupported, /* 0x12 T_TRUE */ + ibf_dump_object_unsupported, /* 0x13 T_FALSE */ + ibf_dump_object_symbol, /* 0x14 T_SYMBOL */ + ibf_dump_object_unsupported, /* T_FIXNUM */ + ibf_dump_object_unsupported, /* T_UNDEF */ + ibf_dump_object_unsupported, /* 0x17 */ + ibf_dump_object_unsupported, /* 0x18 */ + ibf_dump_object_unsupported, /* 0x19 */ + ibf_dump_object_unsupported, /* T_IMEMO 0x1a */ + ibf_dump_object_unsupported, /* T_NODE 0x1b */ + ibf_dump_object_unsupported, /* T_ICLASS 0x1c */ + ibf_dump_object_unsupported, /* T_ZOMBIE 0x1d */ + ibf_dump_object_unsupported, /* 0x1e */ + ibf_dump_object_unsupported /* 0x1f */ +}; + +static ibf_offset_t +lbf_dump_object_object(struct ibf_dump *dump, VALUE obj) +{ + struct ibf_object_header obj_header; + ibf_offset_t current_offset = ibf_dump_pos(dump); + obj_header.type = TYPE(obj); + + if (SPECIAL_CONST_P(obj)) { + if (RB_TYPE_P(obj, T_SYMBOL) || + RB_TYPE_P(obj, T_FLOAT)) { + obj_header.internal = FALSE; + goto dump_object; + } + obj_header.special_const = TRUE; + obj_header.frozen = TRUE; + obj_header.internal = TRUE; + IBF_WV(obj_header); + IBF_WV(obj); + } + else { + obj_header.internal = (RBASIC_CLASS(obj) == 0) ? TRUE : FALSE; + dump_object: + obj_header.special_const = FALSE; + obj_header.frozen = FL_TEST(obj, FL_FREEZE) ? TRUE : FALSE; + IBF_WV(obj_header); + (*dump_object_functions[obj_header.type])(dump, obj); + } + + return current_offset; +} + +typedef VALUE (*ibf_load_object_function)(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t); +static ibf_load_object_function load_object_functions[RUBY_T_MASK+1] = { + ibf_load_object_unsupported, /* T_NONE */ + ibf_load_object_unsupported, /* T_OBJECT */ + ibf_load_object_class, /* T_CLASS */ + ibf_load_object_unsupported, /* T_MODULE */ + ibf_load_object_float, /* T_FLOAT */ + ibf_load_object_string, /* T_STRING */ + ibf_load_object_regexp, /* T_REGEXP */ + ibf_load_object_array, /* T_ARRAY */ + ibf_load_object_hash, /* T_HASH */ + ibf_load_object_struct, /* T_STRUCT */ + ibf_load_object_bignum, /* T_BIGNUM */ + ibf_load_object_unsupported, /* T_FILE */ + ibf_load_object_data, /* T_DATA */ + ibf_load_object_unsupported, /* T_MATCH */ + ibf_load_object_complex_rational, /* T_COMPLEX */ + ibf_load_object_complex_rational, /* T_RATIONAL */ + ibf_load_object_unsupported, /* 0x10 */ + ibf_load_object_unsupported, /* T_NIL */ + ibf_load_object_unsupported, /* T_TRUE */ + ibf_load_object_unsupported, /* T_FALSE */ + ibf_load_object_symbol, + ibf_load_object_unsupported, /* T_FIXNUM */ + ibf_load_object_unsupported, /* T_UNDEF */ + ibf_load_object_unsupported, /* 0x17 */ + ibf_load_object_unsupported, /* 0x18 */ + ibf_load_object_unsupported, /* 0x19 */ + ibf_load_object_unsupported, /* T_IMEMO 0x1a */ + ibf_load_object_unsupported, /* T_NODE 0x1b */ + ibf_load_object_unsupported, /* T_ICLASS 0x1c */ + ibf_load_object_unsupported, /* T_ZOMBIE 0x1d */ + ibf_load_object_unsupported, /* 0x1e */ + ibf_load_object_unsupported /* 0x1f */ +}; + +static VALUE +ibf_load_object(const struct ibf_load *load, VALUE object_index) +{ + if (object_index == 0) { + return Qnil; + } + else { + VALUE obj = rb_ary_entry(load->obj_list, (long)object_index); + if (obj == Qnil) { /* TODO: avoid multiple Qnil load */ + ibf_offset_t *offsets = (ibf_offset_t *)(load->header->object_list_offset + load->buff); + ibf_offset_t offset = offsets[object_index]; + const struct ibf_object_header *header = IBF_OBJHEADER(offset); + + if (header->special_const) { + VALUE *vp = IBF_OBJBODY(VALUE, offset); + obj = *vp; + } + else { + obj = (*load_object_functions[header->type])(load, header, offset); + } + + rb_ary_store(load->obj_list, (long)object_index, obj); + } + iseq_add_mark_object(load->iseq, obj); + return obj; + } +} + +static void +ibf_dump_object_list(struct ibf_dump *dump, struct ibf_header *header) +{ + VALUE list = rb_ary_tmp_new(RARRAY_LEN(dump->obj_list)); + int i, size; + + for (i=0; iobj_list); i++) { + VALUE obj = RARRAY_AREF(dump->obj_list, i); + ibf_offset_t offset = lbf_dump_object_object(dump, obj); + rb_ary_push(list, UINT2NUM(offset)); + } + size = i; + header->object_list_offset = ibf_dump_pos(dump); + + for (i=0; iobject_list_size = size; +} + +VALUE +iseq_ibf_dump(const rb_iseq_t *iseq, VALUE opt) +{ + struct ibf_dump dump; + struct ibf_header header; + + dump.str = rb_str_new(0, 0); + dump.iseq_list = rb_ary_tmp_new(0); + dump.obj_list = rb_ary_tmp_new(1); rb_ary_push(dump.obj_list, Qnil); /* 0th is nil */ + dump.iseq_table = st_init_numtable(); /* need free */ + dump.id_table = st_init_numtable(); /* need free */ + + ibf_table_index(dump.id_table, 0); /* id_index:0 is 0 */ + + if (iseq->body->parent_iseq != NULL || + iseq->body->local_iseq != iseq) { + rb_raise(rb_eRuntimeError, "should be top of iseq"); + } + if (RTEST(ISEQ_COVERAGE(iseq))) { + rb_raise(rb_eRuntimeError, "should not compile with coverage"); + } + + ibf_dump_write(&dump, &header, sizeof(header)); + ibf_dump_write(&dump, RUBY_PLATFORM, strlen(RUBY_PLATFORM) + 1); + ibf_dump_iseq(&dump, iseq); + + header.magic[0] = 'Y'; /* YARB */ + header.magic[1] = 'A'; + header.magic[2] = 'R'; + header.magic[3] = 'B'; + header.major_version = ISEQ_MAJOR_VERSION; + header.minor_version = ISEQ_MINOR_VERSION; + ibf_dump_iseq_list(&dump, &header); + ibf_dump_id_list(&dump, &header); + ibf_dump_object_list(&dump, &header); + header.size = ibf_dump_pos(&dump); + + if (RTEST(opt)) { + VALUE opt_str = rb_check_string_type(opt); + header.extra_size = RSTRING_LEN(opt_str) + 1; + ibf_dump_write(&dump, RSTRING_PTR(opt_str), header.extra_size); + } + else { + header.extra_size = 0; + } + + ibf_dump_overwrite(&dump, &header, sizeof(header), 0); + + /* release. TODO: no need to care exceptions? */ + st_free_table(dump.iseq_table); + st_free_table(dump.id_table); + return dump.str; +} + +static const ibf_offset_t * +ibf_iseq_list(const struct ibf_load *load) +{ + return (ibf_offset_t *)(load->buff + load->header->iseq_list_offset); +} + +void +ibf_load_iseq_complete(rb_iseq_t *iseq) +{ + struct ibf_load *load = RTYPEDDATA_DATA(iseq->aux.loader.obj); + rb_iseq_t *prev_src_iseq = load->iseq; + load->iseq = iseq; + ibf_load_iseq_each(load, iseq, ibf_iseq_list(load)[iseq->aux.loader.index]); + ISEQ_COMPILE_DATA(iseq) = NULL; + FL_UNSET(iseq, ISEQ_NOT_LOADED_YET); + load->iseq = prev_src_iseq; +} + +#if USE_LAZY_LOAD +const rb_iseq_t * +rb_iseq_complete(const rb_iseq_t *iseq) +{ + ibf_load_iseq_complete((rb_iseq_t *)iseq); + return iseq; +} +#endif + +static rb_iseq_t * +ibf_load_iseq(const struct ibf_load *load, const rb_iseq_t *index_iseq) +{ + int iseq_index = (int)(VALUE)index_iseq; + + if (iseq_index == -1) { + return NULL; + } + else { + VALUE iseqv = rb_ary_entry(load->iseq_list, iseq_index); + + if (iseqv != Qnil) { + return (rb_iseq_t *)iseqv; + } + else { + rb_iseq_t *iseq = iseq_imemo_alloc(); + FL_SET(iseq, ISEQ_NOT_LOADED_YET); + iseq->aux.loader.obj = load->loader_obj; + iseq->aux.loader.index = iseq_index; + rb_ary_store(load->iseq_list, iseq_index, (VALUE)iseq); + +#if !USE_LAZY_LOAD + ibf_load_iseq_complete(iseq); +#endif /* !USE_LAZY_LOAD */ + + if (load->iseq) { + iseq_add_mark_object(load->iseq, (VALUE)iseq); + } + return iseq; + } + } +} + +static void +ibf_setup_load(struct ibf_load *load, VALUE loader_obj, VALUE str) +{ + RB_OBJ_WRITE(loader_obj, &load->str, str); + load->loader_obj = loader_obj; + load->buff = StringValuePtr(str); + load->header = (struct ibf_header *)load->buff; + RB_OBJ_WRITE(loader_obj, &load->iseq_list, rb_ary_tmp_new(0)); + RB_OBJ_WRITE(loader_obj, &load->obj_list, rb_ary_tmp_new(0)); + load->id_list = ZALLOC_N(ID, load->header->id_list_size); + load->iseq = NULL; +} + +static void +ibf_loader_mark(void *ptr) +{ + if (ptr) { + struct ibf_load *load = (struct ibf_load *)ptr; + rb_gc_mark(load->str); + rb_gc_mark(load->iseq_list); + rb_gc_mark(load->obj_list); + } +} + +static void +ibf_loader_free(void *ptr) +{ + if (ptr) { + struct ibf_load *load = (struct ibf_load *)ptr; + ruby_xfree(load->id_list); + ruby_xfree(load); + } +} + +static size_t +ibf_loader_memsize(const void *ptr) +{ + if (ptr) { + struct ibf_load *load = (struct ibf_load *)ptr; + return sizeof(struct ibf_load) + load->header->id_list_size * sizeof(ID); + } + else { + return 0; + } +} + +static const rb_data_type_t ibf_load_type = { + "ibf_loader", + {ibf_loader_mark, ibf_loader_free, ibf_loader_memsize,}, + 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY +}; + +const rb_iseq_t * +iseq_ibf_load(VALUE str) +{ + struct ibf_load *load; + const rb_iseq_t *iseq; + VALUE loader_obj = TypedData_Make_Struct(0, struct ibf_load, &ibf_load_type, load); + + ibf_setup_load(load, loader_obj, str); + iseq = ibf_load_iseq(load, 0); + + RB_GC_GUARD(loader_obj); + return iseq; +} + +VALUE +iseq_ibf_load_extra_data(VALUE str) +{ + struct ibf_load *load; + VALUE loader_obj = TypedData_Make_Struct(0, struct ibf_load, &ibf_load_type, load); + VALUE extra_str; + + ibf_setup_load(load, loader_obj, str); + extra_str = rb_str_new2(load->buff + load->header->extra_size); + RB_GC_GUARD(loader_obj); + return extra_str; +} + diff --git a/encoding.c b/encoding.c index eb777c90f3..b030f21875 100644 --- a/encoding.c +++ b/encoding.c @@ -86,6 +86,12 @@ static const rb_data_type_t encoding_data_type = { #define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type) #define is_obj_encoding(obj) (RB_TYPE_P((obj), T_DATA) && is_data_encoding(obj)) +int +rb_data_is_encoding(VALUE obj) +{ + return is_data_encoding(obj); +} + static VALUE enc_new(rb_encoding *encoding) { diff --git a/insns.def b/insns.def index 3c4d98073b..3c185bd084 100644 --- a/insns.def +++ b/insns.def @@ -928,13 +928,15 @@ defineclass rb_bug("unknown defineclass type: %d", (int)type); } + rb_iseq_check(class_iseq); + /* enter scope */ vm_push_frame(th, class_iseq, VM_FRAME_MAGIC_CLASS, klass, VM_ENVVAL_BLOCK_PTR(GET_BLOCK_PTR()), (VALUE)vm_cref_push(th, klass, NULL, FALSE), class_iseq->body->iseq_encoded, GET_SP(), - class_iseq->body->local_size, class_iseq->body->stack_max); - + class_iseq->body->local_size, + class_iseq->body->stack_max); RESTORE_REGS(); NEXT_INSN(); } diff --git a/internal.h b/internal.h index df594c171f..ca630ddbec 100644 --- a/internal.h +++ b/internal.h @@ -725,6 +725,7 @@ void rb_encdb_declare(const char *name); void rb_enc_set_base(const char *name, const char *orig); int rb_enc_set_dummy(int index); void rb_encdb_set_unicode(int index); +int rb_data_is_encoding(VALUE obj); /* enum.c */ VALUE rb_f_send(int argc, VALUE *argv, VALUE recv); diff --git a/iseq.c b/iseq.c index 84d2d14689..fea5f726a1 100644 --- a/iseq.c +++ b/iseq.c @@ -25,9 +25,6 @@ #include "insns.inc" #include "insns_info.inc" -#define ISEQ_MAJOR_VERSION 2 -#define ISEQ_MINOR_VERSION 3 - VALUE rb_cISeq; static VALUE iseqw_new(const rb_iseq_t *iseq); static const rb_iseq_t *iseqw_check(VALUE iseqw); @@ -71,30 +68,32 @@ rb_iseq_free(const rb_iseq_t *iseq) RUBY_FREE_ENTER("iseq"); if (iseq) { - ruby_xfree((void *)iseq->body->iseq_encoded); - ruby_xfree((void *)iseq->body->line_info_table); - ruby_xfree((void *)iseq->body->local_table); - ruby_xfree((void *)iseq->body->is_entries); + if (iseq->body) { + ruby_xfree((void *)iseq->body->iseq_encoded); + ruby_xfree((void *)iseq->body->line_info_table); + ruby_xfree((void *)iseq->body->local_table); + ruby_xfree((void *)iseq->body->is_entries); - if (iseq->body->ci_entries) { - unsigned int i; - struct rb_call_info_with_kwarg *ci_kw_entries = (struct rb_call_info_with_kwarg *)&iseq->body->ci_entries[iseq->body->ci_size]; - for (i=0; ibody->ci_kw_size; i++) { - const struct rb_call_info_kw_arg *kw_arg = ci_kw_entries[i].kw_arg; - ruby_xfree((void *)kw_arg); + if (iseq->body->ci_entries) { + unsigned int i; + struct rb_call_info_with_kwarg *ci_kw_entries = (struct rb_call_info_with_kwarg *)&iseq->body->ci_entries[iseq->body->ci_size]; + for (i=0; ibody->ci_kw_size; i++) { + const struct rb_call_info_kw_arg *kw_arg = ci_kw_entries[i].kw_arg; + ruby_xfree((void *)kw_arg); + } + ruby_xfree(iseq->body->ci_entries); + ruby_xfree(iseq->body->cc_entries); } - ruby_xfree(iseq->body->ci_entries); - ruby_xfree(iseq->body->cc_entries); - } - ruby_xfree((void *)iseq->body->catch_table); - ruby_xfree((void *)iseq->body->param.opt_table); + ruby_xfree((void *)iseq->body->catch_table); + ruby_xfree((void *)iseq->body->param.opt_table); - if (iseq->body->param.keyword != NULL) { - ruby_xfree((void *)iseq->body->param.keyword->default_values); - ruby_xfree((void *)iseq->body->param.keyword); + if (iseq->body->param.keyword != NULL) { + ruby_xfree((void *)iseq->body->param.keyword->default_values); + ruby_xfree((void *)iseq->body->param.keyword); + } + compile_data_free(ISEQ_COMPILE_DATA(iseq)); + ruby_xfree(iseq->body); } - compile_data_free(ISEQ_COMPILE_DATA(iseq)); - ruby_xfree(iseq->body); } RUBY_FREE_LEAVE("iseq"); } @@ -116,9 +115,11 @@ rb_iseq_mark(const rb_iseq_t *iseq) RUBY_MARK_UNLESS_NULL(body->location.absolute_path); } - if (ISEQ_COMPILE_DATA(iseq) != 0) { + if (FL_TEST(iseq, ISEQ_NOT_LOADED_YET)) { + rb_gc_mark(iseq->aux.loader.obj); + } + else if (ISEQ_COMPILE_DATA(iseq) != 0) { const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq); - RUBY_MARK_UNLESS_NULL(compile_data->mark_ary); RUBY_MARK_UNLESS_NULL(compile_data->err_info); RUBY_MARK_UNLESS_NULL(compile_data->catch_table_ary); @@ -205,7 +206,7 @@ iseq_memsize(const rb_iseq_t *iseq) static rb_iseq_t * iseq_alloc(void) { - rb_iseq_t *iseq = (rb_iseq_t *)rb_imemo_new(imemo_iseq, 0, 0, 0, 0); + rb_iseq_t *iseq = iseq_imemo_alloc(); iseq->body = ZALLOC(struct rb_iseq_constant_body); return iseq; } @@ -259,16 +260,6 @@ rb_iseq_add_mark_object(const rb_iseq_t *iseq, VALUE obj) rb_ary_push(ISEQ_MARK_ARY(iseq), obj); } -static VALUE -iseq_mark_ary_create(int flip_cnt) -{ - VALUE ary = rb_ary_tmp_new(3); - rb_ary_push(ary, Qnil); /* ISEQ_MARK_ARY_COVERAGE */ - rb_ary_push(ary, INT2FIX(flip_cnt)); /* ISEQ_MARK_ARY_FLIP_CNT */ - rb_ary_push(ary, Qnil); /* ISEQ_MARK_ARY_ORIGINAL_ISEQ */ - return ary; -} - static VALUE prepare_iseq_build(rb_iseq_t *iseq, VALUE name, VALUE path, VALUE absolute_path, VALUE first_lineno, @@ -485,6 +476,19 @@ rb_iseq_new_with_opt(NODE *node, VALUE name, VALUE path, VALUE absolute_path, return iseq_translate(iseq); } +const rb_iseq_t * +rb_iseq_load_iseq(VALUE fname) +{ + if (rb_respond_to(rb_cISeq, rb_intern("load_iseq"))) { + VALUE iseqv = rb_funcall(rb_cISeq, rb_intern("load_iseq"), 1, fname); + if (CLASS_OF(iseqv) == rb_cISeq) { + return iseqw_check(iseqv); + } + } + + return NULL; +} + #define CHECK_ARRAY(v) rb_convert_type((v), T_ARRAY, "Array", "to_ary") #define CHECK_HASH(v) rb_convert_type((v), T_HASH, "Hash", "to_hash") #define CHECK_STRING(v) rb_convert_type((v), T_STRING, "String", "to_str") @@ -583,8 +587,7 @@ static VALUE iseq_s_load(int argc, VALUE *argv, VALUE self) { VALUE data, opt=Qnil; - rb_scan_args(argc, argv, "11", &data, &opt); - + rb_scan_args(argc, argv, "01", &opt); return iseq_load(data, NULL, opt); } @@ -892,7 +895,11 @@ iseqw_s_compile_option_get(VALUE self) static const rb_iseq_t * iseqw_check(VALUE iseqw) { - const rb_iseq_t *iseq = DATA_PTR(iseqw); + rb_iseq_t *iseq = DATA_PTR(iseqw); + + if (!iseq->body) { + ibf_load_iseq_complete(iseq); + } if (!iseq->body->location.label) { rb_raise(rb_eTypeError, "uninitialized InstructionSequence"); @@ -1235,7 +1242,7 @@ rb_insn_operand_intern(const rb_iseq_t *iseq, { const char *types = insn_op_types(insn); char type = types[op_no]; - VALUE ret; + VALUE ret = Qundef; switch (type) { case TS_OFFSET: /* LONG */ @@ -1281,8 +1288,8 @@ rb_insn_operand_intern(const rb_iseq_t *iseq, case TS_ISEQ: /* iseq */ { - rb_iseq_t *iseq = (rb_iseq_t *)op; - if (iseq) { + if (op) { + const rb_iseq_t *iseq = rb_iseq_check((rb_iseq_t *)op); ret = iseq->body->location.label; if (child) { rb_ary_push(child, (VALUE)iseq); @@ -1492,7 +1499,7 @@ rb_iseq_disasm(const rb_iseq_t *iseq) catch_type((int)entry->type), (int)entry->start, (int)entry->end, (int)entry->sp, (int)entry->cont); if (entry->iseq) { - rb_str_concat(str, rb_iseq_disasm(entry->iseq)); + rb_str_concat(str, rb_iseq_disasm(rb_iseq_check(entry->iseq))); } } } @@ -1561,7 +1568,7 @@ rb_iseq_disasm(const rb_iseq_t *iseq) for (l = 0; l < RARRAY_LEN(child); l++) { VALUE isv = rb_ary_entry(child, l); - rb_str_concat(str, rb_iseq_disasm((rb_iseq_t *)isv)); + rb_str_concat(str, rb_iseq_disasm(rb_iseq_check((rb_iseq_t *)isv))); } return str; @@ -1907,7 +1914,7 @@ iseq_data_to_ary(const rb_iseq_t *iseq) { const rb_iseq_t *iseq = (rb_iseq_t *)*seq; if (iseq) { - VALUE val = iseq_data_to_ary(iseq); + VALUE val = iseq_data_to_ary(rb_iseq_check(iseq)); rb_ary_push(ary, val); } else { @@ -2002,7 +2009,7 @@ iseq_data_to_ary(const rb_iseq_t *iseq) const struct iseq_catch_table_entry *entry = &iseq->body->catch_table->entries[i]; rb_ary_push(ary, exception_type2symbol(entry->type)); if (entry->iseq) { - rb_ary_push(ary, iseq_data_to_ary(entry->iseq)); + rb_ary_push(ary, iseq_data_to_ary(rb_iseq_check(entry->iseq))); } else { rb_ary_push(ary, Qnil); @@ -2325,6 +2332,51 @@ rb_iseqw_local_variables(VALUE iseqval) return rb_iseq_local_variables(iseqw_check(iseqval)); } +/* + * call-seq: + * iseq.to_binary_format(extra_data = nil) -> binary str + * + * Returns serialized iseq binary format data as a String object. + * A correspnding iseq object is created by + * RubyVM::InstructionSequence.from_binary_format() method. + * + * String extra_data will be saved with binary data. + * You can access this data with + * RubyVM::InstructionSequence.from_binary_format_extra_data(binary). + */ +static VALUE +iseqw_to_binary_format(int argc, VALUE *argv, VALUE self) +{ + VALUE opt; + rb_scan_args(argc, argv, "01", &opt); + return iseq_ibf_dump(iseqw_check(self), opt); +} + +/* + * call-seq: + * RubyVM::InstructionSequence.from_binary_format(binary) -> iseq + * + * Load an iseq object from binary format String object + * created by RubyVM::InstructionSequence.to_binary_format. + */ +static VALUE +iseqw_s_from_binary_format(VALUE self, VALUE str) +{ + return iseqw_new(iseq_ibf_load(str)); +} + +/* + * call-seq: + * RubyVM::InstructionSequence.from_binary_format_extra_data(binary) -> str + * + * Load extra data embed into binary format String object. + */ +static VALUE +iseqw_s_from_binary_format_extra_data(VALUE self, VALUE str) +{ + return iseq_ibf_load_extra_data(str); +} + /* * Document-class: RubyVM::InstructionSequence * @@ -2356,6 +2408,11 @@ Init_ISeq(void) rb_define_method(rb_cISeq, "to_a", iseqw_to_a, 0); rb_define_method(rb_cISeq, "eval", iseqw_eval, 0); + rb_define_method(rb_cISeq, "to_binary_format", iseqw_to_binary_format, -1); + rb_define_singleton_method(rb_cISeq, "from_binary_format", iseqw_s_from_binary_format, 1); + rb_define_singleton_method(rb_cISeq, "from_binary_format_extra_data", iseqw_s_from_binary_format_extra_data, 1); + + /* location APIs */ rb_define_method(rb_cISeq, "path", iseqw_path, 0); rb_define_method(rb_cISeq, "absolute_path", iseqw_absolute_path, 0); diff --git a/iseq.h b/iseq.h index c55119d01e..b316ea41b9 100644 --- a/iseq.h +++ b/iseq.h @@ -12,6 +12,9 @@ #ifndef RUBY_ISEQ_H #define RUBY_ISEQ_H 1 +#define ISEQ_MAJOR_VERSION 2 +#define ISEQ_MINOR_VERSION 3 + #ifndef rb_iseq_t typedef struct rb_iseq_struct rb_iseq_t; #define rb_iseq_t rb_iseq_t @@ -29,16 +32,27 @@ enum iseq_mark_ary_index { ISEQ_MARK_ARY_ORIGINAL_ISEQ = 2, }; +static inline VALUE +iseq_mark_ary_create(int flip_cnt) +{ + VALUE ary = rb_ary_tmp_new(3); + rb_ary_push(ary, Qnil); /* ISEQ_MARK_ARY_COVERAGE */ + rb_ary_push(ary, INT2FIX(flip_cnt)); /* ISEQ_MARK_ARY_FLIP_CNT */ + rb_ary_push(ary, Qnil); /* ISEQ_MARK_ARY_ORIGINAL_ISEQ */ + return ary; +} + #define ISEQ_MARK_ARY(iseq) (iseq)->body->mark_ary #define ISEQ_COVERAGE(iseq) RARRAY_AREF(ISEQ_MARK_ARY(iseq), ISEQ_MARK_ARY_COVERAGE) #define ISEQ_COVERAGE_SET(iseq, cov) RARRAY_ASET(ISEQ_MARK_ARY(iseq), ISEQ_MARK_ARY_COVERAGE, cov) +#define ISEQ_FLIP_CNT(iseq) FIX2INT(RARRAY_AREF(ISEQ_MARK_ARY(iseq), ISEQ_MARK_ARY_FLIP_CNT)) + static inline int ISEQ_FLIP_CNT_INCREMENT(const rb_iseq_t *iseq) { - VALUE cntv = RARRAY_AREF(ISEQ_MARK_ARY(iseq), ISEQ_MARK_ARY_FLIP_CNT); - int cnt = FIX2INT(cntv); + int cnt = ISEQ_FLIP_CNT(iseq); RARRAY_ASET(ISEQ_MARK_ARY(iseq), ISEQ_MARK_ARY_FLIP_CNT, INT2FIX(cnt+1)); return cnt; } @@ -59,7 +73,20 @@ ISEQ_ORIGINAL_ISEQ_ALLOC(const rb_iseq_t *iseq, long size) return (VALUE *)RSTRING_PTR(str); } -#define ISEQ_COMPILE_DATA(iseq) (iseq)->compile_data_ +#define ISEQ_COMPILE_DATA(iseq) (iseq)->aux.compile_data + +static inline rb_iseq_t * +iseq_imemo_alloc(void) +{ + return (rb_iseq_t *)rb_imemo_new(imemo_iseq, 0, 0, 0, 0); +} + +#define ISEQ_NOT_LOADED_YET IMEMO_FL_USER1 + +VALUE iseq_ibf_dump(const rb_iseq_t *iseq, VALUE opt); +void ibf_load_iseq_complete(rb_iseq_t *iseq); +const rb_iseq_t *iseq_ibf_load(VALUE str); +VALUE iseq_ibf_load_extra_data(VALUE str); RUBY_SYMBOL_EXPORT_BEGIN diff --git a/load.c b/load.c index 96b92fc8e8..4558e2c6fd 100644 --- a/load.c +++ b/load.c @@ -575,6 +575,7 @@ rb_provide(const char *feature) } NORETURN(static void load_failed(VALUE)); +const rb_iseq_t *rb_iseq_load_iseq(VALUE fname); static int rb_load_internal0(rb_thread_t *th, VALUE fname, int wrap) @@ -604,12 +605,17 @@ rb_load_internal0(rb_thread_t *th, VALUE fname, int wrap) state = EXEC_TAG(); if (state == 0) { NODE *node; - rb_iseq_t *iseq; + const rb_iseq_t *iseq; - th->mild_compile_error++; - node = (NODE *)rb_load_file_str(fname); - iseq = rb_iseq_new_top(node, rb_str_new2(""), fname, rb_realpath_internal(Qnil, fname, 1), NULL); - th->mild_compile_error--; + if ((iseq = rb_iseq_load_iseq(fname)) != NULL) { + /* OK */ + } + else { + th->mild_compile_error++; + node = (NODE *)rb_load_file_str(fname); + iseq = rb_iseq_new_top(node, rb_str_new2(""), fname, rb_realpath_internal(Qnil, fname, 1), NULL); + th->mild_compile_error--; + } rb_iseq_eval(iseq); } TH_POP_TAG(); diff --git a/proc.c b/proc.c index c88c676729..c71e62e7ac 100644 --- a/proc.c +++ b/proc.c @@ -984,12 +984,15 @@ rb_proc_get_iseq(VALUE self, int *is_proc) iseq = rb_method_iseq((VALUE)ifunc->data); if (is_proc) *is_proc = 0; } + return iseq; } else if (SYMBOL_P(iseq)) { self = rb_sym_to_proc((VALUE)iseq); goto again; } - return iseq; + else { + return rb_iseq_check(iseq); + } } static VALUE @@ -998,6 +1001,7 @@ iseq_location(const rb_iseq_t *iseq) VALUE loc[2]; if (!iseq) return Qnil; + rb_iseq_check(iseq); loc[0] = iseq->body->location.path; if (iseq->body->line_info_table) { loc[1] = rb_iseq_first_lineno(iseq); @@ -1142,7 +1146,7 @@ proc_to_s(VALUE self) iseq = proc->block.iseq; is_lambda = proc->is_lambda ? " (lambda)" : ""; - if (RUBY_VM_NORMAL_ISEQ_P(iseq)) { + if (RUBY_VM_NORMAL_ISEQ_P(iseq) && rb_iseq_check(iseq)) { int first_lineno = 0; if (iseq->body->line_info_table) { @@ -2152,7 +2156,7 @@ rb_method_entry_min_max_arity(const rb_method_entry_t *me, int *max) case VM_METHOD_TYPE_BMETHOD: return rb_proc_min_max_arity(def->body.proc, max); case VM_METHOD_TYPE_ISEQ: { - const rb_iseq_t *iseq = def->body.iseq.iseqptr; + const rb_iseq_t *iseq = rb_iseq_check(def->body.iseq.iseqptr); return rb_iseq_min_max_arity(iseq, max); } case VM_METHOD_TYPE_UNDEF: @@ -2289,7 +2293,7 @@ method_def_iseq(const rb_method_definition_t *def) { switch (def->type) { case VM_METHOD_TYPE_ISEQ: - return def->body.iseq.iseqptr; + return rb_iseq_check(def->body.iseq.iseqptr); case VM_METHOD_TYPE_BMETHOD: return get_proc_iseq(def->body.proc, 0); case VM_METHOD_TYPE_ALIAS: @@ -2654,6 +2658,7 @@ proc_binding(VALUE self) bind->env = envval; if (iseq) { + rb_iseq_check(iseq); bind->path = iseq->body->location.path; bind->first_lineno = FIX2INT(rb_iseq_first_lineno(iseq)); } diff --git a/sample/iseq_loader.rb b/sample/iseq_loader.rb new file mode 100644 index 0000000000..4fbf02b0f6 --- /dev/null +++ b/sample/iseq_loader.rb @@ -0,0 +1,240 @@ +# +# iseq_loader.rb - sample of compiler/loader for binary compiled file +# +# Usage as a compiler: ruby iseq_loader.rb [file or directory] ... +# +# It compiles and stores specified files. +# If directories are specified, then compiles and stores all *.rb files. +# (using Dir.glob) +# +# TODO: add remove option +# TODO: add verify option +# +# Usage as a loader: simply require this file with the following setting. +# +# Setting with environment variables. +# +# * RUBY_ISEQ_LOADER_STORAGE to select storage type +# * dbm: use dbm +# * fs: [default] use file system. locate a compiled binary files in same +# directory of scripts like Rubinius. foo.rb.yarb will be created for foo.rb. +# * fs2: use file system. locate compiled file in specified directory. +# * nothing: do nothing. +# +# * RUBY_ISEQ_LOADER_STORAGE_DIR to select directory +# * default: ~/.ruby_binaries/ +# +# * RUBY_ISEQ_LOADER_STORAGE_COMPILE_IF_NOT_COMPILED +# * true: store compiled file if compiled data is not available. +# * false: [default] do nothing if there is no compiled iseq data. + +class RubyVM::InstructionSequence + $ISEQ_LOADER_LOADED = 0 + $ISEQ_LOADER_COMPILED = 0 + $ISEQ_LOADER_IGNORED = 0 + LAUNCHED_TIME = Time.now + COMPILE_FILE_ENABLE = false || true + COMPILE_VERBOSE = $VERBOSE || false # || true + COMPILE_DEBUG = ENV['RUBY_ISEQ_LOADER_DEBUG'] + COMPILE_IF_NOT_COMPILED = ENV['RUBY_ISEQ_LOADER_STORAGE_COMPILE_IF_NOT_COMPILED'] == 'true' + + at_exit{ + STDERR.puts "[ISEQ_LOADER] #{Process.pid} time: #{Time.now - LAUNCHED_TIME}, " + + "loaded: #{$ISEQ_LOADER_LOADED}, " + + "compied: #{$ISEQ_LOADER_COMPILED}, " + + "ignored: #{$ISEQ_LOADER_IGNORED}" + } if COMPILE_VERBOSE + + unless cf_dir = ENV['RUBY_ISEQ_LOADER_STORAGE_DIR'] + cf_dir = File.expand_path("~/.ruby_binaries") + unless File.exist?(cf_dir) + Dir.mkdir(cf_dir) + end + end + CF_PREFIX = "#{cf_dir}/cb." + + class NullStorage + def load_iseq fname; end + def compile_and_save_isq fname; end + def unlink_compiled_iseq; end + end + + class BasicStorage + def initialize + require 'digest/sha1' + end + + def load_iseq fname + iseq_key = iseq_key_name(fname) + if compiled_iseq_exist?(fname, iseq_key) && compiled_iseq_is_younger?(fname, iseq_key) + $ISEQ_LOADER_LOADED += 1 + STDERR.puts "[ISEQ_LOADER] #{Process.pid} load #{fname} from #{iseq_key}" if COMPILE_DEBUG + binary = read_compiled_iseq(fname, iseq_key) + RubyVM::InstructionSequence.from_binary_format(binary) + elsif COMPILE_IF_NOT_COMPILED + compile_and_save_iseq(fname, iseq_key) + else + $ISEQ_LOADER_IGNORED += 1 + # p fname + nil + end + end + + def extra_data fname + "SHA-1:#{::Digest::SHA1.file(fname).digest}" + end + + def compile_and_save_iseq fname, iseq_key = iseq_key_name(fname) + $ISEQ_LOADER_COMPILED += 1 + STDERR.puts "[RUBY_COMPILED_FILE] compile #{fname}" if COMPILE_DEBUG + iseq = RubyVM::InstructionSequence.compile_file(fname) + + binary = iseq.to_binary_format(extra_data(fname)) + write_compiled_iseq(fname, iseq_key, binary) + iseq + end + + # def unlink_compiled_iseq; nil; end # should implement at sub classes + + private + + def iseq_key_name fname + fname + end + + # should implement at sub classes + # def compiled_iseq_younger? fname, iseq_key; end + # def compiled_iseq_exist? fname, iseq_key; end + # def read_compiled_file fname, iseq_key; end + # def write_compiled_file fname, iseq_key, binary; end + end + + class FSStorage < BasicStorage + def initialize + super + require 'fileutils' + @dir = CF_PREFIX + "files" + unless File.directory?(@dir) + FileUtils.mkdir_p(@dir) + end + end + + def unlink_compiled_iseq + File.unlink(compile_file_path) + end + + private + + def iseq_key_name fname + "#{fname}.yarb" # same directory + end + + def compiled_iseq_exist? fname, iseq_key + File.exist?(iseq_key) + end + + def compiled_iseq_is_younger? fname, iseq_key + File.mtime(iseq_key) >= File.mtime(fname) + end + + def read_compiled_iseq fname, iseq_key + open(iseq_key, 'rb'){|f| f.read} + end + + def write_compiled_iseq fname, iseq_key, binary + open(iseq_key, 'wb'){|f| f.write(binary)} + end + end + + class FS2Storage < FSStorage + def iseq_key_name fname + @dir + fname.gsub(/[^A-Za-z0-9\._-]/){|c| '%02x' % c.ord} # special directory + end + end + + class DBMStorage < BasicStorage + def initialize + require 'dbm' + @db = DBM.open(CF_PREFIX+'db') + end + + def unlink_compiled_iseq + @db.delete fname + end + + private + + def date_key_name fname + "date.#{fname}" + end + + def iseq_key_name fname + "body.#{fname}" + end + + def compiled_iseq_exist? fname, iseq_key + @db.has_key? iseq_key + end + + def compiled_iseq_is_younger? fname, iseq_key + date_key = date_key_name(fname) + if @db.has_key? date_key + @db[date_key].to_i >= File.mtime(fname).to_i + end + end + + def read_compiled_iseq fname, iseq_key + @db[iseq_key] + end + + def write_compiled_iseq fname, iseq_key, binary + date_key = date_key_name(fname) + @db[iseq_key] = binary + @db[date_key] = Time.now.to_i + end + end + + STORAGE = case ENV['RUBY_ISEQ_LOADER_STORAGE'] + when 'dbm' + DBMStorage.new + when 'fs' + FSStorage.new + when 'fs2' + FS2Storage.new + when 'null' + NullStorage.new + else + FSStorage.new + end + + STDERR.puts "[ISEQ_LOADER] use #{STORAGE.class} " if COMPILE_VERBOSE + + def self.load_iseq fname + STORAGE.load_iseq(fname) + end + + def self.compile_and_save_iseq fname + STORAGE.compile_and_save_iseq fname + end + + def self.unlink_compiled_iseq fname + STORAGE.unlink_compiled_iseq fname + end +end + +if __FILE__ == $0 + ARGV.each{|path| + if File.directory?(path) + pattern = File.join(path, '**/*.rb') + Dir.glob(pattern){|file| + begin + RubyVM::InstructionSequence.compile_and_save_iseq(file) + rescue SyntaxError => e + STDERR.puts e + end + } + else + RubyVM::InstructionSequence.compile_and_save_iseq(path) + end + } +end diff --git a/test/lib/iseq_loader_checker.rb b/test/lib/iseq_loader_checker.rb index 0c372ca638..09df3d38be 100644 --- a/test/lib/iseq_loader_checker.rb +++ b/test/lib/iseq_loader_checker.rb @@ -1,5 +1,8 @@ -require '-test-/iseq_load/iseq_load' +begin + require '-test-/iseq_load/iseq_load' +rescue LoadError +end require 'tempfile' class RubyVM::InstructionSequence @@ -21,9 +24,6 @@ class RubyVM::InstructionSequence d2 = i2.disasm_if_possible if d1 != d2 - p i1 - return - STDERR.puts "expected:" STDERR.puts d1 STDERR.puts "actual:" @@ -37,19 +37,38 @@ class RubyVM::InstructionSequence i2 end + CHECK_TO_A = ENV['RUBY_ISEQ_DUMP_DEBUG'] == 'to_a' + CHECK_TO_BINARY = ENV['RUBY_ISEQ_DUMP_DEBUG'] == 'to_binary' + def self.translate i1 # check to_a/load_iseq - i2 = compare_dump_and_load(i1, - proc{|iseq| - ary = iseq.to_a - ary[9] == :top ? ary : nil - }, - proc{|ary| - RubyVM::InstructionSequence.iseq_load(ary) - }) + i2_ary = compare_dump_and_load(i1, + proc{|iseq| + ary = iseq.to_a + ary[9] == :top ? ary : nil + }, + proc{|ary| + RubyVM::InstructionSequence.iseq_load(ary) + }) if CHECK_TO_A && defined?(RubyVM::InstructionSequence.iseq_load) + + # check to_binary_format + i2_bin = compare_dump_and_load(i1, + proc{|iseq| + begin + iseq.to_binary_format + rescue RuntimeError => e # not a toplevel + # STDERR.puts [:failed, e, iseq].inspect + nil + end + }, + proc{|bin| + iseq = RubyVM::InstructionSequence.from_binary_format(bin) + # STDERR.puts iseq.inspect + iseq + }) if CHECK_TO_BINARY # return value - i1 - end + i2_bin if CHECK_TO_BINARY + end if CHECK_TO_A || CHECK_TO_BINARY end #require_relative 'x'; exit(1) diff --git a/test/runner.rb b/test/runner.rb index c3cb2d8472..13506e592e 100644 --- a/test/runner.rb +++ b/test/runner.rb @@ -22,7 +22,7 @@ ENV["GEM_SKIP"] = ENV["GEM_HOME"] = ENV["GEM_PATH"] = "".freeze require_relative 'lib/profile_test_all' if ENV.has_key?('RUBY_TEST_ALL_PROFILE') require_relative 'lib/tracepointchecker' require_relative 'lib/zombie_hunter' -# require_relative 'lib/iseq_loader_checker' +require_relative 'lib/iseq_loader_checker' if ENV['COVERAGE'] %w[doclie simplecov-html simplecov].each do |f| diff --git a/vm.c b/vm.c index d6c8f1b622..4bae830633 100644 --- a/vm.c +++ b/vm.c @@ -945,7 +945,7 @@ invoke_block_from_c_0(rb_thread_t *th, const rb_block_t *block, return Qnil; } else if (LIKELY(RUBY_VM_NORMAL_ISEQ_P(block->iseq))) { - const rb_iseq_t *iseq = block->iseq; + const rb_iseq_t *iseq = rb_iseq_check(block->iseq); int i, opt_pc; int type = block_proc_is_lambda(block->proc) ? VM_FRAME_MAGIC_LAMBDA : VM_FRAME_MAGIC_BLOCK; VALUE *sp = th->cfp->sp; @@ -1816,6 +1816,7 @@ vm_exec(rb_thread_t *th) if (catch_iseq != NULL) { /* found catch table */ /* enter catch scope */ + rb_iseq_check(catch_iseq); cfp->sp = vm_base_ptr(cfp) + cont_sp; cfp->pc = cfp->iseq->body->iseq_encoded + cont_pc; diff --git a/vm_core.h b/vm_core.h index aecbe613d8..8c6456abda 100644 --- a/vm_core.h +++ b/vm_core.h @@ -257,10 +257,10 @@ struct rb_call_cache { #endif typedef struct rb_iseq_location_struct { - const VALUE path; - const VALUE absolute_path; - const VALUE base_label; - const VALUE label; + VALUE path; + VALUE absolute_path; + VALUE base_label; + VALUE label; VALUE first_lineno; /* TODO: may be unsigned short */ } rb_iseq_location_t; @@ -376,7 +376,7 @@ struct rb_iseq_constant_body { */ struct rb_call_cache *cc_entries; /* size is ci_size = ci_kw_size */ - const VALUE mark_ary; /* Array: includes operands which should be GC marked */ + VALUE mark_ary; /* Array: includes operands which should be GC marked */ unsigned int local_table_size; unsigned int is_size; @@ -389,12 +389,40 @@ struct rb_iseq_constant_body { /* typedef rb_iseq_t is in method.h */ struct rb_iseq_struct { VALUE flags; - struct iseq_compile_data *compile_data_; /* used at compile time */ - struct rb_iseq_constant_body *body; VALUE reserved1; - VALUE reserved2; + struct rb_iseq_constant_body *body; + + union { /* 4, 5 words */ + struct iseq_compile_data *compile_data; /* used at compile time */ + + struct { + VALUE obj; + int index; + } loader; + } aux; }; +#define USE_LAZY_LOAD 0 + +#ifndef USE_LAZY_LOAD +#define USE_LAZY_LOAD +#endif + +#if USE_LAZY_LOAD +const rb_iseq_t *rb_iseq_complete(const rb_iseq_t *iseq); + +static inline const rb_iseq_t * +rb_iseq_check(const rb_iseq_t *iseq) +{ + if (iseq->body == NULL) { + rb_iseq_complete((rb_iseq_t *)iseq); + } + return iseq; +} +#else +#define rb_iseq_check(iseq) iseq +#endif + enum ruby_special_exceptions { ruby_error_reenter, ruby_error_nomemory, @@ -962,7 +990,7 @@ rb_block_t *rb_vm_control_frame_block_ptr(const rb_control_frame_t *cfp); (!RUBY_VM_VALID_CONTROL_FRAME_P((cfp), RUBY_VM_END_CONTROL_FRAME(th))) #define RUBY_VM_IFUNC_P(ptr) (RB_TYPE_P((VALUE)(ptr), T_IMEMO) && imemo_type((VALUE)ptr) == imemo_ifunc) -#define RUBY_VM_NORMAL_ISEQ_P(ptr) (RB_TYPE_P((VALUE)(ptr), T_IMEMO) && imemo_type((VALUE)ptr) == imemo_iseq) +#define RUBY_VM_NORMAL_ISEQ_P(ptr) (RB_TYPE_P((VALUE)(ptr), T_IMEMO) && imemo_type((VALUE)ptr) == imemo_iseq && rb_iseq_check((rb_iseq_t *)ptr)) #define RUBY_VM_GET_BLOCK_PTR_IN_CFP(cfp) ((rb_block_t *)(&(cfp)->self)) #define RUBY_VM_GET_CFP_FROM_BLOCK_PTR(b) \ diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 033edea5fd..cfa76ab6c9 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1400,7 +1400,7 @@ def_iseq_ptr(rb_method_definition_t *def) #if VM_CHECK_MODE > 0 if (def->type != VM_METHOD_TYPE_ISEQ) rb_bug("def_iseq_ptr: not iseq (%d)", def->type); #endif - return def->body.iseq.iseqptr; + return rb_iseq_check(def->body.iseq.iseqptr); } static VALUE @@ -2428,15 +2428,14 @@ static VALUE vm_invoke_block(rb_thread_t *th, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling, const struct rb_call_info *ci) { const rb_block_t *block = VM_CF_BLOCK_PTR(reg_cfp); - const rb_iseq_t *iseq; VALUE type = GET_ISEQ()->body->local_iseq->body->type; if ((type != ISEQ_TYPE_METHOD && type != ISEQ_TYPE_CLASS) || block == 0) { rb_vm_localjump_error("no block given (yield)", Qnil, 0); } - iseq = block->iseq; - if (RUBY_VM_NORMAL_ISEQ_P(iseq)) { + if (RUBY_VM_NORMAL_ISEQ_P(block->iseq)) { + const rb_iseq_t *iseq = block->iseq; const int arg_size = iseq->body->param.size; int is_lambda = block_proc_is_lambda(block->proc); VALUE * const rsp = GET_SP() - calling->argc;