From 16c5ce863c06dd3ae5562f4ed86fb40ced670c69 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Thu, 3 Sep 2020 12:06:53 -0400 Subject: [PATCH] Yeah, this actually works! --- common.mk | 5 +++ compile.c | 5 +++ ...-example-header.rb => gen_ujit_examples.rb | 39 ++++++++++--------- iseq.c | 34 +++++++++++++++- iseq.h | 2 +- tool/ruby_vm/views/vm.inc.erb | 2 +- 6 files changed, 66 insertions(+), 21 deletions(-) rename gen-ujit-example-header.rb => gen_ujit_examples.rb (79%) diff --git a/common.mk b/common.mk index 872e6e0a10..45ebaab86a 100644 --- a/common.mk +++ b/common.mk @@ -1105,6 +1105,10 @@ incs: $(INSNS) {$(VPATH)}node_name.inc {$(VPATH)}known_errors.inc \ insns: $(INSNS) +ujit_examples.h: gen_ujit_examples.rb vm.$(OBJEXT) + $(ECHO) generating $@ + $(Q) $(BASERUBY) gen_ujit_examples.rb + id.h: $(tooldir)/generic_erb.rb $(srcdir)/template/id.h.tmpl $(srcdir)/defs/id.def $(ECHO) generating $@ $(Q) $(BASERUBY) $(tooldir)/generic_erb.rb --output=$@ \ @@ -7002,6 +7006,7 @@ iseq.$(OBJEXT): {$(VPATH)}util.h iseq.$(OBJEXT): {$(VPATH)}vm_callinfo.h iseq.$(OBJEXT): {$(VPATH)}vm_core.h iseq.$(OBJEXT): {$(VPATH)}vm_opts.h +iseq.$(OBJEXT): {$(VPATH)}ujit_examples.h load.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h load.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h load.$(OBJEXT): $(CCAN_DIR)/list/list.h diff --git a/compile.c b/compile.c index 1e88dc242d..49263002e3 100644 --- a/compile.c +++ b/compile.c @@ -856,6 +856,8 @@ rb_iseq_compile_node(rb_iseq_t *iseq, const NODE *node) return iseq_setup(iseq, ret); } +extern uint8_t *native_pop_code; // TODO global hack + static int rb_iseq_translate_threaded_code(rb_iseq_t *iseq) { @@ -868,6 +870,9 @@ rb_iseq_translate_threaded_code(rb_iseq_t *iseq) int insn = (int)iseq->body->iseq_encoded[i]; int len = insn_len(insn); encoded[i] = (VALUE)table[insn]; + + if (insn == BIN(pop)) encoded[i] = (VALUE)native_pop_code; + i += len; } FL_SET((VALUE)iseq, ISEQ_TRANSLATED); diff --git a/gen-ujit-example-header.rb b/gen_ujit_examples.rb similarity index 79% rename from gen-ujit-example-header.rb rename to gen_ujit_examples.rb index 5ad0c65f02..eaacd988b8 100644 --- a/gen-ujit-example-header.rb +++ b/gen_ujit_examples.rb @@ -2,7 +2,7 @@ def get_example_instruction_id # TODO we could get this from the script that generates vm.inc instead of dothings this song and dance `dwarfdump --name='YARVINSN_ujit_call_example' vm.o`.each_line do |line| if (id = line[/DW_AT_const_value\s\((\d+\))/, 1]) - p [__method__, line] + p [__method__, line] if $DEBUG return id.to_i end end @@ -13,7 +13,7 @@ def get_fileoff # use the load command to figure out the offset to the start of the content of vm.o `otool -l vm.o`.each_line do |line| if (fileoff = line[/fileoff (\d+)/, 1]) - p [__method__, line] + p [__method__, line] if $DEBUG return fileoff.to_i end end @@ -23,7 +23,7 @@ end def get_symbol_offset(symbol) `nm vm.o`.each_line do |line| if (offset = line[Regexp.compile('(\h+).+' + Regexp.escape(symbol) + '\Z'), 1]) - p [__method__, line] + p [__method__, line] if $DEBUG return Integer(offset, 16) end end @@ -42,15 +42,16 @@ def disassemble(offset) puts "feel free to verify with --reloc" disassembly = `#{command}` instructions = [] - puts disassembly + puts disassembly if $DEBUG disassembly.each_line do |line| line = line.strip - match = /\h+: ((?:\h\h\s?)+)\s+(\w+)/.match(line) do |match_data| + match_data = /\h+: ((?:\h\h\s?)+)\s+(\w+)/.match(line) + if match_data bytes = match_data[1] mnemonic = match_data[2] instructions << [bytes, mnemonic, line] - end - if !match && !instructions.empty? + break if mnemonic == 'jmp' + elsif !instructions.empty? p line raise "expected a continuous sequence of disassembly lines" end @@ -70,7 +71,7 @@ def disassemble(offset) call_idx = handler_instructions.find_index { |_, mnemonic, _| mnemonic == 'call' } - puts "\n\nDisassembly for the handler:" + puts "Disassembly for the example handler:" puts handler_instructions.map{|_,_,line|line} pre_call_bytes = [] @@ -78,28 +79,30 @@ def disassemble(offset) handler_instructions.take(call_idx).each do |bytes, mnemonic, _| pre_call_bytes += bytes.split end - handler_instructions[((call_idx+1)...)].each do |bytes, _, _| + handler_instructions[call_idx + 1, handler_instructions.size].each do |bytes, _, _| post_call_bytes += bytes.split end File.write("ujit_examples.h", <<-EOF) -static const uint8_t ujit_precall_bytes[] = { #{pre_call_bytes.map{ |byte| '0x'+byte}.join(', ')} }; -static const uint8_t ujit_postall_bytes[] = { #{post_call_bytes.map{ |byte| '0x'+byte}.join(', ')} }; +static const uint8_t ujit_pre_call_bytes[] = { #{pre_call_bytes.map{ |byte| '0x'+byte}.join(', ')} }; +static const uint8_t ujit_post_call_bytes[] = { #{post_call_bytes.map{ |byte| '0x'+byte}.join(', ')} }; EOF - puts "file:" - puts File.binread("ujit_examples.h") + if $DEBUG + puts "file:" + puts File.binread("ujit_examples.h") + end end instruction_id = get_example_instruction_id fileoff = get_fileoff tc_table_offset = get_symbol_offset('vm_exec_core.insns_address_table') vm_exec_core_offset = get_symbol_offset('vm_exec_core') -p instruction_id -p fileoff -p tc_table_offset.to_s(16) +p instruction_id if $DEBUG +p fileoff if $DEBUG +p tc_table_offset.to_s(16) if $DEBUG offset_to_insn_in_tc_table = fileoff + tc_table_offset + 8 * instruction_id -p offset_to_insn_in_tc_table +p offset_to_insn_in_tc_table if $DEBUG offset_to_handler_code_from_vm_exec_core = readint8b(offset_to_insn_in_tc_table) -p offset_to_handler_code_from_vm_exec_core +p offset_to_handler_code_from_vm_exec_core if $DEBUG disassemble(vm_exec_core_offset + offset_to_handler_code_from_vm_exec_core) diff --git a/iseq.c b/iseq.c index 6928a711ed..67af3371dd 100644 --- a/iseq.c +++ b/iseq.c @@ -42,6 +42,10 @@ #include "builtin.h" #include "insns.inc" #include "insns_info.inc" +#include +#include "ujit_examples.h" + +uint8_t *native_pop_code; // TODO: hack. see addr2insn VALUE rb_cISeq; static VALUE iseqw_new(const rb_iseq_t *iseq); @@ -3205,6 +3209,22 @@ rb_vm_encoded_insn_data_table_init(void) st_add_direct(encoded_insn_data, key1, (st_data_t)&insn_data[insn]); st_add_direct(encoded_insn_data, key2, (st_data_t)&insn_data[insn]); } + + native_pop_code = mmap(0, 4096, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, 0, 0); + if (native_pop_code == MAP_FAILED) rb_bug("mmap failed"); + uint8_t *head = native_pop_code; + memcpy(head, ujit_pre_call_bytes, sizeof(ujit_pre_call_bytes)); + head += sizeof(ujit_pre_call_bytes); + const uint8_t handmade_pop[] = { // TODO assmeble this from a separate file + 0x48, 0x83, 0x6f, 0x08, 0x08, // subq $8, 8(%rdi) + 0x48, 0x83, 0xc6, 0x08, // addq $8, %rsi + 0x48, 0x89, 0x37, // movq %rsi, (%rdi) + 0x48, 0x89, 0xf0 // movq %rsi, %rax + }; + memcpy(head, handmade_pop, sizeof(handmade_pop)); + head += sizeof(handmade_pop); + memcpy(head, ujit_post_call_bytes, sizeof(ujit_post_call_bytes)); + // TODO this is small enough to fit in the page we allocated but that can change } int @@ -3218,6 +3238,12 @@ rb_vm_insn_addr2insn(const void *addr) return (int)e->insn; } + // TODO this is a hack. The proper way to do this is to refactor this so that it takes + // the iseq body. + if (addr && addr == native_pop_code) { + return BIN(pop); + } + rb_bug("rb_vm_insn_addr2insn: invalid insn address: %p", addr); } @@ -3248,6 +3274,12 @@ encoded_iseq_trace_instrument(VALUE *iseq_encoded_insn, rb_event_flag_t turnon, return e->insn_len; } + // TODO this is a hack. The proper way to do this is to refactor this so that it takes + // the iseq body. + if (key && (uint8_t *)key == native_pop_code) { + return insn_len(BIN(pop)); + } + rb_bug("trace_instrument: invalid insn address: %p", (void *)*iseq_encoded_insn); } @@ -3456,7 +3488,7 @@ trace_set_i(void *vstart, void *vend, size_t stride, void *data) } VALUE * -rb_ujit_empty_func(rb_control_frame_t *cfp) +rb_ujit_empty_func(rb_control_frame_t *cfp, const VALUE *pc) { // okay, not really empty, so maybe think of another name. // it's put in this file instead of say, compile.c to dodge long C compile time. diff --git a/iseq.h b/iseq.h index ace5a45ba3..2da29ef7f0 100644 --- a/iseq.h +++ b/iseq.h @@ -313,7 +313,7 @@ VALUE rb_iseq_defined_string(enum defined_type type); /* vm.c */ VALUE rb_iseq_local_variables(const rb_iseq_t *iseq); -NOINLINE(VALUE *rb_ujit_empty_func(rb_control_frame_t *cfp)); +NOINLINE(VALUE *rb_ujit_empty_func(rb_control_frame_t *cfp, const VALUE *pc)); RUBY_SYMBOL_EXPORT_END diff --git a/tool/ruby_vm/views/vm.inc.erb b/tool/ruby_vm/views/vm.inc.erb index 7942a3ef87..49d6fc2e16 100644 --- a/tool/ruby_vm/views/vm.inc.erb +++ b/tool/ruby_vm/views/vm.inc.erb @@ -34,7 +34,7 @@ INSN_ENTRY(<%= insn.name %>) START_OF_ORIGINAL_INSN(<%= insn.name %>); // assumes USE_MACHINE_REGS, aka reg_pc setup, // aka #define SET_PC(x) (reg_cfp->pc = reg_pc = (x)) - reg_pc = rb_ujit_empty_func(GET_CFP()); + reg_pc = rb_ujit_empty_func(GET_CFP(), reg_pc); END_INSN(<%= insn.name %>); } % end