diff --git a/.github/workflows/yjit-ubuntu.yml b/.github/workflows/yjit-ubuntu.yml index 79c66ffd20..7b45912843 100644 --- a/.github/workflows/yjit-ubuntu.yml +++ b/.github/workflows/yjit-ubuntu.yml @@ -78,6 +78,10 @@ jobs: configure: "--enable-yjit=dev" yjit_opts: "--yjit-call-threshold=1 --yjit-verify-ctx" + - test_task: "check" + configure: "--enable-yjit=dev" + yjit_opts: "--yjit-call-threshold=1 --yjit-temp-regs=5" + - test_task: "test-all TESTS=--repeat-count=2" configure: "--enable-yjit=dev" diff --git a/yjit.rb b/yjit.rb index 343f5f0ed8..c744f5335c 100644 --- a/yjit.rb +++ b/yjit.rb @@ -268,6 +268,9 @@ module RubyVM::YJIT $stderr.puts "iseq_stack_too_large: " + format_number(13, stats[:iseq_stack_too_large]) $stderr.puts "iseq_too_long: " + format_number(13, stats[:iseq_too_long]) + $stderr.puts "temp_reg_opnd: " + format_number(13, stats[:temp_reg_opnd]) + $stderr.puts "temp_mem_opnd: " + format_number(13, stats[:temp_mem_opnd]) + $stderr.puts "temp_spill: " + format_number(13, stats[:temp_spill]) $stderr.puts "bindings_allocations: " + format_number(13, stats[:binding_allocations]) $stderr.puts "bindings_set: " + format_number(13, stats[:binding_set]) $stderr.puts "compilation_failure: " + format_number(13, compilation_failure) if compilation_failure != 0 diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 4e5748d231..b87068df11 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -175,6 +175,13 @@ impl Assembler vec![X11_REG, X12_REG, X13_REG] } + /// Get the list of registers that can be used for stack temps. + pub fn get_temp_regs() -> Vec { + // FIXME: arm64 is not supported yet. Insn::Store doesn't support registers + // in its dest operand. Currently crashing at split_memory_address. + vec![] + } + /// Get a list of all of the caller-saved registers pub fn get_caller_save_regs() -> Vec { vec![X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG] @@ -1046,7 +1053,9 @@ impl Assembler Insn::CSelGE { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE); } - Insn::LiveReg { .. } => (), // just a reg alloc signal, no code + Insn::LiveReg { .. } | + Insn::RegTemps(_) | + Insn::SpillTemp(_) => (), // just a reg alloc signal, no code Insn::PadInvalPatch => { while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < JMP_PTR_BYTES && !cb.has_dropped_bytes() { nop(cb); diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index e27049ee33..37fb378905 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -10,8 +10,9 @@ use std::mem::take; use crate::cruby::{VALUE, SIZEOF_VALUE_I32}; use crate::virtualmem::{CodePtr}; use crate::asm::{CodeBlock, uimm_num_bits, imm_num_bits}; -use crate::core::{Context, Type, TempMapping}; +use crate::core::{Context, Type, TempMapping, RegTemps, MAX_REG_TEMPS, MAX_TEMP_TYPES}; use crate::options::*; +use crate::stats::*; #[cfg(target_arch = "x86_64")] use crate::backend::x86_64::*; @@ -73,7 +74,7 @@ pub enum Opnd InsnOut{ idx: usize, num_bits: u8 }, // Pointer to a slot on the VM stack - Stack { idx: i32, sp_offset: i8, num_bits: u8 }, + Stack { idx: i32, stack_size: u8, sp_offset: i8, num_bits: u8 }, // Low-level operands, for lowering Imm(i64), // Raw signed immediate @@ -162,7 +163,7 @@ impl Opnd Opnd::Reg(reg) => Some(Opnd::Reg(reg.with_num_bits(num_bits))), Opnd::Mem(Mem { base, disp, .. }) => Some(Opnd::Mem(Mem { base, disp, num_bits })), Opnd::InsnOut { idx, .. } => Some(Opnd::InsnOut { idx, num_bits }), - Opnd::Stack { idx, sp_offset, .. } => Some(Opnd::Stack { idx, sp_offset, num_bits }), + Opnd::Stack { idx, stack_size, sp_offset, .. } => Some(Opnd::Stack { idx, stack_size, sp_offset, num_bits }), _ => None, } } @@ -216,6 +217,26 @@ impl Opnd pub fn match_num_bits(opnds: &[Opnd]) -> u8 { Self::match_num_bits_iter(opnds.iter()) } + + /// Calculate Opnd::Stack's index from the stack bottom. + pub fn stack_idx(&self) -> u8 { + match self { + Opnd::Stack { idx, stack_size, .. } => { + (*stack_size as isize - *idx as isize - 1) as u8 + }, + _ => unreachable!(), + } + } + + /// Get the index for stack temp registers. + pub fn reg_idx(&self) -> usize { + match self { + Opnd::Stack { .. } => { + self.stack_idx() as usize % get_option!(num_temp_regs) + }, + _ => unreachable!(), + } + } } impl From for Opnd { @@ -408,6 +429,9 @@ pub enum Insn { /// Take a specific register. Signal the register allocator to not use it. LiveReg { opnd: Opnd, out: Opnd }, + /// Update live stack temps without spill + RegTemps(RegTemps), + // A low-level instruction that loads a value into a register. Load { opnd: Opnd, out: Opnd }, @@ -443,6 +467,9 @@ pub enum Insn { /// Shift a value right by a certain amount (signed). RShift { opnd: Opnd, shift: Opnd, out: Opnd }, + /// Spill a stack temp from a register into memory + SpillTemp(Opnd), + // Low-level instruction to store a value to memory. Store { dest: Opnd, src: Opnd }, @@ -514,6 +541,7 @@ impl Insn { Insn::LeaLabel { .. } => "LeaLabel", Insn::Lea { .. } => "Lea", Insn::LiveReg { .. } => "LiveReg", + Insn::RegTemps(_) => "RegTemps", Insn::Load { .. } => "Load", Insn::LoadInto { .. } => "LoadInto", Insn::LoadSExt { .. } => "LoadSExt", @@ -524,6 +552,7 @@ impl Insn { Insn::PadInvalPatch => "PadEntryExit", Insn::PosMarker(_) => "PosMarker", Insn::RShift { .. } => "RShift", + Insn::SpillTemp(_) => "SpillTemp", Insn::Store { .. } => "Store", Insn::Sub { .. } => "Sub", Insn::Test { .. } => "Test", @@ -658,6 +687,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> { Insn::Jz(_) | Insn::Label(_) | Insn::LeaLabel { .. } | + Insn::RegTemps(_) | Insn::PadInvalPatch | Insn::PosMarker(_) => None, Insn::CPopInto(opnd) | @@ -668,7 +698,8 @@ impl<'a> Iterator for InsnOpndIterator<'a> { Insn::LiveReg { opnd, .. } | Insn::Load { opnd, .. } | Insn::LoadSExt { opnd, .. } | - Insn::Not { opnd, .. } => { + Insn::Not { opnd, .. } | + Insn::SpillTemp(opnd) => { match self.idx { 0 => { self.idx += 1; @@ -755,6 +786,7 @@ impl<'a> InsnOpndMutIterator<'a> { Insn::Jz(_) | Insn::Label(_) | Insn::LeaLabel { .. } | + Insn::RegTemps(_) | Insn::PadInvalPatch | Insn::PosMarker(_) => None, Insn::CPopInto(opnd) | @@ -765,7 +797,8 @@ impl<'a> InsnOpndMutIterator<'a> { Insn::LiveReg { opnd, .. } | Insn::Load { opnd, .. } | Insn::LoadSExt { opnd, .. } | - Insn::Not { opnd, .. } => { + Insn::Not { opnd, .. } | + Insn::SpillTemp(opnd) => { match self.idx { 0 => { self.idx += 1; @@ -857,6 +890,10 @@ pub struct Assembler /// Index of the last insn using the output of this insn pub(super) live_ranges: Vec, + /// Parallel vec with insns + /// Bitmap of which temps are in a register for this insn + pub(super) reg_temps: Vec, + /// Names of labels pub(super) label_names: Vec, } @@ -871,6 +908,7 @@ impl Assembler Self { insns: Vec::default(), live_ranges: Vec::default(), + reg_temps: Vec::default(), label_names } } @@ -905,8 +943,33 @@ impl Assembler } } + // Update live stack temps for this instruction + let mut reg_temps = self.get_reg_temps(); + match insn { + Insn::RegTemps(next_temps) => { + reg_temps = next_temps; + } + Insn::SpillTemp(opnd) => { + assert_eq!(reg_temps.get(opnd.stack_idx()), true); + reg_temps.set(opnd.stack_idx(), false); + } + _ => {} + } + // Assert no conflict + for stack_idx in 0..MAX_REG_TEMPS { + if reg_temps.get(stack_idx) { + assert!(!reg_temps.conflicts_with(stack_idx)); + } + } + self.insns.push(insn); self.live_ranges.push(insn_idx); + self.reg_temps.push(reg_temps); + } + + /// Get stack temps that are currently in a register + pub fn get_reg_temps(&self) -> RegTemps { + *self.reg_temps.last().unwrap_or(&RegTemps::default()) } /// Create a new label instance that we can jump to @@ -922,22 +985,113 @@ impl Assembler /// Convert Stack operands to memory operands pub fn lower_stack(mut self) -> Assembler { + // Convert Opnd::Stack to Opnd::Mem + fn mem_opnd(opnd: &Opnd) -> Opnd { + if let Opnd::Stack { idx, sp_offset, num_bits, .. } = *opnd { + incr_counter!(temp_mem_opnd); + Opnd::mem(num_bits, SP, (sp_offset as i32 - idx - 1) * SIZEOF_VALUE_I32) + } else { + unreachable!() + } + } + + // Convert Opnd::Stack to Opnd::Reg + fn reg_opnd(opnd: &Opnd, regs: &Vec) -> Opnd { + if let Opnd::Stack { num_bits, .. } = *opnd { + incr_counter!(temp_reg_opnd); + Opnd::Reg(regs[opnd.reg_idx()]).with_num_bits(num_bits).unwrap() + } else { + unreachable!() + } + } + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names)); + let regs = Assembler::get_temp_regs(); + let reg_temps = take(&mut self.reg_temps); let mut iterator = self.into_draining_iter(); - while let Some((index, mut insn)) = iterator.next_unmapped() { - let mut opnd_iter = insn.opnd_iter_mut(); - while let Some(opnd) = opnd_iter.next() { - if let Opnd::Stack { idx, sp_offset, num_bits } = *opnd { - *opnd = Opnd::mem(num_bits, SP, (sp_offset as i32 - idx - 1) * SIZEOF_VALUE_I32); + while let Some((index, mut insn)) = iterator.next_mapped() { + match &insn { + // The original insn is pushed to the new asm to satisfy ccall's reg_temps assertion. + Insn::RegTemps(_) => {} // noop + Insn::SpillTemp(opnd) => { + incr_counter!(temp_spill); + asm.mov(mem_opnd(opnd), reg_opnd(opnd, ®s)); + } + _ => { + // next_mapped() doesn't map out_opnd. So we need to map it here. + if insn.out_opnd().is_some() { + let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); + let out = insn.out_opnd_mut().unwrap(); + *out = asm.next_opnd_out(out_num_bits); + } + + // Lower Opnd::Stack to Opnd::Reg or Opnd::Mem + let mut opnd_iter = insn.opnd_iter_mut(); + while let Some(opnd) = opnd_iter.next() { + if let Opnd::Stack { idx, stack_size, sp_offset, num_bits } = *opnd { + *opnd = if opnd.stack_idx() < MAX_REG_TEMPS && reg_temps[index].get(opnd.stack_idx()) { + reg_opnd(opnd, ®s) + } else { + mem_opnd(opnd) + }; + } + } } } asm.push_insn(insn); + iterator.map_insn_index(&mut asm); } asm } + /// Allocate a register to a stack temp if available. + pub fn alloc_temp_reg(&mut self, ctx: &mut Context, stack_idx: u8) { + if get_option!(num_temp_regs) == 0 { + return; + } + + assert_eq!(self.get_reg_temps(), ctx.get_reg_temps()); + let mut reg_temps = self.get_reg_temps(); + + // Allocate a register if there's no conflict. + if reg_temps.conflicts_with(stack_idx) { + assert!(!reg_temps.get(stack_idx)); + } else { + reg_temps.set(stack_idx, true); + self.set_reg_temps(reg_temps); + ctx.set_reg_temps(reg_temps); + } + } + + /// Spill all live stack temps from registers to the stack + pub fn spill_temps(&mut self, ctx: &mut Context) { + assert_eq!(self.get_reg_temps(), ctx.get_reg_temps()); + + // Forget registers above the stack top + let mut reg_temps = self.get_reg_temps(); + for stack_idx in ctx.get_stack_size()..MAX_REG_TEMPS { + reg_temps.set(stack_idx, false); + } + self.set_reg_temps(reg_temps); + + // Spill live stack temps + if self.get_reg_temps() != RegTemps::default() { + self.comment(&format!("spill_temps: {:08b} -> {:08b}", self.get_reg_temps().as_u8(), RegTemps::default().as_u8())); + for stack_idx in 0..u8::min(MAX_REG_TEMPS, ctx.get_stack_size()) { + if self.get_reg_temps().get(stack_idx) { + let idx = ctx.get_stack_size() - 1 - stack_idx; + self.spill_temp(ctx.stack_opnd(idx.into())); + } + } + } + + // Every stack temp should have been spilled + assert_eq!(self.get_reg_temps(), RegTemps::default()); + ctx.set_reg_temps(self.get_reg_temps()); + } + /// Sets the out field on the various instructions that require allocated /// registers because their output is used as the operand on a subsequent /// instruction. This is our implementation of the linear scan algorithm. @@ -1318,6 +1472,7 @@ impl Assembler { } pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd { + assert_eq!(self.get_reg_temps(), RegTemps::default(), "temps must be spilled before ccall"); let out = self.next_opnd_out(Opnd::match_num_bits(&opnds)); self.push_insn(Insn::CCall { fptr, opnds, out }); out @@ -1545,6 +1700,20 @@ impl Assembler { out } + /// Update which stack temps are in a register + pub fn set_reg_temps(&mut self, reg_temps: RegTemps) { + if self.get_reg_temps() != reg_temps { + self.comment(&format!("reg_temps: {:08b} -> {:08b}", self.get_reg_temps().as_u8(), reg_temps.as_u8())); + self.push_insn(Insn::RegTemps(reg_temps)); + } + } + + /// Spill a stack temp from a register to the stack + pub fn spill_temp(&mut self, opnd: Opnd) { + assert!(self.get_reg_temps().get(opnd.stack_idx())); + self.push_insn(Insn::SpillTemp(opnd)); + } + pub fn store(&mut self, dest: Opnd, src: Opnd) { self.push_insn(Insn::Store { dest, src }); } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 03ccc10928..0121e19142 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -10,6 +10,7 @@ use crate::codegen::{JITState}; use crate::cruby::*; use crate::backend::ir::*; use crate::codegen::CodegenGlobals; +use crate::options::*; // Use the x86 register type for this platform pub type Reg = X86Reg; @@ -97,6 +98,13 @@ impl Assembler ] } + /// Get the list of registers that can be used for stack temps. + pub fn get_temp_regs() -> Vec { + let num_regs = get_option!(num_temp_regs); + let mut regs = vec![RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG]; + regs.drain(0..num_regs).collect() + } + /// Get a list of all of the caller-save registers pub fn get_caller_save_regs() -> Vec { vec![RAX_REG, RCX_REG, RDX_REG, RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG, R11_REG] @@ -709,7 +717,9 @@ impl Assembler Insn::CSelGE { truthy, falsy, out } => { emit_csel(cb, *truthy, *falsy, *out, cmovl); } - Insn::LiveReg { .. } => (), // just a reg alloc signal, no code + Insn::LiveReg { .. } | + Insn::RegTemps(_) | + Insn::SpillTemp(_) => (), // just a reg alloc signal, no code Insn::PadInvalPatch => { let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos())); if code_size < JMP_PTR_BYTES { diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index f3c671d23b..94de95906a 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -40,6 +40,13 @@ type InsnGenFn = fn( ocb: &mut OutlinedCb, ) -> CodegenStatus; +/// Subset of Context that matters for generating a side exit. +#[derive(Eq, Hash, PartialEq)] +struct SideExitContext { + sp_offset: i8, + reg_temps: RegTemps, +} + /// Ephemeral code generation state. /// Represents a [core::Block] while we build it. pub struct JITState { @@ -68,8 +75,8 @@ pub struct JITState { stack_size_for_pc: u8, /// Side exit to the instruction being compiled. See :side-exit:. - /// For the current PC, CodePtr is cached for each sp_offset key. - side_exit_for_pc: HashMap, + /// For the current PC, it's cached for each (sp_offset, reg_temps). + side_exit_for_pc: HashMap, /// Execution context when compilation started /// This allows us to peek at run-time values @@ -314,6 +321,7 @@ fn jit_save_pc(jit: &JITState, asm: &mut Assembler) { /// Note: this will change the current value of REG_SP, /// which could invalidate memory operands fn gen_save_sp(asm: &mut Assembler, ctx: &mut Context) { + asm.spill_temps(ctx); if ctx.get_sp_offset() != 0 { asm.comment("save SP to CFP"); let stack_pointer = ctx.sp_opnd(0); @@ -470,6 +478,9 @@ fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, asm: &mut Assembler) { asm.comment(&format!("exit to interpreter on {}", insn_name(opcode as usize))); } + // Spill stack temps before returning to the interpreter + asm.spill_temps(&mut ctx.clone()); + // Generate the code to exit to the interpreters // Write the adjusted SP back into the CFP if ctx.get_sp_offset() != 0 { @@ -517,6 +528,7 @@ fn gen_outlined_exit(exit_pc: *mut VALUE, ctx: &Context, ocb: &mut OutlinedCb) - let mut cb = ocb.unwrap(); let exit_code = cb.get_write_ptr(); let mut asm = Assembler::new(); + asm.set_reg_temps(ctx.get_reg_temps()); gen_exit(exit_pc, ctx, &mut asm); @@ -544,10 +556,12 @@ fn side_exit(jit: &mut JITState, ctx: &Context, ocb: &mut OutlinedCb) -> Target // sp_offset because gen_outlined_exit uses ctx.sp_offset to move SP. let ctx = ctx.with_stack_size(jit.stack_size_for_pc); - match jit.side_exit_for_pc.get(&ctx.get_sp_offset()) { + // Cache a side exit for each (sp_offset, reg_temps). + let exit_ctx = SideExitContext { sp_offset: ctx.get_sp_offset(), reg_temps: ctx.get_reg_temps() }; + match jit.side_exit_for_pc.get(&exit_ctx) { None => { let exit_code = gen_outlined_exit(jit.pc, &ctx, ocb); - jit.side_exit_for_pc.insert(ctx.get_sp_offset(), exit_code); + jit.side_exit_for_pc.insert(exit_ctx, exit_code); exit_code.as_side_exit() } Some(code_ptr) => code_ptr.as_side_exit() @@ -857,6 +871,7 @@ pub fn gen_single_block( let chain_depth = if ctx.get_chain_depth() > 0 { format!(", chain_depth: {}", ctx.get_chain_depth()) } else { "".to_string() }; asm.comment(&format!("Block: {} (ISEQ offset: {}{})", iseq_get_location(blockid.iseq, blockid_idx), blockid_idx, chain_depth)); } + asm.set_reg_temps(ctx.get_reg_temps()); // For each instruction to compile // NOTE: could rewrite this loop with a std::iter::Iterator @@ -883,6 +898,14 @@ pub fn gen_single_block( jit.stack_size_for_pc = ctx.get_stack_size(); jit.side_exit_for_pc.clear(); + // stack_pop doesn't immediately deallocate a register for stack temps, + // but it's safe to do so at this instruction boundary. + assert_eq!(asm.get_reg_temps(), ctx.get_reg_temps()); + for stack_idx in ctx.get_stack_size()..MAX_REG_TEMPS { + ctx.dealloc_temp_reg(stack_idx); + } + asm.set_reg_temps(ctx.get_reg_temps()); + // If previous instruction requested to record the boundary if jit.record_boundary_patch_point { // Generate an exit to this instruction and record it @@ -905,7 +928,7 @@ pub fn gen_single_block( gen_counter_incr!(asm, exec_instruction); // Add a comment for the name of the YARV instruction - asm.comment(&format!("Insn: {}", insn_name(opcode))); + asm.comment(&format!("Insn: {} (stack_size: {})", insn_name(opcode), ctx.get_stack_size())); // If requested, dump instructions for debugging if get_option!(dump_insns) { @@ -1003,7 +1026,7 @@ fn gen_dup( let dup_val = ctx.stack_opnd(0); let (mapping, tmp_type) = ctx.get_opnd_mapping(dup_val.into()); - let loc0 = ctx.stack_push_mapping((mapping, tmp_type)); + let loc0 = ctx.stack_push_mapping(asm, (mapping, tmp_type)); asm.mov(loc0, dup_val); KeepCompiling @@ -1029,10 +1052,10 @@ fn gen_dupn( let mapping1 = ctx.get_opnd_mapping(opnd1.into()); let mapping0 = ctx.get_opnd_mapping(opnd0.into()); - let dst1: Opnd = ctx.stack_push_mapping(mapping1); + let dst1: Opnd = ctx.stack_push_mapping(asm, mapping1); asm.mov(dst1, opnd1); - let dst0: Opnd = ctx.stack_push_mapping(mapping0); + let dst0: Opnd = ctx.stack_push_mapping(asm, mapping0); asm.mov(dst0, opnd0); KeepCompiling @@ -1083,7 +1106,7 @@ fn gen_putnil( fn jit_putobject(_jit: &mut JITState, ctx: &mut Context, asm: &mut Assembler, arg: VALUE) { let val_type: Type = Type::from(arg); - let stack_top = ctx.stack_push(val_type); + let stack_top = ctx.stack_push(asm, val_type); asm.mov(stack_top, arg.into()); } @@ -1124,7 +1147,7 @@ fn gen_putself( ) -> CodegenStatus { // Write it on the stack - let stack_top = ctx.stack_push_self(); + let stack_top = ctx.stack_push_self(asm); asm.mov( stack_top, Opnd::mem(VALUE_BITS, CFP, RUBY_OFFSET_CFP_SELF) @@ -1142,7 +1165,7 @@ fn gen_putspecialobject( let object_type = jit.get_arg(0).as_usize(); if object_type == VM_SPECIAL_OBJECT_VMCORE.as_usize() { - let stack_top = ctx.stack_push(Type::UnknownHeap); + let stack_top = ctx.stack_push(asm, Type::UnknownHeap); let frozen_core = unsafe { rb_mRubyVMFrozenCore }; asm.mov(stack_top, frozen_core.into()); KeepCompiling @@ -1186,7 +1209,7 @@ fn gen_topn( let top_n_val = ctx.stack_opnd(n.try_into().unwrap()); let mapping = ctx.get_opnd_mapping(top_n_val.into()); - let loc0 = ctx.stack_push_mapping(mapping); + let loc0 = ctx.stack_push_mapping(asm, mapping); asm.mov(loc0, top_n_val); KeepCompiling @@ -1236,7 +1259,7 @@ fn gen_opt_plus( asm.jo(side_exit(jit, ctx, ocb)); // Push the output on the stack - let dst = ctx.stack_push(Type::Fixnum); + let dst = ctx.stack_push(asm, Type::Fixnum); asm.mov(dst, out_val); KeepCompiling @@ -1278,7 +1301,7 @@ fn gen_newarray( ); ctx.stack_pop(n.as_usize()); - let stack_ret = ctx.stack_push(Type::CArray); + let stack_ret = ctx.stack_push(asm, Type::CArray); asm.mov(stack_ret, new_ary); KeepCompiling @@ -1302,7 +1325,7 @@ fn gen_duparray( vec![ary.into()], ); - let stack_ret = ctx.stack_push(Type::CArray); + let stack_ret = ctx.stack_push(asm, Type::CArray); asm.mov(stack_ret, new_ary); KeepCompiling @@ -1323,7 +1346,7 @@ fn gen_duphash( // call rb_hash_resurrect(VALUE hash); let hash = asm.ccall(rb_hash_resurrect as *const u8, vec![hash.into()]); - let stack_ret = ctx.stack_push(Type::Hash); + let stack_ret = ctx.stack_push(asm, Type::Hash); asm.mov(stack_ret, hash); KeepCompiling @@ -1348,7 +1371,7 @@ fn gen_splatarray( // Call rb_vm_splat_array(flag, ary) let ary = asm.ccall(rb_vm_splat_array as *const u8, vec![flag.into(), ary_opnd]); - let stack_ret = ctx.stack_push(Type::TArray); + let stack_ret = ctx.stack_push(asm, Type::TArray); asm.mov(stack_ret, ary); KeepCompiling @@ -1372,7 +1395,7 @@ fn gen_concatarray( // Call rb_vm_concat_array(ary1, ary2st) let ary = asm.ccall(rb_vm_concat_array as *const u8, vec![ary1_opnd, ary2st_opnd]); - let stack_ret = ctx.stack_push(Type::TArray); + let stack_ret = ctx.stack_push(asm, Type::TArray); asm.mov(stack_ret, ary); KeepCompiling @@ -1401,7 +1424,7 @@ fn gen_newrange( ); ctx.stack_pop(2); - let stack_ret = ctx.stack_push(Type::UnknownHeap); + let stack_ret = ctx.stack_push(asm, Type::UnknownHeap); asm.mov(stack_ret, range_opnd); KeepCompiling @@ -1572,7 +1595,7 @@ fn gen_expandarray( // special case for a, b = nil pattern // push N nils onto the stack for _ in 0..num { - let push_opnd = ctx.stack_push(Type::Nil); + let push_opnd = ctx.stack_push(asm, Type::Nil); asm.mov(push_opnd, Qnil.into()); } return KeepCompiling; @@ -1621,7 +1644,7 @@ fn gen_expandarray( // Loop backward through the array and push each element onto the stack. for i in (0..num).rev() { - let top = ctx.stack_push(Type::Unknown); + let top = ctx.stack_push(asm, Type::Unknown); let offset = i32::try_from(i * SIZEOF_VALUE).unwrap(); asm.mov(top, Opnd::mem(64, ary_opnd, offset)); } @@ -1707,9 +1730,9 @@ fn gen_getlocal_generic( // Write the local at SP let stack_top = if level == 0 { let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset); - ctx.stack_push_local(local_idx.as_usize()) + ctx.stack_push_local(asm, local_idx.as_usize()) } else { - ctx.stack_push(Type::Unknown) + ctx.stack_push(asm, Type::Unknown) }; asm.mov(stack_top, local_opnd); @@ -1863,12 +1886,12 @@ fn gen_newhash( asm.cpop_into(new_hash); // x86 alignment ctx.stack_pop(num.try_into().unwrap()); - let stack_ret = ctx.stack_push(Type::Hash); + let stack_ret = ctx.stack_push(asm, Type::Hash); asm.mov(stack_ret, new_hash); } else { // val = rb_hash_new(); let new_hash = asm.ccall(rb_hash_new as *const u8, vec![]); - let stack_ret = ctx.stack_push(Type::Hash); + let stack_ret = ctx.stack_push(asm, Type::Hash); asm.mov(stack_ret, new_hash); } @@ -1891,7 +1914,7 @@ fn gen_putstring( vec![EC, put_val.into()] ); - let stack_top = ctx.stack_push(Type::CString); + let stack_top = ctx.stack_push(asm, Type::CString); asm.mov(stack_top, str_opnd); KeepCompiling @@ -1931,7 +1954,7 @@ fn gen_checkkeyword( asm.test(bits_opnd, Opnd::Imm(bit_test)); let ret_opnd = asm.csel_z(Qtrue.into(), Qfalse.into()); - let stack_ret = ctx.stack_push(Type::UnknownImm); + let stack_ret = ctx.stack_push(asm, Type::UnknownImm); asm.mov(stack_ret, ret_opnd); KeepCompiling @@ -2023,7 +2046,7 @@ fn gen_set_ivar( ], ); - let out_opnd = ctx.stack_push(Type::Unknown); + let out_opnd = ctx.stack_push(asm, Type::Unknown); asm.mov(out_opnd, val); KeepCompiling @@ -2090,7 +2113,7 @@ fn gen_get_ivar( } // Push the ivar on the stack - let out_opnd = ctx.stack_push(Type::Unknown); + let out_opnd = ctx.stack_push(asm, Type::Unknown); asm.mov(out_opnd, ivar_val); // Jump to next instruction. This allows guard chains to share the same successor. @@ -2141,7 +2164,7 @@ fn gen_get_ivar( // when we entered the compiler. That means we can just return // nil for this shape + iv name None => { - let out_opnd = ctx.stack_push(Type::Nil); + let out_opnd = ctx.stack_push(asm, Type::Nil); asm.mov(out_opnd, Qnil.into()); } Some(ivar_index) => { @@ -2153,7 +2176,7 @@ fn gen_get_ivar( let ivar_opnd = Opnd::mem(64, recv, offs); // Push the ivar on the stack - let out_opnd = ctx.stack_push(Type::Unknown); + let out_opnd = ctx.stack_push(asm, Type::Unknown); asm.mov(out_opnd, ivar_opnd); } else { // Compile time value is *not* embedded. @@ -2164,7 +2187,7 @@ fn gen_get_ivar( // Read the ivar from the extended table let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32); - let out_opnd = ctx.stack_push(Type::Unknown); + let out_opnd = ctx.stack_push(asm, Type::Unknown); asm.mov(out_opnd, ivar_opnd); } } @@ -2344,6 +2367,7 @@ fn gen_setinstancevariable( exit_counter!(setivar_megamorphic), ); + asm.spill_temps(ctx); // for ccall (must be done before write_val is popped) let write_val; match ivar_index { @@ -2386,6 +2410,7 @@ fn gen_setinstancevariable( if needs_extension { // Generate the C call so that runtime code will increase // the capacity and set the buffer. + asm.spill_temps(ctx); // for ccall asm.ccall(rb_ensure_iv_list_size as *const u8, vec![ recv, @@ -2481,7 +2506,7 @@ fn gen_defined( } else { Type::Unknown }; - let stack_ret = ctx.stack_push(out_type); + let stack_ret = ctx.stack_push(asm, out_type); asm.mov(stack_ret, out_value); KeepCompiling @@ -2528,7 +2553,7 @@ fn gen_definedivar( // Push the return value onto the stack let out_type = if pushval.special_const_p() { Type::UnknownImm } else { Type::Unknown }; - let stack_ret = ctx.stack_push(out_type); + let stack_ret = ctx.stack_push(asm, out_type); asm.mov(stack_ret, out_value); return KeepCompiling @@ -2614,7 +2639,7 @@ fn gen_checktype( let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); asm.write_label(ret); - let stack_ret = ctx.stack_push(Type::UnknownImm); + let stack_ret = ctx.stack_push(asm, Type::UnknownImm); asm.mov(stack_ret, ret_opnd); KeepCompiling @@ -2643,7 +2668,7 @@ fn gen_concatstrings( ); ctx.stack_pop(n); - let stack_ret = ctx.stack_push(Type::CString); + let stack_ret = ctx.stack_push(asm, Type::CString); asm.mov(stack_ret, return_value); KeepCompiling @@ -2758,7 +2783,7 @@ fn gen_fixnum_cmp( let bool_opnd = cmov_op(asm, Qtrue.into(), Qfalse.into()); // Push the output on the stack - let dst = ctx.stack_push(Type::UnknownImm); + let dst = ctx.stack_push(asm, Type::UnknownImm); asm.mov(dst, bool_opnd); KeepCompiling @@ -2838,7 +2863,7 @@ fn gen_equality_specialized( // Push the output on the stack ctx.stack_pop(2); - let dst = ctx.stack_push(Type::UnknownImm); + let dst = ctx.stack_push(asm, Type::UnknownImm); asm.mov(dst, val); return Some(true); @@ -2873,6 +2898,9 @@ fn gen_equality_specialized( let equal = asm.new_label("equal"); let ret = asm.new_label("ret"); + // Spill for ccall. For safety, unconditionally spill temps before branching. + asm.spill_temps(ctx); + // If they are equal by identity, return true asm.cmp(a_opnd, b_opnd); asm.je(equal); @@ -2904,7 +2932,7 @@ fn gen_equality_specialized( // Push the output on the stack ctx.stack_pop(2); - let dst = ctx.stack_push(Type::UnknownImm); + let dst = ctx.stack_push(asm, Type::UnknownImm); asm.mov(dst, val); asm.jmp(ret); @@ -3011,6 +3039,7 @@ fn gen_opt_aref( // Call VALUE rb_ary_entry_internal(VALUE ary, long offset). // It never raises or allocates, so we don't need to write to cfp->pc. { + asm.spill_temps(ctx); // for ccall let idx_reg = asm.rshift(idx_reg, Opnd::UImm(1)); // Convert fixnum to int let val = asm.ccall(rb_ary_entry_internal as *const u8, vec![recv_opnd, idx_reg]); @@ -3018,7 +3047,7 @@ fn gen_opt_aref( ctx.stack_pop(2); // Push the return value onto the stack - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, val); } @@ -3058,7 +3087,7 @@ fn gen_opt_aref( ctx.stack_pop(2); // Push the return value onto the stack - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, val); // Jump to next instruction. This allows guard chains to share the same successor. @@ -3135,7 +3164,7 @@ fn gen_opt_aset( // Push the return value onto the stack ctx.stack_pop(3); - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, val); jump_to_next_insn(jit, ctx, asm, ocb); @@ -3166,7 +3195,7 @@ fn gen_opt_aset( // Push the return value onto the stack ctx.stack_pop(3); - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, ret); jump_to_next_insn(jit, ctx, asm, ocb); @@ -3207,7 +3236,7 @@ fn gen_opt_and( let val = asm.and(arg0, arg1); // Push the output on the stack - let dst = ctx.stack_push(Type::Fixnum); + let dst = ctx.stack_push(asm, Type::Fixnum); asm.store(dst, val); KeepCompiling @@ -3248,7 +3277,7 @@ fn gen_opt_or( let val = asm.or(arg0, arg1); // Push the output on the stack - let dst = ctx.stack_push(Type::Fixnum); + let dst = ctx.stack_push(asm, Type::Fixnum); asm.store(dst, val); KeepCompiling @@ -3291,7 +3320,7 @@ fn gen_opt_minus( let val = asm.add(val_untag, Opnd::Imm(1)); // Push the output on the stack - let dst = ctx.stack_push(Type::Fixnum); + let dst = ctx.stack_push(asm, Type::Fixnum); asm.store(dst, val); KeepCompiling @@ -3345,6 +3374,7 @@ fn gen_opt_mod( guard_two_fixnums(jit, ctx, asm, ocb); // Get the operands and destination from the stack + asm.spill_temps(ctx); // for ccall (must be done before stack_pop) let arg1 = ctx.stack_pop(1); let arg0 = ctx.stack_pop(1); @@ -3357,7 +3387,7 @@ fn gen_opt_mod( // Push the return value onto the stack // When the two arguments are fixnums, the modulo output is always a fixnum - let stack_ret = ctx.stack_push(Type::Fixnum); + let stack_ret = ctx.stack_push(asm, Type::Fixnum); asm.mov(stack_ret, ret); KeepCompiling @@ -3420,7 +3450,7 @@ fn gen_opt_str_freeze( let str = jit.get_arg(0); // Push the return value onto the stack - let stack_ret = ctx.stack_push(Type::CString); + let stack_ret = ctx.stack_push(asm, Type::CString); asm.mov(stack_ret, str.into()); KeepCompiling @@ -3439,7 +3469,7 @@ fn gen_opt_str_uminus( let str = jit.get_arg(0); // Push the return value onto the stack - let stack_ret = ctx.stack_push(Type::CString); + let stack_ret = ctx.stack_push(asm, Type::CString); asm.mov(stack_ret, str.into()); KeepCompiling @@ -3474,7 +3504,7 @@ fn gen_opt_newarray_max( ); ctx.stack_pop(num.as_usize()); - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, val_opnd); KeepCompiling @@ -3510,7 +3540,7 @@ fn gen_opt_newarray_min( ); ctx.stack_pop(num.as_usize()); - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, val_opnd); KeepCompiling @@ -4009,7 +4039,7 @@ fn jit_guard_known_klass( // Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee). fn jit_protected_callee_ancestry_guard( jit: &mut JITState, - ctx: &Context, + ctx: &mut Context, asm: &mut Assembler, ocb: &mut OutlinedCb, cme: *const rb_callable_method_entry_t, @@ -4019,6 +4049,7 @@ fn jit_protected_callee_ancestry_guard( // Note: PC isn't written to current control frame as rb_is_kind_of() shouldn't raise. // VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass); + asm.spill_temps(ctx); // for ccall let val = asm.ccall( rb_obj_is_kind_of as *mut u8, vec![ @@ -4050,14 +4081,14 @@ fn jit_rb_obj_not( Some(false) => { asm.comment("rb_obj_not(nil_or_false)"); ctx.stack_pop(1); - let out_opnd = ctx.stack_push(Type::True); + let out_opnd = ctx.stack_push(asm, Type::True); asm.mov(out_opnd, Qtrue.into()); }, Some(true) => { // Note: recv_opnd != Type::Nil && recv_opnd != Type::False. asm.comment("rb_obj_not(truthy)"); ctx.stack_pop(1); - let out_opnd = ctx.stack_push(Type::False); + let out_opnd = ctx.stack_push(asm, Type::False); asm.mov(out_opnd, Qfalse.into()); }, _ => { @@ -4082,7 +4113,7 @@ fn jit_rb_true( ) -> bool { asm.comment("nil? == true"); ctx.stack_pop(1); - let stack_ret = ctx.stack_push(Type::True); + let stack_ret = ctx.stack_push(asm, Type::True); asm.mov(stack_ret, Qtrue.into()); true } @@ -4101,7 +4132,7 @@ fn jit_rb_false( ) -> bool { asm.comment("nil? == false"); ctx.stack_pop(1); - let stack_ret = ctx.stack_push(Type::False); + let stack_ret = ctx.stack_push(asm, Type::False); asm.mov(stack_ret, Qfalse.into()); true } @@ -4151,10 +4182,10 @@ fn jit_rb_kernel_is_a( ctx.stack_pop(2); if sample_is_a { - let stack_ret = ctx.stack_push(Type::True); + let stack_ret = ctx.stack_push(asm, Type::True); asm.mov(stack_ret, Qtrue.into()); } else { - let stack_ret = ctx.stack_push(Type::False); + let stack_ret = ctx.stack_push(asm, Type::False); asm.mov(stack_ret, Qfalse.into()); } return true; @@ -4211,10 +4242,10 @@ fn jit_rb_kernel_instance_of( ctx.stack_pop(2); if sample_instance_of { - let stack_ret = ctx.stack_push(Type::True); + let stack_ret = ctx.stack_push(asm, Type::True); asm.mov(stack_ret, Qtrue.into()); } else { - let stack_ret = ctx.stack_push(Type::False); + let stack_ret = ctx.stack_push(asm, Type::False); asm.mov(stack_ret, Qfalse.into()); } return true; @@ -4236,6 +4267,7 @@ fn jit_rb_mod_eqq( } asm.comment("Module#==="); + asm.spill_temps(ctx); // for ccall // By being here, we know that the receiver is a T_MODULE or a T_CLASS, because Module#=== can // only live on these objects. With that, we can call rb_obj_is_kind_of() without // jit_prepare_routine_call() or a control frame push because it can't raise, allocate, or call @@ -4248,7 +4280,7 @@ fn jit_rb_mod_eqq( // Return the result ctx.stack_pop(2); - let stack_ret = ctx.stack_push(Type::UnknownImm); + let stack_ret = ctx.stack_push(asm, Type::UnknownImm); asm.mov(stack_ret, ret); return true; @@ -4274,7 +4306,7 @@ fn jit_rb_obj_equal( asm.cmp(obj1, obj2); let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); - let stack_ret = ctx.stack_push(Type::UnknownImm); + let stack_ret = ctx.stack_push(asm, Type::UnknownImm); asm.mov(stack_ret, ret_opnd); true } @@ -4317,7 +4349,7 @@ fn jit_rb_int_equal( asm.cmp(arg0, arg1); let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); - let stack_ret = ctx.stack_push(Type::UnknownImm); + let stack_ret = ctx.stack_push(asm, Type::UnknownImm); asm.mov(stack_ret, ret_opnd); true } @@ -4350,13 +4382,14 @@ fn jit_rb_str_uplus( let ret_label = asm.new_label("stack_ret"); // String#+@ can only exist on T_STRING - let stack_ret = ctx.stack_push(Type::TString); + let stack_ret = ctx.stack_push(asm, Type::TString); // If the string isn't frozen, we just return it. asm.mov(stack_ret, recv_opnd); asm.jz(ret_label); // Str is frozen - duplicate it + asm.spill_temps(ctx); // for ccall let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]); asm.mov(stack_ret, ret_opnd); @@ -4378,10 +4411,11 @@ fn jit_rb_str_bytesize( ) -> bool { asm.comment("String#bytesize"); + asm.spill_temps(ctx); // for ccall (must be done before stack_pop) let recv = ctx.stack_pop(1); let ret_opnd = asm.ccall(rb_str_bytesize as *const u8, vec![recv]); - let out_opnd = ctx.stack_push(Type::Fixnum); + let out_opnd = ctx.stack_push(asm, Type::Fixnum); asm.mov(out_opnd, ret_opnd); true @@ -4429,7 +4463,6 @@ fn jit_rb_str_empty_p( ); let recv_opnd = ctx.stack_pop(1); - let out_opnd = ctx.stack_push(Type::UnknownImm); asm.comment("get string length"); let str_len_opnd = Opnd::mem( @@ -4440,6 +4473,7 @@ fn jit_rb_str_empty_p( asm.cmp(str_len_opnd, Opnd::UImm(0)); let string_empty = asm.csel_e(Qtrue.into(), Qfalse.into()); + let out_opnd = ctx.stack_push(asm, Type::UnknownImm); asm.mov(out_opnd, string_empty); return true; @@ -4494,21 +4528,23 @@ fn jit_rb_str_concat( ); asm.test(flags_xor, Opnd::UImm(RUBY_ENCODING_MASK as u64)); - // Push once, use the resulting operand in both branches below. - let stack_ret = ctx.stack_push(Type::CString); - let enc_mismatch = asm.new_label("enc_mismatch"); asm.jnz(enc_mismatch); // If encodings match, call the simple append function and jump to return + asm.spill_temps(ctx); // for ccall let ret_opnd = asm.ccall(rb_yjit_str_simple_append as *const u8, vec![recv, concat_arg]); let ret_label = asm.new_label("func_return"); + let stack_ret = ctx.stack_push(asm, Type::CString); asm.mov(stack_ret, ret_opnd); + ctx.stack_pop(1); // forget stack_ret to re-push after ccall asm.jmp(ret_label); // If encodings are different, use a slower encoding-aware concatenate asm.write_label(enc_mismatch); + asm.spill_temps(ctx); // for ccall let ret_opnd = asm.ccall(rb_str_buf_append as *const u8, vec![recv, concat_arg]); + let stack_ret = ctx.stack_push(asm, Type::CString); asm.mov(stack_ret, ret_opnd); // Drop through to return @@ -4536,7 +4572,7 @@ fn jit_rb_ary_empty_p( asm.test(len_opnd, len_opnd); let bool_val = asm.csel_z(Qtrue.into(), Qfalse.into()); - let out_opnd = ctx.stack_push(Type::UnknownImm); + let out_opnd = ctx.stack_push(asm, Type::UnknownImm); asm.store(out_opnd, bool_val); return true; @@ -4562,7 +4598,7 @@ fn jit_rb_ary_push( let ary_opnd = ctx.stack_pop(1); let ret = asm.ccall(rb_ary_push as *const u8, vec![ary_opnd, item_opnd]); - let ret_opnd = ctx.stack_push(Type::TArray); + let ret_opnd = ctx.stack_push(asm, Type::TArray); asm.mov(ret_opnd, ret); true } @@ -4680,7 +4716,7 @@ fn jit_rb_f_block_given_p( ); ctx.stack_pop(1); - let out_opnd = ctx.stack_push(Type::UnknownImm); + let out_opnd = ctx.stack_push(asm, Type::UnknownImm); // Return `block_handler != VM_BLOCK_HANDLER_NONE` asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into()); @@ -4710,7 +4746,7 @@ fn jit_thread_s_current( // thread->self let thread_self = Opnd::mem(64, ec_thread_opnd, RUBY_OFFSET_THREAD_SELF); - let stack_ret = ctx.stack_push(Type::UnknownHeap); + let stack_ret = ctx.stack_push(asm, Type::UnknownHeap); asm.mov(stack_ret, thread_self); true } @@ -5126,6 +5162,7 @@ fn gen_send_cfunc( assert_ne!(0, unsafe { rb_IMEMO_TYPE_P(imemo_ci, imemo_callinfo) }, "we assume all callinfos with kwargs are on the GC heap"); let sp = asm.lea(ctx.sp_opnd(0)); + asm.spill_temps(ctx); // for ccall let kwargs = asm.ccall(build_kwhash as *const u8, vec![imemo_ci.into(), sp]); // Replace the stack location at the start of kwargs with the new hash @@ -5136,6 +5173,9 @@ fn gen_send_cfunc( // Copy SP because REG_SP will get overwritten let sp = asm.lea(ctx.sp_opnd(0)); + // Arguments must be spilled before popped from ctx + asm.spill_temps(ctx); + // Pop the C function arguments from the stack (in the caller) ctx.stack_pop((argc + 1).try_into().unwrap()); @@ -5176,7 +5216,7 @@ fn gen_send_cfunc( record_global_inval_patch(asm, CodegenGlobals::get_outline_full_cfunc_return_pos()); // Push the return value on the Ruby stack - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, ret); // Pop the stack frame (ec->cfp++) @@ -5280,7 +5320,7 @@ fn move_rest_args_to_stack(array: Opnd, num_args: u32, jit: &mut JITState, ctx: let ary_opnd = asm.csel_nz(ary_opnd, heap_ptr_opnd); for i in 0..num_args { - let top = ctx.stack_push(Type::Unknown); + let top = ctx.stack_push(asm, Type::Unknown); asm.mov(top, Opnd::mem(64, ary_opnd, i as i32 * SIZEOF_VALUE_I32)); } } @@ -5370,7 +5410,7 @@ fn push_splat_args(required_args: u32, jit: &mut JITState, ctx: &mut Context, as let ary_opnd = asm.csel_nz(ary_opnd, heap_ptr_opnd); for i in 0..required_args { - let top = ctx.stack_push(Type::Unknown); + let top = ctx.stack_push(asm, Type::Unknown); asm.mov(top, Opnd::mem(64, ary_opnd, i as i32 * SIZEOF_VALUE_I32)); } @@ -5764,7 +5804,7 @@ fn gen_send_iseq( let val = asm.ccall(unsafe { (*builtin_info).func_ptr as *const u8 }, args); // Push the return value - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, val); // Note: assuming that the leaf builtin doesn't change local variables here. @@ -5809,7 +5849,7 @@ fn gen_send_iseq( for _ in 0..remaining_opt { // We need to push nil for the optional arguments - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, Qnil.into()); } } @@ -5851,7 +5891,7 @@ fn gen_send_iseq( ); ctx.stack_pop(diff as usize); - let stack_ret = ctx.stack_push(Type::TArray); + let stack_ret = ctx.stack_push(asm, Type::TArray); asm.mov(stack_ret, array); // We now should have the required arguments // and an array of all the rest arguments @@ -5864,8 +5904,9 @@ fn gen_send_iseq( move_rest_args_to_stack(array, diff, jit, ctx, asm, ocb); // We will now slice the array to give us a new array of the correct size + asm.spill_temps(ctx); // for ccall let ret = asm.ccall(rb_yjit_rb_ary_subseq_length as *const u8, vec![array, Opnd::UImm(diff as u64)]); - let stack_ret = ctx.stack_push(Type::TArray); + let stack_ret = ctx.stack_push(asm, Type::TArray); asm.mov(stack_ret, ret); // We now should have the required arguments @@ -5874,7 +5915,7 @@ fn gen_send_iseq( } else { // The arguments are equal so we can just push to the stack assert!(non_rest_arg_count == required_num); - let stack_ret = ctx.stack_push(Type::TArray); + let stack_ret = ctx.stack_push(asm, Type::TArray); asm.mov(stack_ret, array); } } else { @@ -5900,7 +5941,7 @@ fn gen_send_iseq( ] ); ctx.stack_pop(n.as_usize()); - let stack_ret = ctx.stack_push(Type::CArray); + let stack_ret = ctx.stack_push(asm, Type::CArray); asm.mov(stack_ret, new_ary); } } @@ -5967,7 +6008,7 @@ fn gen_send_iseq( // filling in (which is done in the next loop). Also increments // argc so that the callee's SP is recorded correctly. argc += 1; - let default_arg = ctx.stack_push(Type::Unknown); + let default_arg = ctx.stack_push(asm, Type::Unknown); // callee_idx - keyword->required_num is used in a couple of places below. let req_num: isize = unsafe { (*keyword).required_num }.try_into().unwrap(); @@ -6065,13 +6106,14 @@ fn gen_send_iseq( asm.comment("push splat arg0 onto the stack"); ctx.stack_pop(argc.try_into().unwrap()); for i in 0..lead_num { - let stack_opnd = ctx.stack_push(Type::Unknown); + let stack_opnd = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_opnd, Opnd::mem(64, array_opnd, SIZEOF_VALUE_I32 * i)); } argc = lead_num; } - + // Spill stack temps to let the callee use them + asm.spill_temps(ctx); // Points to the receiver operand on the stack unless a captured environment is used let recv = match captured_opnd { @@ -6157,7 +6199,11 @@ fn gen_send_iseq( // the return value in case of JIT-to-JIT return. let mut return_ctx = ctx.clone(); return_ctx.stack_pop(sp_offset.try_into().unwrap()); - return_ctx.stack_push(Type::Unknown); + let return_val = return_ctx.stack_push(asm, Type::Unknown); + if return_val.stack_idx() < MAX_REG_TEMPS { + // The callee writes a return value on stack. Update reg_temps accordingly. + return_ctx.dealloc_temp_reg(return_val.stack_idx()); + } return_ctx.set_sp_offset(1); return_ctx.reset_chain_depth(); @@ -6242,7 +6288,7 @@ fn gen_struct_aref( Opnd::mem(64, rstruct_ptr, SIZEOF_VALUE_I32 * off) }; - let ret = ctx.stack_push(Type::Unknown); + let ret = ctx.stack_push(asm, Type::Unknown); asm.mov(ret, val); jump_to_next_insn(jit, ctx, asm, ocb); @@ -6279,12 +6325,13 @@ fn gen_struct_aset( asm.comment("struct aset"); + asm.spill_temps(ctx); // for ccall (must be done before stack_pop) let val = ctx.stack_pop(1); let recv = ctx.stack_pop(1); let val = asm.ccall(RSTRUCT_SET as *const u8, vec![recv, (off as i64).into(), val]); - let ret = ctx.stack_push(Type::Unknown); + let ret = ctx.stack_push(asm, Type::Unknown); asm.mov(ret, val); jump_to_next_insn(jit, ctx, asm, ocb); @@ -6605,6 +6652,7 @@ fn gen_send_general( // values for the register allocator. let name_opnd = asm.load(name_opnd); + asm.spill_temps(ctx); // for ccall let symbol_id_opnd = asm.ccall(rb_get_symbol_id as *const u8, vec![name_opnd]); asm.comment("chain_guard_send"); @@ -6675,7 +6723,7 @@ fn gen_send_general( ctx.stack_pop(argc as usize + 1); - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, ret); return KeepCompiling; @@ -6921,7 +6969,7 @@ fn gen_invokeblock( ); ctx.stack_pop(argc.try_into().unwrap()); - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, ret); // cfunc calls may corrupt types @@ -7132,7 +7180,7 @@ fn gen_getglobal( vec![ gid.into() ] ); - let top = ctx.stack_push(Type::Unknown); + let top = ctx.stack_push(asm, Type::Unknown); asm.mov(top, val_opnd); KeepCompiling @@ -7176,7 +7224,7 @@ fn gen_anytostring( let val = asm.ccall(rb_obj_as_string_result as *const u8, vec![str, val]); // Push the return value - let stack_ret = ctx.stack_push(Type::TString); + let stack_ret = ctx.stack_push(asm, Type::TString); asm.mov(stack_ret, val); KeepCompiling @@ -7231,7 +7279,7 @@ fn gen_intern( let sym = asm.ccall(rb_str_intern as *const u8, vec![str]); // Push the return value - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, sym); KeepCompiling @@ -7280,10 +7328,11 @@ fn gen_toregexp( asm.cpop_into(ary); // The value we want to push on the stack is in RAX right now - let stack_ret = ctx.stack_push(Type::UnknownHeap); + let stack_ret = ctx.stack_push(asm, Type::UnknownHeap); asm.mov(stack_ret, val); // Clear the temp array. + asm.spill_temps(ctx); // for ccall asm.ccall(rb_ary_clear as *const u8, vec![ary]); KeepCompiling @@ -7335,7 +7384,7 @@ fn gen_getspecial( _ => panic!("invalid back-ref"), }; - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, val); KeepCompiling @@ -7359,7 +7408,7 @@ fn gen_getspecial( ] ); - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, val); KeepCompiling @@ -7385,7 +7434,7 @@ fn gen_getclassvariable( ], ); - let top = ctx.stack_push(Type::Unknown); + let top = ctx.stack_push(asm, Type::Unknown); asm.mov(top, val_opnd); KeepCompiling @@ -7443,7 +7492,7 @@ fn gen_getconstant( ], ); - let top = ctx.stack_push(Type::Unknown); + let top = ctx.stack_push(asm, Type::Unknown); asm.mov(top, val_opnd); KeepCompiling @@ -7476,6 +7525,7 @@ fn gen_opt_getconstant_path( let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8)); // Call function to verify the cache. It doesn't allocate or call methods. + asm.spill_temps(ctx); // for ccall let ret_val = asm.ccall( rb_vm_ic_hit_p as *const u8, vec![inline_cache, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)] @@ -7501,7 +7551,7 @@ fn gen_opt_getconstant_path( )); // Push ic->entry->value - let stack_top = ctx.stack_push(Type::Unknown); + let stack_top = ctx.stack_push(asm, Type::Unknown); asm.store(stack_top, ic_entry_val); } else { // Optimize for single ractor mode. @@ -7603,7 +7653,7 @@ fn gen_getblockparamproxy( // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr. assert!(!unsafe { rb_block_param_proxy }.special_const_p()); - let top = ctx.stack_push(Type::BlockParamProxy); + let top = ctx.stack_push(asm, Type::BlockParamProxy); asm.mov(top, Opnd::const_ptr(unsafe { rb_block_param_proxy }.as_ptr())); } @@ -7687,7 +7737,7 @@ fn gen_getblockparam( asm.write_label(frame_flag_modified); // Push the proc on the stack - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); let ep_opnd = gen_get_ep(asm, level); asm.mov(stack_ret, Opnd::mem(64, ep_opnd, offs)); @@ -7724,7 +7774,7 @@ fn gen_invokebuiltin( // Push the return value ctx.stack_pop(bf_argc); - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, val); KeepCompiling @@ -7769,7 +7819,7 @@ fn gen_opt_invokebuiltin_delegate( let val = asm.ccall(unsafe { (*bf).func_ptr } as *const u8, args); // Push the return value - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = ctx.stack_push(asm, Type::Unknown); asm.mov(stack_ret, val); KeepCompiling @@ -8252,7 +8302,7 @@ mod tests { fn test_gen_pop() { let (mut jit, _, mut asm, _cb, mut ocb) = setup_codegen(); let mut context = Context::default(); - context.stack_push(Type::Fixnum); + context.stack_push(&mut asm, Type::Fixnum); let status = gen_pop(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); @@ -8262,7 +8312,7 @@ mod tests { #[test] fn test_gen_dup() { let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Fixnum); + context.stack_push(&mut asm, Type::Fixnum); let status = gen_dup(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); @@ -8278,8 +8328,8 @@ mod tests { #[test] fn test_gen_dupn() { let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Fixnum); - context.stack_push(Type::Flonum); + context.stack_push(&mut asm, Type::Fixnum); + context.stack_push(&mut asm, Type::Flonum); let mut value_array: [u64; 2] = [0, 2]; // We only compile for n == 2 let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; @@ -8302,8 +8352,8 @@ mod tests { #[test] fn test_gen_swap() { let (mut jit, mut context, mut asm, _cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Fixnum); - context.stack_push(Type::Flonum); + context.stack_push(&mut asm, Type::Fixnum); + context.stack_push(&mut asm, Type::Flonum); let status = gen_swap(&mut jit, &mut context, &mut asm, &mut ocb); @@ -8393,9 +8443,9 @@ mod tests { #[test] fn test_gen_setn() { let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Fixnum); - context.stack_push(Type::Flonum); - context.stack_push(Type::CString); + context.stack_push(&mut asm, Type::Fixnum); + context.stack_push(&mut asm, Type::Flonum); + context.stack_push(&mut asm, Type::CString); let mut value_array: [u64; 2] = [0, 2]; let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; @@ -8416,8 +8466,8 @@ mod tests { #[test] fn test_gen_topn() { let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Flonum); - context.stack_push(Type::CString); + context.stack_push(&mut asm, Type::Flonum); + context.stack_push(&mut asm, Type::CString); let mut value_array: [u64; 2] = [0, 1]; let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; @@ -8438,9 +8488,9 @@ mod tests { #[test] fn test_gen_adjuststack() { let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Flonum); - context.stack_push(Type::CString); - context.stack_push(Type::Fixnum); + context.stack_push(&mut asm, Type::Flonum); + context.stack_push(&mut asm, Type::CString); + context.stack_push(&mut asm, Type::Fixnum); let mut value_array: [u64; 3] = [0, 2, 0]; let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; @@ -8460,7 +8510,7 @@ mod tests { fn test_gen_leave() { let (mut jit, mut context, mut asm, _cb, mut ocb) = setup_codegen(); // Push return value - context.stack_push(Type::Fixnum); + context.stack_push(&mut asm, Type::Fixnum); gen_leave(&mut jit, &mut context, &mut asm, &mut ocb); } } diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 385aef3783..05e9f85b18 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -371,6 +371,45 @@ impl From for YARVOpnd { } } +/// Maximum index of stack temps that could be in a register +pub const MAX_REG_TEMPS: u8 = 8; + +/// Bitmap of which stack temps are in a register +#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)] +pub struct RegTemps(u8); + +impl RegTemps { + pub fn get(&self, index: u8) -> bool { + assert!(index < MAX_REG_TEMPS); + (self.0 >> index) & 1 == 1 + } + + pub fn set(&mut self, index: u8, value: bool) { + assert!(index < MAX_REG_TEMPS); + if value { + self.0 = self.0 | (1 << index); + } else { + self.0 = self.0 & !(1 << index); + } + } + + pub fn as_u8(&self) -> u8 { + self.0 + } + + /// Return true if there's a register that conflicts with a given stack_idx. + pub fn conflicts_with(&self, stack_idx: u8) -> bool { + let mut other_idx = stack_idx as isize - get_option!(num_temp_regs) as isize; + while other_idx >= 0 { + if self.get(other_idx as u8) { + return true; + } + other_idx -= get_option!(num_temp_regs) as isize; + } + false + } +} + /// Code generation context /// Contains information we can use to specialize/optimize code /// There are a lot of context objects so we try to keep the size small. @@ -383,6 +422,9 @@ pub struct Context { // This represents how far the JIT's SP is from the "real" SP sp_offset: i8, + /// Bitmap of which stack temps are in a register + reg_temps: RegTemps, + // Depth of this block in the sidechain (eg: inline-cache chain) chain_depth: u8, @@ -698,7 +740,7 @@ impl PendingBranch { // The branch struct is uninitialized right now but as a stable address. // We make sure the stub runs after the branch is initialized. let branch_struct_addr = self.uninit_branch.as_ptr() as usize; - let stub_addr = gen_branch_stub(ocb, branch_struct_addr, target_idx); + let stub_addr = gen_branch_stub(ctx, ocb, branch_struct_addr, target_idx); if let Some(stub_addr) = stub_addr { // Fill the branch target with a stub @@ -1333,6 +1375,7 @@ pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context { let mut generic_ctx = Context::default(); generic_ctx.stack_size = ctx.stack_size; generic_ctx.sp_offset = ctx.sp_offset; + generic_ctx.reg_temps = ctx.reg_temps; debug_assert_ne!( TypeDiff::Incompatible, @@ -1534,6 +1577,14 @@ impl Context { self.sp_offset = offset; } + pub fn get_reg_temps(&self) -> RegTemps { + self.reg_temps + } + + pub fn set_reg_temps(&mut self, reg_temps: RegTemps) { + self.reg_temps = reg_temps; + } + pub fn get_chain_depth(&self) -> u8 { self.chain_depth } @@ -1553,12 +1604,19 @@ impl Context { return Opnd::mem(64, SP, offset); } + /// Stop using a register for a given stack temp. + pub fn dealloc_temp_reg(&mut self, stack_idx: u8) { + let mut reg_temps = self.get_reg_temps(); + reg_temps.set(stack_idx, false); + self.set_reg_temps(reg_temps); + } + /// Push one new value on the temp stack with an explicit mapping /// Return a pointer to the new stack top - pub fn stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> Opnd { + pub fn stack_push_mapping(&mut self, asm: &mut Assembler, (mapping, temp_type): (TempMapping, Type)) -> Opnd { // If type propagation is disabled, store no types if get_option!(no_type_prop) { - return self.stack_push_mapping((mapping, Type::Unknown)); + return self.stack_push_mapping(asm, (mapping, Type::Unknown)); } let stack_size: usize = self.stack_size.into(); @@ -1573,6 +1631,12 @@ impl Context { } } + // Allocate a register to the stack operand + assert_eq!(self.reg_temps, asm.get_reg_temps()); + if self.stack_size < MAX_REG_TEMPS { + asm.alloc_temp_reg(self, self.stack_size); + } + self.stack_size += 1; self.sp_offset += 1; @@ -1581,22 +1645,22 @@ impl Context { /// Push one new value on the temp stack /// Return a pointer to the new stack top - pub fn stack_push(&mut self, val_type: Type) -> Opnd { - return self.stack_push_mapping((MapToStack, val_type)); + pub fn stack_push(&mut self, asm: &mut Assembler, val_type: Type) -> Opnd { + return self.stack_push_mapping(asm, (MapToStack, val_type)); } /// Push the self value on the stack - pub fn stack_push_self(&mut self) -> Opnd { - return self.stack_push_mapping((MapToSelf, Type::Unknown)); + pub fn stack_push_self(&mut self, asm: &mut Assembler) -> Opnd { + return self.stack_push_mapping(asm, (MapToSelf, Type::Unknown)); } /// Push a local variable on the stack - pub fn stack_push_local(&mut self, local_idx: usize) -> Opnd { + pub fn stack_push_local(&mut self, asm: &mut Assembler, local_idx: usize) -> Opnd { if local_idx >= MAX_LOCAL_TYPES { - return self.stack_push(Type::Unknown); + return self.stack_push(asm, Type::Unknown); } - return self.stack_push_mapping((MapToLocal((local_idx as u8).into()), Type::Unknown)); + return self.stack_push_mapping(asm, (MapToLocal((local_idx as u8).into()), Type::Unknown)); } // Pop N values off the stack @@ -1639,7 +1703,7 @@ impl Context { /// Get an operand pointing to a slot on the temp stack pub fn stack_opnd(&self, idx: i32) -> Opnd { - Opnd::Stack { idx, sp_offset: self.sp_offset, num_bits: 64 } + Opnd::Stack { idx, stack_size: self.stack_size, sp_offset: self.sp_offset, num_bits: 64 } } /// Get the type of an instruction operand @@ -1838,6 +1902,10 @@ impl Context { return TypeDiff::Incompatible; } + if dst.reg_temps != src.reg_temps { + return TypeDiff::Incompatible; + } + // Difference sum let mut diff = 0; @@ -2466,6 +2534,7 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - /// Generate a "stub", a piece of code that calls the compiler back when run. /// A piece of code that redeems for more code; a thunk for code. fn gen_branch_stub( + ctx: &Context, ocb: &mut OutlinedCb, branch_struct_address: usize, target_idx: u32, @@ -2476,8 +2545,18 @@ fn gen_branch_stub( let stub_addr = ocb.get_write_ptr(); let mut asm = Assembler::new(); + asm.set_reg_temps(ctx.reg_temps); asm.comment("branch stub hit"); + // Save caller-saved registers before C_ARG_OPNDS get clobbered. + // Spill all registers for consistency with the trampoline. + for ® in caller_saved_temp_regs().iter() { + asm.cpush(reg); + } + + // Spill temps to the VM stack as well for jit.peek_at_stack() + asm.spill_temps(&mut ctx.clone()); + // Set up the arguments unique to this stub for: // // branch_stub_hit(branch_ptr, target_idx, ec) @@ -2522,6 +2601,11 @@ pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr { ] ); + // Restore caller-saved registers for stack temps + for ® in caller_saved_temp_regs().iter().rev() { + asm.cpop_into(reg); + } + // Jump to the address returned by the branch_stub_hit() call asm.jmp_opnd(jump_addr); @@ -2530,6 +2614,16 @@ pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr { code_ptr } +/// Return registers to be pushed and popped on branch_stub_hit. +/// The return value may include an extra register for x86 alignment. +fn caller_saved_temp_regs() -> Vec { + let mut regs = Assembler::get_temp_regs(); + if regs.len() % 2 == 1 { + regs.push(*regs.last().unwrap()); // x86 alignment + } + regs.iter().map(|®| Opnd::Reg(reg)).collect() +} + impl Assembler { /// Mark the start position of a patchable entry point in the machine code @@ -2888,7 +2982,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) { } // Create a stub for this branch target - let stub_addr = gen_branch_stub(ocb, branchref.as_ptr() as usize, target_idx as u32); + let stub_addr = gen_branch_stub(&block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32); // In case we were unable to generate a stub (e.g. OOM). Use the block's // exit instead of a stub for the block. It's important that we @@ -3031,6 +3125,33 @@ mod tests { assert_eq!(Type::Fixnum.diff(Type::UnknownHeap), TypeDiff::Incompatible); } + #[test] + fn reg_temps() { + let mut reg_temps = RegTemps(0); + + // 0 means every slot is not spilled + for stack_idx in 0..MAX_REG_TEMPS { + assert_eq!(reg_temps.get(stack_idx), false); + } + + // Set 0, 2, 7 + reg_temps.set(0, true); + reg_temps.set(2, true); + reg_temps.set(3, true); + reg_temps.set(3, false); + reg_temps.set(7, true); + + // Get 0..8 + assert_eq!(reg_temps.get(0), true); + assert_eq!(reg_temps.get(1), false); + assert_eq!(reg_temps.get(2), true); + assert_eq!(reg_temps.get(3), false); + assert_eq!(reg_temps.get(4), false); + assert_eq!(reg_temps.get(5), false); + assert_eq!(reg_temps.get(6), false); + assert_eq!(reg_temps.get(7), true); + } + #[test] fn context() { // Valid src => dst @@ -3038,7 +3159,7 @@ mod tests { // Try pushing an operand and getting its type let mut ctx = Context::default(); - ctx.stack_push(Type::Fixnum); + ctx.stack_push(&mut Assembler::new(), Type::Fixnum); let top_type = ctx.get_opnd_type(StackOpnd(0)); assert!(top_type == Type::Fixnum); diff --git a/yjit/src/options.rs b/yjit/src/options.rs index dfae06d1e7..759ed16205 100644 --- a/yjit/src/options.rs +++ b/yjit/src/options.rs @@ -22,6 +22,9 @@ pub struct Options { // 1 means always create generic versions pub max_versions: usize, + // The number of registers allocated for stack temps + pub num_temp_regs: usize, + // Capture and print out stats pub gen_stats: bool, @@ -52,6 +55,7 @@ pub static mut OPTIONS: Options = Options { greedy_versioning: false, no_type_prop: false, max_versions: 4, + num_temp_regs: 0, gen_stats: false, gen_trace_exits: false, pause: false, @@ -141,6 +145,13 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { OPTIONS.pause = true; }, + ("temp-regs", _) => match opt_val.parse() { + Ok(n) => unsafe { OPTIONS.num_temp_regs = n }, + Err(_) => { + return None; + } + }, + ("dump-disasm", _) => match opt_val.to_string().as_str() { "" => unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::Stdout) }, directory => { diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index 79d1d66c44..55bb82b5f5 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -352,6 +352,11 @@ make_counters! { iseq_stack_too_large, iseq_too_long, + + temp_reg_opnd, + temp_mem_opnd, + temp_spill, + temp_reload, } //=========================================================================== diff --git a/yjit/yjit.mk b/yjit/yjit.mk index 634d5d56d9..f27ebf504e 100644 --- a/yjit/yjit.mk +++ b/yjit/yjit.mk @@ -92,8 +92,7 @@ yjit-smoke-test: ifneq ($(strip $(CARGO)),) $(CARGO) test --all-features -q --manifest-path='$(top_srcdir)/yjit/Cargo.toml' endif - $(MAKE) btest RUN_OPTS='--yjit-call-threshold=1' BTESTS=-j - $(MAKE) test-all TESTS='$(top_srcdir)/test/ruby/test_yjit.rb' + $(MAKE) btest RUN_OPTS='--yjit-call-threshold=1 --yjit-temp-regs=5' BTESTS=-j # Generate Rust bindings. See source for details. # Needs `./configure --enable-yjit=dev` and Clang.