Fix incorrect line numbers in GC hook

If the previous instruction is not a leaf instruction, then the PC was
incremented before the instruction was ran (meaning the currently
executing instruction is actually the previous instruction), so we
should not increment the PC otherwise we will calculate the source
line for the next instruction.

This bug can be reproduced in the following script:

```
require "objspace"

ObjectSpace.trace_object_allocations_start
a =

  1.0 / 0.0
p [ObjectSpace.allocation_sourceline(a), ObjectSpace.allocation_sourcefile(a)]
```

Which outputs: [4, "test.rb"]

This is incorrect because the object was allocated on line 10 and not
line 4. The behaviour is correct when we use a leaf instruction (e.g.
if we replaced `1.0 / 0.0` with `"hello"`), then the output is:
[10, "test.rb"].

[Bug #19456]
This commit is contained in:
Peter Zhu 2023-02-24 09:20:14 -05:00
Родитель d2631c427e
Коммит 3e09822407
8 изменённых файлов: 68 добавлений и 10 удалений

Просмотреть файл

@ -6791,6 +6791,7 @@ gc.$(OBJEXT): $(top_srcdir)/internal/basic_operators.h
gc.$(OBJEXT): $(top_srcdir)/internal/bignum.h
gc.$(OBJEXT): $(top_srcdir)/internal/bits.h
gc.$(OBJEXT): $(top_srcdir)/internal/class.h
gc.$(OBJEXT): $(top_srcdir)/internal/compile.h
gc.$(OBJEXT): $(top_srcdir)/internal/compilers.h
gc.$(OBJEXT): $(top_srcdir)/internal/complex.h
gc.$(OBJEXT): $(top_srcdir)/internal/cont.h

Просмотреть файл

@ -10264,6 +10264,18 @@ dump_disasm_list_with_cursor(const LINK_ELEMENT *link, const LINK_ELEMENT *curr,
fflush(stdout);
}
bool
rb_insns_leaf_p(int i)
{
return insn_leaf_p(i);
}
int
rb_insn_len(VALUE insn)
{
return insn_len(insn);
}
const char *
rb_insns_name(int i)
{

21
gc.c
Просмотреть файл

@ -100,6 +100,7 @@
#include "id_table.h"
#include "internal.h"
#include "internal/class.h"
#include "internal/compile.h"
#include "internal/complex.h"
#include "internal/cont.h"
#include "internal/error.h"
@ -2485,8 +2486,24 @@ gc_event_hook_body(rb_execution_context_t *ec, rb_objspace_t *objspace, const rb
{
const VALUE *pc = ec->cfp->pc;
if (pc && VM_FRAME_RUBYFRAME_P(ec->cfp)) {
/* increment PC because source line is calculated with PC-1 */
ec->cfp->pc++;
int prev_opcode = rb_vm_insn_addr2opcode((void *)*ec->cfp->iseq->body->iseq_encoded);
for (const VALUE *insn = ec->cfp->iseq->body->iseq_encoded; insn < pc; insn += rb_insn_len(prev_opcode)) {
prev_opcode = rb_vm_insn_addr2opcode((void *)*insn);
}
/* If the previous instruction is a leaf instruction, then the PC is
* the currently executing instruction. We should increment the PC
* because the source line is calculated with PC-1 in calc_pos.
*
* If the previous instruction is not a leaf instruction, then the PC
* was incremented before the instruction was ran (meaning the
* currently executing instruction is actually the previous
* instruction), so we should not increment the PC otherwise we will
* calculate the source line for the next instruction.
*/
if (rb_insns_leaf_p(prev_opcode)) {
ec->cfp->pc++;
}
}
EXEC_EVENT_HOOK(ec, event, ec->cfp->self, 0, 0, 0, data);
ec->cfp->pc = pc;

Просмотреть файл

@ -17,6 +17,8 @@ struct rb_iseq_struct; /* in vm_core.h */
/* compile.c */
int rb_dvar_defined(ID, const struct rb_iseq_struct *);
int rb_local_defined(ID, const struct rb_iseq_struct *);
bool rb_insns_leaf_p(int i);
int rb_insn_len(VALUE insn);
const char *rb_insns_name(int i);
VALUE rb_insns_name_array(void);
int rb_iseq_cdhash_cmp(VALUE val, VALUE lit);

Просмотреть файл

@ -216,6 +216,14 @@ class TestObjSpace < Test::Unit::TestCase
assert_equal(c3, ObjectSpace.allocation_generation(o3))
assert_equal(self.class.name, ObjectSpace.allocation_class_path(o3))
assert_equal(__method__, ObjectSpace.allocation_method_id(o3))
# [Bug #19456]
o4 =
# This line intentionally left blank
# This line intentionally left blank
1.0 / 0.0; line4 = __LINE__; c4 = GC.count
assert_equal(__FILE__, ObjectSpace.allocation_sourcefile(o4))
assert_equal(line4, ObjectSpace.allocation_sourceline(o4))
}
end

Просмотреть файл

@ -10,6 +10,31 @@
#include "iseq.h"
extern const bool rb_vm_insn_leaf_p[];
#ifdef RUBY_VM_INSNS_INFO
const bool rb_vm_insn_leaf_p[] = {
% RubyVM::Instructions.each_slice(20) do |insns|
<%= insns.map do |insn|
if insn.is_a?(RubyVM::BareInstructions)
insn.always_leaf? ? '1' : '0'
else
'0'
end
end.join(', ')
%>,
% end
};
#endif
CONSTFUNC(MAYBE_UNUSED(static bool insn_leaf_p(VALUE insn)));
bool
insn_leaf_p(VALUE insn)
{
return rb_vm_insn_leaf_p[insn];
}
// This is used to tell MJIT that this insn would be leaf if CHECK_INTS didn't exist.
// It should be used only when RUBY_VM_CHECK_INTS is directly written in insns.def.
static bool leafness_of_check_ints = false;

7
yjit.c
Просмотреть файл

@ -480,13 +480,6 @@ rb_insn_name(VALUE insn)
return insn_name(insn);
}
// Query the instruction length in bytes for YARV opcode insn
int
rb_insn_len(VALUE insn)
{
return insn_len(insn);
}
unsigned int
rb_vm_ci_argc(const struct rb_callinfo *ci)
{

Просмотреть файл

@ -1181,6 +1181,7 @@ extern "C" {
key: st_data_t,
pval: *mut st_data_t,
) -> ::std::os::raw::c_int;
pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int;
pub fn rb_vm_insn_decode(encoded: VALUE) -> ::std::os::raw::c_int;
pub fn rb_vm_insn_addr2opcode(addr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int;
pub fn rb_iseq_line_no(iseq: *const rb_iseq_t, pos: usize) -> ::std::os::raw::c_uint;
@ -1221,7 +1222,6 @@ extern "C" {
pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char;
pub fn rb_yjit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t;
pub fn rb_insn_name(insn: VALUE) -> *const ::std::os::raw::c_char;
pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int;
pub fn rb_vm_ci_argc(ci: *const rb_callinfo) -> ::std::os::raw::c_uint;
pub fn rb_vm_ci_mid(ci: *const rb_callinfo) -> ID;
pub fn rb_vm_ci_flag(ci: *const rb_callinfo) -> ::std::os::raw::c_uint;