2012-06-02 19:23:37 +04:00
|
|
|
/**********************************************************************
|
|
|
|
|
2012-06-09 02:44:01 +04:00
|
|
|
vm_backtrace.c -
|
2012-06-02 19:23:37 +04:00
|
|
|
|
|
|
|
$Author: ko1 $
|
|
|
|
created at: Sun Jun 03 00:14:20 2012
|
|
|
|
|
|
|
|
Copyright (C) 1993-2012 Yukihiro Matsumoto
|
|
|
|
|
|
|
|
**********************************************************************/
|
|
|
|
|
2012-11-29 11:05:27 +04:00
|
|
|
#include "eval_intern.h"
|
2019-12-04 11:16:30 +03:00
|
|
|
#include "internal.h"
|
2023-02-15 12:42:52 +03:00
|
|
|
#include "internal/class.h"
|
2021-02-15 09:58:45 +03:00
|
|
|
#include "internal/error.h"
|
2019-12-04 11:16:30 +03:00
|
|
|
#include "internal/vm.h"
|
2012-06-02 19:59:37 +04:00
|
|
|
#include "iseq.h"
|
2019-12-04 11:16:30 +03:00
|
|
|
#include "ruby/debug.h"
|
|
|
|
#include "ruby/encoding.h"
|
|
|
|
#include "vm_core.h"
|
2012-06-02 19:23:37 +04:00
|
|
|
|
|
|
|
static VALUE rb_cBacktrace;
|
2012-06-02 20:46:08 +04:00
|
|
|
static VALUE rb_cBacktraceLocation;
|
2012-06-02 19:23:37 +04:00
|
|
|
|
2014-10-15 18:37:54 +04:00
|
|
|
static VALUE
|
|
|
|
id2str(ID id)
|
|
|
|
{
|
|
|
|
VALUE str = rb_id2str(id);
|
2014-10-16 17:45:53 +04:00
|
|
|
if (!str) return Qnil;
|
2014-10-15 18:37:54 +04:00
|
|
|
return str;
|
|
|
|
}
|
|
|
|
#define rb_id2str(id) id2str(id)
|
|
|
|
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
#define BACKTRACE_START 0
|
|
|
|
#define ALL_BACKTRACE_LINES -1
|
|
|
|
|
2012-06-02 19:23:37 +04:00
|
|
|
inline static int
|
2021-06-08 11:34:08 +03:00
|
|
|
calc_pos(const rb_iseq_t *iseq, const VALUE *pc, int *lineno, int *node_id)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2019-07-31 17:15:56 +03:00
|
|
|
VM_ASSERT(iseq);
|
2022-10-17 11:50:42 +03:00
|
|
|
|
|
|
|
if (pc == NULL) {
|
2022-03-23 22:19:48 +03:00
|
|
|
if (ISEQ_BODY(iseq)->type == ISEQ_TYPE_TOP) {
|
|
|
|
VM_ASSERT(! ISEQ_BODY(iseq)->local_table);
|
|
|
|
VM_ASSERT(! ISEQ_BODY(iseq)->local_table_size);
|
2021-06-08 07:22:27 +03:00
|
|
|
return 0;
|
|
|
|
}
|
2022-09-25 11:07:18 +03:00
|
|
|
if (lineno) *lineno = ISEQ_BODY(iseq)->location.first_lineno;
|
2021-06-08 11:57:44 +03:00
|
|
|
#ifdef USE_ISEQ_NODE_ID
|
2021-06-08 11:34:08 +03:00
|
|
|
if (node_id) *node_id = -1;
|
|
|
|
#endif
|
|
|
|
return 1;
|
2018-11-07 11:04:33 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-10-17 11:50:42 +03:00
|
|
|
VM_ASSERT(ISEQ_BODY(iseq));
|
|
|
|
VM_ASSERT(ISEQ_BODY(iseq)->iseq_encoded);
|
|
|
|
VM_ASSERT(ISEQ_BODY(iseq)->iseq_size);
|
|
|
|
|
2022-03-23 22:19:48 +03:00
|
|
|
ptrdiff_t n = pc - ISEQ_BODY(iseq)->iseq_encoded;
|
|
|
|
VM_ASSERT(n <= ISEQ_BODY(iseq)->iseq_size);
|
2019-07-31 17:15:56 +03:00
|
|
|
VM_ASSERT(n >= 0);
|
|
|
|
ASSUME(n >= 0);
|
|
|
|
size_t pos = n; /* no overflow */
|
|
|
|
if (LIKELY(pos)) {
|
|
|
|
/* use pos-1 because PC points next instruction at the beginning of instruction */
|
|
|
|
pos--;
|
|
|
|
}
|
|
|
|
#if VMDEBUG && defined(HAVE_BUILTIN___BUILTIN_TRAP)
|
|
|
|
else {
|
|
|
|
/* SDR() is not possible; that causes infinite loop. */
|
|
|
|
rb_print_backtrace();
|
|
|
|
__builtin_trap();
|
|
|
|
}
|
2018-11-07 11:04:33 +03:00
|
|
|
#endif
|
2021-06-08 11:34:08 +03:00
|
|
|
if (lineno) *lineno = rb_iseq_line_no(iseq, pos);
|
2021-06-08 11:57:44 +03:00
|
|
|
#ifdef USE_ISEQ_NODE_ID
|
2021-06-08 11:34:08 +03:00
|
|
|
if (node_id) *node_id = rb_iseq_node_id(iseq, pos);
|
|
|
|
#endif
|
|
|
|
return 1;
|
2019-07-31 17:15:56 +03:00
|
|
|
}
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
2021-06-08 11:34:08 +03:00
|
|
|
inline static int
|
|
|
|
calc_lineno(const rb_iseq_t *iseq, const VALUE *pc)
|
|
|
|
{
|
|
|
|
int lineno;
|
|
|
|
if (calc_pos(iseq, pc, &lineno, NULL)) return lineno;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-06-08 11:57:44 +03:00
|
|
|
#ifdef USE_ISEQ_NODE_ID
|
2021-06-08 11:34:08 +03:00
|
|
|
inline static int
|
|
|
|
calc_node_id(const rb_iseq_t *iseq, const VALUE *pc)
|
|
|
|
{
|
|
|
|
int node_id;
|
|
|
|
if (calc_pos(iseq, pc, NULL, &node_id)) return node_id;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2012-06-02 19:23:37 +04:00
|
|
|
int
|
|
|
|
rb_vm_get_sourceline(const rb_control_frame_t *cfp)
|
|
|
|
{
|
2016-08-03 04:50:50 +03:00
|
|
|
if (VM_FRAME_RUBYFRAME_P(cfp) && cfp->iseq) {
|
2017-11-14 15:58:36 +03:00
|
|
|
const rb_iseq_t *iseq = cfp->iseq;
|
|
|
|
int line = calc_lineno(iseq, cfp->pc);
|
|
|
|
if (line != 0) {
|
|
|
|
return line;
|
|
|
|
}
|
|
|
|
else {
|
2022-09-25 11:07:18 +03:00
|
|
|
return ISEQ_BODY(iseq)->location.first_lineno;
|
2017-11-14 15:58:36 +03:00
|
|
|
}
|
2016-08-03 04:50:50 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
return 0;
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-06-02 20:46:08 +04:00
|
|
|
typedef struct rb_backtrace_location_struct {
|
|
|
|
enum LOCATION_TYPE {
|
|
|
|
LOCATION_TYPE_ISEQ = 1,
|
|
|
|
LOCATION_TYPE_CFUNC,
|
2012-06-02 19:23:37 +04:00
|
|
|
} type;
|
|
|
|
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
const rb_iseq_t *iseq;
|
|
|
|
const VALUE *pc;
|
|
|
|
ID mid;
|
2012-06-02 20:46:08 +04:00
|
|
|
} rb_backtrace_location_t;
|
2012-06-02 19:23:37 +04:00
|
|
|
|
|
|
|
struct valued_frame_info {
|
2012-06-02 20:46:08 +04:00
|
|
|
rb_backtrace_location_t *loc;
|
2012-06-02 19:23:37 +04:00
|
|
|
VALUE btobj;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void
|
2012-06-02 20:46:08 +04:00
|
|
|
location_mark(void *ptr)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2014-09-13 09:14:51 +04:00
|
|
|
struct valued_frame_info *vfi = (struct valued_frame_info *)ptr;
|
|
|
|
rb_gc_mark(vfi->btobj);
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2012-06-02 20:46:08 +04:00
|
|
|
location_mark_entry(rb_backtrace_location_t *fi)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
|
|
|
switch (fi->type) {
|
2012-06-02 20:46:08 +04:00
|
|
|
case LOCATION_TYPE_ISEQ:
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
rb_gc_mark_movable((VALUE)fi->iseq);
|
2012-06-02 19:23:37 +04:00
|
|
|
break;
|
2012-06-02 20:46:08 +04:00
|
|
|
case LOCATION_TYPE_CFUNC:
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
if (fi->iseq) {
|
|
|
|
rb_gc_mark_movable((VALUE)fi->iseq);
|
|
|
|
}
|
|
|
|
break;
|
2012-06-02 19:23:37 +04:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t
|
2012-06-02 20:46:08 +04:00
|
|
|
location_memsize(const void *ptr)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2012-06-02 20:46:08 +04:00
|
|
|
/* rb_backtrace_location_t *fi = (rb_backtrace_location_t *)ptr; */
|
|
|
|
return sizeof(rb_backtrace_location_t);
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
2012-06-02 20:46:08 +04:00
|
|
|
static const rb_data_type_t location_data_type = {
|
2012-06-02 19:23:37 +04:00
|
|
|
"frame_info",
|
2014-09-13 09:14:51 +04:00
|
|
|
{location_mark, RUBY_TYPED_DEFAULT_FREE, location_memsize,},
|
2023-02-03 17:20:44 +03:00
|
|
|
0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
2012-06-02 19:23:37 +04:00
|
|
|
};
|
|
|
|
|
2021-06-08 11:34:08 +03:00
|
|
|
int
|
|
|
|
rb_frame_info_p(VALUE obj)
|
|
|
|
{
|
|
|
|
return rb_typeddata_is_kind_of(obj, &location_data_type);
|
|
|
|
}
|
|
|
|
|
2012-06-02 20:46:08 +04:00
|
|
|
static inline rb_backtrace_location_t *
|
|
|
|
location_ptr(VALUE locobj)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2012-06-02 20:46:08 +04:00
|
|
|
struct valued_frame_info *vloc;
|
|
|
|
GetCoreDataFromValue(locobj, struct valued_frame_info, vloc);
|
|
|
|
return vloc->loc;
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2012-06-02 20:46:08 +04:00
|
|
|
location_lineno(rb_backtrace_location_t *loc)
|
|
|
|
{
|
|
|
|
switch (loc->type) {
|
|
|
|
case LOCATION_TYPE_ISEQ:
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
return calc_lineno(loc->iseq, loc->pc);
|
2012-06-02 20:46:08 +04:00
|
|
|
case LOCATION_TYPE_CFUNC:
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
if (loc->iseq && loc->pc) {
|
|
|
|
return calc_lineno(loc->iseq, loc->pc);
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
default:
|
2012-06-02 20:46:08 +04:00
|
|
|
rb_bug("location_lineno: unreachable");
|
2012-06-02 19:23:37 +04:00
|
|
|
UNREACHABLE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-31 10:09:57 +04:00
|
|
|
/*
|
|
|
|
* Returns the line number of this frame.
|
|
|
|
*
|
|
|
|
* For example, using +caller_locations.rb+ from Thread::Backtrace::Location
|
|
|
|
*
|
|
|
|
* loc = c(0..1).first
|
|
|
|
* loc.lineno #=> 2
|
|
|
|
*/
|
2012-06-02 19:23:37 +04:00
|
|
|
static VALUE
|
2012-06-02 20:46:08 +04:00
|
|
|
location_lineno_m(VALUE self)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2012-06-02 20:46:08 +04:00
|
|
|
return INT2FIX(location_lineno(location_ptr(self)));
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2012-06-02 20:46:08 +04:00
|
|
|
location_label(rb_backtrace_location_t *loc)
|
|
|
|
{
|
|
|
|
switch (loc->type) {
|
|
|
|
case LOCATION_TYPE_ISEQ:
|
2022-03-23 22:19:48 +03:00
|
|
|
return ISEQ_BODY(loc->iseq)->location.label;
|
2012-06-02 20:46:08 +04:00
|
|
|
case LOCATION_TYPE_CFUNC:
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
return rb_id2str(loc->mid);
|
2012-06-02 19:23:37 +04:00
|
|
|
default:
|
2012-06-02 20:46:08 +04:00
|
|
|
rb_bug("location_label: unreachable");
|
2012-06-02 19:23:37 +04:00
|
|
|
UNREACHABLE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-31 10:09:57 +04:00
|
|
|
/*
|
|
|
|
* Returns the label of this frame.
|
|
|
|
*
|
|
|
|
* Usually consists of method, class, module, etc names with decoration.
|
|
|
|
*
|
2013-06-17 09:31:28 +04:00
|
|
|
* Consider the following example:
|
|
|
|
*
|
|
|
|
* def foo
|
|
|
|
* puts caller_locations(0).first.label
|
|
|
|
*
|
|
|
|
* 1.times do
|
|
|
|
* puts caller_locations(0).first.label
|
|
|
|
*
|
|
|
|
* 1.times do
|
|
|
|
* puts caller_locations(0).first.label
|
|
|
|
* end
|
|
|
|
*
|
|
|
|
* end
|
|
|
|
* end
|
|
|
|
*
|
|
|
|
* The result of calling +foo+ is this:
|
|
|
|
*
|
|
|
|
* label: foo
|
|
|
|
* label: block in foo
|
|
|
|
* label: block (2 levels) in foo
|
2012-12-31 10:09:57 +04:00
|
|
|
*
|
|
|
|
*/
|
2012-06-02 19:23:37 +04:00
|
|
|
static VALUE
|
2012-06-02 20:46:08 +04:00
|
|
|
location_label_m(VALUE self)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2012-06-02 20:46:08 +04:00
|
|
|
return location_label(location_ptr(self));
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2012-06-02 20:46:08 +04:00
|
|
|
location_base_label(rb_backtrace_location_t *loc)
|
|
|
|
{
|
|
|
|
switch (loc->type) {
|
|
|
|
case LOCATION_TYPE_ISEQ:
|
2022-03-23 22:19:48 +03:00
|
|
|
return ISEQ_BODY(loc->iseq)->location.base_label;
|
2012-06-02 20:46:08 +04:00
|
|
|
case LOCATION_TYPE_CFUNC:
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
return rb_id2str(loc->mid);
|
2012-06-02 19:23:37 +04:00
|
|
|
default:
|
2012-06-02 20:46:08 +04:00
|
|
|
rb_bug("location_base_label: unreachable");
|
2012-06-02 19:23:37 +04:00
|
|
|
UNREACHABLE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-31 10:09:57 +04:00
|
|
|
/*
|
|
|
|
* Returns the base label of this frame.
|
|
|
|
*
|
|
|
|
* Usually same as #label, without decoration.
|
|
|
|
*/
|
2012-06-02 19:23:37 +04:00
|
|
|
static VALUE
|
2012-06-02 20:46:08 +04:00
|
|
|
location_base_label_m(VALUE self)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2012-06-02 20:46:08 +04:00
|
|
|
return location_base_label(location_ptr(self));
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
2021-10-25 19:58:01 +03:00
|
|
|
static const rb_iseq_t *
|
|
|
|
location_iseq(rb_backtrace_location_t *loc)
|
2012-06-02 20:46:08 +04:00
|
|
|
{
|
|
|
|
switch (loc->type) {
|
|
|
|
case LOCATION_TYPE_ISEQ:
|
2021-10-25 19:58:01 +03:00
|
|
|
return loc->iseq;
|
2012-06-02 20:46:08 +04:00
|
|
|
case LOCATION_TYPE_CFUNC:
|
2021-10-25 19:58:01 +03:00
|
|
|
return loc->iseq;
|
2012-06-02 19:23:37 +04:00
|
|
|
default:
|
2021-10-25 19:58:01 +03:00
|
|
|
rb_bug("location_iseq: unreachable");
|
2012-06-02 19:23:37 +04:00
|
|
|
UNREACHABLE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-31 10:09:57 +04:00
|
|
|
/*
|
2020-09-24 01:52:54 +03:00
|
|
|
* Returns the file name of this frame. This will generally be an absolute
|
|
|
|
* path, unless the frame is in the main script, in which case it will be the
|
|
|
|
* script location passed on the command line.
|
2012-12-31 10:09:57 +04:00
|
|
|
*
|
|
|
|
* For example, using +caller_locations.rb+ from Thread::Backtrace::Location
|
|
|
|
*
|
|
|
|
* loc = c(0..1).first
|
|
|
|
* loc.path #=> caller_locations.rb
|
|
|
|
*/
|
2012-06-02 19:23:37 +04:00
|
|
|
static VALUE
|
2012-06-02 20:46:08 +04:00
|
|
|
location_path_m(VALUE self)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2021-10-25 19:58:01 +03:00
|
|
|
const rb_iseq_t *iseq = location_iseq(location_ptr(self));
|
|
|
|
return iseq ? rb_iseq_path(iseq) : Qnil;
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
2021-06-08 11:57:44 +03:00
|
|
|
#ifdef USE_ISEQ_NODE_ID
|
2021-06-08 11:34:08 +03:00
|
|
|
static int
|
|
|
|
location_node_id(rb_backtrace_location_t *loc)
|
|
|
|
{
|
|
|
|
switch (loc->type) {
|
|
|
|
case LOCATION_TYPE_ISEQ:
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
return calc_node_id(loc->iseq, loc->pc);
|
2021-06-08 11:34:08 +03:00
|
|
|
case LOCATION_TYPE_CFUNC:
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
if (loc->iseq && loc->pc) {
|
|
|
|
return calc_node_id(loc->iseq, loc->pc);
|
2021-06-08 11:34:08 +03:00
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
default:
|
|
|
|
rb_bug("location_node_id: unreachable");
|
|
|
|
UNREACHABLE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2021-12-18 21:40:44 +03:00
|
|
|
int
|
|
|
|
rb_get_node_id_from_frame_info(VALUE obj)
|
2021-06-08 11:34:08 +03:00
|
|
|
{
|
2021-06-08 11:57:44 +03:00
|
|
|
#ifdef USE_ISEQ_NODE_ID
|
2021-06-08 11:34:08 +03:00
|
|
|
rb_backtrace_location_t *loc = location_ptr(obj);
|
2021-12-18 21:40:44 +03:00
|
|
|
return location_node_id(loc);
|
2021-06-08 11:34:08 +03:00
|
|
|
#else
|
2021-12-18 21:40:44 +03:00
|
|
|
return -1;
|
2021-06-08 11:34:08 +03:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2021-12-18 21:40:44 +03:00
|
|
|
const rb_iseq_t *
|
|
|
|
rb_get_iseq_from_frame_info(VALUE obj)
|
|
|
|
{
|
|
|
|
rb_backtrace_location_t *loc = location_ptr(obj);
|
|
|
|
const rb_iseq_t *iseq = location_iseq(loc);
|
|
|
|
return iseq;
|
|
|
|
}
|
|
|
|
|
2012-06-02 19:23:37 +04:00
|
|
|
static VALUE
|
2017-06-01 03:05:33 +03:00
|
|
|
location_realpath(rb_backtrace_location_t *loc)
|
2012-06-02 20:46:08 +04:00
|
|
|
{
|
|
|
|
switch (loc->type) {
|
|
|
|
case LOCATION_TYPE_ISEQ:
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
return rb_iseq_realpath(loc->iseq);
|
2012-06-02 20:46:08 +04:00
|
|
|
case LOCATION_TYPE_CFUNC:
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
if (loc->iseq) {
|
|
|
|
return rb_iseq_realpath(loc->iseq);
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
return Qnil;
|
|
|
|
default:
|
2017-06-01 03:05:33 +03:00
|
|
|
rb_bug("location_realpath: unreachable");
|
2012-06-02 19:23:37 +04:00
|
|
|
UNREACHABLE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-31 10:09:57 +04:00
|
|
|
/*
|
|
|
|
* Returns the full file path of this frame.
|
|
|
|
*
|
2020-09-24 01:52:54 +03:00
|
|
|
* Same as #path, except that it will return absolute path
|
|
|
|
* even if the frame is in the main script.
|
2012-12-31 10:09:57 +04:00
|
|
|
*/
|
2012-06-02 19:23:37 +04:00
|
|
|
static VALUE
|
2012-06-02 20:46:08 +04:00
|
|
|
location_absolute_path_m(VALUE self)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2017-06-01 03:05:33 +03:00
|
|
|
return location_realpath(location_ptr(self));
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2012-06-02 20:46:08 +04:00
|
|
|
location_format(VALUE file, int lineno, VALUE name)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2014-10-15 18:37:51 +04:00
|
|
|
VALUE s = rb_enc_sprintf(rb_enc_compatible(file, name), "%s", RSTRING_PTR(file));
|
2012-06-02 20:46:08 +04:00
|
|
|
if (lineno != 0) {
|
2014-10-15 18:37:51 +04:00
|
|
|
rb_str_catf(s, ":%d", lineno);
|
|
|
|
}
|
|
|
|
rb_str_cat_cstr(s, ":in ");
|
|
|
|
if (NIL_P(name)) {
|
|
|
|
rb_str_cat_cstr(s, "unknown method");
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
else {
|
2014-10-15 18:37:51 +04:00
|
|
|
rb_str_catf(s, "`%s'", RSTRING_PTR(name));
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
2014-10-15 18:37:51 +04:00
|
|
|
return s;
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2012-06-02 20:46:08 +04:00
|
|
|
location_to_str(rb_backtrace_location_t *loc)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
|
|
|
VALUE file, name;
|
2012-06-02 20:46:08 +04:00
|
|
|
int lineno;
|
2012-06-02 19:23:37 +04:00
|
|
|
|
2012-06-02 20:46:08 +04:00
|
|
|
switch (loc->type) {
|
|
|
|
case LOCATION_TYPE_ISEQ:
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
file = rb_iseq_path(loc->iseq);
|
2022-03-23 22:19:48 +03:00
|
|
|
name = ISEQ_BODY(loc->iseq)->location.label;
|
2012-06-02 19:23:37 +04:00
|
|
|
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
lineno = calc_lineno(loc->iseq, loc->pc);
|
2012-06-02 19:23:37 +04:00
|
|
|
break;
|
2012-06-02 20:46:08 +04:00
|
|
|
case LOCATION_TYPE_CFUNC:
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
if (loc->iseq && loc->pc) {
|
|
|
|
file = rb_iseq_path(loc->iseq);
|
|
|
|
lineno = calc_lineno(loc->iseq, loc->pc);
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
else {
|
2017-10-29 16:19:53 +03:00
|
|
|
file = GET_VM()->progname;
|
2019-03-21 08:59:16 +03:00
|
|
|
lineno = 0;
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
name = rb_id2str(loc->mid);
|
2012-06-02 19:23:37 +04:00
|
|
|
break;
|
|
|
|
default:
|
2012-06-02 20:46:08 +04:00
|
|
|
rb_bug("location_to_str: unreachable");
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
2012-06-02 20:46:08 +04:00
|
|
|
return location_format(file, lineno, name);
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
2012-12-31 10:09:57 +04:00
|
|
|
/*
|
|
|
|
* Returns a Kernel#caller style string representing this frame.
|
|
|
|
*/
|
2012-06-02 19:23:37 +04:00
|
|
|
static VALUE
|
2012-06-02 20:46:08 +04:00
|
|
|
location_to_str_m(VALUE self)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2012-06-02 20:46:08 +04:00
|
|
|
return location_to_str(location_ptr(self));
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
2012-12-31 10:09:57 +04:00
|
|
|
/*
|
|
|
|
* Returns the same as calling +inspect+ on the string representation of
|
|
|
|
* #to_str
|
|
|
|
*/
|
2012-12-01 16:09:17 +04:00
|
|
|
static VALUE
|
|
|
|
location_inspect_m(VALUE self)
|
|
|
|
{
|
|
|
|
return rb_str_inspect(location_to_str(location_ptr(self)));
|
|
|
|
}
|
|
|
|
|
2012-06-02 19:23:37 +04:00
|
|
|
typedef struct rb_backtrace_struct {
|
2012-06-02 20:46:08 +04:00
|
|
|
rb_backtrace_location_t *backtrace;
|
2012-06-02 19:23:37 +04:00
|
|
|
int backtrace_size;
|
|
|
|
VALUE strary;
|
2013-12-13 08:31:06 +04:00
|
|
|
VALUE locary;
|
2012-06-02 19:23:37 +04:00
|
|
|
} rb_backtrace_t;
|
|
|
|
|
|
|
|
static void
|
|
|
|
backtrace_mark(void *ptr)
|
|
|
|
{
|
2014-09-13 09:14:51 +04:00
|
|
|
rb_backtrace_t *bt = (rb_backtrace_t *)ptr;
|
|
|
|
size_t i, s = bt->backtrace_size;
|
2012-06-02 19:23:37 +04:00
|
|
|
|
2014-09-13 09:14:51 +04:00
|
|
|
for (i=0; i<s; i++) {
|
|
|
|
location_mark_entry(&bt->backtrace[i]);
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
2020-05-07 20:15:35 +03:00
|
|
|
rb_gc_mark_movable(bt->strary);
|
|
|
|
rb_gc_mark_movable(bt->locary);
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
backtrace_free(void *ptr)
|
|
|
|
{
|
2014-09-13 09:14:51 +04:00
|
|
|
rb_backtrace_t *bt = (rb_backtrace_t *)ptr;
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
if (bt->backtrace) ruby_xfree(bt->backtrace);
|
2014-09-13 09:14:51 +04:00
|
|
|
ruby_xfree(bt);
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
2020-05-07 20:15:35 +03:00
|
|
|
static void
|
|
|
|
location_update_entry(rb_backtrace_location_t *fi)
|
|
|
|
{
|
|
|
|
switch (fi->type) {
|
|
|
|
case LOCATION_TYPE_ISEQ:
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
fi->iseq = (rb_iseq_t*)rb_gc_location((VALUE)fi->iseq);
|
2020-05-07 20:15:35 +03:00
|
|
|
break;
|
|
|
|
case LOCATION_TYPE_CFUNC:
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
if (fi->iseq) {
|
|
|
|
fi->iseq = (rb_iseq_t*)rb_gc_location((VALUE)fi->iseq);
|
|
|
|
}
|
|
|
|
break;
|
2020-05-07 20:15:35 +03:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
backtrace_update(void *ptr)
|
|
|
|
{
|
|
|
|
rb_backtrace_t *bt = (rb_backtrace_t *)ptr;
|
|
|
|
size_t i, s = bt->backtrace_size;
|
|
|
|
|
|
|
|
for (i=0; i<s; i++) {
|
|
|
|
location_update_entry(&bt->backtrace[i]);
|
|
|
|
}
|
|
|
|
bt->strary = rb_gc_location(bt->strary);
|
|
|
|
bt->locary = rb_gc_location(bt->locary);
|
|
|
|
}
|
|
|
|
|
2012-06-02 19:23:37 +04:00
|
|
|
static size_t
|
|
|
|
backtrace_memsize(const void *ptr)
|
|
|
|
{
|
|
|
|
rb_backtrace_t *bt = (rb_backtrace_t *)ptr;
|
2012-06-02 20:46:08 +04:00
|
|
|
return sizeof(rb_backtrace_t) + sizeof(rb_backtrace_location_t) * bt->backtrace_size;
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static const rb_data_type_t backtrace_data_type = {
|
|
|
|
"backtrace",
|
2020-05-07 20:15:35 +03:00
|
|
|
{backtrace_mark, backtrace_free, backtrace_memsize, backtrace_update},
|
2023-02-06 20:41:36 +03:00
|
|
|
0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
2012-06-02 19:23:37 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
int
|
|
|
|
rb_backtrace_p(VALUE obj)
|
|
|
|
{
|
|
|
|
return rb_typeddata_is_kind_of(obj, &backtrace_data_type);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
backtrace_alloc(VALUE klass)
|
|
|
|
{
|
|
|
|
rb_backtrace_t *bt;
|
|
|
|
VALUE obj = TypedData_Make_Struct(klass, rb_backtrace_t, &backtrace_data_type, bt);
|
|
|
|
return obj;
|
|
|
|
}
|
|
|
|
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
static long
|
|
|
|
backtrace_size(const rb_execution_context_t *ec)
|
|
|
|
{
|
|
|
|
const rb_control_frame_t *last_cfp = ec->cfp;
|
|
|
|
const rb_control_frame_t *start_cfp = RUBY_VM_END_CONTROL_FRAME(ec);
|
|
|
|
|
|
|
|
if (start_cfp == NULL) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
start_cfp =
|
|
|
|
RUBY_VM_NEXT_CONTROL_FRAME(
|
|
|
|
RUBY_VM_NEXT_CONTROL_FRAME(start_cfp)); /* skip top frames */
|
|
|
|
|
|
|
|
if (start_cfp < last_cfp) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return start_cfp - last_cfp + 1;
|
|
|
|
}
|
|
|
|
|
2020-10-11 14:36:25 +03:00
|
|
|
static bool
|
|
|
|
is_internal_location(const rb_control_frame_t *cfp)
|
|
|
|
{
|
|
|
|
static const char prefix[] = "<internal:";
|
|
|
|
const size_t prefix_len = sizeof(prefix) - 1;
|
|
|
|
VALUE file = rb_iseq_path(cfp->iseq);
|
|
|
|
return strncmp(prefix, RSTRING_PTR(file), prefix_len) == 0;
|
|
|
|
}
|
|
|
|
|
2012-06-02 19:23:37 +04:00
|
|
|
static void
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
bt_update_cfunc_loc(unsigned long cfunc_counter, rb_backtrace_location_t *cfunc_loc, const rb_iseq_t *iseq, const VALUE *pc)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
for (; cfunc_counter > 0; cfunc_counter--, cfunc_loc--) {
|
|
|
|
cfunc_loc->iseq = iseq;
|
|
|
|
cfunc_loc->pc = pc;
|
|
|
|
}
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
2022-01-15 00:02:46 +03:00
|
|
|
static VALUE location_create(rb_backtrace_location_t *srcloc, void *btobj);
|
|
|
|
|
|
|
|
static void
|
|
|
|
bt_yield_loc(rb_backtrace_location_t *loc, long num_frames, VALUE btobj)
|
|
|
|
{
|
|
|
|
for (; num_frames > 0; num_frames--, loc++) {
|
|
|
|
rb_yield(location_create(loc, (void *)btobj));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
static VALUE
|
2022-01-15 00:02:46 +03:00
|
|
|
rb_ec_partial_backtrace_object(const rb_execution_context_t *ec, long start_frame, long num_frames, int* start_too_large, bool skip_internal, bool do_yield)
|
2020-10-11 14:36:25 +03:00
|
|
|
{
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
const rb_control_frame_t *cfp = ec->cfp;
|
|
|
|
const rb_control_frame_t *end_cfp = RUBY_VM_END_CONTROL_FRAME(ec);
|
|
|
|
ptrdiff_t size;
|
|
|
|
rb_backtrace_t *bt;
|
|
|
|
VALUE btobj = backtrace_alloc(rb_cBacktrace);
|
2022-02-22 18:15:21 +03:00
|
|
|
rb_backtrace_location_t *loc = NULL;
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
unsigned long cfunc_counter = 0;
|
|
|
|
GetCoreDataFromValue(btobj, rb_backtrace_t, bt);
|
2020-10-11 14:36:25 +03:00
|
|
|
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
// In the case the thread vm_stack or cfp is not initialized, there is no backtrace.
|
|
|
|
if (end_cfp == NULL) {
|
|
|
|
num_frames = 0;
|
2021-06-16 16:07:05 +03:00
|
|
|
}
|
|
|
|
else {
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
end_cfp = RUBY_VM_NEXT_CONTROL_FRAME(end_cfp);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* top frame (dummy) <- RUBY_VM_END_CONTROL_FRAME
|
|
|
|
* top frame (dummy) <- end_cfp
|
|
|
|
* top frame <- main script
|
|
|
|
* top frame
|
|
|
|
* ...
|
|
|
|
* 2nd frame <- lev:0
|
|
|
|
* current frame <- ec->cfp
|
|
|
|
*/
|
|
|
|
|
|
|
|
size = end_cfp - cfp + 1;
|
|
|
|
if (size < 0) {
|
|
|
|
num_frames = 0;
|
|
|
|
}
|
|
|
|
else if (num_frames < 0 || num_frames > size) {
|
|
|
|
num_frames = size;
|
|
|
|
}
|
2020-10-11 14:36:25 +03:00
|
|
|
}
|
|
|
|
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
bt->backtrace = ZALLOC_N(rb_backtrace_location_t, num_frames);
|
|
|
|
bt->backtrace_size = 0;
|
|
|
|
if (num_frames == 0) {
|
|
|
|
if (start_too_large) *start_too_large = 0;
|
|
|
|
return btobj;
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
}
|
2012-06-02 19:23:37 +04:00
|
|
|
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
for (; cfp != end_cfp && (bt->backtrace_size < num_frames); cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp)) {
|
|
|
|
if (cfp->iseq) {
|
|
|
|
if (cfp->pc) {
|
|
|
|
if (start_frame > 0) {
|
|
|
|
start_frame--;
|
|
|
|
}
|
|
|
|
else if (!skip_internal || !is_internal_location(cfp)) {
|
|
|
|
const rb_iseq_t *iseq = cfp->iseq;
|
|
|
|
const VALUE *pc = cfp->pc;
|
|
|
|
loc = &bt->backtrace[bt->backtrace_size++];
|
|
|
|
loc->type = LOCATION_TYPE_ISEQ;
|
2023-02-06 20:41:36 +03:00
|
|
|
RB_OBJ_WRITE(btobj, &loc->iseq, iseq);
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
loc->pc = pc;
|
|
|
|
bt_update_cfunc_loc(cfunc_counter, loc-1, iseq, pc);
|
2022-01-15 00:02:46 +03:00
|
|
|
if (do_yield) {
|
|
|
|
bt_yield_loc(loc - cfunc_counter, cfunc_counter+1, btobj);
|
|
|
|
}
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
cfunc_counter = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-08-11 00:46:17 +03:00
|
|
|
else {
|
|
|
|
VM_ASSERT(RUBYVM_CFUNC_FRAME_P(cfp));
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
if (start_frame > 0) {
|
|
|
|
start_frame--;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
loc = &bt->backtrace[bt->backtrace_size++];
|
|
|
|
loc->type = LOCATION_TYPE_CFUNC;
|
|
|
|
loc->iseq = NULL;
|
|
|
|
loc->pc = NULL;
|
|
|
|
loc->mid = rb_vm_frame_method_entry(cfp)->def->original_id;
|
|
|
|
cfunc_counter++;
|
|
|
|
}
|
|
|
|
}
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
}
|
|
|
|
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
if (cfunc_counter > 0) {
|
|
|
|
for (; cfp != end_cfp; cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp)) {
|
|
|
|
if (cfp->iseq && cfp->pc && (!skip_internal || !is_internal_location(cfp))) {
|
|
|
|
bt_update_cfunc_loc(cfunc_counter, loc, cfp->iseq, cfp->pc);
|
2023-02-06 20:41:36 +03:00
|
|
|
RB_OBJ_WRITTEN(btobj, Qundef, cfp->iseq);
|
2022-01-15 00:02:46 +03:00
|
|
|
if (do_yield) {
|
|
|
|
bt_yield_loc(loc - cfunc_counter, cfunc_counter, btobj);
|
|
|
|
}
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
break;
|
|
|
|
}
|
2020-10-11 14:36:25 +03:00
|
|
|
}
|
|
|
|
}
|
2012-06-02 19:23:37 +04:00
|
|
|
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
if (start_too_large) *start_too_large = (start_frame > 0 ? -1 : 0);
|
|
|
|
return btobj;
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
MJIT_FUNC_EXPORTED VALUE
|
|
|
|
rb_ec_backtrace_object(const rb_execution_context_t *ec)
|
|
|
|
{
|
2022-01-15 00:02:46 +03:00
|
|
|
return rb_ec_partial_backtrace_object(ec, BACKTRACE_START, ALL_BACKTRACE_LINES, NULL, FALSE, FALSE);
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
}
|
|
|
|
|
2012-06-02 19:23:37 +04:00
|
|
|
static VALUE
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
backtrace_collect(rb_backtrace_t *bt, VALUE (*func)(rb_backtrace_location_t *, void *arg), void *arg)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
|
|
|
VALUE btary;
|
|
|
|
int i;
|
|
|
|
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
btary = rb_ary_new2(bt->backtrace_size);
|
2012-06-02 19:23:37 +04:00
|
|
|
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
for (i=0; i<bt->backtrace_size; i++) {
|
|
|
|
rb_backtrace_location_t *loc = &bt->backtrace[i];
|
2012-06-02 20:46:08 +04:00
|
|
|
rb_ary_push(btary, func(loc, arg));
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
return btary;
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2012-06-02 20:46:08 +04:00
|
|
|
location_to_str_dmyarg(rb_backtrace_location_t *loc, void *dmy)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2012-06-02 20:46:08 +04:00
|
|
|
return location_to_str(loc);
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
backtrace_to_str_ary(VALUE self)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2020-08-12 21:43:11 +03:00
|
|
|
VALUE r;
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
rb_backtrace_t *bt;
|
2012-06-02 19:23:37 +04:00
|
|
|
GetCoreDataFromValue(self, rb_backtrace_t, bt);
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
r = backtrace_collect(bt, location_to_str_dmyarg, 0);
|
2012-11-30 04:38:24 +04:00
|
|
|
RB_GC_GUARD(self);
|
2012-11-30 02:35:09 +04:00
|
|
|
return r;
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
2012-11-29 00:30:01 +04:00
|
|
|
VALUE
|
|
|
|
rb_backtrace_to_str_ary(VALUE self)
|
|
|
|
{
|
|
|
|
rb_backtrace_t *bt;
|
|
|
|
GetCoreDataFromValue(self, rb_backtrace_t, bt);
|
|
|
|
|
|
|
|
if (!bt->strary) {
|
2023-02-06 20:41:36 +03:00
|
|
|
RB_OBJ_WRITE(self, &bt->strary, backtrace_to_str_ary(self));
|
2012-11-29 00:30:01 +04:00
|
|
|
}
|
|
|
|
return bt->strary;
|
|
|
|
}
|
|
|
|
|
mjit_compile.c: merge initial JIT compiler
which has been developed by Takashi Kokubun <takashikkbn@gmail> as
YARV-MJIT. Many of its bugs are fixed by wanabe <s.wanabe@gmail.com>.
This JIT compiler is designed to be a safe migration path to introduce
JIT compiler to MRI. So this commit does not include any bytecode
changes or dynamic instruction modifications, which are done in original
MJIT.
This commit even strips off some aggressive optimizations from
YARV-MJIT, and thus it's slower than YARV-MJIT too. But it's still
fairly faster than Ruby 2.5 in some benchmarks (attached below).
Note that this JIT compiler passes `make test`, `make test-all`, `make
test-spec` without JIT, and even with JIT. Not only it's perfectly safe
with JIT disabled because it does not replace VM instructions unlike
MJIT, but also with JIT enabled it stably runs Ruby applications
including Rails applications.
I'm expecting this version as just "initial" JIT compiler. I have many
optimization ideas which are skipped for initial merging, and you may
easily replace this JIT compiler with a faster one by just replacing
mjit_compile.c. `mjit_compile` interface is designed for the purpose.
common.mk: update dependencies for mjit_compile.c.
internal.h: declare `rb_vm_insn_addr2insn` for MJIT.
vm.c: exclude some definitions if `-DMJIT_HEADER` is provided to
compiler. This avoids to include some functions which take a long time
to compile, e.g. vm_exec_core. Some of the purpose is achieved in
transform_mjit_header.rb (see `IGNORED_FUNCTIONS`) but others are
manually resolved for now. Load mjit_helper.h for MJIT header.
mjit_helper.h: New. This is a file used only by JIT-ed code. I'll
refactor `mjit_call_cfunc` later.
vm_eval.c: add some #ifdef switches to skip compiling some functions
like Init_vm_eval.
win32/mkexports.rb: export thread/ec functions, which are used by MJIT.
include/ruby/defines.h: add MJIT_FUNC_EXPORTED macro alis to clarify
that a function is exported only for MJIT.
array.c: export a function used by MJIT.
bignum.c: ditto.
class.c: ditto.
compile.c: ditto.
error.c: ditto.
gc.c: ditto.
hash.c: ditto.
iseq.c: ditto.
numeric.c: ditto.
object.c: ditto.
proc.c: ditto.
re.c: ditto.
st.c: ditto.
string.c: ditto.
thread.c: ditto.
variable.c: ditto.
vm_backtrace.c: ditto.
vm_insnhelper.c: ditto.
vm_method.c: ditto.
I would like to improve maintainability of function exports, but I
believe this way is acceptable as initial merging if we clarify the
new exports are for MJIT (so that we can use them as TODO list to fix)
and add unit tests to detect unresolved symbols.
I'll add unit tests of JIT compilations in succeeding commits.
Author: Takashi Kokubun <takashikkbn@gmail.com>
Contributor: wanabe <s.wanabe@gmail.com>
Part of [Feature #14235]
---
* Known issues
* Code generated by gcc is faster than clang. The benchmark may be worse
in macOS. Following benchmark result is provided by gcc w/ Linux.
* Performance is decreased when Google Chrome is running
* JIT can work on MinGW, but it doesn't improve performance at least
in short running benchmark.
* Currently it doesn't perform well with Rails. We'll try to fix this
before release.
---
* Benchmark reslts
Benchmarked with:
Intel 4.0GHz i7-4790K with 16GB memory under x86-64 Ubuntu 8 Cores
- 2.0.0-p0: Ruby 2.0.0-p0
- r62186: Ruby trunk (early 2.6.0), before MJIT changes
- JIT off: On this commit, but without `--jit` option
- JIT on: On this commit, and with `--jit` option
** Optcarrot fps
Benchmark: https://github.com/mame/optcarrot
| |2.0.0-p0 |r62186 |JIT off |JIT on |
|:--------|:--------|:--------|:--------|:--------|
|fps |37.32 |51.46 |51.31 |58.88 |
|vs 2.0.0 |1.00x |1.38x |1.37x |1.58x |
** MJIT benchmarks
Benchmark: https://github.com/benchmark-driver/mjit-benchmarks
(Original: https://github.com/vnmakarov/ruby/tree/rtl_mjit_branch/MJIT-benchmarks)
| |2.0.0-p0 |r62186 |JIT off |JIT on |
|:----------|:--------|:--------|:--------|:--------|
|aread |1.00 |1.09 |1.07 |2.19 |
|aref |1.00 |1.13 |1.11 |2.22 |
|aset |1.00 |1.50 |1.45 |2.64 |
|awrite |1.00 |1.17 |1.13 |2.20 |
|call |1.00 |1.29 |1.26 |2.02 |
|const2 |1.00 |1.10 |1.10 |2.19 |
|const |1.00 |1.11 |1.10 |2.19 |
|fannk |1.00 |1.04 |1.02 |1.00 |
|fib |1.00 |1.32 |1.31 |1.84 |
|ivread |1.00 |1.13 |1.12 |2.43 |
|ivwrite |1.00 |1.23 |1.21 |2.40 |
|mandelbrot |1.00 |1.13 |1.16 |1.28 |
|meteor |1.00 |2.97 |2.92 |3.17 |
|nbody |1.00 |1.17 |1.15 |1.49 |
|nest-ntimes|1.00 |1.22 |1.20 |1.39 |
|nest-while |1.00 |1.10 |1.10 |1.37 |
|norm |1.00 |1.18 |1.16 |1.24 |
|nsvb |1.00 |1.16 |1.16 |1.17 |
|red-black |1.00 |1.02 |0.99 |1.12 |
|sieve |1.00 |1.30 |1.28 |1.62 |
|trees |1.00 |1.14 |1.13 |1.19 |
|while |1.00 |1.12 |1.11 |2.41 |
** Discourse's script/bench.rb
Benchmark: https://github.com/discourse/discourse/blob/v1.8.7/script/bench.rb
NOTE: Rails performance was somehow a little degraded with JIT for now.
We should fix this.
(At least I know opt_aref is performing badly in JIT and I have an idea
to fix it. Please wait for the fix.)
*** JIT off
Your Results: (note for timings- percentile is first, duration is second in millisecs)
categories_admin:
50: 17
75: 18
90: 22
99: 29
home_admin:
50: 21
75: 21
90: 27
99: 40
topic_admin:
50: 17
75: 18
90: 22
99: 32
categories:
50: 35
75: 41
90: 43
99: 77
home:
50: 39
75: 46
90: 49
99: 95
topic:
50: 46
75: 52
90: 56
99: 101
*** JIT on
Your Results: (note for timings- percentile is first, duration is second in millisecs)
categories_admin:
50: 19
75: 21
90: 25
99: 33
home_admin:
50: 24
75: 26
90: 30
99: 35
topic_admin:
50: 19
75: 20
90: 25
99: 30
categories:
50: 40
75: 44
90: 48
99: 76
home:
50: 42
75: 48
90: 51
99: 89
topic:
50: 49
75: 55
90: 58
99: 99
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62197 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2018-02-04 14:22:28 +03:00
|
|
|
MJIT_FUNC_EXPORTED void
|
2017-11-09 08:22:51 +03:00
|
|
|
rb_backtrace_use_iseq_first_lineno_for_last_location(VALUE self)
|
|
|
|
{
|
|
|
|
const rb_backtrace_t *bt;
|
|
|
|
rb_backtrace_location_t *loc;
|
|
|
|
|
|
|
|
GetCoreDataFromValue(self, rb_backtrace_t, bt);
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
VM_ASSERT(bt->backtrace_size > 0);
|
2017-11-09 08:22:51 +03:00
|
|
|
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
loc = &bt->backtrace[0];
|
2017-11-09 08:22:51 +03:00
|
|
|
|
|
|
|
VM_ASSERT(loc->type == LOCATION_TYPE_ISEQ);
|
|
|
|
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
loc->pc = NULL; // means location.first_lineno
|
2017-11-09 08:22:51 +03:00
|
|
|
}
|
|
|
|
|
2012-06-02 19:23:37 +04:00
|
|
|
static VALUE
|
2012-06-02 20:46:08 +04:00
|
|
|
location_create(rb_backtrace_location_t *srcloc, void *btobj)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
|
|
|
VALUE obj;
|
2012-06-02 20:46:08 +04:00
|
|
|
struct valued_frame_info *vloc;
|
|
|
|
obj = TypedData_Make_Struct(rb_cBacktraceLocation, struct valued_frame_info, &location_data_type, vloc);
|
2012-06-02 19:23:37 +04:00
|
|
|
|
2012-06-02 20:46:08 +04:00
|
|
|
vloc->loc = srcloc;
|
2023-02-03 17:20:44 +03:00
|
|
|
RB_OBJ_WRITE(obj, &vloc->btobj, (VALUE)btobj);
|
2012-06-02 19:23:37 +04:00
|
|
|
|
|
|
|
return obj;
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
backtrace_to_location_ary(VALUE self)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2020-08-12 21:43:11 +03:00
|
|
|
VALUE r;
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
rb_backtrace_t *bt;
|
2012-06-02 19:23:37 +04:00
|
|
|
GetCoreDataFromValue(self, rb_backtrace_t, bt);
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
r = backtrace_collect(bt, location_create, (void *)self);
|
2012-11-30 04:38:24 +04:00
|
|
|
RB_GC_GUARD(self);
|
2012-11-30 02:35:09 +04:00
|
|
|
return r;
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
2013-12-13 08:31:06 +04:00
|
|
|
VALUE
|
|
|
|
rb_backtrace_to_location_ary(VALUE self)
|
|
|
|
{
|
|
|
|
rb_backtrace_t *bt;
|
|
|
|
GetCoreDataFromValue(self, rb_backtrace_t, bt);
|
|
|
|
|
|
|
|
if (!bt->locary) {
|
2023-02-06 20:41:36 +03:00
|
|
|
RB_OBJ_WRITE(self, &bt->locary, backtrace_to_location_ary(self));
|
2013-12-13 08:31:06 +04:00
|
|
|
}
|
|
|
|
return bt->locary;
|
|
|
|
}
|
|
|
|
|
2012-06-02 19:23:37 +04:00
|
|
|
static VALUE
|
|
|
|
backtrace_dump_data(VALUE self)
|
|
|
|
{
|
|
|
|
VALUE str = rb_backtrace_to_str_ary(self);
|
|
|
|
return str;
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
backtrace_load_data(VALUE self, VALUE str)
|
|
|
|
{
|
|
|
|
rb_backtrace_t *bt;
|
|
|
|
GetCoreDataFromValue(self, rb_backtrace_t, bt);
|
2023-02-06 20:41:36 +03:00
|
|
|
RB_OBJ_WRITE(self, &bt->strary, str);
|
2012-06-02 19:23:37 +04:00
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
2021-12-20 10:07:25 +03:00
|
|
|
/*
|
2022-05-11 23:32:33 +03:00
|
|
|
* call-seq: Thread::Backtrace::limit -> integer
|
2021-12-20 10:07:25 +03:00
|
|
|
*
|
|
|
|
* Returns maximum backtrace length set by <tt>--backtrace-limit</tt>
|
2022-11-21 08:07:18 +03:00
|
|
|
* command-line option. The default is <tt>-1</tt> which means unlimited
|
2021-12-20 10:07:25 +03:00
|
|
|
* backtraces. If the value is zero or positive, the error backtraces,
|
|
|
|
* produced by Exception#full_message, are abbreviated and the extra lines
|
|
|
|
* are replaced by <tt>... 3 levels... </tt>
|
|
|
|
*
|
|
|
|
* $ ruby -r net/http -e "p Thread::Backtrace.limit; Net::HTTP.get(URI('http://wrong.address'))"
|
|
|
|
* - 1
|
|
|
|
* .../lib/ruby/3.1.0/socket.rb:227:in `getaddrinfo': Failed to open TCP connection to wrong.address:80 (getaddrinfo: Name or service not known) (SocketError)
|
|
|
|
* from .../lib/ruby/3.1.0/socket.rb:227:in `foreach'
|
|
|
|
* from .../lib/ruby/3.1.0/socket.rb:632:in `tcp'
|
|
|
|
* from .../lib/ruby/3.1.0/net/http.rb:998:in `connect'
|
|
|
|
* from .../lib/ruby/3.1.0/net/http.rb:976:in `do_start'
|
|
|
|
* from .../lib/ruby/3.1.0/net/http.rb:965:in `start'
|
|
|
|
* from .../lib/ruby/3.1.0/net/http.rb:627:in `start'
|
|
|
|
* from .../lib/ruby/3.1.0/net/http.rb:503:in `get_response'
|
|
|
|
* from .../lib/ruby/3.1.0/net/http.rb:474:in `get'
|
|
|
|
* .../lib/ruby/3.1.0/socket.rb:227:in `getaddrinfo': getaddrinfo: Name or service not known (SocketError)
|
|
|
|
* from .../lib/ruby/3.1.0/socket.rb:227:in `foreach'
|
|
|
|
* from .../lib/ruby/3.1.0/socket.rb:632:in `tcp'
|
|
|
|
* from .../lib/ruby/3.1.0/net/http.rb:998:in `connect'
|
|
|
|
* from .../lib/ruby/3.1.0/net/http.rb:976:in `do_start'
|
|
|
|
* from .../lib/ruby/3.1.0/net/http.rb:965:in `start'
|
|
|
|
* from .../lib/ruby/3.1.0/net/http.rb:627:in `start'
|
|
|
|
* from .../lib/ruby/3.1.0/net/http.rb:503:in `get_response'
|
|
|
|
* from .../lib/ruby/3.1.0/net/http.rb:474:in `get'
|
|
|
|
* from -e:1:in `<main>'
|
|
|
|
*
|
|
|
|
* $ ruby --backtrace-limit 2 -r net/http -e "p Thread::Backtrace.limit; Net::HTTP.get(URI('http://wrong.address'))"
|
|
|
|
* 2
|
|
|
|
* .../lib/ruby/3.1.0/socket.rb:227:in `getaddrinfo': Failed to open TCP connection to wrong.address:80 (getaddrinfo: Name or service not known) (SocketError)
|
|
|
|
* from .../lib/ruby/3.1.0/socket.rb:227:in `foreach'
|
|
|
|
* from .../lib/ruby/3.1.0/socket.rb:632:in `tcp'
|
|
|
|
* ... 7 levels...
|
|
|
|
* .../lib/ruby/3.1.0/socket.rb:227:in `getaddrinfo': getaddrinfo: Name or service not known (SocketError)
|
|
|
|
* from .../lib/ruby/3.1.0/socket.rb:227:in `foreach'
|
|
|
|
* from .../lib/ruby/3.1.0/socket.rb:632:in `tcp'
|
|
|
|
* ... 7 levels...
|
|
|
|
*
|
|
|
|
* $ ruby --backtrace-limit 0 -r net/http -e "p Thread::Backtrace.limit; Net::HTTP.get(URI('http://wrong.address'))"
|
|
|
|
* 0
|
|
|
|
* .../lib/ruby/3.1.0/socket.rb:227:in `getaddrinfo': Failed to open TCP connection to wrong.address:80 (getaddrinfo: Name or service not known) (SocketError)
|
|
|
|
* ... 9 levels...
|
|
|
|
* .../lib/ruby/3.1.0/socket.rb:227:in `getaddrinfo': getaddrinfo: Name or service not known (SocketError)
|
|
|
|
* ... 9 levels...
|
|
|
|
*
|
|
|
|
*/
|
2021-02-15 09:58:45 +03:00
|
|
|
static VALUE
|
|
|
|
backtrace_limit(VALUE self)
|
|
|
|
{
|
|
|
|
return LONG2NUM(rb_backtrace_length_limit);
|
|
|
|
}
|
|
|
|
|
2012-06-02 19:59:37 +04:00
|
|
|
VALUE
|
2017-10-28 16:22:04 +03:00
|
|
|
rb_ec_backtrace_str_ary(const rb_execution_context_t *ec, long lev, long n)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2022-01-15 00:02:46 +03:00
|
|
|
return rb_backtrace_to_str_ary(rb_ec_partial_backtrace_object(ec, lev, n, NULL, FALSE, FALSE));
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
2019-12-10 14:54:01 +03:00
|
|
|
VALUE
|
2020-10-11 14:36:25 +03:00
|
|
|
rb_ec_backtrace_location_ary(const rb_execution_context_t *ec, long lev, long n, bool skip_internal)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2022-01-15 00:02:46 +03:00
|
|
|
return rb_backtrace_to_location_ary(rb_ec_partial_backtrace_object(ec, lev, n, NULL, skip_internal, FALSE));
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
2012-06-04 06:49:37 +04:00
|
|
|
/* make old style backtrace directly */
|
2012-06-02 19:23:37 +04:00
|
|
|
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
static void
|
|
|
|
backtrace_each(const rb_execution_context_t *ec,
|
|
|
|
void (*init)(void *arg, size_t size),
|
|
|
|
void (*iter_iseq)(void *arg, const rb_control_frame_t *cfp),
|
|
|
|
void (*iter_cfunc)(void *arg, const rb_control_frame_t *cfp, ID mid),
|
|
|
|
void *arg)
|
|
|
|
{
|
|
|
|
const rb_control_frame_t *last_cfp = ec->cfp;
|
|
|
|
const rb_control_frame_t *start_cfp = RUBY_VM_END_CONTROL_FRAME(ec);
|
|
|
|
const rb_control_frame_t *cfp;
|
|
|
|
ptrdiff_t size, i;
|
|
|
|
|
|
|
|
// In the case the thread vm_stack or cfp is not initialized, there is no backtrace.
|
|
|
|
if (start_cfp == NULL) {
|
|
|
|
init(arg, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* <- start_cfp (end control frame)
|
|
|
|
* top frame (dummy)
|
|
|
|
* top frame (dummy)
|
|
|
|
* top frame <- start_cfp
|
|
|
|
* top frame
|
|
|
|
* ...
|
|
|
|
* 2nd frame <- lev:0
|
|
|
|
* current frame <- ec->cfp
|
|
|
|
*/
|
|
|
|
|
|
|
|
start_cfp =
|
|
|
|
RUBY_VM_NEXT_CONTROL_FRAME(
|
|
|
|
RUBY_VM_NEXT_CONTROL_FRAME(start_cfp)); /* skip top frames */
|
|
|
|
|
|
|
|
if (start_cfp < last_cfp) {
|
|
|
|
size = 0;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
size = start_cfp - last_cfp + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
init(arg, size);
|
|
|
|
|
|
|
|
/* SDR(); */
|
|
|
|
for (i=0, cfp = start_cfp; i<size; i++, cfp = RUBY_VM_NEXT_CONTROL_FRAME(cfp)) {
|
|
|
|
/* fprintf(stderr, "cfp: %d\n", (rb_control_frame_t *)(ec->vm_stack + ec->vm_stack_size) - cfp); */
|
|
|
|
if (cfp->iseq) {
|
|
|
|
if (cfp->pc) {
|
|
|
|
iter_iseq(arg, cfp);
|
|
|
|
}
|
|
|
|
}
|
2021-08-11 00:46:17 +03:00
|
|
|
else {
|
|
|
|
VM_ASSERT(RUBYVM_CFUNC_FRAME_P(cfp));
|
Make backtrace generation work outward from current frame
This fixes multiple bugs found in the partial backtrace
optimization added in 3b24b7914c16930bfadc89d6aff6326a51c54295.
These bugs occurs when passing a start argument to caller where
the start argument lands on a iseq frame without a pc.
Before this commit, the following code results in the same
line being printed twice, both for the #each method.
```ruby
def a; [1].group_by { b } end
def b; puts(caller(2, 1).first, caller(3, 1).first) end
a
```
After this commit and in Ruby 2.7, the lines are different,
with the first line being for each and the second for group_by.
Before this commit, the following code can either segfault or
result in an infinite loop:
```ruby
def foo
caller_locations(2, 1).inspect # segfault
caller_locations(2, 1)[0].path # infinite loop
end
1.times.map { 1.times.map { foo } }
```
After this commit, this code works correctly.
This commit completely refactors the backtrace handling.
Instead of processing the backtrace from the outermost
frame working in, process it from the innermost frame
working out. This is much faster for partial backtraces,
since you only access the control frames you need to in
order to construct the backtrace.
To handle cfunc frames in the new design, they start
out with no location information. We increment a counter
for each cfunc frame added. When an iseq frame with pc
is accessed, after adding the iseq backtrace location,
we use the location for the iseq backtrace location for
all of the directly preceding cfunc backtrace locations.
If the last backtrace line is a cfunc frame, we continue
scanning for iseq frames until the end control frame, and
use the location information from the first one for the
trailing cfunc frames in the backtrace.
As only rb_ec_partial_backtrace_object uses the new
backtrace implementation, remove all of the function
pointers and inline the functions. This makes the
process easier to understand.
Restore the Ruby 2.7 implementation of backtrace_each and
use it for all the other functions that called
backtrace_each other than rb_ec_partial_backtrace_object.
All other cases requested the entire backtrace, so there
is no advantage of using the new algorithm for those.
Additionally, there are implicit assumptions in the other
code that the backtrace processing works inward instead
of outward.
Remove the cfunc/iseq union in rb_backtrace_location_t,
and remove the prev_loc member for cfunc. Both cfunc and
iseq types can now have iseq and pc entries, so the
location information can be accessed the same way for each.
This avoids the need for a extra backtrace location entry
to store an iseq backtrace location if the final entry in
the backtrace is a cfunc. This is also what fixes the
segfault and infinite loop issues in the above bugs.
Here's Ruby pseudocode for the new algorithm, where start
and length are the arguments to caller or caller_locations:
```ruby
end_cf = VM.end_control_frame.next
cf = VM.start_control_frame
size = VM.num_control_frames - 2
bt = []
cfunc_counter = 0
if length.nil? || length > size
length = size
end
while cf != end_cf && bt.size != length
if cf.iseq?
if cf.instruction_pointer?
if start > 0
start -= 1
else
bt << cf.iseq_backtrace_entry
cf_counter.times do |i|
bt[-1 - i].loc = cf.loc
end
cfunc_counter = 0
end
end
elsif cf.cfunc?
if start > 0
start -= 1
else
bt << cf.cfunc_backtrace_entry
cfunc_counter += 1
end
end
cf = cf.prev
end
if cfunc_counter > 0
while cf != end_cf
if (cf.iseq? && cf.instruction_pointer?)
cf_counter.times do |i|
bt[-i].loc = cf.loc
end
end
cf = cf.prev
end
end
```
With the following benchmark, which uses a call depth of
around 100 (common in many Ruby applications):
```ruby
class T
def test(depth, &block)
if depth == 0
yield self
else
test(depth - 1, &block)
end
end
def array
Array.new
end
def first
caller_locations(1, 1)
end
def full
caller_locations
end
end
t = T.new
t.test((ARGV.first || 100).to_i) do
Benchmark.ips do |x|
x.report ('caller_loc(1, 1)') {t.first}
x.report ('caller_loc') {t.full}
x.report ('Array.new') {t.array}
x.compare!
end
end
```
Results before commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 281.159k (_ 0.7%) i/s - 1.426M in 5.073055s
caller_loc 15.836k (_ 2.1%) i/s - 79.450k in 5.019426s
Array.new 1.852M (_ 2.5%) i/s - 9.296M in 5.022511s
Comparison:
Array.new: 1852297.5 i/s
caller_loc(1, 1): 281159.1 i/s - 6.59x (_ 0.00) slower
caller_loc: 15835.9 i/s - 116.97x (_ 0.00) slower
```
Results after commit:
```
Calculating -------------------------------------
caller_loc(1, 1) 562.286k (_ 0.8%) i/s - 2.858M in 5.083249s
caller_loc 16.402k (_ 1.0%) i/s - 83.200k in 5.072963s
Array.new 1.853M (_ 0.1%) i/s - 9.278M in 5.007523s
Comparison:
Array.new: 1852776.5 i/s
caller_loc(1, 1): 562285.6 i/s - 3.30x (_ 0.00) slower
caller_loc: 16402.3 i/s - 112.96x (_ 0.00) slower
```
This shows that the speed of caller_locations(1, 1) has roughly
doubled, and the speed of caller_locations with no arguments
has improved slightly. So this new algorithm is significant faster,
much simpler, and fixes bugs in the previous algorithm.
Fixes [Bug #18053]
2021-07-22 02:44:56 +03:00
|
|
|
const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(cfp);
|
|
|
|
ID mid = me->def->original_id;
|
|
|
|
|
|
|
|
iter_cfunc(arg, cfp, mid);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-06-02 19:23:37 +04:00
|
|
|
struct oldbt_arg {
|
|
|
|
VALUE filename;
|
2012-06-02 20:46:08 +04:00
|
|
|
int lineno;
|
|
|
|
void (*func)(void *data, VALUE file, int lineno, VALUE name);
|
2012-06-02 19:23:37 +04:00
|
|
|
void *data; /* result */
|
|
|
|
};
|
|
|
|
|
|
|
|
static void
|
|
|
|
oldbt_init(void *ptr, size_t dmy)
|
|
|
|
{
|
|
|
|
struct oldbt_arg *arg = (struct oldbt_arg *)ptr;
|
2017-10-29 16:19:53 +03:00
|
|
|
arg->filename = GET_VM()->progname;
|
2012-06-02 20:46:08 +04:00
|
|
|
arg->lineno = 0;
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2012-11-29 11:05:27 +04:00
|
|
|
oldbt_iter_iseq(void *ptr, const rb_control_frame_t *cfp)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
2012-11-29 11:05:27 +04:00
|
|
|
const rb_iseq_t *iseq = cfp->iseq;
|
|
|
|
const VALUE *pc = cfp->pc;
|
2012-06-02 19:23:37 +04:00
|
|
|
struct oldbt_arg *arg = (struct oldbt_arg *)ptr;
|
2017-06-01 03:05:33 +03:00
|
|
|
VALUE file = arg->filename = rb_iseq_path(iseq);
|
2022-03-23 22:19:48 +03:00
|
|
|
VALUE name = ISEQ_BODY(iseq)->location.label;
|
2012-06-02 20:46:08 +04:00
|
|
|
int lineno = arg->lineno = calc_lineno(iseq, pc);
|
2012-06-02 19:23:37 +04:00
|
|
|
|
2012-06-02 20:46:08 +04:00
|
|
|
(arg->func)(arg->data, file, lineno, name);
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2012-11-29 11:05:27 +04:00
|
|
|
oldbt_iter_cfunc(void *ptr, const rb_control_frame_t *cfp, ID mid)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
|
|
|
struct oldbt_arg *arg = (struct oldbt_arg *)ptr;
|
|
|
|
VALUE file = arg->filename;
|
|
|
|
VALUE name = rb_id2str(mid);
|
2012-06-02 20:46:08 +04:00
|
|
|
int lineno = arg->lineno;
|
2012-06-02 19:23:37 +04:00
|
|
|
|
2012-06-02 20:46:08 +04:00
|
|
|
(arg->func)(arg->data, file, lineno, name);
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2012-06-02 20:46:08 +04:00
|
|
|
oldbt_print(void *data, VALUE file, int lineno, VALUE name)
|
2012-06-02 19:23:37 +04:00
|
|
|
{
|
|
|
|
FILE *fp = (FILE *)data;
|
|
|
|
|
|
|
|
if (NIL_P(name)) {
|
|
|
|
fprintf(fp, "\tfrom %s:%d:in unknown method\n",
|
2012-06-02 20:46:08 +04:00
|
|
|
RSTRING_PTR(file), lineno);
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
fprintf(fp, "\tfrom %s:%d:in `%s'\n",
|
2012-06-02 20:46:08 +04:00
|
|
|
RSTRING_PTR(file), lineno, RSTRING_PTR(name));
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
vm_backtrace_print(FILE *fp)
|
|
|
|
{
|
|
|
|
struct oldbt_arg arg;
|
|
|
|
|
|
|
|
arg.func = oldbt_print;
|
|
|
|
arg.data = (void *)fp;
|
2017-10-26 13:57:16 +03:00
|
|
|
backtrace_each(GET_EC(),
|
2012-06-02 19:23:37 +04:00
|
|
|
oldbt_init,
|
|
|
|
oldbt_iter_iseq,
|
|
|
|
oldbt_iter_cfunc,
|
|
|
|
&arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
oldbt_bugreport(void *arg, VALUE file, int line, VALUE method)
|
|
|
|
{
|
|
|
|
const char *filename = NIL_P(file) ? "ruby" : RSTRING_PTR(file);
|
|
|
|
if (!*(int *)arg) {
|
|
|
|
fprintf(stderr, "-- Ruby level backtrace information "
|
|
|
|
"----------------------------------------\n");
|
|
|
|
*(int *)arg = 1;
|
|
|
|
}
|
|
|
|
if (NIL_P(method)) {
|
|
|
|
fprintf(stderr, "%s:%d:in unknown method\n", filename, line);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
fprintf(stderr, "%s:%d:in `%s'\n", filename, line, RSTRING_PTR(method));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
rb_backtrace_print_as_bugreport(void)
|
|
|
|
{
|
|
|
|
struct oldbt_arg arg;
|
2014-01-24 11:17:17 +04:00
|
|
|
int i = 0;
|
2012-06-02 19:23:37 +04:00
|
|
|
|
|
|
|
arg.func = oldbt_bugreport;
|
|
|
|
arg.data = (int *)&i;
|
|
|
|
|
2017-10-26 13:57:16 +03:00
|
|
|
backtrace_each(GET_EC(),
|
2012-06-02 19:23:37 +04:00
|
|
|
oldbt_init,
|
|
|
|
oldbt_iter_iseq,
|
|
|
|
oldbt_iter_cfunc,
|
|
|
|
&arg);
|
|
|
|
}
|
|
|
|
|
2012-06-02 19:59:37 +04:00
|
|
|
void
|
|
|
|
rb_backtrace(void)
|
|
|
|
{
|
|
|
|
vm_backtrace_print(stderr);
|
|
|
|
}
|
|
|
|
|
2017-04-25 11:17:24 +03:00
|
|
|
struct print_to_arg {
|
|
|
|
VALUE (*iter)(VALUE recv, VALUE str);
|
|
|
|
VALUE output;
|
|
|
|
};
|
|
|
|
|
2013-10-08 19:56:01 +04:00
|
|
|
static void
|
|
|
|
oldbt_print_to(void *data, VALUE file, int lineno, VALUE name)
|
|
|
|
{
|
2017-04-25 11:17:24 +03:00
|
|
|
const struct print_to_arg *arg = data;
|
2013-10-08 19:56:01 +04:00
|
|
|
VALUE str = rb_sprintf("\tfrom %"PRIsVALUE":%d:in ", file, lineno);
|
|
|
|
|
|
|
|
if (NIL_P(name)) {
|
|
|
|
rb_str_cat2(str, "unknown method\n");
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
rb_str_catf(str, " `%"PRIsVALUE"'\n", name);
|
|
|
|
}
|
2017-04-25 11:17:24 +03:00
|
|
|
(*arg->iter)(arg->output, str);
|
2013-10-08 19:56:01 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-04-25 11:17:24 +03:00
|
|
|
rb_backtrace_each(VALUE (*iter)(VALUE recv, VALUE str), VALUE output)
|
2013-10-08 19:56:01 +04:00
|
|
|
{
|
|
|
|
struct oldbt_arg arg;
|
2017-04-25 11:17:24 +03:00
|
|
|
struct print_to_arg parg;
|
2013-10-08 19:56:01 +04:00
|
|
|
|
2017-04-25 11:17:24 +03:00
|
|
|
parg.iter = iter;
|
|
|
|
parg.output = output;
|
2013-10-08 19:56:01 +04:00
|
|
|
arg.func = oldbt_print_to;
|
2017-04-25 11:17:24 +03:00
|
|
|
arg.data = &parg;
|
2017-10-26 13:57:16 +03:00
|
|
|
backtrace_each(GET_EC(),
|
2013-10-08 19:56:01 +04:00
|
|
|
oldbt_init,
|
|
|
|
oldbt_iter_iseq,
|
|
|
|
oldbt_iter_cfunc,
|
|
|
|
&arg);
|
|
|
|
}
|
|
|
|
|
2012-06-02 19:59:37 +04:00
|
|
|
VALUE
|
|
|
|
rb_make_backtrace(void)
|
|
|
|
{
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
return rb_ec_backtrace_str_ary(GET_EC(), BACKTRACE_START, ALL_BACKTRACE_LINES);
|
2012-06-02 19:59:37 +04:00
|
|
|
}
|
|
|
|
|
2012-11-19 10:07:06 +04:00
|
|
|
static VALUE
|
2017-10-28 16:22:04 +03:00
|
|
|
ec_backtrace_to_ary(const rb_execution_context_t *ec, int argc, const VALUE *argv, int lev_default, int lev_plus, int to_str)
|
2012-11-19 10:07:06 +04:00
|
|
|
{
|
|
|
|
VALUE level, vn;
|
2012-11-29 05:26:38 +04:00
|
|
|
long lev, n;
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
VALUE btval;
|
2012-11-30 02:35:09 +04:00
|
|
|
VALUE r;
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
int too_large;
|
2012-11-19 10:07:06 +04:00
|
|
|
|
|
|
|
rb_scan_args(argc, argv, "02", &level, &vn);
|
|
|
|
|
2013-09-10 09:37:39 +04:00
|
|
|
if (argc == 2 && NIL_P(vn)) argc--;
|
|
|
|
|
2012-11-29 04:12:49 +04:00
|
|
|
switch (argc) {
|
|
|
|
case 0:
|
|
|
|
lev = lev_default + lev_plus;
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
n = ALL_BACKTRACE_LINES;
|
2012-11-29 04:12:49 +04:00
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
{
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
long beg, len, bt_size = backtrace_size(ec);
|
|
|
|
switch (rb_range_beg_len(level, &beg, &len, bt_size - lev_plus, 0)) {
|
2012-11-29 04:12:49 +04:00
|
|
|
case Qfalse:
|
|
|
|
lev = NUM2LONG(level);
|
|
|
|
if (lev < 0) {
|
2012-11-29 05:30:42 +04:00
|
|
|
rb_raise(rb_eArgError, "negative level (%ld)", lev);
|
2012-11-29 04:12:49 +04:00
|
|
|
}
|
|
|
|
lev += lev_plus;
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
n = ALL_BACKTRACE_LINES;
|
2012-11-29 04:12:49 +04:00
|
|
|
break;
|
|
|
|
case Qnil:
|
|
|
|
return Qnil;
|
|
|
|
default:
|
|
|
|
lev = beg + lev_plus;
|
|
|
|
n = len;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2012-11-19 10:07:06 +04:00
|
|
|
}
|
2012-11-29 04:12:49 +04:00
|
|
|
case 2:
|
|
|
|
lev = NUM2LONG(level);
|
2012-12-03 11:23:57 +04:00
|
|
|
n = NUM2LONG(vn);
|
2012-11-29 04:12:49 +04:00
|
|
|
if (lev < 0) {
|
2012-11-29 05:30:42 +04:00
|
|
|
rb_raise(rb_eArgError, "negative level (%ld)", lev);
|
2012-11-29 04:12:49 +04:00
|
|
|
}
|
2012-12-03 11:23:57 +04:00
|
|
|
if (n < 0) {
|
|
|
|
rb_raise(rb_eArgError, "negative size (%ld)", n);
|
|
|
|
}
|
2012-11-29 04:12:49 +04:00
|
|
|
lev += lev_plus;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
lev = n = 0; /* to avoid warning */
|
|
|
|
break;
|
2012-11-19 10:07:06 +04:00
|
|
|
}
|
|
|
|
|
2012-11-29 04:12:49 +04:00
|
|
|
if (n == 0) {
|
|
|
|
return rb_ary_new();
|
2012-11-19 10:07:06 +04:00
|
|
|
}
|
|
|
|
|
2022-01-15 00:02:46 +03:00
|
|
|
btval = rb_ec_partial_backtrace_object(ec, lev, n, &too_large, FALSE, FALSE);
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
|
|
|
|
if (too_large) {
|
|
|
|
return Qnil;
|
|
|
|
}
|
|
|
|
|
2012-11-19 10:07:06 +04:00
|
|
|
if (to_str) {
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
r = backtrace_to_str_ary(btval);
|
2012-11-19 10:07:06 +04:00
|
|
|
}
|
|
|
|
else {
|
Improve performance of partial backtraces
Previously, backtrace_each fully populated the rb_backtrace_t with all
backtrace frames, even if caller only requested a partial backtrace
(e.g. Kernel#caller_locations(1, 1)). This changes backtrace_each to
only add the requested frames to the rb_backtrace_t.
To do this, backtrace_each needs to be passed the starting frame and
number of frames values passed to Kernel#caller or #caller_locations.
backtrace_each works from the top of the stack to the bottom, where the
bottom is the current frame. Due to how the location for cfuncs is
tracked using the location of the previous iseq, we need to store an
extra frame for the previous iseq if we are limiting the backtrace and
final backtrace frame (the first one stored) would be a cfunc and not
an iseq.
To limit the amount of work in this case, while scanning until the start
of the requested backtrace, for each iseq, store the cfp. If the first
backtrace frame we care about is a cfunc, use the stored cfp to find the
related iseq. Use a function pointer to handle the storage of the cfp
in the iteration arg, and also store the location of the extra frame
in the iteration arg.
backtrace_each needs to return int instead of void in order to signal
when a starting frame larger than backtrace size is given, as caller
and caller_locations needs to return nil and not the empty array in
these cases.
To handle cases where a range is provided with a negative end, and the
backtrace size is needed to calculate the result to pass to
rb_range_beg_len, add a backtrace_size static function to calculate
the size, which copies the logic from backtrace_each.
As backtrace_each only adds the backtrace lines requested,
backtrace_to_*_ary can be simplified to always operate on the entire
backtrace.
Previously, caller_locations(1,1) was about 6.2 times slower for an
800 deep callstack compared to an empty callstack. With this new
approach, it is only 1.3 times slower. It will always be somewhat
slower as it still needs to scan the cfps from the top of the stack
until it finds the first requested backtrace frame.
This initializes the backtrace memory to zero. I do not think this is
necessary, as from my analysis, nothing during the setting of the
backtrace entries can cause a garbage collection, but it seems the
safest approach, and it's unlikely the performance decrease is
significant.
This removes the rb_backtrace_t backtrace_base member. backtrace
and backtrace_base were initialized to the same value, and neither
is modified, so it doesn't make sense to have two pointers.
This also removes LOCATION_TYPE_IFUNC from vm_backtrace.c, as
the value is never set.
Fixes [Bug #17031]
2020-08-28 01:17:36 +03:00
|
|
|
r = backtrace_to_location_ary(btval);
|
2012-11-19 10:07:06 +04:00
|
|
|
}
|
2012-11-30 04:38:24 +04:00
|
|
|
RB_GC_GUARD(btval);
|
2012-11-30 02:35:09 +04:00
|
|
|
return r;
|
2012-11-19 10:07:06 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2014-06-18 10:16:39 +04:00
|
|
|
thread_backtrace_to_ary(int argc, const VALUE *argv, VALUE thval, int to_str)
|
2012-06-02 19:59:37 +04:00
|
|
|
{
|
2017-06-28 07:49:30 +03:00
|
|
|
rb_thread_t *target_th = rb_thread_ptr(thval);
|
2012-06-02 19:59:37 +04:00
|
|
|
|
2017-06-28 07:49:30 +03:00
|
|
|
if (target_th->to_kill || target_th->status == THREAD_KILLED)
|
|
|
|
return Qnil;
|
2012-06-02 19:59:37 +04:00
|
|
|
|
2017-10-28 16:22:04 +03:00
|
|
|
return ec_backtrace_to_ary(target_th->ec, argc, argv, 0, 0, to_str);
|
2012-11-19 10:07:06 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
2014-06-18 10:16:39 +04:00
|
|
|
rb_vm_thread_backtrace(int argc, const VALUE *argv, VALUE thval)
|
2012-11-19 10:07:06 +04:00
|
|
|
{
|
|
|
|
return thread_backtrace_to_ary(argc, argv, thval, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
2014-06-18 10:16:39 +04:00
|
|
|
rb_vm_thread_backtrace_locations(int argc, const VALUE *argv, VALUE thval)
|
2012-11-19 10:07:06 +04:00
|
|
|
{
|
|
|
|
return thread_backtrace_to_ary(argc, argv, thval, 0);
|
2012-06-02 19:59:37 +04:00
|
|
|
}
|
|
|
|
|
2022-08-06 04:13:20 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_backtrace(int argc, const VALUE * argv, struct rb_execution_context_struct * ec)
|
2020-08-15 06:36:18 +03:00
|
|
|
{
|
|
|
|
return ec_backtrace_to_ary(ec, argc, argv, 0, 0, 1);
|
|
|
|
}
|
|
|
|
|
2022-08-06 04:13:20 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_backtrace_locations(int argc, const VALUE * argv, struct rb_execution_context_struct * ec)
|
2020-08-15 06:36:18 +03:00
|
|
|
{
|
|
|
|
return ec_backtrace_to_ary(ec, argc, argv, 0, 0, 0);
|
|
|
|
}
|
|
|
|
|
2012-06-02 19:59:37 +04:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2012-12-31 10:09:57 +04:00
|
|
|
* caller(start=1, length=nil) -> array or nil
|
|
|
|
* caller(range) -> array or nil
|
2012-06-02 19:59:37 +04:00
|
|
|
*
|
|
|
|
* Returns the current execution stack---an array containing strings in
|
2012-12-31 10:09:57 +04:00
|
|
|
* the form <code>file:line</code> or <code>file:line: in
|
|
|
|
* `method'</code>.
|
|
|
|
*
|
|
|
|
* The optional _start_ parameter determines the number of initial stack
|
|
|
|
* entries to omit from the top of the stack.
|
|
|
|
*
|
|
|
|
* A second optional +length+ parameter can be used to limit how many entries
|
|
|
|
* are returned from the stack.
|
2012-06-02 19:59:37 +04:00
|
|
|
*
|
|
|
|
* Returns +nil+ if _start_ is greater than the size of
|
|
|
|
* current execution stack.
|
|
|
|
*
|
2012-12-31 10:09:57 +04:00
|
|
|
* Optionally you can pass a range, which will return an array containing the
|
|
|
|
* entries within the specified range.
|
|
|
|
*
|
2012-06-02 19:59:37 +04:00
|
|
|
* def a(skip)
|
|
|
|
* caller(skip)
|
|
|
|
* end
|
|
|
|
* def b(skip)
|
|
|
|
* a(skip)
|
|
|
|
* end
|
|
|
|
* def c(skip)
|
|
|
|
* b(skip)
|
|
|
|
* end
|
|
|
|
* c(0) #=> ["prog:2:in `a'", "prog:5:in `b'", "prog:8:in `c'", "prog:10:in `<main>'"]
|
|
|
|
* c(1) #=> ["prog:5:in `b'", "prog:8:in `c'", "prog:11:in `<main>'"]
|
|
|
|
* c(2) #=> ["prog:8:in `c'", "prog:12:in `<main>'"]
|
|
|
|
* c(3) #=> ["prog:13:in `<main>'"]
|
|
|
|
* c(4) #=> []
|
|
|
|
* c(5) #=> nil
|
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
2019-08-28 12:19:11 +03:00
|
|
|
rb_f_caller(int argc, VALUE *argv, VALUE _)
|
2012-06-02 19:59:37 +04:00
|
|
|
{
|
2017-10-28 16:22:04 +03:00
|
|
|
return ec_backtrace_to_ary(GET_EC(), argc, argv, 1, 1, 1);
|
2012-06-02 19:59:37 +04:00
|
|
|
}
|
|
|
|
|
2012-12-31 10:09:57 +04:00
|
|
|
/*
|
|
|
|
* call-seq:
|
|
|
|
* caller_locations(start=1, length=nil) -> array or nil
|
|
|
|
* caller_locations(range) -> array or nil
|
|
|
|
*
|
|
|
|
* Returns the current execution stack---an array containing
|
|
|
|
* backtrace location objects.
|
|
|
|
*
|
|
|
|
* See Thread::Backtrace::Location for more information.
|
|
|
|
*
|
|
|
|
* The optional _start_ parameter determines the number of initial stack
|
|
|
|
* entries to omit from the top of the stack.
|
|
|
|
*
|
|
|
|
* A second optional +length+ parameter can be used to limit how many entries
|
|
|
|
* are returned from the stack.
|
|
|
|
*
|
|
|
|
* Returns +nil+ if _start_ is greater than the size of
|
|
|
|
* current execution stack.
|
|
|
|
*
|
|
|
|
* Optionally you can pass a range, which will return an array containing the
|
|
|
|
* entries within the specified range.
|
|
|
|
*/
|
2012-06-02 19:59:37 +04:00
|
|
|
static VALUE
|
2019-08-28 12:19:11 +03:00
|
|
|
rb_f_caller_locations(int argc, VALUE *argv, VALUE _)
|
2012-06-02 19:59:37 +04:00
|
|
|
{
|
2017-10-28 16:22:04 +03:00
|
|
|
return ec_backtrace_to_ary(GET_EC(), argc, argv, 1, 1, 0);
|
2012-06-02 19:59:37 +04:00
|
|
|
}
|
|
|
|
|
2022-01-15 00:02:46 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
|
|
|
* Thread.each_caller_location{ |loc| ... } -> nil
|
|
|
|
*
|
|
|
|
* Yields each frame of the current execution stack as a
|
|
|
|
* backtrace location object.
|
|
|
|
*/
|
|
|
|
static VALUE
|
|
|
|
each_caller_location(VALUE unused)
|
|
|
|
{
|
|
|
|
rb_ec_partial_backtrace_object(GET_EC(), 2, ALL_BACKTRACE_LINES, NULL, FALSE, TRUE);
|
|
|
|
return Qnil;
|
|
|
|
}
|
|
|
|
|
2012-06-02 19:59:37 +04:00
|
|
|
/* called from Init_vm() in vm.c */
|
|
|
|
void
|
2012-06-02 19:23:37 +04:00
|
|
|
Init_vm_backtrace(void)
|
|
|
|
{
|
2021-12-20 10:07:25 +03:00
|
|
|
/*
|
|
|
|
* An internal representation of the backtrace. The user will never interact with
|
|
|
|
* objects of this class directly, but class methods can be used to get backtrace
|
|
|
|
* settings of the current session.
|
|
|
|
*/
|
2012-11-28 23:46:44 +04:00
|
|
|
rb_cBacktrace = rb_define_class_under(rb_cThread, "Backtrace", rb_cObject);
|
2012-06-02 19:23:37 +04:00
|
|
|
rb_define_alloc_func(rb_cBacktrace, backtrace_alloc);
|
|
|
|
rb_undef_method(CLASS_OF(rb_cBacktrace), "new");
|
|
|
|
rb_marshal_define_compat(rb_cBacktrace, rb_cArray, backtrace_dump_data, backtrace_load_data);
|
2021-02-15 09:58:45 +03:00
|
|
|
rb_define_singleton_method(rb_cBacktrace, "limit", backtrace_limit, 0);
|
2012-06-02 19:23:37 +04:00
|
|
|
|
2012-12-31 10:09:57 +04:00
|
|
|
/*
|
|
|
|
* An object representation of a stack frame, initialized by
|
|
|
|
* Kernel#caller_locations.
|
|
|
|
*
|
|
|
|
* For example:
|
|
|
|
*
|
|
|
|
* # caller_locations.rb
|
|
|
|
* def a(skip)
|
|
|
|
* caller_locations(skip)
|
|
|
|
* end
|
|
|
|
* def b(skip)
|
|
|
|
* a(skip)
|
|
|
|
* end
|
|
|
|
* def c(skip)
|
|
|
|
* b(skip)
|
|
|
|
* end
|
|
|
|
*
|
|
|
|
* c(0..2).map do |call|
|
|
|
|
* puts call.to_s
|
|
|
|
* end
|
|
|
|
*
|
|
|
|
* Running <code>ruby caller_locations.rb</code> will produce:
|
|
|
|
*
|
|
|
|
* caller_locations.rb:2:in `a'
|
|
|
|
* caller_locations.rb:5:in `b'
|
|
|
|
* caller_locations.rb:8:in `c'
|
|
|
|
*
|
|
|
|
* Here's another example with a slightly different result:
|
|
|
|
*
|
|
|
|
* # foo.rb
|
|
|
|
* class Foo
|
|
|
|
* attr_accessor :locations
|
|
|
|
* def initialize(skip)
|
|
|
|
* @locations = caller_locations(skip)
|
|
|
|
* end
|
|
|
|
* end
|
|
|
|
*
|
|
|
|
* Foo.new(0..2).locations.map do |call|
|
|
|
|
* puts call.to_s
|
|
|
|
* end
|
|
|
|
*
|
|
|
|
* Now run <code>ruby foo.rb</code> and you should see:
|
|
|
|
*
|
|
|
|
* init.rb:4:in `initialize'
|
|
|
|
* init.rb:8:in `new'
|
|
|
|
* init.rb:8:in `<main>'
|
|
|
|
*/
|
2012-06-02 20:46:08 +04:00
|
|
|
rb_cBacktraceLocation = rb_define_class_under(rb_cBacktrace, "Location", rb_cObject);
|
|
|
|
rb_undef_alloc_func(rb_cBacktraceLocation);
|
|
|
|
rb_undef_method(CLASS_OF(rb_cBacktraceLocation), "new");
|
|
|
|
rb_define_method(rb_cBacktraceLocation, "lineno", location_lineno_m, 0);
|
|
|
|
rb_define_method(rb_cBacktraceLocation, "label", location_label_m, 0);
|
|
|
|
rb_define_method(rb_cBacktraceLocation, "base_label", location_base_label_m, 0);
|
|
|
|
rb_define_method(rb_cBacktraceLocation, "path", location_path_m, 0);
|
|
|
|
rb_define_method(rb_cBacktraceLocation, "absolute_path", location_absolute_path_m, 0);
|
|
|
|
rb_define_method(rb_cBacktraceLocation, "to_s", location_to_str_m, 0);
|
2012-12-01 16:09:17 +04:00
|
|
|
rb_define_method(rb_cBacktraceLocation, "inspect", location_inspect_m, 0);
|
2012-06-02 19:59:37 +04:00
|
|
|
|
|
|
|
rb_define_global_function("caller", rb_f_caller, -1);
|
2012-06-02 20:46:08 +04:00
|
|
|
rb_define_global_function("caller_locations", rb_f_caller_locations, -1);
|
2022-01-15 00:02:46 +03:00
|
|
|
|
|
|
|
rb_define_singleton_method(rb_cThread, "each_caller_location", each_caller_location, 0);
|
2012-06-02 19:23:37 +04:00
|
|
|
}
|
2012-11-29 11:05:27 +04:00
|
|
|
|
|
|
|
/* debugger API */
|
|
|
|
|
2013-04-05 14:29:38 +04:00
|
|
|
RUBY_SYMBOL_EXPORT_BEGIN
|
2012-11-29 11:05:27 +04:00
|
|
|
|
2013-04-05 14:29:38 +04:00
|
|
|
RUBY_SYMBOL_EXPORT_END
|
2012-11-29 11:05:27 +04:00
|
|
|
|
|
|
|
struct rb_debug_inspector_struct {
|
2017-11-07 08:06:50 +03:00
|
|
|
rb_execution_context_t *ec;
|
2012-11-29 11:05:27 +04:00
|
|
|
rb_control_frame_t *cfp;
|
|
|
|
VALUE backtrace;
|
|
|
|
VALUE contexts; /* [[klass, binding, iseq, cfp], ...] */
|
2013-01-28 13:02:19 +04:00
|
|
|
long backtrace_size;
|
2012-11-29 11:05:27 +04:00
|
|
|
};
|
|
|
|
|
2013-01-29 12:25:32 +04:00
|
|
|
enum {
|
|
|
|
CALLER_BINDING_SELF,
|
|
|
|
CALLER_BINDING_CLASS,
|
|
|
|
CALLER_BINDING_BINDING,
|
|
|
|
CALLER_BINDING_ISEQ,
|
2022-11-25 05:51:35 +03:00
|
|
|
CALLER_BINDING_CFP,
|
|
|
|
CALLER_BINDING_DEPTH,
|
2013-01-29 12:25:32 +04:00
|
|
|
};
|
|
|
|
|
2012-11-29 11:05:27 +04:00
|
|
|
struct collect_caller_bindings_data {
|
|
|
|
VALUE ary;
|
2022-11-25 05:51:35 +03:00
|
|
|
const rb_execution_context_t *ec;
|
2012-11-29 11:05:27 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
static void
|
|
|
|
collect_caller_bindings_init(void *arg, size_t size)
|
|
|
|
{
|
|
|
|
/* */
|
|
|
|
}
|
|
|
|
|
2013-01-29 12:25:32 +04:00
|
|
|
static VALUE
|
|
|
|
get_klass(const rb_control_frame_t *cfp)
|
|
|
|
{
|
|
|
|
VALUE klass;
|
* vm_trace.c (tracepoint_attr_callee_id, rb_tracearg_callee_id):
add TracePoint#callee_id. [ruby-core:77241] [Feature #12747]
* cont.c, eval.c, gc.c, include/ruby/intern.h, insns.def, thread.c,
vm.c, vm_backtrace.c, vm_core.h, vm_eval.c, vm_insnhelper.c, vm_trace.c: ditto.
* test/ruby/test_settracefunc.rb: tests for above.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56593 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2016-11-05 16:15:27 +03:00
|
|
|
if (rb_vm_control_frame_id_and_class(cfp, 0, 0, &klass)) {
|
2013-01-29 12:25:32 +04:00
|
|
|
if (RB_TYPE_P(klass, T_ICLASS)) {
|
|
|
|
return RBASIC(klass)->klass;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return klass;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qnil;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-25 05:51:35 +03:00
|
|
|
static int
|
|
|
|
frame_depth(const rb_execution_context_t *ec, const rb_control_frame_t *cfp)
|
|
|
|
{
|
|
|
|
VM_ASSERT(RUBY_VM_END_CONTROL_FRAME(ec) >= cfp);
|
|
|
|
return (int)(RUBY_VM_END_CONTROL_FRAME(ec) - cfp);
|
|
|
|
}
|
|
|
|
|
2012-11-29 11:05:27 +04:00
|
|
|
static void
|
|
|
|
collect_caller_bindings_iseq(void *arg, const rb_control_frame_t *cfp)
|
|
|
|
{
|
|
|
|
struct collect_caller_bindings_data *data = (struct collect_caller_bindings_data *)arg;
|
2022-11-25 05:51:35 +03:00
|
|
|
VALUE frame = rb_ary_new2(6);
|
2013-01-29 12:25:32 +04:00
|
|
|
|
|
|
|
rb_ary_store(frame, CALLER_BINDING_SELF, cfp->self);
|
|
|
|
rb_ary_store(frame, CALLER_BINDING_CLASS, get_klass(cfp));
|
|
|
|
rb_ary_store(frame, CALLER_BINDING_BINDING, GC_GUARDED_PTR(cfp)); /* create later */
|
2015-07-22 01:52:59 +03:00
|
|
|
rb_ary_store(frame, CALLER_BINDING_ISEQ, cfp->iseq ? (VALUE)cfp->iseq : Qnil);
|
2013-01-29 12:25:32 +04:00
|
|
|
rb_ary_store(frame, CALLER_BINDING_CFP, GC_GUARDED_PTR(cfp));
|
2022-11-25 05:51:35 +03:00
|
|
|
rb_ary_store(frame, CALLER_BINDING_DEPTH, INT2FIX(frame_depth(data->ec, cfp)));
|
2013-01-29 12:25:32 +04:00
|
|
|
|
|
|
|
rb_ary_push(data->ary, frame);
|
2012-11-29 11:05:27 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
collect_caller_bindings_cfunc(void *arg, const rb_control_frame_t *cfp, ID mid)
|
|
|
|
{
|
|
|
|
struct collect_caller_bindings_data *data = (struct collect_caller_bindings_data *)arg;
|
2022-11-25 05:51:35 +03:00
|
|
|
VALUE frame = rb_ary_new2(6);
|
2013-01-29 12:25:32 +04:00
|
|
|
|
|
|
|
rb_ary_store(frame, CALLER_BINDING_SELF, cfp->self);
|
|
|
|
rb_ary_store(frame, CALLER_BINDING_CLASS, get_klass(cfp));
|
|
|
|
rb_ary_store(frame, CALLER_BINDING_BINDING, Qnil); /* not available */
|
|
|
|
rb_ary_store(frame, CALLER_BINDING_ISEQ, Qnil); /* not available */
|
|
|
|
rb_ary_store(frame, CALLER_BINDING_CFP, GC_GUARDED_PTR(cfp));
|
2022-11-25 05:51:35 +03:00
|
|
|
rb_ary_store(frame, CALLER_BINDING_DEPTH, INT2FIX(frame_depth(data->ec, cfp)));
|
2013-01-29 12:25:32 +04:00
|
|
|
|
|
|
|
rb_ary_push(data->ary, frame);
|
2012-11-29 11:05:27 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2017-11-07 08:06:50 +03:00
|
|
|
collect_caller_bindings(const rb_execution_context_t *ec)
|
2012-11-29 11:05:27 +04:00
|
|
|
{
|
2013-01-29 12:25:32 +04:00
|
|
|
int i;
|
2022-11-25 05:51:35 +03:00
|
|
|
VALUE result;
|
|
|
|
struct collect_caller_bindings_data data = {
|
|
|
|
rb_ary_new(), ec
|
|
|
|
};
|
2013-01-29 12:25:32 +04:00
|
|
|
|
2017-11-07 08:06:50 +03:00
|
|
|
backtrace_each(ec,
|
2012-11-29 11:05:27 +04:00
|
|
|
collect_caller_bindings_init,
|
|
|
|
collect_caller_bindings_iseq,
|
|
|
|
collect_caller_bindings_cfunc,
|
|
|
|
&data);
|
2013-01-29 12:25:32 +04:00
|
|
|
|
|
|
|
result = rb_ary_reverse(data.ary);
|
|
|
|
|
|
|
|
/* bindings should be created from top of frame */
|
|
|
|
for (i=0; i<RARRAY_LEN(result); i++) {
|
|
|
|
VALUE entry = rb_ary_entry(result, i);
|
|
|
|
VALUE cfp_val = rb_ary_entry(entry, CALLER_BINDING_BINDING);
|
|
|
|
|
|
|
|
if (!NIL_P(cfp_val)) {
|
|
|
|
rb_control_frame_t *cfp = GC_GUARDED_PTR_REF(cfp_val);
|
2017-11-07 08:06:50 +03:00
|
|
|
rb_ary_store(entry, CALLER_BINDING_BINDING, rb_vm_make_binding(ec, cfp));
|
2013-01-29 12:25:32 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
2012-11-29 11:05:27 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note that the passed `rb_debug_inspector_t' will be disabled
|
|
|
|
* after `rb_debug_inspector_open'.
|
|
|
|
*/
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_debug_inspector_open(rb_debug_inspector_func_t func, void *data)
|
|
|
|
{
|
|
|
|
rb_debug_inspector_t dbg_context;
|
2017-11-07 08:06:50 +03:00
|
|
|
rb_execution_context_t *ec = GET_EC();
|
2017-06-23 10:25:52 +03:00
|
|
|
enum ruby_tag_type state;
|
2016-09-16 09:15:55 +03:00
|
|
|
volatile VALUE MAYBE_UNUSED(result);
|
2012-11-29 11:05:27 +04:00
|
|
|
|
2018-09-21 09:41:07 +03:00
|
|
|
/* escape all env to heap */
|
|
|
|
rb_vm_stack_to_heap(ec);
|
|
|
|
|
2017-11-07 08:06:50 +03:00
|
|
|
dbg_context.ec = ec;
|
|
|
|
dbg_context.cfp = dbg_context.ec->cfp;
|
2020-10-11 14:36:25 +03:00
|
|
|
dbg_context.backtrace = rb_ec_backtrace_location_ary(ec, BACKTRACE_START, ALL_BACKTRACE_LINES, FALSE);
|
2013-01-28 13:02:19 +04:00
|
|
|
dbg_context.backtrace_size = RARRAY_LEN(dbg_context.backtrace);
|
2017-11-07 08:06:50 +03:00
|
|
|
dbg_context.contexts = collect_caller_bindings(ec);
|
2012-11-29 11:05:27 +04:00
|
|
|
|
2017-11-07 08:06:50 +03:00
|
|
|
EC_PUSH_TAG(ec);
|
2017-12-06 06:16:08 +03:00
|
|
|
if ((state = EC_EXEC_TAG()) == TAG_NONE) {
|
2012-11-29 11:05:27 +04:00
|
|
|
result = (*func)(&dbg_context, data);
|
|
|
|
}
|
2017-10-26 14:02:13 +03:00
|
|
|
EC_POP_TAG();
|
2012-11-29 11:05:27 +04:00
|
|
|
|
|
|
|
/* invalidate bindings? */
|
|
|
|
|
|
|
|
if (state) {
|
2017-11-07 08:06:50 +03:00
|
|
|
EC_JUMP_TAG(ec, state);
|
2012-11-29 11:05:27 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2013-01-28 13:02:19 +04:00
|
|
|
frame_get(const rb_debug_inspector_t *dc, long index)
|
2012-11-29 11:05:27 +04:00
|
|
|
{
|
|
|
|
if (index < 0 || index >= dc->backtrace_size) {
|
|
|
|
rb_raise(rb_eArgError, "no such frame");
|
|
|
|
}
|
|
|
|
return rb_ary_entry(dc->contexts, index);
|
|
|
|
}
|
|
|
|
|
2013-01-29 12:25:32 +04:00
|
|
|
VALUE
|
|
|
|
rb_debug_inspector_frame_self_get(const rb_debug_inspector_t *dc, long index)
|
|
|
|
{
|
|
|
|
VALUE frame = frame_get(dc, index);
|
|
|
|
return rb_ary_entry(frame, CALLER_BINDING_SELF);
|
|
|
|
}
|
|
|
|
|
2012-11-29 11:05:27 +04:00
|
|
|
VALUE
|
2013-01-28 13:02:19 +04:00
|
|
|
rb_debug_inspector_frame_class_get(const rb_debug_inspector_t *dc, long index)
|
2012-11-29 11:05:27 +04:00
|
|
|
{
|
|
|
|
VALUE frame = frame_get(dc, index);
|
2013-01-29 12:25:32 +04:00
|
|
|
return rb_ary_entry(frame, CALLER_BINDING_CLASS);
|
2012-11-29 11:05:27 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
2013-01-28 13:02:19 +04:00
|
|
|
rb_debug_inspector_frame_binding_get(const rb_debug_inspector_t *dc, long index)
|
2012-11-29 11:05:27 +04:00
|
|
|
{
|
|
|
|
VALUE frame = frame_get(dc, index);
|
2013-01-29 12:25:32 +04:00
|
|
|
return rb_ary_entry(frame, CALLER_BINDING_BINDING);
|
2012-11-29 11:05:27 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
2013-01-28 13:02:19 +04:00
|
|
|
rb_debug_inspector_frame_iseq_get(const rb_debug_inspector_t *dc, long index)
|
2012-11-29 11:05:27 +04:00
|
|
|
{
|
|
|
|
VALUE frame = frame_get(dc, index);
|
2015-07-22 01:52:59 +03:00
|
|
|
VALUE iseq = rb_ary_entry(frame, CALLER_BINDING_ISEQ);
|
|
|
|
|
|
|
|
return RTEST(iseq) ? rb_iseqw_new((rb_iseq_t *)iseq) : Qnil;
|
2012-11-29 11:05:27 +04:00
|
|
|
}
|
|
|
|
|
2022-11-25 05:51:35 +03:00
|
|
|
VALUE
|
|
|
|
rb_debug_inspector_frame_depth(const rb_debug_inspector_t *dc, long index)
|
|
|
|
{
|
|
|
|
VALUE frame = frame_get(dc, index);
|
|
|
|
return rb_ary_entry(frame, CALLER_BINDING_DEPTH);
|
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_debug_inspector_current_depth(void)
|
|
|
|
{
|
|
|
|
rb_execution_context_t *ec = GET_EC();
|
|
|
|
return INT2FIX(frame_depth(ec, ec->cfp));
|
|
|
|
}
|
|
|
|
|
2012-11-29 11:05:27 +04:00
|
|
|
VALUE
|
|
|
|
rb_debug_inspector_backtrace_locations(const rb_debug_inspector_t *dc)
|
|
|
|
{
|
|
|
|
return dc->backtrace;
|
|
|
|
}
|
|
|
|
|
2013-10-07 11:21:11 +04:00
|
|
|
int
|
|
|
|
rb_profile_frames(int start, int limit, VALUE *buff, int *lines)
|
|
|
|
{
|
|
|
|
int i;
|
2017-10-26 11:41:34 +03:00
|
|
|
const rb_execution_context_t *ec = GET_EC();
|
|
|
|
const rb_control_frame_t *cfp = ec->cfp, *end_cfp = RUBY_VM_END_CONTROL_FRAME(ec);
|
2015-12-21 04:18:48 +03:00
|
|
|
const rb_callable_method_entry_t *cme;
|
2013-10-07 11:21:11 +04:00
|
|
|
|
2023-01-13 02:54:05 +03:00
|
|
|
// If this function is called inside a thread after thread creation, but
|
|
|
|
// before the CFP has been created, just return 0. This can happen when
|
|
|
|
// sampling via signals. Threads can be interrupted randomly by the
|
|
|
|
// signal, including during the time after the thread has been created, but
|
|
|
|
// before the CFP has been allocated
|
|
|
|
if (!cfp) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-07-11 16:51:44 +03:00
|
|
|
// Skip dummy frame; see `rb_ec_partial_backtrace_object` for details
|
|
|
|
end_cfp = RUBY_VM_NEXT_CONTROL_FRAME(end_cfp);
|
|
|
|
|
2018-04-27 03:13:51 +03:00
|
|
|
for (i=0; i<limit && cfp != end_cfp;) {
|
2022-10-17 11:50:42 +03:00
|
|
|
if (VM_FRAME_RUBYFRAME_P(cfp) && cfp->pc != 0) {
|
2013-10-07 11:21:11 +04:00
|
|
|
if (start > 0) {
|
|
|
|
start--;
|
|
|
|
continue;
|
|
|
|
}
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2013-10-07 11:21:11 +04:00
|
|
|
/* record frame info */
|
2015-12-21 04:18:48 +03:00
|
|
|
cme = rb_vm_frame_method_entry(cfp);
|
|
|
|
if (cme && cme->def->type == VM_METHOD_TYPE_ISEQ) {
|
2015-07-06 21:44:54 +03:00
|
|
|
buff[i] = (VALUE)cme;
|
|
|
|
}
|
|
|
|
else {
|
2015-07-22 01:52:59 +03:00
|
|
|
buff[i] = (VALUE)cfp->iseq;
|
2015-07-06 21:44:54 +03:00
|
|
|
}
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2015-12-21 04:18:48 +03:00
|
|
|
if (lines) lines[i] = calc_lineno(cfp->iseq, cfp->pc);
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2013-10-07 11:21:11 +04:00
|
|
|
i++;
|
|
|
|
}
|
2020-07-08 11:32:28 +03:00
|
|
|
else {
|
|
|
|
cme = rb_vm_frame_method_entry(cfp);
|
|
|
|
if (cme && cme->def->type == VM_METHOD_TYPE_CFUNC) {
|
|
|
|
buff[i] = (VALUE)cme;
|
|
|
|
if (lines) lines[i] = 0;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
2018-04-27 03:13:51 +03:00
|
|
|
cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
|
2013-10-07 11:21:11 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
2015-07-22 01:52:59 +03:00
|
|
|
static const rb_iseq_t *
|
2015-07-06 21:44:54 +03:00
|
|
|
frame2iseq(VALUE frame)
|
|
|
|
{
|
2021-10-03 16:34:45 +03:00
|
|
|
if (NIL_P(frame)) return NULL;
|
2015-07-06 21:44:54 +03:00
|
|
|
|
|
|
|
if (RB_TYPE_P(frame, T_IMEMO)) {
|
2015-07-22 01:52:59 +03:00
|
|
|
switch (imemo_type(frame)) {
|
|
|
|
case imemo_iseq:
|
|
|
|
return (const rb_iseq_t *)frame;
|
|
|
|
case imemo_ment:
|
|
|
|
{
|
2019-10-03 06:26:41 +03:00
|
|
|
const rb_callable_method_entry_t *cme = (rb_callable_method_entry_t *)frame;
|
2015-07-22 01:52:59 +03:00
|
|
|
switch (cme->def->type) {
|
|
|
|
case VM_METHOD_TYPE_ISEQ:
|
|
|
|
return cme->def->body.iseq.iseqptr;
|
|
|
|
default:
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
2015-07-06 21:44:54 +03:00
|
|
|
default:
|
2015-07-22 01:52:59 +03:00
|
|
|
break;
|
2015-07-06 21:44:54 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
rb_bug("frame2iseq: unreachable");
|
|
|
|
}
|
2013-10-07 11:21:11 +04:00
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_profile_frame_path(VALUE frame)
|
|
|
|
{
|
2015-07-22 01:52:59 +03:00
|
|
|
const rb_iseq_t *iseq = frame2iseq(frame);
|
|
|
|
return iseq ? rb_iseq_path(iseq) : Qnil;
|
2013-10-07 11:21:11 +04:00
|
|
|
}
|
|
|
|
|
2020-07-08 11:32:28 +03:00
|
|
|
static const rb_callable_method_entry_t *
|
|
|
|
cframe(VALUE frame)
|
|
|
|
{
|
2021-10-03 16:34:45 +03:00
|
|
|
if (NIL_P(frame)) return NULL;
|
2020-07-08 11:32:28 +03:00
|
|
|
|
|
|
|
if (RB_TYPE_P(frame, T_IMEMO)) {
|
|
|
|
switch (imemo_type(frame)) {
|
|
|
|
case imemo_ment:
|
|
|
|
{
|
|
|
|
const rb_callable_method_entry_t *cme = (rb_callable_method_entry_t *)frame;
|
|
|
|
switch (cme->def->type) {
|
|
|
|
case VM_METHOD_TYPE_CFUNC:
|
|
|
|
return cme;
|
|
|
|
default:
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2013-10-07 11:21:11 +04:00
|
|
|
VALUE
|
|
|
|
rb_profile_frame_absolute_path(VALUE frame)
|
|
|
|
{
|
2020-07-08 11:32:28 +03:00
|
|
|
if (cframe(frame)) {
|
|
|
|
static VALUE cfunc_str = Qfalse;
|
|
|
|
if (!cfunc_str) {
|
|
|
|
cfunc_str = rb_str_new_literal("<cfunc>");
|
|
|
|
rb_gc_register_mark_object(cfunc_str);
|
|
|
|
}
|
|
|
|
return cfunc_str;
|
|
|
|
}
|
2015-07-22 01:52:59 +03:00
|
|
|
const rb_iseq_t *iseq = frame2iseq(frame);
|
2017-06-01 03:05:33 +03:00
|
|
|
return iseq ? rb_iseq_realpath(iseq) : Qnil;
|
2013-10-07 11:21:11 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_profile_frame_label(VALUE frame)
|
|
|
|
{
|
2015-07-22 01:52:59 +03:00
|
|
|
const rb_iseq_t *iseq = frame2iseq(frame);
|
|
|
|
return iseq ? rb_iseq_label(iseq) : Qnil;
|
2013-10-07 11:21:11 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_profile_frame_base_label(VALUE frame)
|
|
|
|
{
|
2015-07-22 01:52:59 +03:00
|
|
|
const rb_iseq_t *iseq = frame2iseq(frame);
|
|
|
|
return iseq ? rb_iseq_base_label(iseq) : Qnil;
|
2013-10-07 11:21:11 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_profile_frame_first_lineno(VALUE frame)
|
|
|
|
{
|
2015-07-22 01:52:59 +03:00
|
|
|
const rb_iseq_t *iseq = frame2iseq(frame);
|
|
|
|
return iseq ? rb_iseq_first_lineno(iseq) : Qnil;
|
2015-07-06 21:44:54 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
frame2klass(VALUE frame)
|
|
|
|
{
|
2021-10-03 16:34:45 +03:00
|
|
|
if (NIL_P(frame)) return Qnil;
|
2015-07-06 21:44:54 +03:00
|
|
|
|
|
|
|
if (RB_TYPE_P(frame, T_IMEMO)) {
|
2019-10-03 06:26:41 +03:00
|
|
|
const rb_callable_method_entry_t *cme = (rb_callable_method_entry_t *)frame;
|
2016-04-11 14:19:52 +03:00
|
|
|
|
|
|
|
if (imemo_type(frame) == imemo_ment) {
|
|
|
|
return cme->defined_class;
|
|
|
|
}
|
2015-07-06 21:44:54 +03:00
|
|
|
}
|
2016-04-11 14:19:52 +03:00
|
|
|
return Qnil;
|
2013-10-07 11:21:11 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_profile_frame_classpath(VALUE frame)
|
|
|
|
{
|
2015-07-06 21:44:54 +03:00
|
|
|
VALUE klass = frame2klass(frame);
|
2013-10-07 11:21:11 +04:00
|
|
|
|
|
|
|
if (klass && !NIL_P(klass)) {
|
|
|
|
if (RB_TYPE_P(klass, T_ICLASS)) {
|
|
|
|
klass = RBASIC(klass)->klass;
|
|
|
|
}
|
|
|
|
else if (FL_TEST(klass, FL_SINGLETON)) {
|
2023-02-15 12:42:52 +03:00
|
|
|
klass = RCLASS_ATTACHED_OBJECT(klass);
|
2020-05-06 19:49:25 +03:00
|
|
|
if (!RB_TYPE_P(klass, T_CLASS) && !RB_TYPE_P(klass, T_MODULE))
|
2013-10-28 02:48:34 +04:00
|
|
|
return rb_sprintf("#<%s:%p>", rb_class2name(rb_obj_class(klass)), (void*)klass);
|
2013-10-07 11:21:11 +04:00
|
|
|
}
|
|
|
|
return rb_class_path(klass);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qnil;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_profile_frame_singleton_method_p(VALUE frame)
|
|
|
|
{
|
2015-07-06 21:44:54 +03:00
|
|
|
VALUE klass = frame2klass(frame);
|
|
|
|
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(klass && !NIL_P(klass) && FL_TEST(klass, FL_SINGLETON));
|
2013-10-07 11:21:11 +04:00
|
|
|
}
|
2013-10-08 16:08:20 +04:00
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_profile_frame_method_name(VALUE frame)
|
|
|
|
{
|
2020-07-08 11:32:28 +03:00
|
|
|
const rb_callable_method_entry_t *cme = cframe(frame);
|
|
|
|
if (cme) {
|
|
|
|
ID mid = cme->def->original_id;
|
|
|
|
return id2str(mid);
|
|
|
|
}
|
2015-07-22 01:52:59 +03:00
|
|
|
const rb_iseq_t *iseq = frame2iseq(frame);
|
|
|
|
return iseq ? rb_iseq_method_name(iseq) : Qnil;
|
2013-10-08 16:08:20 +04:00
|
|
|
}
|
|
|
|
|
2020-07-08 11:32:28 +03:00
|
|
|
static VALUE
|
|
|
|
qualified_method_name(VALUE frame, VALUE method_name)
|
2013-10-08 16:08:20 +04:00
|
|
|
{
|
|
|
|
if (method_name != Qnil) {
|
|
|
|
VALUE classpath = rb_profile_frame_classpath(frame);
|
|
|
|
VALUE singleton_p = rb_profile_frame_singleton_method_p(frame);
|
|
|
|
|
|
|
|
if (classpath != Qnil) {
|
|
|
|
return rb_sprintf("%"PRIsVALUE"%s%"PRIsVALUE,
|
|
|
|
classpath, singleton_p == Qtrue ? "." : "#", method_name);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return method_name;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qnil;
|
|
|
|
}
|
|
|
|
}
|
2013-10-09 04:21:51 +04:00
|
|
|
|
2020-07-08 11:32:28 +03:00
|
|
|
VALUE
|
|
|
|
rb_profile_frame_qualified_method_name(VALUE frame)
|
|
|
|
{
|
|
|
|
VALUE method_name = rb_profile_frame_method_name(frame);
|
|
|
|
|
|
|
|
return qualified_method_name(frame, method_name);
|
|
|
|
}
|
|
|
|
|
2013-10-09 04:21:51 +04:00
|
|
|
VALUE
|
|
|
|
rb_profile_frame_full_label(VALUE frame)
|
|
|
|
{
|
2020-07-08 11:32:28 +03:00
|
|
|
const rb_callable_method_entry_t *cme = cframe(frame);
|
|
|
|
if (cme) {
|
|
|
|
ID mid = cme->def->original_id;
|
|
|
|
VALUE method_name = id2str(mid);
|
|
|
|
return qualified_method_name(frame, method_name);
|
|
|
|
}
|
|
|
|
|
2013-10-09 04:21:51 +04:00
|
|
|
VALUE label = rb_profile_frame_label(frame);
|
|
|
|
VALUE base_label = rb_profile_frame_base_label(frame);
|
|
|
|
VALUE qualified_method_name = rb_profile_frame_qualified_method_name(frame);
|
|
|
|
|
|
|
|
if (NIL_P(qualified_method_name) || base_label == qualified_method_name) {
|
|
|
|
return label;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
long label_length = RSTRING_LEN(label);
|
|
|
|
long base_label_length = RSTRING_LEN(base_label);
|
2013-10-09 08:27:39 +04:00
|
|
|
int prefix_len = rb_long2int(label_length - base_label_length);
|
2013-10-09 04:21:51 +04:00
|
|
|
|
2013-10-09 08:27:39 +04:00
|
|
|
return rb_sprintf("%.*s%"PRIsVALUE, prefix_len, RSTRING_PTR(label), qualified_method_name);
|
2013-10-09 04:21:51 +04:00
|
|
|
}
|
|
|
|
}
|