move ADD_PC around to optimize PC manipluiations

This commit introduces new attribute handles_flame and if that is
_not_ the case, places ADD_PC right after INC_SP.  This improves
locality of PC manipulations to prevents unnecessary register spill-
outs. As a result, it reduces the size of vm_exec_core from 32,688
bytes to 32,384 bytes on my machine.

Speedup is very faint, but certain.

-----------------------------------------------------------
benchmark results:
minimum results in each 3 measurements.
Execution time (sec)
name    before  after
so_ackermann     0.476  0.464
so_array         0.742  0.728
so_binary_trees  5.493  5.466
so_concatenate   3.619  3.395
so_count_words   0.190  0.184
so_exception     0.249  0.239
so_fannkuch      0.994  0.953
so_fasta         1.369  1.374
so_k_nucleotide  1.111  1.111
so_lists         0.470  0.481
so_mandelbrot    2.059  2.050
so_matrix        0.466  0.465
so_meteor_contest        2.712  2.781
so_nbody         1.154  1.204
so_nested_loop   0.852  0.846
so_nsieve        1.636  1.623
so_nsieve_bits   2.073  2.039
so_object        0.616  0.584
so_partial_sums  1.464  1.481
so_pidigits      1.075  1.082
so_random        0.321  0.317
so_reverse_complement    0.555  0.558
so_sieve         0.495  0.490
so_spectralnorm  1.634  1.627

Speedup ratio: compare with the result of `before' (greater is better)
name    after
so_ackermann    1.025
so_array        1.019
so_binary_trees 1.005
so_concatenate  1.066
so_count_words  1.030
so_exception    1.040
so_fannkuch     1.043
so_fasta        0.996
so_k_nucleotide 1.000
so_lists        0.978
so_mandelbrot   1.004
so_matrix       1.001
so_meteor_contest       0.975
so_nbody        0.959
so_nested_loop  1.007
so_nsieve       1.008
so_nsieve_bits  1.017
so_object       1.056
so_partial_sums 0.989
so_pidigits     0.994
so_random       1.014
so_reverse_complement   0.996
so_sieve        1.010
so_spectralnorm 1.004

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62051 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
shyouhei 2018-01-26 06:30:58 +00:00
Родитель b56f6a6b8c
Коммит 3234245ae3
3 изменённых файлов: 19 добавлений и 5 удалений

Просмотреть файл

@ -680,6 +680,7 @@ defineclass
(ID id, ISEQ class_iseq, rb_num_t flags)
(VALUE cbase, VALUE super)
(VALUE val)
// attr bool handles_frame = true;
{
VALUE klass = vm_find_or_create_class_by_id(id, flags, cbase, super);
@ -829,6 +830,7 @@ leave
()
(VALUE val)
(VALUE val)
// attr bool handles_frame = true;
{
if (OPT_CHECKED_RUN) {
const VALUE *const bp = vm_base_ptr(reg_cfp);
@ -1380,6 +1382,7 @@ opt_call_c_function
(rb_insn_func_t funcptr)
()
()
// attr bool handles_frame = true;
{
reg_cfp = (funcptr)(ec, reg_cfp);

Просмотреть файл

@ -33,6 +33,7 @@ class RubyVM::BareInstructions
h[a.key] = a
}
@attrs_orig = @attrs.dup
predefine_attributes
end
def pretty_name
@ -109,8 +110,8 @@ class RubyVM::BareInstructions
}.join
end
def pushs_frame?
opes.any? {|o| /CALL_INFO/ =~ o[:type] }
def handles_frame?
/\b(false|0)\b/ !~ @attrs['handles_frame'].expr.expr
end
def inspect
@ -126,7 +127,13 @@ class RubyVM::BareInstructions
type: t, \
location: [], \
expr: v.to_s + ';'
return @attrs[k] = attr
return @attrs[k] ||= attr
end
def predefine_attributes
generate_attribute 'sp_inc', 'rb_snum_t', rets.size - pops.size
generate_attribute 'handles_frame', 'bool', \
opes.any? {|o| /CALL_INFO/ =~ o[:type] }
end
def typesplit a

Просмотреть файл

@ -28,8 +28,9 @@ INSN_ENTRY(<%= insn.name %>)
<%= pop[:name] %> = <%= insn.cast_from_VALUE pop, "TOPN(#{i})"%>;
% end
DEBUG_ENTER_INSN(<%=cstr insn.name %>);
% if insn.handles_frame?
ADD_PC(<%= insn.width %>);
PREFETCH(GET_PC());
% end
% unless insn.pops.empty?
POPN(<%= insn.pops.size %>);
% end
@ -44,7 +45,10 @@ INSN_ENTRY(<%= insn.name %>)
PUSH(<%= insn.cast_to_VALUE ret %>);
% end
% end
%
% unless insn.handles_frame?
ADD_PC(<%= insn.width %>);
PREFETCH(GET_PC());
% end
END_INSN(<%= insn.name %>);
}
#undef NAME_OF_CURRENT_INSN