Optimize Marshal dump/load for large (> 31-bit) FIXNUM (#6229)

* Optimize Marshal dump of large fixnum

Marshal's FIXNUM type only supports 31-bit fixnums, so on 64-bit
platforms the 63-bit fixnums need to be represented in Marshal's
BIGNUM.

Previously this was done by converting to a bugnum and serializing the
bignum object.

This commit avoids allocating the intermediate bignum object, instead
outputting the T_FIXNUM directly to a Marshal bignum. This maintains the
same representation as the previous implementation, including not using
LINKs for these large fixnums (an artifact of the previous
implementation always allocating a new BIGNUM).

This commit also avoids unnecessary st_lookups on immediate values,
which we know will not be in that table.

* Fastpath for loading FIXNUM from Marshal bignum

* Run update-deps
This commit is contained in:
John Hawthorn 2022-08-15 16:14:12 -07:00 коммит произвёл GitHub
Родитель e49db0f760
Коммит 0608a9a086
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 148 добавлений и 25 удалений

Просмотреть файл

@ -0,0 +1,22 @@
prelude: |
smallint_array = 1000.times.map { |x| x }
bigint32_array = 1000.times.map { |x| x + 2**32 }
bigint64_array = 1000.times.map { |x| x + 2**64 }
smallint_dump = Marshal.dump(smallint_array)
bigint32_dump = Marshal.dump(bigint32_array)
bigint64_dump = Marshal.dump(bigint64_array)
benchmark:
marshal_dump_integer_small: |
Marshal.dump(smallint_array)
marshal_dump_integer_over_32_bit: |
Marshal.dump(bigint32_array)
marshal_dump_integer_over_64_bit: |
Marshal.dump(bigint64_array)
marshal_load_integer_small: |
Marshal.load(smallint_dump)
marshal_load_integer_over_32_bit: |
Marshal.load(bigint32_dump)
marshal_load_integer_over_64_bit: |
Marshal.load(bigint64_dump)
loop_count: 4000

Просмотреть файл

@ -8715,12 +8715,15 @@ main.$(OBJEXT): {$(VPATH)}vm_debug.h
marshal.$(OBJEXT): $(hdrdir)/ruby/ruby.h
marshal.$(OBJEXT): $(top_srcdir)/internal/array.h
marshal.$(OBJEXT): $(top_srcdir)/internal/bignum.h
marshal.$(OBJEXT): $(top_srcdir)/internal/bits.h
marshal.$(OBJEXT): $(top_srcdir)/internal/class.h
marshal.$(OBJEXT): $(top_srcdir)/internal/compilers.h
marshal.$(OBJEXT): $(top_srcdir)/internal/encoding.h
marshal.$(OBJEXT): $(top_srcdir)/internal/error.h
marshal.$(OBJEXT): $(top_srcdir)/internal/fixnum.h
marshal.$(OBJEXT): $(top_srcdir)/internal/gc.h
marshal.$(OBJEXT): $(top_srcdir)/internal/hash.h
marshal.$(OBJEXT): $(top_srcdir)/internal/numeric.h
marshal.$(OBJEXT): $(top_srcdir)/internal/object.h
marshal.$(OBJEXT): $(top_srcdir)/internal/serial.h
marshal.$(OBJEXT): $(top_srcdir)/internal/static_assert.h

126
marshal.c
Просмотреть файл

@ -28,6 +28,7 @@
#include "internal/encoding.h"
#include "internal/error.h"
#include "internal/hash.h"
#include "internal/numeric.h"
#include "internal/object.h"
#include "internal/struct.h"
#include "internal/symbol.h"
@ -171,6 +172,7 @@ struct dump_arg {
st_table *data;
st_table *compat_tbl;
st_table *encodings;
unsigned long num_entries;
};
struct dump_call_arg {
@ -754,6 +756,60 @@ w_objivar(VALUE obj, struct dump_call_arg *arg)
w_ivar_each(obj, num, arg);
}
// Optimized dump for fixnum larger than 31-bits
static void
w_bigfixnum(VALUE obj, struct dump_arg *arg)
{
RUBY_ASSERT(FIXNUM_P(obj));
w_byte(TYPE_BIGNUM, arg);
#if SIZEOF_LONG == SIZEOF_VALUE
long num, slen_num;
num = FIX2LONG(obj);
#else
long long num, slen_num;
num = NUM2LL(obj);
#endif
char sign = num < 0 ? '-' : '+';
w_byte(sign, arg);
// Guaranteed not to overflow, as FIXNUM is 1-bit less than long
if (num < 0) num = -num;
// calculate the size in shorts
int slen = 0;
{
slen_num = num;
while (slen_num) {
slen++;
slen_num = SHORTDN(slen_num);
}
}
RUBY_ASSERT(slen > 0 && slen <= SIZEOF_LONG / 2);
w_long((long)slen, arg);
for (int i = 0; i < slen; i++) {
w_short(num & SHORTMASK, arg);
num = SHORTDN(num);
}
// We aren't adding this object to the link table, but we need to increment
// the index.
arg->num_entries++;
RUBY_ASSERT(num == 0);
}
static void
w_remember(VALUE obj, struct dump_arg *arg)
{
st_add_direct(arg->data, obj, arg->num_entries++);
}
static void
w_object(VALUE obj, struct dump_arg *arg, int limit)
{
@ -767,17 +823,6 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
rb_raise(rb_eArgError, "exceed depth limit");
}
if (limit > 0) limit--;
c_arg.limit = limit;
c_arg.arg = arg;
c_arg.obj = obj;
if (st_lookup(arg->data, obj, &num)) {
w_byte(TYPE_LINK, arg);
w_long((long)num, arg);
return;
}
if (NIL_P(obj)) {
w_byte(TYPE_NIL, arg);
}
@ -797,19 +842,32 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
w_long(FIX2LONG(obj), arg);
}
else {
w_object(rb_int2big(FIX2LONG(obj)), arg, limit);
w_bigfixnum(obj, arg);
}
#endif
}
else if (SYMBOL_P(obj)) {
w_symbol(obj, arg);
}
else if (FLONUM_P(obj)) {
st_add_direct(arg->data, obj, arg->data->num_entries);
w_byte(TYPE_FLOAT, arg);
w_float(RFLOAT_VALUE(obj), arg);
}
else {
if (st_lookup(arg->data, obj, &num)) {
w_byte(TYPE_LINK, arg);
w_long((long)num, arg);
return;
}
if (limit > 0) limit--;
c_arg.limit = limit;
c_arg.arg = arg;
c_arg.obj = obj;
if (FLONUM_P(obj)) {
w_remember(obj, arg);
w_byte(TYPE_FLOAT, arg);
w_float(RFLOAT_VALUE(obj), arg);
return;
}
VALUE v;
if (!RBASIC_CLASS(obj)) {
@ -818,7 +876,7 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
}
if (rb_obj_respond_to(obj, s_mdump, TRUE)) {
st_add_direct(arg->data, obj, arg->data->num_entries);
w_remember(obj, arg);
v = dump_funcall(arg, obj, s_mdump, 0, 0);
w_class(TYPE_USRMARSHAL, obj, arg, FALSE);
@ -848,11 +906,11 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
if (hasiv) {
w_ivar(hasiv, ivobj, encname, &c_arg);
}
st_add_direct(arg->data, obj, arg->data->num_entries);
w_remember(obj, arg);
return;
}
st_add_direct(arg->data, obj, arg->data->num_entries);
w_remember(obj, arg);
hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj);
{
@ -1044,6 +1102,7 @@ clear_dump_arg(struct dump_arg *arg)
arg->symbols = 0;
st_free_table(arg->data);
arg->data = 0;
arg->num_entries = 0;
if (arg->compat_tbl) {
st_free_table(arg->compat_tbl);
arg->compat_tbl = 0;
@ -1126,6 +1185,7 @@ rb_marshal_dump_limited(VALUE obj, VALUE port, int limit)
arg->dest = 0;
arg->symbols = st_init_numtable();
arg->data = rb_init_identtable();
arg->num_entries = 0;
arg->compat_tbl = 0;
arg->encodings = 0;
arg->str = rb_str_buf_new(0);
@ -1881,10 +1941,28 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
sign = r_byte(arg);
len = r_long(arg);
data = r_bytes0(len * 2, arg);
v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0,
INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0));
rb_str_resize(data, 0L);
if (SIZEOF_VALUE >= 8 && len <= 4) {
// Representable within uintptr, likely FIXNUM
VALUE num = 0;
for (int i = 0; i < len; i++) {
num |= (VALUE)r_byte(arg) << (i * 16);
num |= (VALUE)r_byte(arg) << (i * 16 + 8);
}
#if SIZEOF_VALUE == SIZEOF_LONG
v = ULONG2NUM(num);
#else
v = ULL2NUM(num);
#endif
if (sign == '-') {
v = rb_int_uminus(v);
}
} else {
data = r_bytes0(len * 2, arg);
v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0,
INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0));
rb_str_resize(data, 0L);
}
v = r_entry(v, arg);
v = r_leave(v, arg, false);
}

Просмотреть файл

@ -33,7 +33,7 @@ class TestMarshal < Test::Unit::TestCase
end
def test_marshal
a = [1, 2, 3, [4,5,"foo"], {1=>"bar"}, 2.5, fact(30)]
a = [1, 2, 3, 2**32, 2**64, [4,5,"foo"], {1=>"bar"}, 2.5, fact(30)]
assert_equal a, Marshal.load(Marshal.dump(a))
[[1,2,3,4], [81, 2, 118, 3146]].each { |w,x,y,z|
@ -47,6 +47,26 @@ class TestMarshal < Test::Unit::TestCase
}
end
def test_marshal_integers
a = []
[-2, -1, 0, 1, 2].each do |i|
0.upto(65).map do |exp|
a << 2**exp + i
end
end
assert_equal a, Marshal.load(Marshal.dump(a))
a = [2**32, []]*2
assert_equal a, Marshal.load(Marshal.dump(a))
a = [2**32, 2**32, []]*2
assert_equal a, Marshal.load(Marshal.dump(a))
end
def test_marshal_small_bignum_backref
assert_equal [2**32, 2**32], Marshal.load("\x04\b[\al+\b\x00\x00\x00\x00\x01\x00@\x06")
end
StrClone = String.clone
def test_marshal_cloned_class
assert_instance_of(StrClone, Marshal.load(Marshal.dump(StrClone.new("abc"))))