* marshal.c: add marshal readahead. marshalized Array, Hash and Struct

have size at least number of its elements, marshal readahead will
  read the certain readable length and buffer when it needs more bytes.
  marshal readahead prevents many calls to IO#getbyte and IO#read,
  then it enables performace improvement.
  [ruby-dev:45637] [Feature #6440]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37772 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
glass 2012-11-20 15:17:15 +00:00
Родитель 2fff359537
Коммит 132eb25a79
2 изменённых файлов: 141 добавлений и 16 удалений

Просмотреть файл

@ -1,3 +1,12 @@
Tue Nov 20 23:28:26 2012 Masaki Matsushita <glass.saga@gmail.com>
* marshal.c: add marshal readahead. marshalized Array, Hash and Struct
have size at least number of its elements, marshal readahead will
read the certain readable length and buffer when it needs more bytes.
marshal readahead prevents many calls to IO#getbyte and IO#read,
then it enables performace improvement.
[ruby-dev:45637] [Feature #6440]
Tue Nov 20 22:35:02 2012 NARUSE, Yui <naruse@ruby-lang.org>
* Makefile.in (.d.h): replace char * to const char * because somehow

148
marshal.c
Просмотреть файл

@ -969,6 +969,9 @@ marshal_dump(int argc, VALUE *argv)
struct load_arg {
VALUE src;
char *buf;
long buflen;
long readable;
long offset;
st_table *symbols;
st_table *data;
@ -1022,6 +1025,13 @@ static VALUE r_object(struct load_arg *arg);
static ID r_symbol(struct load_arg *arg);
static VALUE path2class(VALUE path);
NORETURN(static void too_short(void));
static void
too_short(void)
{
rb_raise(rb_eArgError, "marshal data too short");
}
static st_index_t
r_prepare(struct load_arg *arg)
{
@ -1031,6 +1041,27 @@ r_prepare(struct load_arg *arg)
return idx;
}
static unsigned char
r_byte1_buffered(struct load_arg *arg)
{
if (arg->buflen == 0) {
long readable = arg->readable < BUFSIZ ? arg->readable : BUFSIZ;
VALUE str, n = LONG2NUM(readable);
str = rb_funcall2(arg->src, s_read, 1, &n);
check_load_arg(arg, s_read);
if (NIL_P(str)) too_short();
StringValue(str);
arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str));
arg->offset = 0;
arg->buflen = RSTRING_LEN(str);
}
arg->buflen--;
return arg->buf[arg->offset++];
}
static int
r_byte(struct load_arg *arg)
{
@ -1041,15 +1072,19 @@ r_byte(struct load_arg *arg)
c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
}
else {
rb_raise(rb_eArgError, "marshal data too short");
too_short();
}
}
else {
VALUE src = arg->src;
VALUE v = rb_funcall2(src, s_getbyte, 0, 0);
check_load_arg(arg, s_getbyte);
if (NIL_P(v)) rb_eof_error();
c = (unsigned char)NUM2CHR(v);
if (arg->readable >0 || arg->buflen > 0) {
c = r_byte1_buffered(arg);
}
else {
VALUE v = rb_funcall2(arg->src, s_getbyte, 0, 0);
check_load_arg(arg, s_getbyte);
if (NIL_P(v)) rb_eof_error();
c = (unsigned char)NUM2CHR(v);
}
}
return c;
}
@ -1102,6 +1137,68 @@ r_long(struct load_arg *arg)
return x;
}
static VALUE
r_bytes1(long len, struct load_arg *arg)
{
VALUE str, n = LONG2NUM(len);
str = rb_funcall2(arg->src, s_read, 1, &n);
check_load_arg(arg, s_read);
if (NIL_P(str)) too_short();
StringValue(str);
if (RSTRING_LEN(str) != len) too_short();
arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
return str;
}
static VALUE
r_bytes1_buffered(long len, struct load_arg *arg)
{
VALUE str;
if (len <= arg->buflen) {
str = rb_str_new(arg->buf+arg->offset, len);
arg->offset += len;
arg->buflen -= len;
}
else {
long buflen = arg->buflen;
long readable = arg->readable + 1;
long tmp_len, read_len, need_len = len - buflen;
VALUE tmp, n;
readable = readable < BUFSIZ ? readable : BUFSIZ;
read_len = need_len > readable ? need_len : readable;
n = LONG2NUM(read_len);
tmp = rb_funcall2(arg->src, s_read, 1, &n);
check_load_arg(arg, s_read);
if (NIL_P(tmp)) too_short();
StringValue(tmp);
tmp_len = RSTRING_LEN(tmp);
if (tmp_len < need_len) too_short();
arg->infection |= (int)FL_TEST(tmp, MARSHAL_INFECTION);
str = rb_str_new(arg->buf+arg->offset, buflen);
rb_str_cat(str, RSTRING_PTR(tmp), need_len);
if (tmp_len > need_len) {
buflen = tmp_len - need_len;
memcpy(arg->buf, RSTRING_PTR(tmp)+need_len, buflen);
arg->buflen = buflen;
}
else {
arg->buflen = 0;
}
arg->offset = 0;
}
return str;
}
#define r_bytes(arg) r_bytes0(r_long(arg), (arg))
static VALUE
@ -1116,19 +1213,16 @@ r_bytes0(long len, struct load_arg *arg)
arg->offset += len;
}
else {
too_short:
rb_raise(rb_eArgError, "marshal data too short");
too_short();
}
}
else {
VALUE src = arg->src;
VALUE n = LONG2NUM(len);
str = rb_funcall2(src, s_read, 1, &n);
check_load_arg(arg, s_read);
if (NIL_P(str)) goto too_short;
StringValue(str);
if (RSTRING_LEN(str) != len) goto too_short;
arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
if (arg->readable > 0 || arg->buflen > 0) {
str = r_bytes1_buffered(len, arg);
}
else {
str = r_bytes1(len, arg);
}
}
return str;
}
@ -1545,10 +1639,13 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
v = rb_ary_new2(len);
v = r_entry(v, arg);
arg->readable += len - 1;
while (len--) {
rb_ary_push(v, r_object(arg));
arg->readable--;
}
v = r_leave(v, arg);
arg->readable++;
}
break;
@ -1559,11 +1656,14 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
v = rb_hash_new();
v = r_entry(v, arg);
arg->readable += (len - 1) * 2;
while (len--) {
VALUE key = r_object(arg);
VALUE value = r_object(arg);
rb_hash_aset(v, key, value);
arg->readable -= 2;
}
arg->readable += 2;
if (type == TYPE_HASH_DEF) {
RHASH_IFNONE(v) = r_object(arg);
}
@ -1590,6 +1690,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
rb_class2name(klass));
}
arg->readable += (len - 1) * 2;
v = r_entry0(v, idx, arg);
values = rb_ary_new2(len);
for (i=0; i<len; i++) {
@ -1602,9 +1703,11 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
rb_id2name(SYM2ID(RARRAY_PTR(mem)[i])));
}
rb_ary_push(values, r_object(arg));
arg->readable -= 2;
}
rb_struct_initialize(v, values);
v = r_leave(v, arg);
arg->readable += 2;
}
break;
@ -1751,6 +1854,13 @@ r_object(struct load_arg *arg)
static void
clear_load_arg(struct load_arg *arg)
{
if (arg->buf) {
xfree(arg->buf);
arg->buf = 0;
}
arg->buflen = 0;
arg->offset = 0;
arg->readable = 0;
if (!arg->symbols) return;
st_free_table(arg->symbols);
arg->symbols = 0;
@ -1803,6 +1913,12 @@ marshal_load(int argc, VALUE *argv)
arg->data = st_init_numtable();
arg->compat_tbl = st_init_numtable();
arg->proc = 0;
arg->readable = 0;
if (NIL_P(v))
arg->buf = xmalloc(BUFSIZ);
else
arg->buf = 0;
major = r_byte(arg);
minor = r_byte(arg);