Fix memory leak when parsing invalid hash symbol

For example:

    10.times do
      100_000.times do
        eval('{"\xC3": 1}')
      rescue EncodingError
      end

      puts `ps -o rss= -p #{$$}`
    end

Before:

    32032
    48464
    66112
    84192
    100592
    117520
    134096
    150656
    167168
    183760

After:

    17120
    17120
    17120
    17120
    18560
    18560
    18560
    18560
    18560
    18560
This commit is contained in:
Peter Zhu 2024-02-08 10:43:50 -05:00
Родитель e4272fd292
Коммит a71d1ed838
5 изменённых файлов: 67 добавлений и 5 удалений

33
parse.y
Просмотреть файл

@ -15549,6 +15549,23 @@ nd_value(struct parser_params *p, NODE *node)
}
}
static void
warn_duplicate_keys_check_key(struct parser_params *p, st_data_t key, st_table *literal_keys)
{
if (OBJ_BUILTIN_TYPE(key) == T_NODE && nd_type(key) == NODE_SYM) {
rb_parser_string_t *parser_str = RNODE_SYM(key)->string;
struct RString fake_str;
VALUE str = rb_setup_fake_str(&fake_str, parser_str->ptr, parser_str->len, parser_str->enc);
if (rb_enc_asciicompat(parser_str->enc) && rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) {
st_free_table(literal_keys);
/* Since we have a ASCII compatible encoding and the coderange is
* broken, sym_check_asciionly should raise an EncodingError. */
rb_check_id_cstr(parser_str->ptr, parser_str->len, parser_str->enc);
rb_bug("unreachable");
}
}
}
static void
warn_duplicate_keys(struct parser_params *p, NODE *hash)
{
@ -15567,12 +15584,18 @@ warn_duplicate_keys(struct parser_params *p, NODE *hash)
if (!head) {
key = (st_data_t)value;
}
else if (nd_type_st_key_enable_p(head) &&
st_delete(literal_keys, (key = (st_data_t)nd_st_key(p, head), &key), &data)) {
rb_compile_warn(p->ruby_sourcefile, nd_line((NODE *)data),
"key %+"PRIsVALUE" is duplicated and overwritten on line %d",
nd_value(p, head), nd_line(head));
else if (nd_type_st_key_enable_p(head)) {
warn_duplicate_keys_check_key(p, (st_data_t)head, literal_keys);
key = (st_data_t)nd_st_key(p, head);
if (st_delete(literal_keys, &key, &data)) {
rb_compile_warn(p->ruby_sourcefile, nd_line((NODE *)data),
"key %+"PRIsVALUE" is duplicated and overwritten on line %d",
nd_value(p, head), nd_line(head));
}
}
warn_duplicate_keys_check_key(p, key, literal_keys);
st_insert(literal_keys, (st_data_t)key, (st_data_t)hash);
hash = next;
}

Просмотреть файл

@ -292,6 +292,18 @@ enc_symname_type(const char *name, long len, void *enc, unsigned int allowed_att
return rb_enc_symname_type(name, len, (rb_encoding *)enc, allowed_attrset);
}
static ID
check_id_cstr(const char *ptr, long len, void *enc)
{
return rb_check_id_cstr(ptr, len, (rb_encoding *)enc);
}
static VALUE
setup_fake_str(struct RString *fake_str, const char *name, long len, void *enc)
{
return rb_setup_fake_str(fake_str, name, len, (rb_encoding *)enc);
}
typedef struct {
struct parser_params *parser;
rb_encoding *enc;
@ -551,6 +563,7 @@ static const rb_parser_config_t rb_global_parser_config = {
.id2str = rb_id2str,
.id2sym = rb_id2sym,
.sym2id = rb_sym2id,
.check_id_cstr = check_id_cstr,
.str_catf = rb_str_catf,
.str_cat_cstr = rb_str_cat_cstr,
@ -565,10 +578,12 @@ static const rb_parser_config_t rb_global_parser_config = {
.str_resize = rb_str_resize,
.str_new = rb_str_new,
.str_new_cstr = rb_str_new_cstr,
.setup_fake_str = setup_fake_str,
.fstring = rb_fstring,
.is_ascii_string = is_ascii_string2,
.enc_str_new = enc_str_new,
.enc_str_buf_cat = enc_str_buf_cat,
.enc_str_coderange = rb_enc_str_coderange,
.str_buf_append = rb_str_buf_append,
.str_vcatf = rb_str_vcatf,
.string_value_cstr = rb_string_value_cstr,
@ -628,6 +643,7 @@ static const rb_parser_config_t rb_global_parser_config = {
.encoding_set = encoding_set,
.encoding_is_ascii8bit = encoding_is_ascii8bit,
.usascii_encoding = usascii_encoding,
.enc_coderange_broken = ENC_CODERANGE_BROKEN,
.ractor_make_shareable = rb_ractor_make_shareable,

Просмотреть файл

@ -1277,6 +1277,7 @@ typedef struct rb_parser_config_struct {
VALUE (*id2str)(ID id);
VALUE (*id2sym)(ID x);
ID (*sym2id)(VALUE sym);
ID (*check_id_cstr)(const char *ptr, long len, rb_encoding *enc);
/* String */
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
@ -1293,10 +1294,12 @@ typedef struct rb_parser_config_struct {
VALUE (*str_resize)(VALUE str, long len);
VALUE (*str_new)(const char *ptr, long len);
VALUE (*str_new_cstr)(const char *ptr);
VALUE (*setup_fake_str)(struct RString *fake_str, const char *name, long len, rb_encoding *enc);
VALUE (*fstring)(VALUE);
int (*is_ascii_string)(VALUE str);
VALUE (*enc_str_new)(const char *ptr, long len, rb_encoding *enc);
VALUE (*enc_str_buf_cat)(VALUE str, const char *ptr, long len, rb_encoding *enc);
int (*enc_str_coderange)(VALUE str);
VALUE (*str_buf_append)(VALUE str, VALUE str2);
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0)
VALUE (*str_vcatf)(VALUE str, const char *fmt, va_list ap);
@ -1361,6 +1364,7 @@ typedef struct rb_parser_config_struct {
void (*encoding_set)(VALUE obj, int encindex);
int (*encoding_is_ascii8bit)(VALUE obj);
rb_encoding *(*usascii_encoding)(void);
int enc_coderange_broken;
/* Ractor */
VALUE (*ractor_make_shareable)(VALUE obj);

Просмотреть файл

@ -1365,6 +1365,21 @@ eom
assert_valid_syntax 'p :foo, {proc do end => proc do end, b: proc do end}', bug13073
end
def test_invalid_symbol_in_hash_memory_leak
assert_no_memory_leak([], "#{<<-'begin;'}", "#{<<-'end;'}", rss: true)
str = '{"\xC3": 1}'.force_encoding("UTF-8")
code = proc do
eval(str)
raise "unreachable"
rescue EncodingError
end
1_000.times(&code)
begin;
1_000_000.times(&code)
end;
end
def test_do_after_local_variable
obj = Object.new
def obj.m; yield; end

Просмотреть файл

@ -173,6 +173,7 @@ struct rb_imemo_tmpbuf_struct {
#define ID2SYM p->config->id2sym
#undef SYM2ID
#define SYM2ID p->config->sym2id
#define rb_check_id_cstr p->config->check_id_cstr
#define rb_str_catf p->config->str_catf
#undef rb_str_cat_cstr
@ -191,10 +192,12 @@ struct rb_imemo_tmpbuf_struct {
#define rb_str_new p->config->str_new
#undef rb_str_new_cstr
#define rb_str_new_cstr p->config->str_new_cstr
#define rb_setup_fake_str p->config->setup_fake_str
#define rb_fstring p->config->fstring
#define is_ascii_string p->config->is_ascii_string
#define rb_enc_str_new p->config->enc_str_new
#define rb_enc_str_buf_cat p->config->enc_str_buf_cat
#define rb_enc_str_coderange p->config->enc_str_coderange
#define rb_str_buf_append p->config->str_buf_append
#define rb_str_vcatf p->config->str_vcatf
#undef StringValueCStr
@ -259,6 +262,7 @@ struct rb_imemo_tmpbuf_struct {
#define ENCODING_SET p->config->encoding_set
#define ENCODING_IS_ASCII8BIT p->config->encoding_is_ascii8bit
#define rb_usascii_encoding p->config->usascii_encoding
#define ENC_CODERANGE_BROKEN p->config->enc_coderange_broken
#define rb_ractor_make_shareable p->config->ractor_make_shareable