Fix memory leak in String#start_with? when regexp times out

[Bug #20653]

This commit refactors how Onigmo handles timeout. Instead of raising a
timeout error, onig_search will return a ONIGERR_TIMEOUT which the
caller can free memory, and then raise a timeout error.

This fixes a memory leak in String#start_with when the regexp times out.
For example:

    regex = Regexp.new("^#{"(a*)" * 10_000}x$", timeout: 0.000001)
    str = "a" * 1000000 + "x"

    10.times do
      100.times do
        str.start_with?(regex)
      rescue
      end

      puts `ps -o rss= -p #{$$}`
    end

Before:

    33216
    51936
    71152
    81728
    97152
    103248
    120384
    133392
    133520
    133616

After:

    14912
    15376
    15824
    15824
    16128
    16128
    16144
    16144
    16160
    16160
This commit is contained in:
Peter Zhu 2024-07-25 15:28:25 -04:00
Родитель 67e1ea0028
Коммит 7464514ca5
4 изменённых файлов: 32 добавлений и 54 удалений

63
re.c
Просмотреть файл

@ -1719,10 +1719,16 @@ rb_reg_onig_match(VALUE re, VALUE str,
if (result < 0) {
onig_region_free(regs, 0);
if (result != ONIG_MISMATCH) {
switch (result) {
case ONIG_MISMATCH:
break;
case ONIGERR_TIMEOUT:
rb_raise(rb_eRegexpTimeoutError, "regexp match timeout");
default: {
onig_errmsg_buffer err = "";
onig_error_code_to_str((UChar*)err, (int)result);
rb_reg_raise(err, re);
}
}
}
@ -1783,23 +1789,6 @@ reg_onig_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_p
ONIG_OPTION_NONE);
}
struct rb_reg_onig_match_args {
VALUE re;
VALUE str;
struct reg_onig_search_args args;
struct re_registers regs;
OnigPosition result;
};
static VALUE
rb_reg_onig_match_try(VALUE value_args)
{
struct rb_reg_onig_match_args *args = (struct rb_reg_onig_match_args *)value_args;
args->result = rb_reg_onig_match(args->re, args->str, reg_onig_search, &args->args, &args->regs);
return Qnil;
}
/* returns byte offset */
static long
rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match)
@ -1810,38 +1799,22 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
return -1;
}
struct rb_reg_onig_match_args args = {
.re = re,
.str = str,
.args = {
.pos = pos,
.range = reverse ? 0 : len,
},
.regs = {0}
struct reg_onig_search_args args = {
.pos = pos,
.range = reverse ? 0 : len,
};
struct re_registers regs = {0};
/* If there is a timeout set, then rb_reg_onig_match could raise a
* Regexp::TimeoutError so we want to protect it from leaking memory. */
if (rb_reg_match_time_limit) {
int state;
rb_protect(rb_reg_onig_match_try, (VALUE)&args, &state);
if (state) {
onig_region_free(&args.regs, false);
rb_jump_tag(state);
}
}
else {
rb_reg_onig_match_try((VALUE)&args);
}
OnigPosition result = rb_reg_onig_match(re, str, reg_onig_search, &args, &regs);
if (args.result == ONIG_MISMATCH) {
if (result == ONIG_MISMATCH) {
rb_backref_set(Qnil);
return ONIG_MISMATCH;
}
VALUE match = match_alloc(rb_cMatch);
rb_matchext_t *rm = RMATCH_EXT(match);
rm->regs = args.regs;
rm->regs = regs;
if (set_backref_str) {
RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str));
@ -1858,7 +1831,7 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
rb_backref_set(match);
if (set_match) *set_match = match;
return args.result;
return result;
}
long
@ -4720,12 +4693,6 @@ rb_reg_timeout_p(regex_t *reg, void *end_time_)
return false;
}
void
rb_reg_raise_timeout(void)
{
rb_raise(rb_eRegexpTimeoutError, "regexp match timeout");
}
/*
* call-seq:
* Regexp.timeout -> float or nil

Просмотреть файл

@ -5575,8 +5575,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
timeout:
MATCH_ARG_FREE(msa);
onig_region_free(region, false);
HANDLE_REG_TIMEOUT_IN_MATCH_AT;
return ONIGERR_TIMEOUT;
}
extern OnigPosition

Просмотреть файл

@ -163,9 +163,6 @@
rb_thread_check_ints(); \
} \
} while(0)
# define HANDLE_REG_TIMEOUT_IN_MATCH_AT do { \
rb_reg_raise_timeout(); \
} while (0)
# define onig_st_init_table st_init_table
# define onig_st_init_table_with_size st_init_table_with_size
# define onig_st_init_numtable st_init_numtable
@ -1002,7 +999,6 @@ extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, c
extern size_t onig_memsize(const regex_t *reg);
extern size_t onig_region_memsize(const struct re_registers *regs);
bool rb_reg_timeout_p(regex_t *reg, void *end_time);
NORETURN(void rb_reg_raise_timeout(void));
#endif
RUBY_SYMBOL_EXPORT_END

Просмотреть файл

@ -1971,6 +1971,22 @@ CODE
assert_nil($&)
end
def test_start_with_timeout_memory_leak
assert_no_memory_leak([], "#{<<~"begin;"}", "#{<<~'end;'}", "[Bug #20653]", rss: true)
regex = Regexp.new("^#{"(a*)" * 10_000}x$", timeout: 0.000001)
str = "a" * 1_000_000 + "x"
code = proc do
str.start_with?(regex)
rescue
end
10.times(&code)
begin;
1_000.times(&code)
end;
end
def test_strip
assert_equal(S("x"), S(" x ").strip)
assert_equal(S("x"), S(" \n\r\t x \t\r\n\n ").strip)