зеркало из https://github.com/github/ruby.git
Fix memory leak in String#start_with? when regexp times out
[Bug #20653] This commit refactors how Onigmo handles timeout. Instead of raising a timeout error, onig_search will return a ONIGERR_TIMEOUT which the caller can free memory, and then raise a timeout error. This fixes a memory leak in String#start_with when the regexp times out. For example: regex = Regexp.new("^#{"(a*)" * 10_000}x$", timeout: 0.000001) str = "a" * 1000000 + "x" 10.times do 100.times do str.start_with?(regex) rescue end puts `ps -o rss= -p #{$$}` end Before: 33216 51936 71152 81728 97152 103248 120384 133392 133520 133616 After: 14912 15376 15824 15824 16128 16128 16144 16144 16160 16160
This commit is contained in:
Родитель
67e1ea0028
Коммит
7464514ca5
63
re.c
63
re.c
|
@ -1719,10 +1719,16 @@ rb_reg_onig_match(VALUE re, VALUE str,
|
|||
if (result < 0) {
|
||||
onig_region_free(regs, 0);
|
||||
|
||||
if (result != ONIG_MISMATCH) {
|
||||
switch (result) {
|
||||
case ONIG_MISMATCH:
|
||||
break;
|
||||
case ONIGERR_TIMEOUT:
|
||||
rb_raise(rb_eRegexpTimeoutError, "regexp match timeout");
|
||||
default: {
|
||||
onig_errmsg_buffer err = "";
|
||||
onig_error_code_to_str((UChar*)err, (int)result);
|
||||
rb_reg_raise(err, re);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1783,23 +1789,6 @@ reg_onig_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_p
|
|||
ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
struct rb_reg_onig_match_args {
|
||||
VALUE re;
|
||||
VALUE str;
|
||||
struct reg_onig_search_args args;
|
||||
struct re_registers regs;
|
||||
|
||||
OnigPosition result;
|
||||
};
|
||||
|
||||
static VALUE
|
||||
rb_reg_onig_match_try(VALUE value_args)
|
||||
{
|
||||
struct rb_reg_onig_match_args *args = (struct rb_reg_onig_match_args *)value_args;
|
||||
args->result = rb_reg_onig_match(args->re, args->str, reg_onig_search, &args->args, &args->regs);
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
/* returns byte offset */
|
||||
static long
|
||||
rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match)
|
||||
|
@ -1810,38 +1799,22 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
|
|||
return -1;
|
||||
}
|
||||
|
||||
struct rb_reg_onig_match_args args = {
|
||||
.re = re,
|
||||
.str = str,
|
||||
.args = {
|
||||
.pos = pos,
|
||||
.range = reverse ? 0 : len,
|
||||
},
|
||||
.regs = {0}
|
||||
struct reg_onig_search_args args = {
|
||||
.pos = pos,
|
||||
.range = reverse ? 0 : len,
|
||||
};
|
||||
struct re_registers regs = {0};
|
||||
|
||||
/* If there is a timeout set, then rb_reg_onig_match could raise a
|
||||
* Regexp::TimeoutError so we want to protect it from leaking memory. */
|
||||
if (rb_reg_match_time_limit) {
|
||||
int state;
|
||||
rb_protect(rb_reg_onig_match_try, (VALUE)&args, &state);
|
||||
if (state) {
|
||||
onig_region_free(&args.regs, false);
|
||||
rb_jump_tag(state);
|
||||
}
|
||||
}
|
||||
else {
|
||||
rb_reg_onig_match_try((VALUE)&args);
|
||||
}
|
||||
OnigPosition result = rb_reg_onig_match(re, str, reg_onig_search, &args, ®s);
|
||||
|
||||
if (args.result == ONIG_MISMATCH) {
|
||||
if (result == ONIG_MISMATCH) {
|
||||
rb_backref_set(Qnil);
|
||||
return ONIG_MISMATCH;
|
||||
}
|
||||
|
||||
VALUE match = match_alloc(rb_cMatch);
|
||||
rb_matchext_t *rm = RMATCH_EXT(match);
|
||||
rm->regs = args.regs;
|
||||
rm->regs = regs;
|
||||
|
||||
if (set_backref_str) {
|
||||
RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str));
|
||||
|
@ -1858,7 +1831,7 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
|
|||
rb_backref_set(match);
|
||||
if (set_match) *set_match = match;
|
||||
|
||||
return args.result;
|
||||
return result;
|
||||
}
|
||||
|
||||
long
|
||||
|
@ -4720,12 +4693,6 @@ rb_reg_timeout_p(regex_t *reg, void *end_time_)
|
|||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
rb_reg_raise_timeout(void)
|
||||
{
|
||||
rb_raise(rb_eRegexpTimeoutError, "regexp match timeout");
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* Regexp.timeout -> float or nil
|
||||
|
|
|
@ -5575,8 +5575,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
|
|||
|
||||
timeout:
|
||||
MATCH_ARG_FREE(msa);
|
||||
onig_region_free(region, false);
|
||||
HANDLE_REG_TIMEOUT_IN_MATCH_AT;
|
||||
return ONIGERR_TIMEOUT;
|
||||
}
|
||||
|
||||
extern OnigPosition
|
||||
|
|
4
regint.h
4
regint.h
|
@ -163,9 +163,6 @@
|
|||
rb_thread_check_ints(); \
|
||||
} \
|
||||
} while(0)
|
||||
# define HANDLE_REG_TIMEOUT_IN_MATCH_AT do { \
|
||||
rb_reg_raise_timeout(); \
|
||||
} while (0)
|
||||
# define onig_st_init_table st_init_table
|
||||
# define onig_st_init_table_with_size st_init_table_with_size
|
||||
# define onig_st_init_numtable st_init_numtable
|
||||
|
@ -1002,7 +999,6 @@ extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, c
|
|||
extern size_t onig_memsize(const regex_t *reg);
|
||||
extern size_t onig_region_memsize(const struct re_registers *regs);
|
||||
bool rb_reg_timeout_p(regex_t *reg, void *end_time);
|
||||
NORETURN(void rb_reg_raise_timeout(void));
|
||||
#endif
|
||||
|
||||
RUBY_SYMBOL_EXPORT_END
|
||||
|
|
|
@ -1971,6 +1971,22 @@ CODE
|
|||
assert_nil($&)
|
||||
end
|
||||
|
||||
def test_start_with_timeout_memory_leak
|
||||
assert_no_memory_leak([], "#{<<~"begin;"}", "#{<<~'end;'}", "[Bug #20653]", rss: true)
|
||||
regex = Regexp.new("^#{"(a*)" * 10_000}x$", timeout: 0.000001)
|
||||
str = "a" * 1_000_000 + "x"
|
||||
|
||||
code = proc do
|
||||
str.start_with?(regex)
|
||||
rescue
|
||||
end
|
||||
|
||||
10.times(&code)
|
||||
begin;
|
||||
1_000.times(&code)
|
||||
end;
|
||||
end
|
||||
|
||||
def test_strip
|
||||
assert_equal(S("x"), S(" x ").strip)
|
||||
assert_equal(S("x"), S(" \n\r\t x \t\r\n\n ").strip)
|
||||
|
|
Загрузка…
Ссылка в новой задаче