[Bug #20650] Fix memory leak in Regexp capture group when timeout (#11244)

Fix memory leak in Regexp capture group when timeout

[Bug #20650]

The capture group allocates memory that is leaked when it times out.

For example:

    re = Regexp.new("^#{"(a*)" * 10_000}x$", timeout: 0.000001)
    str = "a" * 1000000 + "x"

    10.times do
      100.times do
        re =~ str
      rescue Regexp::TimeoutError
      end

      puts `ps -o rss= -p #{$$}`
    end

Before:

    34688
    56416
    78288
    100368
    120784
    140704
    161904
    183568
    204320
    224800

After:

    16288
    16288
    16880
    16896
    16912
    16928
    16944
    17184
    17184
    17200
This commit is contained in:
Peter Zhu 2024-07-25 12:14:26 -04:00 коммит произвёл GitHub
Родитель 4667f8ec10
Коммит 7571ad42f4
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
3 изменённых файлов: 66 добавлений и 24 удалений

Просмотреть файл

@ -636,6 +636,7 @@ ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET -21
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
#define ONIGERR_TIMEOUT -23
/* general error */
#define ONIGERR_INVALID_ARGUMENT -30
/* syntax error */

Просмотреть файл

@ -4220,7 +4220,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
xfree(xmalloc_base);
if (stk_base != stk_alloc || IS_NOT_NULL(msa->stack_p))
xfree(stk_base);
HANDLE_REG_TIMEOUT_IN_MATCH_AT;
return ONIGERR_TIMEOUT;
}
@ -5212,44 +5212,64 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
# define MATCH_AND_RETURN_CHECK(upper_range) \
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
if (! IS_FIND_LONGEST(reg->options)) {\
goto match;\
switch (r) { \
case ONIG_MISMATCH: \
break; \
case ONIGERR_TIMEOUT: \
goto timeout; \
default: \
if (r >= 0) { \
if (! IS_FIND_LONGEST(reg->options)) { \
goto match; \
}\
}\
}\
else goto finish; /* error */ \
else goto finish; /* error */ \
}
# else
# define MATCH_AND_RETURN_CHECK(upper_range) \
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
goto match;\
}\
else goto finish; /* error */ \
switch (r) { \
case ONIG_MISMATCH: \
break; \
case ONIGERR_TIMEOUT: \
goto timeout; \
default: \
if (r >= 0) { \
goto match; \
}\
else goto finish; /* error */ \
}
# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
#else
# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
# define MATCH_AND_RETURN_CHECK(none) \
r = match_at(reg, str, end, s, prev, &msa);\
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
if (! IS_FIND_LONGEST(reg->options)) {\
goto match;\
}\
}\
else goto finish; /* error */ \
switch (r) { \
case ONIG_MISMATCH: \
break; \
case ONIGERR_TIMEOUT: \
goto timeout; \
default: \
if (r >= 0) { \
if (! IS_FIND_LONGEST(reg->options)) { \
goto match; \
} \
} \
else goto finish; /* error */ \
}
# else
# define MATCH_AND_RETURN_CHECK(none) \
r = match_at(reg, str, end, s, prev, &msa);\
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
goto match;\
}\
else goto finish; /* error */ \
switch (r) { \
case ONIG_MISMATCH: \
break; \
case ONIGERR_TIMEOUT: \
goto timeout; \
default: \
if (r >= 0) { \
goto match; \
} \
else goto finish; /* error */ \
}
# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
@ -5552,6 +5572,11 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
match:
MATCH_ARG_FREE(msa);
return s - str;
timeout:
MATCH_ARG_FREE(msa);
onig_region_free(region, false);
HANDLE_REG_TIMEOUT_IN_MATCH_AT;
}
extern OnigPosition

Просмотреть файл

@ -1895,6 +1895,22 @@ class TestRegexp < Test::Unit::TestCase
end;
end
def test_timeout_memory_leak
assert_no_memory_leak([], "#{<<~"begin;"}", "#{<<~'end;'}", "[Bug #20650]", timeout: 100, rss: true)
regex = Regexp.new("^#{"(a*)" * 10_000}x$", timeout: 0.000001)
str = "a" * 1_000_000 + "x"
code = proc do
regex =~ str
rescue
end
10.times(&code)
begin;
1_000.times(&code)
end;
end
def test_match_cache_exponential
assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
timeout = #{ EnvUtil.apply_timeout_scale(10).inspect }