From 7571ad42f42939d172ec9a68dfe56aac724ee2ef Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 25 Jul 2024 12:14:26 -0400 Subject: [PATCH] [Bug #20650] Fix memory leak in Regexp capture group when timeout (#11244) Fix memory leak in Regexp capture group when timeout [Bug #20650] The capture group allocates memory that is leaked when it times out. For example: re = Regexp.new("^#{"(a*)" * 10_000}x$", timeout: 0.000001) str = "a" * 1000000 + "x" 10.times do 100.times do re =~ str rescue Regexp::TimeoutError end puts `ps -o rss= -p #{$$}` end Before: 34688 56416 78288 100368 120784 140704 161904 183568 204320 224800 After: 16288 16288 16880 16896 16912 16928 16944 17184 17184 17200 --- include/ruby/onigmo.h | 1 + regexec.c | 73 +++++++++++++++++++++++++++------------- test/ruby/test_regexp.rb | 16 +++++++++ 3 files changed, 66 insertions(+), 24 deletions(-) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index d233336316..db290cd47a 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -636,6 +636,7 @@ ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16 #define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET -21 #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 +#define ONIGERR_TIMEOUT -23 /* general error */ #define ONIGERR_INVALID_ARGUMENT -30 /* syntax error */ diff --git a/regexec.c b/regexec.c index 6d82429e03..9833eeff25 100644 --- a/regexec.c +++ b/regexec.c @@ -4220,7 +4220,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, xfree(xmalloc_base); if (stk_base != stk_alloc || IS_NOT_NULL(msa->stack_p)) xfree(stk_base); - HANDLE_REG_TIMEOUT_IN_MATCH_AT; + return ONIGERR_TIMEOUT; } @@ -5212,44 +5212,64 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE # define MATCH_AND_RETURN_CHECK(upper_range) \ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - if (! IS_FIND_LONGEST(reg->options)) {\ - goto match;\ + switch (r) { \ + case ONIG_MISMATCH: \ + break; \ + case ONIGERR_TIMEOUT: \ + goto timeout; \ + default: \ + if (r >= 0) { \ + if (! IS_FIND_LONGEST(reg->options)) { \ + goto match; \ + }\ }\ - }\ - else goto finish; /* error */ \ + else goto finish; /* error */ \ } # else # define MATCH_AND_RETURN_CHECK(upper_range) \ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - goto match;\ - }\ - else goto finish; /* error */ \ + switch (r) { \ + case ONIG_MISMATCH: \ + break; \ + case ONIGERR_TIMEOUT: \ + goto timeout; \ + default: \ + if (r >= 0) { \ + goto match; \ + }\ + else goto finish; /* error */ \ } # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ #else # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE # define MATCH_AND_RETURN_CHECK(none) \ r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - if (! IS_FIND_LONGEST(reg->options)) {\ - goto match;\ - }\ - }\ - else goto finish; /* error */ \ + switch (r) { \ + case ONIG_MISMATCH: \ + break; \ + case ONIGERR_TIMEOUT: \ + goto timeout; \ + default: \ + if (r >= 0) { \ + if (! IS_FIND_LONGEST(reg->options)) { \ + goto match; \ + } \ + } \ + else goto finish; /* error */ \ } # else # define MATCH_AND_RETURN_CHECK(none) \ r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - goto match;\ - }\ - else goto finish; /* error */ \ + switch (r) { \ + case ONIG_MISMATCH: \ + break; \ + case ONIGERR_TIMEOUT: \ + goto timeout; \ + default: \ + if (r >= 0) { \ + goto match; \ + } \ + else goto finish; /* error */ \ } # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ @@ -5552,6 +5572,11 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, match: MATCH_ARG_FREE(msa); return s - str; + +timeout: + MATCH_ARG_FREE(msa); + onig_region_free(region, false); + HANDLE_REG_TIMEOUT_IN_MATCH_AT; } extern OnigPosition diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index c8caca2891..6b9efcb555 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -1895,6 +1895,22 @@ class TestRegexp < Test::Unit::TestCase end; end + def test_timeout_memory_leak + assert_no_memory_leak([], "#{<<~"begin;"}", "#{<<~'end;'}", "[Bug #20650]", timeout: 100, rss: true) + regex = Regexp.new("^#{"(a*)" * 10_000}x$", timeout: 0.000001) + str = "a" * 1_000_000 + "x" + + code = proc do + regex =~ str + rescue + end + + 10.times(&code) + begin; + 1_000.times(&code) + end; + end + def test_match_cache_exponential assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") timeout = #{ EnvUtil.apply_timeout_scale(10).inspect }