Refactor `Regexp#match` cache implementation (#7724)

* Refactor Regexp#match cache implementation

Improved variable and function names
Fixed [Bug 19537] (Maybe fixed in https://github.com/ruby/ruby/pull/7694)

* Add a comment of the glossary for "match cache"

* Skip to reset match cache when no cache point on null check
This commit is contained in:
TSUYUSATO Kitsune 2023-04-19 13:08:28 +09:00 коммит произвёл GitHub
Родитель 8023da746c
Коммит a1c2c274ee
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 437 добавлений и 334 удалений

Просмотреть файл

@ -744,8 +744,6 @@ typedef struct {
typedef struct {
int lower;
int upper;
long base_num;
long inner_num;
} OnigRepeatRange;
typedef void (*OnigWarnFunc)(const char* s);

719
regexec.c

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -35,19 +35,15 @@
/* #define ONIG_DEBUG_COMPILE */
/* #define ONIG_DEBUG_SEARCH */
/* #define ONIG_DEBUG_MATCH */
/* #define ONIG_DEBUG_MATCH_CACHE */
/* #define ONIG_DEBUG_MEMLEAK */
/* #define ONIG_DONT_OPTIMIZE */
/* for byte-code statistical data. */
/* #define ONIG_DEBUG_STATISTICS */
/* enable matching optimization by using cache. */
#define USE_CACHE_MATCH_OPT
#ifdef USE_CACHE_MATCH_OPT
# define NUM_CACHE_OPCODE_FAIL -1
# define NUM_CACHE_OPCODE_UNINIT -2
#endif
/* enable the match optimization by using a cache. */
#define USE_MATCH_CACHE
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
@ -880,12 +876,14 @@ typedef struct _OnigStackType {
} u;
} OnigStackType;
#ifdef USE_CACHE_MATCH_OPT
#ifdef USE_MATCH_CACHE
typedef struct {
UChar *addr;
long num;
int outer_repeat;
} OnigCacheIndex;
long cache_point;
int outer_repeat_mem;
long num_cache_points_at_outer_repeat;
long num_cache_points_in_outer_repeat;
} OnigCacheOpcode;
#endif
typedef struct {
@ -910,16 +908,18 @@ typedef struct {
#else
uint64_t end_time;
#endif
#ifdef USE_CACHE_MATCH_OPT
long num_fail;
int enable_cache_match_opt;
long num_cache_opcode;
long num_cache_table;
OnigCacheIndex* cache_index_table;
uint8_t* match_cache;
#ifdef USE_MATCH_CACHE
int enable_match_cache;
long num_fails;
long num_cache_opcodes;
OnigCacheOpcode* cache_opcodes;
long num_cache_points;
uint8_t* match_cache_buf;
#endif
} OnigMatchArg;
#define NUM_CACHE_OPCODES_IMPOSSIBLE -1
#define NUM_CACHE_OPCODES_UNINIT -2
#define IS_CODE_SB_WORD(enc,code) \
(ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))

Просмотреть файл

@ -1733,7 +1733,7 @@ class TestRegexp < Test::Unit::TestCase
end;
end
def test_cache_optimization_exponential
def test_match_cache_exponential
assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
timeout = #{ EnvUtil.apply_timeout_scale(10).inspect }
begin;
@ -1743,7 +1743,7 @@ class TestRegexp < Test::Unit::TestCase
end;
end
def test_cache_optimization_square
def test_match_cache_square
assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
timeout = #{ EnvUtil.apply_timeout_scale(10).inspect }
begin;
@ -1753,7 +1753,7 @@ class TestRegexp < Test::Unit::TestCase
end;
end
def test_cache_index_initialize
def test_cache_opcodes_initialize
str = 'test1-test2-test3-test4-test_5'
re = '^([0-9a-zA-Z\-/]*){1,256}$'
100.times do
@ -1781,6 +1781,14 @@ class TestRegexp < Test::Unit::TestCase
assert_equal("123456789".match(/(?:x?\dx?){2,}/)[0], "123456789")
end
def test_bug_19537
str = 'aac'
re = '^([ab]{1,3})(a?)*$'
100.times do
assert !Regexp.new(re).match?(str)
end
end
def test_linear_time_p
assert_send [Regexp, :linear_time?, /a/]
assert_send [Regexp, :linear_time?, 'a']