зеркало из https://github.com/github/ruby.git
Add function rb_reg_onig_match
rb_reg_onig_match performs preparation, error handling, and cleanup for matching a regex against a string. This reduces repetitive code and removes the need for StringScanner to access internal data of regex.
This commit is contained in:
Родитель
e5effa4bd0
Коммит
7193b404a1
|
@ -3,6 +3,7 @@ require 'mkmf'
|
|||
if RUBY_ENGINE == 'ruby'
|
||||
$INCFLAGS << " -I$(top_srcdir)" if $extmk
|
||||
have_func("onig_region_memsize", "ruby.h")
|
||||
have_func("rb_reg_onig_match", "ruby.h")
|
||||
create_makefile 'strscan'
|
||||
else
|
||||
File.write('Makefile', dummy_makefile("").join)
|
||||
|
|
|
@ -539,6 +539,68 @@ adjust_register_position(struct strscanner *p, long position)
|
|||
}
|
||||
}
|
||||
|
||||
/* rb_reg_onig_match is available in Ruby 3.3 and later. */
|
||||
#ifndef HAVE_RB_REG_ONIG_MATCH
|
||||
static OnigPosition
|
||||
rb_reg_onig_match(VALUE re, VALUE str,
|
||||
OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
|
||||
void *args, struct re_registers *regs)
|
||||
{
|
||||
regex_t *reg = rb_reg_prepare_re(re, str);
|
||||
|
||||
bool tmpreg = reg != RREGEXP_PTR(re);
|
||||
if (!tmpreg) RREGEXP(re)->usecnt++;
|
||||
|
||||
OnigPosition result = match(reg, str, regs, args);
|
||||
|
||||
if (!tmpreg) RREGEXP(re)->usecnt--;
|
||||
if (tmpreg) {
|
||||
if (RREGEXP(re)->usecnt) {
|
||||
onig_free(reg);
|
||||
}
|
||||
else {
|
||||
onig_free(RREGEXP_PTR(re));
|
||||
RREGEXP_PTR(re) = reg;
|
||||
}
|
||||
}
|
||||
|
||||
if (result < 0) {
|
||||
if (result != ONIG_MISMATCH) {
|
||||
rb_raise(ScanError, "regexp buffer overflow");
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
static OnigPosition
|
||||
strscan_match(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr)
|
||||
{
|
||||
struct strscanner *p = (struct strscanner *)args_ptr;
|
||||
|
||||
return onig_match(reg,
|
||||
match_target(p),
|
||||
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
||||
(UChar* )CURPTR(p),
|
||||
regs,
|
||||
ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
static OnigPosition
|
||||
strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr)
|
||||
{
|
||||
struct strscanner *p = (struct strscanner *)args_ptr;
|
||||
|
||||
return onig_search(reg,
|
||||
match_target(p),
|
||||
(UChar *)(CURPTR(p) + S_RESTLEN(p)),
|
||||
(UChar *)CURPTR(p),
|
||||
(UChar *)(CURPTR(p) + S_RESTLEN(p)),
|
||||
regs,
|
||||
ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
static VALUE
|
||||
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
|
||||
{
|
||||
|
@ -560,47 +622,14 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
|
|||
}
|
||||
|
||||
if (RB_TYPE_P(pattern, T_REGEXP)) {
|
||||
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
||||
regex_t *re;
|
||||
long ret;
|
||||
int tmpreg;
|
||||
|
||||
p->regex = pattern;
|
||||
re = rb_reg_prepare_re(pattern, p->str);
|
||||
tmpreg = re != RREGEXP_PTR(pattern);
|
||||
if (!tmpreg) RREGEXP(pattern)->usecnt++;
|
||||
OnigPosition ret = rb_reg_onig_match(pattern,
|
||||
p->str,
|
||||
headonly ? strscan_match : strscan_search,
|
||||
(void *)p,
|
||||
&(p->regs));
|
||||
|
||||
if (headonly) {
|
||||
ret = onig_match(re,
|
||||
match_target(p),
|
||||
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
||||
(UChar* )CURPTR(p),
|
||||
&(p->regs),
|
||||
ONIG_OPTION_NONE);
|
||||
}
|
||||
else {
|
||||
ret = onig_search(re,
|
||||
match_target(p),
|
||||
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
||||
(UChar* )CURPTR(p),
|
||||
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
||||
&(p->regs),
|
||||
ONIG_OPTION_NONE);
|
||||
}
|
||||
if (!tmpreg) RREGEXP(pattern)->usecnt--;
|
||||
if (tmpreg) {
|
||||
if (RREGEXP(pattern)->usecnt) {
|
||||
onig_free(re);
|
||||
}
|
||||
else {
|
||||
onig_free(RREGEXP_PTR(pattern));
|
||||
RREGEXP_PTR(pattern) = re;
|
||||
}
|
||||
}
|
||||
|
||||
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
||||
if (ret < 0) {
|
||||
/* not matched */
|
||||
if (ret == ONIG_MISMATCH) {
|
||||
return Qnil;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "ruby/onigmo.h"
|
||||
#include "ruby/regex.h"
|
||||
#include "ruby/internal/core/rmatch.h"
|
||||
#include "ruby/internal/dllexport.h"
|
||||
|
@ -105,25 +106,28 @@ long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir);
|
|||
VALUE rb_reg_quote(VALUE str);
|
||||
|
||||
/**
|
||||
* Exercises various checks and preprocesses so that the given regular
|
||||
* expression can be applied to the given string. The preprocess here includes
|
||||
* (but not limited to) for instance encoding conversion.
|
||||
* Runs a regular expression match using function `match`. Performs preparation,
|
||||
* error handling, and memory cleanup.
|
||||
*
|
||||
* @param[in] re Target regular expression.
|
||||
* @param[in] str What `re` is about to run on.
|
||||
* @param[in] match The function to run to match `str` against `re`.
|
||||
* @param[in] args Pointer to arguments to pass into `match`.
|
||||
* @param[out] regs Registers on a successful match.
|
||||
* @exception rb_eArgError `re` does not fit for `str`.
|
||||
* @exception rb_eEncCompatError `re` and `str` are incompatible.
|
||||
* @exception rb_eRegexpError `re` is malformed.
|
||||
* @return A preprocessesed pattern buffer ready to be applied to `str`.
|
||||
* @note The return value is manages by our GC. Don't free.
|
||||
* @return Match position on a successful match, `ONIG_MISMATCH` otherwise.
|
||||
*
|
||||
* @internal
|
||||
*
|
||||
* The return type, `regex_t *`, is defined in `<ruby/onigmo.h>`, _and_
|
||||
* The type `regex_t *` is defined in `<ruby/onigmo.h>`, _and_
|
||||
* _conflicts_ with POSIX's `<regex.h>`. We can no longer save the situation
|
||||
* at this point. Just don't mix the two.
|
||||
*/
|
||||
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
||||
OnigPosition rb_reg_onig_match(VALUE re, VALUE str,
|
||||
OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
|
||||
void *args, struct re_registers *regs);
|
||||
|
||||
/**
|
||||
* Duplicates a match data. This is roughly the same as `onig_region_copy()`,
|
||||
|
|
242
re.c
242
re.c
|
@ -1575,8 +1575,8 @@ rb_reg_prepare_enc(VALUE re, VALUE str, int warn)
|
|||
return enc;
|
||||
}
|
||||
|
||||
regex_t *
|
||||
rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err)
|
||||
static regex_t *
|
||||
rb_reg_prepare_re(VALUE re, VALUE str, onig_errmsg_buffer err)
|
||||
{
|
||||
regex_t *reg = RREGEXP_PTR(re);
|
||||
int r;
|
||||
|
@ -1620,11 +1620,40 @@ rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err)
|
|||
return reg;
|
||||
}
|
||||
|
||||
regex_t *
|
||||
rb_reg_prepare_re(VALUE re, VALUE str)
|
||||
OnigPosition
|
||||
rb_reg_onig_match(VALUE re, VALUE str,
|
||||
OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
|
||||
void *args, struct re_registers *regs)
|
||||
{
|
||||
onig_errmsg_buffer err = "";
|
||||
return rb_reg_prepare_re0(re, str, err);
|
||||
regex_t *reg = rb_reg_prepare_re(re, str, err);
|
||||
|
||||
bool tmpreg = reg != RREGEXP_PTR(re);
|
||||
if (!tmpreg) RREGEXP(re)->usecnt++;
|
||||
|
||||
OnigPosition result = match(reg, str, regs, args);
|
||||
|
||||
if (!tmpreg) RREGEXP(re)->usecnt--;
|
||||
if (tmpreg) {
|
||||
if (RREGEXP(re)->usecnt) {
|
||||
onig_free(reg);
|
||||
}
|
||||
else {
|
||||
onig_free(RREGEXP_PTR(re));
|
||||
RREGEXP_PTR(re) = reg;
|
||||
}
|
||||
}
|
||||
|
||||
if (result < 0) {
|
||||
onig_region_free(regs, 0);
|
||||
|
||||
if (result != ONIG_MISMATCH) {
|
||||
onig_error_code_to_str((UChar*)err, (int)result);
|
||||
rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
long
|
||||
|
@ -1658,65 +1687,52 @@ rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse)
|
|||
return pos;
|
||||
}
|
||||
|
||||
struct reg_onig_search_args {
|
||||
long pos;
|
||||
long range;
|
||||
};
|
||||
|
||||
static OnigPosition
|
||||
reg_onig_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr)
|
||||
{
|
||||
struct reg_onig_search_args *args = (struct reg_onig_search_args *)args_ptr;
|
||||
const char *ptr;
|
||||
long len;
|
||||
RSTRING_GETMEM(str, ptr, len);
|
||||
|
||||
return onig_search(
|
||||
reg,
|
||||
(UChar *)ptr,
|
||||
(UChar *)(ptr + len),
|
||||
(UChar *)(ptr + args->pos),
|
||||
(UChar *)(ptr + args->range),
|
||||
regs,
|
||||
ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
/* returns byte offset */
|
||||
static long
|
||||
rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match)
|
||||
{
|
||||
long result;
|
||||
VALUE match;
|
||||
struct re_registers regi, *regs = ®i;
|
||||
char *start, *range;
|
||||
long len;
|
||||
regex_t *reg;
|
||||
int tmpreg;
|
||||
onig_errmsg_buffer err = "";
|
||||
|
||||
RSTRING_GETMEM(str, start, len);
|
||||
range = start;
|
||||
long len = RSTRING_LEN(str);
|
||||
if (pos > len || pos < 0) {
|
||||
rb_backref_set(Qnil);
|
||||
return -1;
|
||||
}
|
||||
|
||||
reg = rb_reg_prepare_re0(re, str, err);
|
||||
tmpreg = reg != RREGEXP_PTR(re);
|
||||
if (!tmpreg) RREGEXP(re)->usecnt++;
|
||||
struct reg_onig_search_args args = {
|
||||
.pos = pos,
|
||||
.range = reverse ? 0 : len,
|
||||
};
|
||||
|
||||
MEMZERO(regs, struct re_registers, 1);
|
||||
if (!reverse) {
|
||||
range += len;
|
||||
}
|
||||
result = onig_search(reg,
|
||||
(UChar*)start,
|
||||
((UChar*)(start + len)),
|
||||
((UChar*)(start + pos)),
|
||||
((UChar*)range),
|
||||
regs, ONIG_OPTION_NONE);
|
||||
if (!tmpreg) RREGEXP(re)->usecnt--;
|
||||
if (tmpreg) {
|
||||
if (RREGEXP(re)->usecnt) {
|
||||
onig_free(reg);
|
||||
}
|
||||
else {
|
||||
onig_free(RREGEXP_PTR(re));
|
||||
RREGEXP_PTR(re) = reg;
|
||||
}
|
||||
}
|
||||
if (result < 0) {
|
||||
if (regs == ®i)
|
||||
onig_region_free(regs, 0);
|
||||
if (result == ONIG_MISMATCH) {
|
||||
rb_backref_set(Qnil);
|
||||
return result;
|
||||
}
|
||||
else {
|
||||
onig_error_code_to_str((UChar*)err, (int)result);
|
||||
rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
|
||||
}
|
||||
}
|
||||
VALUE match = match_alloc(rb_cMatch);
|
||||
struct re_registers *regs = RMATCH_REGS(match);
|
||||
|
||||
match = match_alloc(rb_cMatch);
|
||||
memcpy(RMATCH_REGS(match), regs, sizeof(struct re_registers));
|
||||
OnigPosition result = rb_reg_onig_match(re, str, reg_onig_search, &args, regs);
|
||||
if (result == ONIG_MISMATCH) {
|
||||
rb_backref_set(Qnil);
|
||||
return ONIG_MISMATCH;
|
||||
}
|
||||
|
||||
if (set_backref_str) {
|
||||
RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str));
|
||||
|
@ -1748,69 +1764,35 @@ rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
|
|||
return rb_reg_search0(re, str, pos, reverse, 1);
|
||||
}
|
||||
|
||||
bool
|
||||
rb_reg_start_with_p(VALUE re, VALUE str)
|
||||
static OnigPosition
|
||||
reg_onig_match(regex_t *reg, VALUE str, struct re_registers *regs, void *_)
|
||||
{
|
||||
long result;
|
||||
VALUE match;
|
||||
struct re_registers regi, *regs = ®i;
|
||||
regex_t *reg;
|
||||
int tmpreg;
|
||||
onig_errmsg_buffer err = "";
|
||||
|
||||
reg = rb_reg_prepare_re0(re, str, err);
|
||||
tmpreg = reg != RREGEXP_PTR(re);
|
||||
if (!tmpreg) RREGEXP(re)->usecnt++;
|
||||
|
||||
match = rb_backref_get();
|
||||
if (!NIL_P(match)) {
|
||||
if (FL_TEST(match, MATCH_BUSY)) {
|
||||
match = Qnil;
|
||||
}
|
||||
else {
|
||||
regs = RMATCH_REGS(match);
|
||||
}
|
||||
}
|
||||
if (NIL_P(match)) {
|
||||
MEMZERO(regs, struct re_registers, 1);
|
||||
}
|
||||
const char *ptr;
|
||||
long len;
|
||||
RSTRING_GETMEM(str, ptr, len);
|
||||
result = onig_match(reg,
|
||||
(UChar*)(ptr),
|
||||
((UChar*)(ptr + len)),
|
||||
(UChar*)(ptr),
|
||||
regs, ONIG_OPTION_NONE);
|
||||
if (!tmpreg) RREGEXP(re)->usecnt--;
|
||||
if (tmpreg) {
|
||||
if (RREGEXP(re)->usecnt) {
|
||||
onig_free(reg);
|
||||
}
|
||||
else {
|
||||
onig_free(RREGEXP_PTR(re));
|
||||
RREGEXP_PTR(re) = reg;
|
||||
}
|
||||
}
|
||||
if (result < 0) {
|
||||
if (regs == ®i)
|
||||
onig_region_free(regs, 0);
|
||||
if (result == ONIG_MISMATCH) {
|
||||
rb_backref_set(Qnil);
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
onig_error_code_to_str((UChar*)err, (int)result);
|
||||
rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
|
||||
}
|
||||
|
||||
return onig_match(
|
||||
reg,
|
||||
(UChar *)ptr,
|
||||
(UChar *)(ptr + len),
|
||||
(UChar *)ptr,
|
||||
regs,
|
||||
ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
bool
|
||||
rb_reg_start_with_p(VALUE re, VALUE str)
|
||||
{
|
||||
VALUE match = rb_backref_get();
|
||||
if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
|
||||
match = match_alloc(rb_cMatch);
|
||||
}
|
||||
|
||||
if (NIL_P(match)) {
|
||||
int err;
|
||||
match = match_alloc(rb_cMatch);
|
||||
err = rb_reg_region_copy(RMATCH_REGS(match), regs);
|
||||
onig_region_free(regs, 0);
|
||||
if (err) rb_memerror();
|
||||
struct re_registers *regs = RMATCH_REGS(match);
|
||||
|
||||
if (rb_reg_onig_match(re, str, reg_onig_match, NULL, regs) == ONIG_MISMATCH) {
|
||||
rb_backref_set(Qnil);
|
||||
return false;
|
||||
}
|
||||
|
||||
RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str));
|
||||
|
@ -3784,12 +3766,6 @@ rb_reg_match_m_p(int argc, VALUE *argv, VALUE re)
|
|||
VALUE
|
||||
rb_reg_match_p(VALUE re, VALUE str, long pos)
|
||||
{
|
||||
regex_t *reg;
|
||||
onig_errmsg_buffer err = "";
|
||||
OnigPosition result;
|
||||
const UChar *start, *end;
|
||||
int tmpreg;
|
||||
|
||||
if (NIL_P(str)) return Qfalse;
|
||||
str = SYMBOL_P(str) ? rb_sym2str(str) : StringValue(str);
|
||||
if (pos) {
|
||||
|
@ -3804,33 +3780,13 @@ rb_reg_match_p(VALUE re, VALUE str, long pos)
|
|||
pos = beg - RSTRING_PTR(str);
|
||||
}
|
||||
}
|
||||
reg = rb_reg_prepare_re0(re, str, err);
|
||||
tmpreg = reg != RREGEXP_PTR(re);
|
||||
if (!tmpreg) RREGEXP(re)->usecnt++;
|
||||
start = ((UChar*)RSTRING_PTR(str));
|
||||
end = start + RSTRING_LEN(str);
|
||||
result = onig_search(reg, start, end, start + pos, end,
|
||||
NULL, ONIG_OPTION_NONE);
|
||||
if (!tmpreg) RREGEXP(re)->usecnt--;
|
||||
if (tmpreg) {
|
||||
if (RREGEXP(re)->usecnt) {
|
||||
onig_free(reg);
|
||||
}
|
||||
else {
|
||||
onig_free(RREGEXP_PTR(re));
|
||||
RREGEXP_PTR(re) = reg;
|
||||
}
|
||||
}
|
||||
if (result < 0) {
|
||||
if (result == ONIG_MISMATCH) {
|
||||
return Qfalse;
|
||||
}
|
||||
else {
|
||||
onig_error_code_to_str((UChar*)err, (int)result);
|
||||
rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
|
||||
}
|
||||
}
|
||||
return Qtrue;
|
||||
|
||||
struct reg_onig_search_args args = {
|
||||
.pos = pos,
|
||||
.range = RSTRING_LEN(str),
|
||||
};
|
||||
|
||||
return rb_reg_onig_match(re, str, reg_onig_search, &args, NULL) == ONIG_MISMATCH ? Qfalse : Qtrue;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -888,12 +888,16 @@ onig_region_free(OnigRegion* r, int free_self)
|
|||
if (r->allocated > 0) {
|
||||
xfree(r->beg);
|
||||
xfree(r->end);
|
||||
r->allocated = 0;
|
||||
}
|
||||
#ifdef USE_CAPTURE_HISTORY
|
||||
history_root_free(r);
|
||||
#endif
|
||||
if (free_self) xfree(r);
|
||||
if (free_self) {
|
||||
xfree(r);
|
||||
}
|
||||
else {
|
||||
memset(r, 0, sizeof(OnigRegion));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче