зеркало из https://github.com/github/ruby.git
remove GNU regex API
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7993 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
02b8414d98
Коммит
1034d1407e
13
ChangeLog
13
ChangeLog
|
@ -1,3 +1,16 @@
|
|||
Thu Feb 17 22:15:34 2005 K.Kosako <sndgk@ybb.ne.jp>
|
||||
|
||||
* ext/strscan/strscan.c: calls Oniguruma API directly.
|
||||
|
||||
Thu Feb 17 21:53:12 2005 K.Kosako <sndgk@ybb.ne.jp>
|
||||
|
||||
* common.mk, LEGAL: remove reggnu.c.
|
||||
|
||||
Thu Feb 17 21:53:12 2005 Kazuo Saito <ksaito@uranus.dti.ne.jp>
|
||||
|
||||
* gc.c, re.c: now ruby calls Oniguruma API directly, bypassing
|
||||
GNU compatible APIs.
|
||||
|
||||
Thu Feb 17 20:09:23 2005 Hirokazu Yamamoto <ocean@m2.ccsnet.ne.jp>
|
||||
|
||||
* lib/drb/drb.rb (DRbServer.default_safe_level): fix typo.
|
||||
|
|
5
LEGAL
5
LEGAL
|
@ -12,7 +12,6 @@ regenc.[ch]:
|
|||
regerror.c:
|
||||
regex.c:
|
||||
regexec.c:
|
||||
reggnu.c:
|
||||
regint.h:
|
||||
regparse.[ch]:
|
||||
ascii.c:
|
||||
|
@ -20,11 +19,11 @@ euc_jp.c:
|
|||
sjis.c:
|
||||
utf8.c:
|
||||
|
||||
Oniguruma ---- (C) K.Kosako <kosako@sofnec.co.jp>
|
||||
Oniguruma ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
|
||||
http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
|
||||
http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
|
||||
http://www.geocities.jp/kosako1/oniguruma/
|
||||
http://www.geocities.jp/kosako3/oniguruma/
|
||||
|
||||
When this software is partly used or it is distributed with Ruby,
|
||||
this of Ruby follows the license of Ruby.
|
||||
|
|
|
@ -38,7 +38,6 @@ OBJS = array.$(OBJEXT) \
|
|||
regenc.$(OBJEXT) \
|
||||
regerror.$(OBJEXT) \
|
||||
regexec.$(OBJEXT) \
|
||||
reggnu.$(OBJEXT) \
|
||||
regparse.$(OBJEXT) \
|
||||
ruby.$(OBJEXT) \
|
||||
signal.$(OBJEXT) \
|
||||
|
@ -260,9 +259,6 @@ regerror.$(OBJEXT): {$(VPATH)}regerror.c {$(VPATH)}regint.h \
|
|||
{$(VPATH)}regenc.h {$(VPATH)}oniguruma.h config.h
|
||||
regexec.$(OBJEXT): {$(VPATH)}regexec.c {$(VPATH)}regint.h \
|
||||
{$(VPATH)}regenc.h {$(VPATH)}oniguruma.h config.h
|
||||
reggnu.$(OBJEXT): {$(VPATH)}reggnu.c {$(VPATH)}regint.h \
|
||||
{$(VPATH)}regenc.h {$(VPATH)}oniguruma.h {$(VPATH)}oniggnu.h \
|
||||
config.h
|
||||
regparse.$(OBJEXT): {$(VPATH)}regparse.c {$(VPATH)}oniguruma.h \
|
||||
{$(VPATH)}regint.h {$(VPATH)}regparse.h {$(VPATH)}regenc.h config.h
|
||||
ruby.$(OBJEXT): {$(VPATH)}ruby.c {$(VPATH)}ruby.h config.h \
|
||||
|
|
|
@ -171,7 +171,7 @@ static void
|
|||
strscan_free(p)
|
||||
struct strscanner *p;
|
||||
{
|
||||
re_free_registers(&(p->regs));
|
||||
onig_region_free(&(p->regs), 0);
|
||||
memset(p, sizeof(struct strscanner), 0);
|
||||
free(p);
|
||||
}
|
||||
|
@ -440,17 +440,15 @@ strscan_do_scan(self, regex, succptr, getstr, headonly)
|
|||
}
|
||||
strscan_prepare_re(regex);
|
||||
if (headonly) {
|
||||
ret = re_match(RREGEXP(regex)->ptr,
|
||||
CURPTR(p), S_RESTLEN(p),
|
||||
0,
|
||||
&(p->regs));
|
||||
ret = onig_match(RREGEXP(regex)->ptr, (UChar* )CURPTR(p),
|
||||
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
||||
(UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
|
||||
}
|
||||
else {
|
||||
ret = re_search(RREGEXP(regex)->ptr,
|
||||
CURPTR(p), S_RESTLEN(p),
|
||||
0,
|
||||
S_RESTLEN(p),
|
||||
&(p->regs));
|
||||
ret = onig_search(RREGEXP(regex)->ptr,
|
||||
(UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
||||
(UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
||||
&(p->regs), ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
||||
|
|
5
gc.c
5
gc.c
|
@ -52,7 +52,6 @@ extern unsigned long __libc_ia64_register_backing_store_base;
|
|||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
void re_free_registers _((struct re_registers*));
|
||||
int rb_io_fptr_finalize _((struct OpenFile*));
|
||||
|
||||
#if !defined(setjmp) && defined(HAVE__SETJMP)
|
||||
|
@ -1166,7 +1165,7 @@ obj_free(obj)
|
|||
break;
|
||||
case T_REGEXP:
|
||||
if (RANY(obj)->as.regexp.ptr) {
|
||||
re_free_pattern(RANY(obj)->as.regexp.ptr);
|
||||
onig_free(RANY(obj)->as.regexp.ptr);
|
||||
}
|
||||
if (RANY(obj)->as.regexp.str) {
|
||||
RUBY_CRITICAL(free(RANY(obj)->as.regexp.str));
|
||||
|
@ -1184,7 +1183,7 @@ obj_free(obj)
|
|||
break;
|
||||
case T_MATCH:
|
||||
if (RANY(obj)->as.match.regs) {
|
||||
re_free_registers(RANY(obj)->as.match.regs);
|
||||
onig_region_free(RANY(obj)->as.match.regs, 0);
|
||||
RUBY_CRITICAL(free(RANY(obj)->as.match.regs));
|
||||
}
|
||||
break;
|
||||
|
|
162
re.c
162
re.c
|
@ -208,16 +208,16 @@ kcode_set_option(re)
|
|||
if (reg_kcode == curr_kcode) return;
|
||||
switch (curr_kcode) {
|
||||
case KCODE_NONE:
|
||||
re_mbcinit(MBCTYPE_ASCII);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
|
||||
break;
|
||||
case KCODE_EUC:
|
||||
re_mbcinit(MBCTYPE_EUC);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP);
|
||||
break;
|
||||
case KCODE_SJIS:
|
||||
re_mbcinit(MBCTYPE_SJIS);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_SJIS);
|
||||
break;
|
||||
case KCODE_UTF8:
|
||||
re_mbcinit(MBCTYPE_UTF8);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_UTF8);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -228,16 +228,16 @@ kcode_reset_option()
|
|||
if (reg_kcode == curr_kcode) return;
|
||||
switch (reg_kcode) {
|
||||
case KCODE_NONE:
|
||||
re_mbcinit(MBCTYPE_ASCII);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
|
||||
break;
|
||||
case KCODE_EUC:
|
||||
re_mbcinit(MBCTYPE_EUC);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP);
|
||||
break;
|
||||
case KCODE_SJIS:
|
||||
re_mbcinit(MBCTYPE_SJIS);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_SJIS);
|
||||
break;
|
||||
case KCODE_UTF8:
|
||||
re_mbcinit(MBCTYPE_UTF8);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_UTF8);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -335,11 +335,11 @@ rb_reg_desc(s, len, re)
|
|||
rb_str_buf_cat2(str, "/");
|
||||
if (re) {
|
||||
rb_reg_check(re);
|
||||
if (RREGEXP(re)->ptr->options & RE_OPTION_MULTILINE)
|
||||
if (RREGEXP(re)->ptr->options & ONIG_OPTION_MULTILINE)
|
||||
rb_str_buf_cat2(str, "m");
|
||||
if (RREGEXP(re)->ptr->options & RE_OPTION_IGNORECASE)
|
||||
if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE)
|
||||
rb_str_buf_cat2(str, "i");
|
||||
if (RREGEXP(re)->ptr->options & RE_OPTION_EXTENDED)
|
||||
if (RREGEXP(re)->ptr->options & ONIG_OPTION_EXTEND)
|
||||
rb_str_buf_cat2(str, "x");
|
||||
|
||||
if (FL_TEST(re, KCODE_FIXED)) {
|
||||
|
@ -430,7 +430,7 @@ rb_reg_to_s(re)
|
|||
VALUE re;
|
||||
{
|
||||
int options;
|
||||
const int embeddable = RE_OPTION_MULTILINE|RE_OPTION_IGNORECASE|RE_OPTION_EXTENDED;
|
||||
const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
|
||||
long len;
|
||||
const char* ptr;
|
||||
VALUE str = rb_str_buf_new2("(?");
|
||||
|
@ -447,13 +447,13 @@ rb_reg_to_s(re)
|
|||
if ((len -= 2) > 0) {
|
||||
do {
|
||||
if (*ptr == 'm') {
|
||||
options |= RE_OPTION_MULTILINE;
|
||||
options |= ONIG_OPTION_MULTILINE;
|
||||
}
|
||||
else if (*ptr == 'i') {
|
||||
options |= RE_OPTION_IGNORECASE;
|
||||
options |= ONIG_OPTION_IGNORECASE;
|
||||
}
|
||||
else if (*ptr == 'x') {
|
||||
options |= RE_OPTION_EXTENDED;
|
||||
options |= ONIG_OPTION_EXTEND;
|
||||
}
|
||||
else break;
|
||||
++ptr;
|
||||
|
@ -464,13 +464,13 @@ rb_reg_to_s(re)
|
|||
--len;
|
||||
do {
|
||||
if (*ptr == 'm') {
|
||||
options &= ~RE_OPTION_MULTILINE;
|
||||
options &= ~ONIG_OPTION_MULTILINE;
|
||||
}
|
||||
else if (*ptr == 'i') {
|
||||
options &= ~RE_OPTION_IGNORECASE;
|
||||
options &= ~ONIG_OPTION_IGNORECASE;
|
||||
}
|
||||
else if (*ptr == 'x') {
|
||||
options &= ~RE_OPTION_EXTENDED;
|
||||
options &= ~ONIG_OPTION_EXTEND;
|
||||
}
|
||||
else break;
|
||||
++ptr;
|
||||
|
@ -485,12 +485,17 @@ rb_reg_to_s(re)
|
|||
int r;
|
||||
Regexp *rp;
|
||||
kcode_set_option(re);
|
||||
r = re_alloc_pattern(&rp);
|
||||
r = onig_alloc_init(&rp, ONIG_OPTION_DEFAULT,
|
||||
ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
|
||||
onigenc_get_default_encoding(),
|
||||
OnigDefaultSyntax);
|
||||
if (r == 0) {
|
||||
err = (re_compile_pattern(++ptr, len -= 2, rp, NULL) != 0);
|
||||
++ptr;
|
||||
len -= 2;
|
||||
err = (onig_compile(rp, ptr, ptr + len, NULL) != 0);
|
||||
}
|
||||
kcode_reset_option();
|
||||
re_free_pattern(rp);
|
||||
onig_free(rp);
|
||||
}
|
||||
if (err) {
|
||||
options = RREGEXP(re)->ptr->options;
|
||||
|
@ -499,15 +504,15 @@ rb_reg_to_s(re)
|
|||
}
|
||||
}
|
||||
|
||||
if (options & RE_OPTION_MULTILINE) rb_str_buf_cat2(str, "m");
|
||||
if (options & RE_OPTION_IGNORECASE) rb_str_buf_cat2(str, "i");
|
||||
if (options & RE_OPTION_EXTENDED) rb_str_buf_cat2(str, "x");
|
||||
if (options & ONIG_OPTION_MULTILINE) rb_str_buf_cat2(str, "m");
|
||||
if (options & ONIG_OPTION_IGNORECASE) rb_str_buf_cat2(str, "i");
|
||||
if (options & ONIG_OPTION_EXTEND) rb_str_buf_cat2(str, "x");
|
||||
|
||||
if ((options & embeddable) != embeddable) {
|
||||
rb_str_buf_cat2(str, "-");
|
||||
if (!(options & RE_OPTION_MULTILINE)) rb_str_buf_cat2(str, "m");
|
||||
if (!(options & RE_OPTION_IGNORECASE)) rb_str_buf_cat2(str, "i");
|
||||
if (!(options & RE_OPTION_EXTENDED)) rb_str_buf_cat2(str, "x");
|
||||
if (!(options & ONIG_OPTION_MULTILINE)) rb_str_buf_cat2(str, "m");
|
||||
if (!(options & ONIG_OPTION_IGNORECASE)) rb_str_buf_cat2(str, "i");
|
||||
if (!(options & ONIG_OPTION_EXTEND)) rb_str_buf_cat2(str, "x");
|
||||
}
|
||||
|
||||
rb_str_buf_cat2(str, ":");
|
||||
|
@ -547,7 +552,7 @@ rb_reg_casefold_p(re)
|
|||
VALUE re;
|
||||
{
|
||||
rb_reg_check(re);
|
||||
if (RREGEXP(re)->ptr->options & RE_OPTION_IGNORECASE) return Qtrue;
|
||||
if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) return Qtrue;
|
||||
return Qfalse;
|
||||
}
|
||||
|
||||
|
@ -626,6 +631,7 @@ make_regexp(s, len, flags, ce)
|
|||
Regexp *rp;
|
||||
char err[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
int r;
|
||||
OnigErrorInfo einfo;
|
||||
|
||||
/* Handle escaped characters first. */
|
||||
|
||||
|
@ -634,18 +640,19 @@ make_regexp(s, len, flags, ce)
|
|||
from that.
|
||||
*/
|
||||
|
||||
r = re_alloc_pattern(&rp);
|
||||
r = onig_alloc_init(&rp, flags,
|
||||
ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
|
||||
onigenc_get_default_encoding(),
|
||||
OnigDefaultSyntax);
|
||||
if (r) {
|
||||
re_error_code_to_str((UChar* )err, r);
|
||||
onig_error_code_to_str((UChar* )err, r);
|
||||
rb_reg_raise(s, len, err, 0, ce);
|
||||
}
|
||||
|
||||
if (flags) {
|
||||
rp->options = flags;
|
||||
}
|
||||
r = re_compile_pattern(s, len, rp, err);
|
||||
r = onig_compile(rp, (UChar* )s, (UChar* )(s + len), &einfo);
|
||||
|
||||
if (r != 0) {
|
||||
(void )onig_error_code_to_str((UChar* )err, r, &einfo);
|
||||
rb_reg_raise(s, len, err, 0, ce);
|
||||
}
|
||||
return rp;
|
||||
|
@ -694,9 +701,9 @@ match_init_copy(obj, orig)
|
|||
rb_raise(rb_eTypeError, "wrong argument class");
|
||||
}
|
||||
RMATCH(obj)->str = RMATCH(orig)->str;
|
||||
re_free_registers(RMATCH(obj)->regs);
|
||||
onig_region_free(RMATCH(obj)->regs, 0);
|
||||
RMATCH(obj)->regs->allocated = 0;
|
||||
re_copy_registers(RMATCH(obj)->regs, RMATCH(orig)->regs);
|
||||
onig_region_copy(RMATCH(obj)->regs, RMATCH(orig)->regs);
|
||||
|
||||
return obj;
|
||||
}
|
||||
|
@ -830,12 +837,12 @@ rb_reg_prepare_re(re)
|
|||
/* ignorecase status */
|
||||
if (ruby_ignorecase && !state) {
|
||||
FL_SET(re, REG_CASESTATE);
|
||||
RREGEXP(re)->ptr->options |= RE_OPTION_IGNORECASE;
|
||||
RREGEXP(re)->ptr->options |= ONIG_OPTION_IGNORECASE;
|
||||
need_recompile = 1;
|
||||
}
|
||||
if (!ruby_ignorecase && state) {
|
||||
FL_UNSET(re, REG_CASESTATE);
|
||||
RREGEXP(re)->ptr->options &= ~RE_OPTION_IGNORECASE;
|
||||
RREGEXP(re)->ptr->options &= ~ONIG_OPTION_IGNORECASE;
|
||||
need_recompile = 1;
|
||||
}
|
||||
|
||||
|
@ -849,13 +856,22 @@ rb_reg_prepare_re(re)
|
|||
if (need_recompile) {
|
||||
char err[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
int r;
|
||||
OnigErrorInfo einfo;
|
||||
regex_t *reg;
|
||||
UChar *pattern;
|
||||
|
||||
if (FL_TEST(re, KCODE_FIXED))
|
||||
kcode_set_option(re);
|
||||
rb_reg_check(re);
|
||||
r = re_recompile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr, err);
|
||||
reg = RREGEXP(re)->ptr;
|
||||
pattern = ((UChar* )RREGEXP(re)->str);
|
||||
r = onig_recompile(reg, pattern, pattern + RREGEXP(re)->len,
|
||||
reg->options, onigenc_get_default_encoding(),
|
||||
OnigDefaultSyntax, &einfo);
|
||||
|
||||
if (r != 0) {
|
||||
rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, re, Qfalse);
|
||||
(void )onig_error_code_to_str((UChar* )err, r, &einfo);
|
||||
rb_reg_raise(pattern, RREGEXP(re)->len, err, re, Qfalse);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -866,6 +882,8 @@ rb_reg_adjust_startpos(re, str, pos, reverse)
|
|||
long pos, reverse;
|
||||
{
|
||||
long range;
|
||||
OnigEncoding enc;
|
||||
UChar *p, *string;
|
||||
|
||||
rb_reg_check(re);
|
||||
if (may_need_recompile) rb_reg_prepare_re(re);
|
||||
|
@ -881,9 +899,22 @@ rb_reg_adjust_startpos(re, str, pos, reverse)
|
|||
else {
|
||||
range = RSTRING(str)->len - pos;
|
||||
}
|
||||
return re_adjust_startpos(RREGEXP(re)->ptr,
|
||||
RSTRING(str)->ptr, RSTRING(str)->len,
|
||||
pos, range);
|
||||
|
||||
enc = (RREGEXP(re)->ptr)->enc;
|
||||
|
||||
if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING(str)->len) {
|
||||
string = (UChar* )RSTRING(str)->ptr;
|
||||
|
||||
if (range > 0) {
|
||||
p = onigenc_get_right_adjust_char_head(enc, string, string + pos);
|
||||
}
|
||||
else {
|
||||
p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, string, string + pos);
|
||||
}
|
||||
return p - string;
|
||||
}
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
long
|
||||
|
@ -915,8 +946,13 @@ rb_reg_search(re, str, pos, reverse)
|
|||
else {
|
||||
range = RSTRING(str)->len - pos;
|
||||
}
|
||||
result = re_search(RREGEXP(re)->ptr,RSTRING(str)->ptr,RSTRING(str)->len,
|
||||
pos, range, ®s);
|
||||
|
||||
result = onig_search(RREGEXP(re)->ptr,
|
||||
(UChar* )(RSTRING(str)->ptr),
|
||||
((UChar* )(RSTRING(str)->ptr) + RSTRING(str)->len),
|
||||
((UChar* )(RSTRING(str)->ptr) + pos),
|
||||
((UChar* )(RSTRING(str)->ptr) + pos + range),
|
||||
®s, ONIG_OPTION_NONE);
|
||||
|
||||
if (FL_TEST(re, KCODE_FIXED))
|
||||
kcode_reset_option();
|
||||
|
@ -928,7 +964,7 @@ rb_reg_search(re, str, pos, reverse)
|
|||
}
|
||||
else {
|
||||
char err[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
re_error_code_to_str((UChar* )err, result);
|
||||
onig_error_code_to_str((UChar* )err, result);
|
||||
rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, 0, Qfalse);
|
||||
}
|
||||
}
|
||||
|
@ -944,7 +980,7 @@ rb_reg_search(re, str, pos, reverse)
|
|||
FL_UNSET(match, FL_TAINT);
|
||||
}
|
||||
|
||||
re_copy_registers(RMATCH(match)->regs, ®s);
|
||||
onig_region_copy(RMATCH(match)->regs, ®s);
|
||||
RMATCH(match)->str = rb_str_new4(str);
|
||||
rb_backref_set(match);
|
||||
|
||||
|
@ -1338,7 +1374,7 @@ rb_reg_initialize(obj, s, len, options, ce)
|
|||
{
|
||||
struct RRegexp *re = RREGEXP(obj);
|
||||
|
||||
if (re->ptr) re_free_pattern(re->ptr);
|
||||
if (re->ptr) onig_free(re->ptr);
|
||||
if (re->str) free(re->str);
|
||||
re->ptr = 0;
|
||||
re->str = 0;
|
||||
|
@ -1366,7 +1402,7 @@ rb_reg_initialize(obj, s, len, options, ce)
|
|||
kcode_set_option((VALUE)re);
|
||||
}
|
||||
if (ruby_ignorecase) {
|
||||
options |= RE_OPTION_IGNORECASE;
|
||||
options |= ONIG_OPTION_IGNORECASE;
|
||||
FL_SET(re, REG_CASESTATE);
|
||||
}
|
||||
re->ptr = make_regexp(s, len, options & 0xf, ce);
|
||||
|
@ -1734,7 +1770,7 @@ rb_reg_initialize_m(argc, argv, self)
|
|||
else {
|
||||
if (argc >= 2) {
|
||||
if (FIXNUM_P(argv[1])) flags = FIX2INT(argv[1]);
|
||||
else if (RTEST(argv[1])) flags = RE_OPTION_IGNORECASE;
|
||||
else if (RTEST(argv[1])) flags = ONIG_OPTION_IGNORECASE;
|
||||
}
|
||||
if (argc == 3 && !NIL_P(argv[2])) {
|
||||
char *kcode = StringValuePtr(argv[2]);
|
||||
|
@ -1924,7 +1960,7 @@ rb_reg_options(re)
|
|||
|
||||
rb_reg_check(re);
|
||||
options = RREGEXP(re)->ptr->options &
|
||||
(RE_OPTION_IGNORECASE|RE_OPTION_MULTILINE|RE_OPTION_EXTENDED);
|
||||
(ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND);
|
||||
if (FL_TEST(re, KCODE_FIXED)) {
|
||||
options |= rb_reg_get_kcode(re);
|
||||
}
|
||||
|
@ -2157,17 +2193,17 @@ rb_set_kcode(code)
|
|||
case 'E':
|
||||
case 'e':
|
||||
reg_kcode = KCODE_EUC;
|
||||
re_mbcinit(MBCTYPE_EUC);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP);
|
||||
break;
|
||||
case 'S':
|
||||
case 's':
|
||||
reg_kcode = KCODE_SJIS;
|
||||
re_mbcinit(MBCTYPE_SJIS);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_SJIS);
|
||||
break;
|
||||
case 'U':
|
||||
case 'u':
|
||||
reg_kcode = KCODE_UTF8;
|
||||
re_mbcinit(MBCTYPE_UTF8);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_UTF8);
|
||||
break;
|
||||
default:
|
||||
case 'N':
|
||||
|
@ -2176,7 +2212,7 @@ rb_set_kcode(code)
|
|||
case 'a':
|
||||
set_no_conversion:
|
||||
reg_kcode = KCODE_NONE;
|
||||
re_mbcinit(MBCTYPE_ASCII);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -2271,17 +2307,17 @@ Init_Regexp()
|
|||
{
|
||||
rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError);
|
||||
|
||||
re_set_casetable(casetable);
|
||||
onigenc_set_default_caseconv_table((UChar* )casetable);
|
||||
#if DEFAULT_KCODE == KCODE_EUC
|
||||
re_mbcinit(MBCTYPE_EUC);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP);
|
||||
#else
|
||||
#if DEFAULT_KCODE == KCODE_SJIS
|
||||
re_mbcinit(MBCTYPE_SJIS);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_SJIS);
|
||||
#else
|
||||
#if DEFAULT_KCODE == KCODE_UTF8
|
||||
re_mbcinit(MBCTYPE_UTF8);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_UTF8);
|
||||
#else
|
||||
re_mbcinit(MBCTYPE_ASCII);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
@ -2320,9 +2356,9 @@ Init_Regexp()
|
|||
rb_define_method(rb_cRegexp, "options", rb_reg_options_m, 0);
|
||||
rb_define_method(rb_cRegexp, "kcode", rb_reg_kcode_m, 0);
|
||||
|
||||
rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(RE_OPTION_IGNORECASE));
|
||||
rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(RE_OPTION_EXTENDED));
|
||||
rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(RE_OPTION_MULTILINE));
|
||||
rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE));
|
||||
rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND));
|
||||
rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE));
|
||||
|
||||
rb_global_variable(®_cache);
|
||||
|
||||
|
|
276
reggnu.c
276
reggnu.c
|
@ -1,276 +0,0 @@
|
|||
/**********************************************************************
|
||||
reggnu.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
#ifndef ONIGGNU_H /* name changes from oniggnu.h to regex.h in ruby. */
|
||||
#include "oniggnu.h"
|
||||
#endif
|
||||
|
||||
#if defined(RUBY_PLATFORM) || defined(RUBY)
|
||||
#ifndef ONIG_RUBY_M17N
|
||||
#define USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
|
||||
#endif
|
||||
#endif
|
||||
|
||||
extern void
|
||||
re_free_registers(OnigRegion* r)
|
||||
{
|
||||
/* 0: don't free self */
|
||||
onig_region_free(r, 0);
|
||||
}
|
||||
|
||||
extern int
|
||||
re_adjust_startpos(regex_t* reg, const char* string, int size,
|
||||
int startpos, int range)
|
||||
{
|
||||
if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
|
||||
UChar *p;
|
||||
UChar *s = (UChar* )string + startpos;
|
||||
|
||||
if (range > 0) {
|
||||
p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
|
||||
}
|
||||
else {
|
||||
p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s);
|
||||
}
|
||||
return p - (UChar* )string;
|
||||
}
|
||||
|
||||
return startpos;
|
||||
}
|
||||
|
||||
extern int
|
||||
re_match(regex_t* reg, const char* str, int size, int pos,
|
||||
struct re_registers* regs)
|
||||
{
|
||||
return onig_match(reg, (UChar* )str, (UChar* )(str + size),
|
||||
(UChar* )(str + pos), regs, ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
extern int
|
||||
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
|
||||
struct re_registers* regs)
|
||||
{
|
||||
return onig_search(bufp, (UChar* )string, (UChar* )(string + size),
|
||||
(UChar* )(string + startpos),
|
||||
(UChar* )(string + startpos + range),
|
||||
regs, ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
extern int
|
||||
re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
|
||||
{
|
||||
int r;
|
||||
OnigErrorInfo einfo;
|
||||
|
||||
r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
|
||||
if (r != 0) {
|
||||
if (IS_NOT_NULL(ebuf))
|
||||
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
extern int
|
||||
re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
|
||||
{
|
||||
int r;
|
||||
OnigErrorInfo einfo;
|
||||
OnigEncoding enc;
|
||||
|
||||
/* I think encoding and options should be arguments of this function.
|
||||
But this is adapted to present re.c. (2002/11/29)
|
||||
*/
|
||||
enc = OnigEncDefaultCharEncoding;
|
||||
|
||||
r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
|
||||
reg->options, enc, OnigDefaultSyntax, &einfo);
|
||||
if (r != 0) {
|
||||
if (IS_NOT_NULL(ebuf))
|
||||
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
extern void
|
||||
re_free_pattern(regex_t* reg)
|
||||
{
|
||||
onig_free(reg);
|
||||
}
|
||||
|
||||
extern int
|
||||
re_alloc_pattern(regex_t** reg)
|
||||
{
|
||||
return onig_alloc_init(reg, ONIG_OPTION_DEFAULT,
|
||||
ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
|
||||
OnigEncDefaultCharEncoding,
|
||||
OnigDefaultSyntax);
|
||||
}
|
||||
|
||||
extern void
|
||||
re_set_casetable(const char* table)
|
||||
{
|
||||
onigenc_set_default_caseconv_table((UChar* )table);
|
||||
}
|
||||
|
||||
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
|
||||
static const unsigned char mbctab_ascii[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static const unsigned char mbctab_euc[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
|
||||
};
|
||||
|
||||
static const unsigned char mbctab_sjis[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
|
||||
};
|
||||
|
||||
static const unsigned char mbctab_utf8[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
|
||||
};
|
||||
|
||||
const unsigned char *re_mbctab = mbctab_ascii;
|
||||
#endif
|
||||
|
||||
extern void
|
||||
#ifdef ONIG_RUBY_M17N
|
||||
re_mbcinit(OnigEncoding enc)
|
||||
#else
|
||||
re_mbcinit(int mb_code)
|
||||
#endif
|
||||
{
|
||||
#ifdef ONIG_RUBY_M17N
|
||||
|
||||
onigenc_set_default_encoding(enc);
|
||||
|
||||
#else
|
||||
|
||||
OnigEncoding enc;
|
||||
|
||||
switch (mb_code) {
|
||||
case MBCTYPE_ASCII:
|
||||
enc = ONIG_ENCODING_ASCII;
|
||||
break;
|
||||
case MBCTYPE_EUC:
|
||||
enc = ONIG_ENCODING_EUC_JP;
|
||||
break;
|
||||
case MBCTYPE_SJIS:
|
||||
enc = ONIG_ENCODING_SJIS;
|
||||
break;
|
||||
case MBCTYPE_UTF8:
|
||||
enc = ONIG_ENCODING_UTF8;
|
||||
break;
|
||||
default:
|
||||
return ;
|
||||
break;
|
||||
}
|
||||
|
||||
onigenc_set_default_encoding(enc);
|
||||
#endif
|
||||
|
||||
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
|
||||
switch (mb_code) {
|
||||
case MBCTYPE_ASCII:
|
||||
re_mbctab = mbctab_ascii;
|
||||
break;
|
||||
case MBCTYPE_EUC:
|
||||
re_mbctab = mbctab_euc;
|
||||
break;
|
||||
case MBCTYPE_SJIS:
|
||||
re_mbctab = mbctab_sjis;
|
||||
break;
|
||||
case MBCTYPE_UTF8:
|
||||
re_mbctab = mbctab_utf8;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
Загрузка…
Ссылка в новой задаче