From 1034d1407e433b3fa7fe163c87e0b81e9b3f5787 Mon Sep 17 00:00:00 2001 From: kosako Date: Thu, 17 Feb 2005 14:43:38 +0000 Subject: [PATCH] remove GNU regex API git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7993 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 13 ++ LEGAL | 5 +- common.mk | 4 - ext/strscan/strscan.c | 18 ++- gc.c | 5 +- re.c | 162 +++++++++++++++---------- reggnu.c | 276 ------------------------------------------ 7 files changed, 124 insertions(+), 359 deletions(-) delete mode 100644 reggnu.c diff --git a/ChangeLog b/ChangeLog index b28a2386b3..4a56a148bc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +Thu Feb 17 22:15:34 2005 K.Kosako + + * ext/strscan/strscan.c: calls Oniguruma API directly. + +Thu Feb 17 21:53:12 2005 K.Kosako + + * common.mk, LEGAL: remove reggnu.c. + +Thu Feb 17 21:53:12 2005 Kazuo Saito + + * gc.c, re.c: now ruby calls Oniguruma API directly, bypassing + GNU compatible APIs. + Thu Feb 17 20:09:23 2005 Hirokazu Yamamoto * lib/drb/drb.rb (DRbServer.default_safe_level): fix typo. diff --git a/LEGAL b/LEGAL index 121c5a7184..852a6bdf60 100644 --- a/LEGAL +++ b/LEGAL @@ -12,7 +12,6 @@ regenc.[ch]: regerror.c: regex.c: regexec.c: -reggnu.c: regint.h: regparse.[ch]: ascii.c: @@ -20,11 +19,11 @@ euc_jp.c: sjis.c: utf8.c: -Oniguruma ---- (C) K.Kosako +Oniguruma ---- (C) K.Kosako http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ -http://www.geocities.jp/kosako1/oniguruma/ +http://www.geocities.jp/kosako3/oniguruma/ When this software is partly used or it is distributed with Ruby, this of Ruby follows the license of Ruby. diff --git a/common.mk b/common.mk index 88f8afec82..86dd449028 100644 --- a/common.mk +++ b/common.mk @@ -38,7 +38,6 @@ OBJS = array.$(OBJEXT) \ regenc.$(OBJEXT) \ regerror.$(OBJEXT) \ regexec.$(OBJEXT) \ - reggnu.$(OBJEXT) \ regparse.$(OBJEXT) \ ruby.$(OBJEXT) \ signal.$(OBJEXT) \ @@ -260,9 +259,6 @@ regerror.$(OBJEXT): {$(VPATH)}regerror.c {$(VPATH)}regint.h \ {$(VPATH)}regenc.h {$(VPATH)}oniguruma.h config.h regexec.$(OBJEXT): {$(VPATH)}regexec.c {$(VPATH)}regint.h \ {$(VPATH)}regenc.h {$(VPATH)}oniguruma.h config.h -reggnu.$(OBJEXT): {$(VPATH)}reggnu.c {$(VPATH)}regint.h \ - {$(VPATH)}regenc.h {$(VPATH)}oniguruma.h {$(VPATH)}oniggnu.h \ - config.h regparse.$(OBJEXT): {$(VPATH)}regparse.c {$(VPATH)}oniguruma.h \ {$(VPATH)}regint.h {$(VPATH)}regparse.h {$(VPATH)}regenc.h config.h ruby.$(OBJEXT): {$(VPATH)}ruby.c {$(VPATH)}ruby.h config.h \ diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index db1a9c64e6..158fab321f 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -171,7 +171,7 @@ static void strscan_free(p) struct strscanner *p; { - re_free_registers(&(p->regs)); + onig_region_free(&(p->regs), 0); memset(p, sizeof(struct strscanner), 0); free(p); } @@ -440,17 +440,15 @@ strscan_do_scan(self, regex, succptr, getstr, headonly) } strscan_prepare_re(regex); if (headonly) { - ret = re_match(RREGEXP(regex)->ptr, - CURPTR(p), S_RESTLEN(p), - 0, - &(p->regs)); + ret = onig_match(RREGEXP(regex)->ptr, (UChar* )CURPTR(p), + (UChar* )(CURPTR(p) + S_RESTLEN(p)), + (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE); } else { - ret = re_search(RREGEXP(regex)->ptr, - CURPTR(p), S_RESTLEN(p), - 0, - S_RESTLEN(p), - &(p->regs)); + ret = onig_search(RREGEXP(regex)->ptr, + (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)), + (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)), + &(p->regs), ONIG_OPTION_NONE); } if (ret == -2) rb_raise(ScanError, "regexp buffer overflow"); diff --git a/gc.c b/gc.c index 7aa42da67f..4197defda4 100644 --- a/gc.c +++ b/gc.c @@ -52,7 +52,6 @@ extern unsigned long __libc_ia64_register_backing_store_base; #include #endif -void re_free_registers _((struct re_registers*)); int rb_io_fptr_finalize _((struct OpenFile*)); #if !defined(setjmp) && defined(HAVE__SETJMP) @@ -1166,7 +1165,7 @@ obj_free(obj) break; case T_REGEXP: if (RANY(obj)->as.regexp.ptr) { - re_free_pattern(RANY(obj)->as.regexp.ptr); + onig_free(RANY(obj)->as.regexp.ptr); } if (RANY(obj)->as.regexp.str) { RUBY_CRITICAL(free(RANY(obj)->as.regexp.str)); @@ -1184,7 +1183,7 @@ obj_free(obj) break; case T_MATCH: if (RANY(obj)->as.match.regs) { - re_free_registers(RANY(obj)->as.match.regs); + onig_region_free(RANY(obj)->as.match.regs, 0); RUBY_CRITICAL(free(RANY(obj)->as.match.regs)); } break; diff --git a/re.c b/re.c index b8bb851be1..8de3ddf8fa 100644 --- a/re.c +++ b/re.c @@ -208,16 +208,16 @@ kcode_set_option(re) if (reg_kcode == curr_kcode) return; switch (curr_kcode) { case KCODE_NONE: - re_mbcinit(MBCTYPE_ASCII); + onigenc_set_default_encoding(ONIG_ENCODING_ASCII); break; case KCODE_EUC: - re_mbcinit(MBCTYPE_EUC); + onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP); break; case KCODE_SJIS: - re_mbcinit(MBCTYPE_SJIS); + onigenc_set_default_encoding(ONIG_ENCODING_SJIS); break; case KCODE_UTF8: - re_mbcinit(MBCTYPE_UTF8); + onigenc_set_default_encoding(ONIG_ENCODING_UTF8); break; } } @@ -228,16 +228,16 @@ kcode_reset_option() if (reg_kcode == curr_kcode) return; switch (reg_kcode) { case KCODE_NONE: - re_mbcinit(MBCTYPE_ASCII); + onigenc_set_default_encoding(ONIG_ENCODING_ASCII); break; case KCODE_EUC: - re_mbcinit(MBCTYPE_EUC); + onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP); break; case KCODE_SJIS: - re_mbcinit(MBCTYPE_SJIS); + onigenc_set_default_encoding(ONIG_ENCODING_SJIS); break; case KCODE_UTF8: - re_mbcinit(MBCTYPE_UTF8); + onigenc_set_default_encoding(ONIG_ENCODING_UTF8); break; } } @@ -335,11 +335,11 @@ rb_reg_desc(s, len, re) rb_str_buf_cat2(str, "/"); if (re) { rb_reg_check(re); - if (RREGEXP(re)->ptr->options & RE_OPTION_MULTILINE) + if (RREGEXP(re)->ptr->options & ONIG_OPTION_MULTILINE) rb_str_buf_cat2(str, "m"); - if (RREGEXP(re)->ptr->options & RE_OPTION_IGNORECASE) + if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) rb_str_buf_cat2(str, "i"); - if (RREGEXP(re)->ptr->options & RE_OPTION_EXTENDED) + if (RREGEXP(re)->ptr->options & ONIG_OPTION_EXTEND) rb_str_buf_cat2(str, "x"); if (FL_TEST(re, KCODE_FIXED)) { @@ -430,7 +430,7 @@ rb_reg_to_s(re) VALUE re; { int options; - const int embeddable = RE_OPTION_MULTILINE|RE_OPTION_IGNORECASE|RE_OPTION_EXTENDED; + const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND; long len; const char* ptr; VALUE str = rb_str_buf_new2("(?"); @@ -447,13 +447,13 @@ rb_reg_to_s(re) if ((len -= 2) > 0) { do { if (*ptr == 'm') { - options |= RE_OPTION_MULTILINE; + options |= ONIG_OPTION_MULTILINE; } else if (*ptr == 'i') { - options |= RE_OPTION_IGNORECASE; + options |= ONIG_OPTION_IGNORECASE; } else if (*ptr == 'x') { - options |= RE_OPTION_EXTENDED; + options |= ONIG_OPTION_EXTEND; } else break; ++ptr; @@ -464,13 +464,13 @@ rb_reg_to_s(re) --len; do { if (*ptr == 'm') { - options &= ~RE_OPTION_MULTILINE; + options &= ~ONIG_OPTION_MULTILINE; } else if (*ptr == 'i') { - options &= ~RE_OPTION_IGNORECASE; + options &= ~ONIG_OPTION_IGNORECASE; } else if (*ptr == 'x') { - options &= ~RE_OPTION_EXTENDED; + options &= ~ONIG_OPTION_EXTEND; } else break; ++ptr; @@ -485,12 +485,17 @@ rb_reg_to_s(re) int r; Regexp *rp; kcode_set_option(re); - r = re_alloc_pattern(&rp); + r = onig_alloc_init(&rp, ONIG_OPTION_DEFAULT, + ONIGENC_AMBIGUOUS_MATCH_DEFAULT, + onigenc_get_default_encoding(), + OnigDefaultSyntax); if (r == 0) { - err = (re_compile_pattern(++ptr, len -= 2, rp, NULL) != 0); + ++ptr; + len -= 2; + err = (onig_compile(rp, ptr, ptr + len, NULL) != 0); } kcode_reset_option(); - re_free_pattern(rp); + onig_free(rp); } if (err) { options = RREGEXP(re)->ptr->options; @@ -499,15 +504,15 @@ rb_reg_to_s(re) } } - if (options & RE_OPTION_MULTILINE) rb_str_buf_cat2(str, "m"); - if (options & RE_OPTION_IGNORECASE) rb_str_buf_cat2(str, "i"); - if (options & RE_OPTION_EXTENDED) rb_str_buf_cat2(str, "x"); + if (options & ONIG_OPTION_MULTILINE) rb_str_buf_cat2(str, "m"); + if (options & ONIG_OPTION_IGNORECASE) rb_str_buf_cat2(str, "i"); + if (options & ONIG_OPTION_EXTEND) rb_str_buf_cat2(str, "x"); if ((options & embeddable) != embeddable) { rb_str_buf_cat2(str, "-"); - if (!(options & RE_OPTION_MULTILINE)) rb_str_buf_cat2(str, "m"); - if (!(options & RE_OPTION_IGNORECASE)) rb_str_buf_cat2(str, "i"); - if (!(options & RE_OPTION_EXTENDED)) rb_str_buf_cat2(str, "x"); + if (!(options & ONIG_OPTION_MULTILINE)) rb_str_buf_cat2(str, "m"); + if (!(options & ONIG_OPTION_IGNORECASE)) rb_str_buf_cat2(str, "i"); + if (!(options & ONIG_OPTION_EXTEND)) rb_str_buf_cat2(str, "x"); } rb_str_buf_cat2(str, ":"); @@ -547,7 +552,7 @@ rb_reg_casefold_p(re) VALUE re; { rb_reg_check(re); - if (RREGEXP(re)->ptr->options & RE_OPTION_IGNORECASE) return Qtrue; + if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) return Qtrue; return Qfalse; } @@ -626,6 +631,7 @@ make_regexp(s, len, flags, ce) Regexp *rp; char err[ONIG_MAX_ERROR_MESSAGE_LEN]; int r; + OnigErrorInfo einfo; /* Handle escaped characters first. */ @@ -634,18 +640,19 @@ make_regexp(s, len, flags, ce) from that. */ - r = re_alloc_pattern(&rp); + r = onig_alloc_init(&rp, flags, + ONIGENC_AMBIGUOUS_MATCH_DEFAULT, + onigenc_get_default_encoding(), + OnigDefaultSyntax); if (r) { - re_error_code_to_str((UChar* )err, r); + onig_error_code_to_str((UChar* )err, r); rb_reg_raise(s, len, err, 0, ce); } - if (flags) { - rp->options = flags; - } - r = re_compile_pattern(s, len, rp, err); + r = onig_compile(rp, (UChar* )s, (UChar* )(s + len), &einfo); if (r != 0) { + (void )onig_error_code_to_str((UChar* )err, r, &einfo); rb_reg_raise(s, len, err, 0, ce); } return rp; @@ -694,9 +701,9 @@ match_init_copy(obj, orig) rb_raise(rb_eTypeError, "wrong argument class"); } RMATCH(obj)->str = RMATCH(orig)->str; - re_free_registers(RMATCH(obj)->regs); + onig_region_free(RMATCH(obj)->regs, 0); RMATCH(obj)->regs->allocated = 0; - re_copy_registers(RMATCH(obj)->regs, RMATCH(orig)->regs); + onig_region_copy(RMATCH(obj)->regs, RMATCH(orig)->regs); return obj; } @@ -830,12 +837,12 @@ rb_reg_prepare_re(re) /* ignorecase status */ if (ruby_ignorecase && !state) { FL_SET(re, REG_CASESTATE); - RREGEXP(re)->ptr->options |= RE_OPTION_IGNORECASE; + RREGEXP(re)->ptr->options |= ONIG_OPTION_IGNORECASE; need_recompile = 1; } if (!ruby_ignorecase && state) { FL_UNSET(re, REG_CASESTATE); - RREGEXP(re)->ptr->options &= ~RE_OPTION_IGNORECASE; + RREGEXP(re)->ptr->options &= ~ONIG_OPTION_IGNORECASE; need_recompile = 1; } @@ -849,13 +856,22 @@ rb_reg_prepare_re(re) if (need_recompile) { char err[ONIG_MAX_ERROR_MESSAGE_LEN]; int r; + OnigErrorInfo einfo; + regex_t *reg; + UChar *pattern; if (FL_TEST(re, KCODE_FIXED)) kcode_set_option(re); rb_reg_check(re); - r = re_recompile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr, err); + reg = RREGEXP(re)->ptr; + pattern = ((UChar* )RREGEXP(re)->str); + r = onig_recompile(reg, pattern, pattern + RREGEXP(re)->len, + reg->options, onigenc_get_default_encoding(), + OnigDefaultSyntax, &einfo); + if (r != 0) { - rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, re, Qfalse); + (void )onig_error_code_to_str((UChar* )err, r, &einfo); + rb_reg_raise(pattern, RREGEXP(re)->len, err, re, Qfalse); } } } @@ -866,6 +882,8 @@ rb_reg_adjust_startpos(re, str, pos, reverse) long pos, reverse; { long range; + OnigEncoding enc; + UChar *p, *string; rb_reg_check(re); if (may_need_recompile) rb_reg_prepare_re(re); @@ -881,9 +899,22 @@ rb_reg_adjust_startpos(re, str, pos, reverse) else { range = RSTRING(str)->len - pos; } - return re_adjust_startpos(RREGEXP(re)->ptr, - RSTRING(str)->ptr, RSTRING(str)->len, - pos, range); + + enc = (RREGEXP(re)->ptr)->enc; + + if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING(str)->len) { + string = (UChar* )RSTRING(str)->ptr; + + if (range > 0) { + p = onigenc_get_right_adjust_char_head(enc, string, string + pos); + } + else { + p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, string, string + pos); + } + return p - string; + } + + return pos; } long @@ -915,8 +946,13 @@ rb_reg_search(re, str, pos, reverse) else { range = RSTRING(str)->len - pos; } - result = re_search(RREGEXP(re)->ptr,RSTRING(str)->ptr,RSTRING(str)->len, - pos, range, ®s); + + result = onig_search(RREGEXP(re)->ptr, + (UChar* )(RSTRING(str)->ptr), + ((UChar* )(RSTRING(str)->ptr) + RSTRING(str)->len), + ((UChar* )(RSTRING(str)->ptr) + pos), + ((UChar* )(RSTRING(str)->ptr) + pos + range), + ®s, ONIG_OPTION_NONE); if (FL_TEST(re, KCODE_FIXED)) kcode_reset_option(); @@ -928,7 +964,7 @@ rb_reg_search(re, str, pos, reverse) } else { char err[ONIG_MAX_ERROR_MESSAGE_LEN]; - re_error_code_to_str((UChar* )err, result); + onig_error_code_to_str((UChar* )err, result); rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, 0, Qfalse); } } @@ -944,7 +980,7 @@ rb_reg_search(re, str, pos, reverse) FL_UNSET(match, FL_TAINT); } - re_copy_registers(RMATCH(match)->regs, ®s); + onig_region_copy(RMATCH(match)->regs, ®s); RMATCH(match)->str = rb_str_new4(str); rb_backref_set(match); @@ -1338,7 +1374,7 @@ rb_reg_initialize(obj, s, len, options, ce) { struct RRegexp *re = RREGEXP(obj); - if (re->ptr) re_free_pattern(re->ptr); + if (re->ptr) onig_free(re->ptr); if (re->str) free(re->str); re->ptr = 0; re->str = 0; @@ -1366,7 +1402,7 @@ rb_reg_initialize(obj, s, len, options, ce) kcode_set_option((VALUE)re); } if (ruby_ignorecase) { - options |= RE_OPTION_IGNORECASE; + options |= ONIG_OPTION_IGNORECASE; FL_SET(re, REG_CASESTATE); } re->ptr = make_regexp(s, len, options & 0xf, ce); @@ -1734,7 +1770,7 @@ rb_reg_initialize_m(argc, argv, self) else { if (argc >= 2) { if (FIXNUM_P(argv[1])) flags = FIX2INT(argv[1]); - else if (RTEST(argv[1])) flags = RE_OPTION_IGNORECASE; + else if (RTEST(argv[1])) flags = ONIG_OPTION_IGNORECASE; } if (argc == 3 && !NIL_P(argv[2])) { char *kcode = StringValuePtr(argv[2]); @@ -1924,7 +1960,7 @@ rb_reg_options(re) rb_reg_check(re); options = RREGEXP(re)->ptr->options & - (RE_OPTION_IGNORECASE|RE_OPTION_MULTILINE|RE_OPTION_EXTENDED); + (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND); if (FL_TEST(re, KCODE_FIXED)) { options |= rb_reg_get_kcode(re); } @@ -2157,17 +2193,17 @@ rb_set_kcode(code) case 'E': case 'e': reg_kcode = KCODE_EUC; - re_mbcinit(MBCTYPE_EUC); + onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP); break; case 'S': case 's': reg_kcode = KCODE_SJIS; - re_mbcinit(MBCTYPE_SJIS); + onigenc_set_default_encoding(ONIG_ENCODING_SJIS); break; case 'U': case 'u': reg_kcode = KCODE_UTF8; - re_mbcinit(MBCTYPE_UTF8); + onigenc_set_default_encoding(ONIG_ENCODING_UTF8); break; default: case 'N': @@ -2176,7 +2212,7 @@ rb_set_kcode(code) case 'a': set_no_conversion: reg_kcode = KCODE_NONE; - re_mbcinit(MBCTYPE_ASCII); + onigenc_set_default_encoding(ONIG_ENCODING_ASCII); break; } } @@ -2271,17 +2307,17 @@ Init_Regexp() { rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError); - re_set_casetable(casetable); + onigenc_set_default_caseconv_table((UChar* )casetable); #if DEFAULT_KCODE == KCODE_EUC - re_mbcinit(MBCTYPE_EUC); + onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP); #else #if DEFAULT_KCODE == KCODE_SJIS - re_mbcinit(MBCTYPE_SJIS); + onigenc_set_default_encoding(ONIG_ENCODING_SJIS); #else #if DEFAULT_KCODE == KCODE_UTF8 - re_mbcinit(MBCTYPE_UTF8); + onigenc_set_default_encoding(ONIG_ENCODING_UTF8); #else - re_mbcinit(MBCTYPE_ASCII); + onigenc_set_default_encoding(ONIG_ENCODING_ASCII); #endif #endif #endif @@ -2320,9 +2356,9 @@ Init_Regexp() rb_define_method(rb_cRegexp, "options", rb_reg_options_m, 0); rb_define_method(rb_cRegexp, "kcode", rb_reg_kcode_m, 0); - rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(RE_OPTION_IGNORECASE)); - rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(RE_OPTION_EXTENDED)); - rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(RE_OPTION_MULTILINE)); + rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE)); + rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND)); + rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE)); rb_global_variable(®_cache); diff --git a/reggnu.c b/reggnu.c deleted file mode 100644 index de7468f36a..0000000000 --- a/reggnu.c +++ /dev/null @@ -1,276 +0,0 @@ -/********************************************************************** - reggnu.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2004 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -#ifndef ONIGGNU_H /* name changes from oniggnu.h to regex.h in ruby. */ -#include "oniggnu.h" -#endif - -#if defined(RUBY_PLATFORM) || defined(RUBY) -#ifndef ONIG_RUBY_M17N -#define USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY -#endif -#endif - -extern void -re_free_registers(OnigRegion* r) -{ - /* 0: don't free self */ - onig_region_free(r, 0); -} - -extern int -re_adjust_startpos(regex_t* reg, const char* string, int size, - int startpos, int range) -{ - if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) { - UChar *p; - UChar *s = (UChar* )string + startpos; - - if (range > 0) { - p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s); - } - else { - p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s); - } - return p - (UChar* )string; - } - - return startpos; -} - -extern int -re_match(regex_t* reg, const char* str, int size, int pos, - struct re_registers* regs) -{ - return onig_match(reg, (UChar* )str, (UChar* )(str + size), - (UChar* )(str + pos), regs, ONIG_OPTION_NONE); -} - -extern int -re_search(regex_t* bufp, const char* string, int size, int startpos, int range, - struct re_registers* regs) -{ - return onig_search(bufp, (UChar* )string, (UChar* )(string + size), - (UChar* )(string + startpos), - (UChar* )(string + startpos + range), - regs, ONIG_OPTION_NONE); -} - -extern int -re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) -{ - int r; - OnigErrorInfo einfo; - - r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo); - if (r != 0) { - if (IS_NOT_NULL(ebuf)) - (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); - } - - return r; -} - -extern int -re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) -{ - int r; - OnigErrorInfo einfo; - OnigEncoding enc; - - /* I think encoding and options should be arguments of this function. - But this is adapted to present re.c. (2002/11/29) - */ - enc = OnigEncDefaultCharEncoding; - - r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size), - reg->options, enc, OnigDefaultSyntax, &einfo); - if (r != 0) { - if (IS_NOT_NULL(ebuf)) - (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); - } - return r; -} - -extern void -re_free_pattern(regex_t* reg) -{ - onig_free(reg); -} - -extern int -re_alloc_pattern(regex_t** reg) -{ - return onig_alloc_init(reg, ONIG_OPTION_DEFAULT, - ONIGENC_AMBIGUOUS_MATCH_DEFAULT, - OnigEncDefaultCharEncoding, - OnigDefaultSyntax); -} - -extern void -re_set_casetable(const char* table) -{ - onigenc_set_default_caseconv_table((UChar* )table); -} - -#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY -static const unsigned char mbctab_ascii[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static const unsigned char mbctab_euc[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, -}; - -static const unsigned char mbctab_sjis[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 -}; - -static const unsigned char mbctab_utf8[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0, -}; - -const unsigned char *re_mbctab = mbctab_ascii; -#endif - -extern void -#ifdef ONIG_RUBY_M17N -re_mbcinit(OnigEncoding enc) -#else -re_mbcinit(int mb_code) -#endif -{ -#ifdef ONIG_RUBY_M17N - - onigenc_set_default_encoding(enc); - -#else - - OnigEncoding enc; - - switch (mb_code) { - case MBCTYPE_ASCII: - enc = ONIG_ENCODING_ASCII; - break; - case MBCTYPE_EUC: - enc = ONIG_ENCODING_EUC_JP; - break; - case MBCTYPE_SJIS: - enc = ONIG_ENCODING_SJIS; - break; - case MBCTYPE_UTF8: - enc = ONIG_ENCODING_UTF8; - break; - default: - return ; - break; - } - - onigenc_set_default_encoding(enc); -#endif - -#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY - switch (mb_code) { - case MBCTYPE_ASCII: - re_mbctab = mbctab_ascii; - break; - case MBCTYPE_EUC: - re_mbctab = mbctab_euc; - break; - case MBCTYPE_SJIS: - re_mbctab = mbctab_sjis; - break; - case MBCTYPE_UTF8: - re_mbctab = mbctab_utf8; - break; - } -#endif -}