From dbfe25c939c98f7ec93ca5de3d92682b16086d03 Mon Sep 17 00:00:00 2001 From: ksaito Date: Tue, 16 Mar 2004 15:25:28 +0000 Subject: [PATCH] * oniguruma.h, regparse.c: imported Oniguruma 2.2.5. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5963 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 5 ++++ oniguruma.h | 16 ++++++++--- regparse.c | 78 ++++++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 79 insertions(+), 20 deletions(-) diff --git a/ChangeLog b/ChangeLog index 61c1988389..eb5032951c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Wed Mar 17 00:22:03 2004 Kazuo Saito + + * oniguruma.h: imported Oniguruma 2.2.5. + * regparse.c: ditto. + Tue Mar 16 11:14:17 Hirokazu Yamamoto * dir.c (fnmatch_helper): File.fnmatch('\.', '.') should return true. diff --git a/oniguruma.h b/oniguruma.h index 338f71357a..b9c5ad8cd4 100644 --- a/oniguruma.h +++ b/oniguruma.h @@ -11,7 +11,7 @@ #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 2 #define ONIGURUMA_VERSION_MINOR 2 -#define ONIGURUMA_VERSION_TEENY 4 +#define ONIGURUMA_VERSION_TEENY 5 #ifndef P_ #if defined(__STDC__) || defined(_WIN32) @@ -464,7 +464,9 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_SYN_OP2_ESC_V_VTAB (1<<13) /* \v as VTAB */ #define ONIG_SYN_OP2_ESC_U_HEX4 (1<<14) /* \uHHHH */ #define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1<<15) /* \`, \' */ -#define ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */ +#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */ +#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1<<17) /* \p{^..}, \P{^..} */ +#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1<<18) /* \p{IsXDigit} */ /* syntax (behavior) */ #define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */ @@ -503,7 +505,10 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_NORMAL 0 #define ONIG_MISMATCH -1 #define ONIG_NO_SUPPORT_CONFIG -2 + /* internal error */ +#define ONIGERR_MEMORY -5 +#define ONIGERR_TYPE_BUG -6 #define ONIGERR_PARSER_BUG -11 #define ONIGERR_STACK_BUG -12 #define ONIGERR_UNDEFINED_BYTECODE -13 @@ -558,8 +563,11 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_NEVER_ENDING_RECURSION -221 #define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222 #define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 +#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 +#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 + /* errors related to thread */ -#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 +#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 /* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */ @@ -706,7 +714,7 @@ void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior) ONIG_EXTERN void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options)); ONIG_EXTERN -int onig_set_meta_char P_((unsigned int what, unsigned int c)); +int onig_set_meta_char P_((unsigned int what, OnigCodePoint code)); ONIG_EXTERN int onig_end P_((void)); ONIG_EXTERN diff --git a/regparse.c b/regparse.c index ead0bce12b..3a5b402f1e 100644 --- a/regparse.c +++ b/regparse.c @@ -96,7 +96,7 @@ OnigSyntaxType OnigSyntaxJava = { ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 | - ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY ) + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY ) , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) , ONIG_OPTION_SINGLELINE }; @@ -109,7 +109,9 @@ OnigSyntaxType OnigSyntaxPerl = { & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | - ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY ) + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | + ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS ) , SYN_GNU_REGEX_BV , ONIG_OPTION_SINGLELINE }; @@ -192,26 +194,30 @@ OnigMetaCharTableType OnigMetaCharTable = { }; #ifdef USE_VARIABLE_META_CHARS -extern int onig_set_meta_char(unsigned int what, unsigned int c) +extern int onig_set_meta_char(unsigned int what, OnigCodePoint code) { + if (code >= 256) { /* restricted by current implementation. */ + return ONIGERR_INVALID_ARGUMENT; + } + switch (what) { case ONIG_META_CHAR_ESCAPE: - OnigMetaCharTable.esc = c; + OnigMetaCharTable.esc = (UChar )code; break; case ONIG_META_CHAR_ANYCHAR: - OnigMetaCharTable.anychar = c; + OnigMetaCharTable.anychar = (UChar )code; break; case ONIG_META_CHAR_ANYTIME: - OnigMetaCharTable.anytime = c; + OnigMetaCharTable.anytime = (UChar )code; break; case ONIG_META_CHAR_ZERO_OR_ONE_TIME: - OnigMetaCharTable.zero_or_one_time = c; + OnigMetaCharTable.zero_or_one_time = (UChar )code; break; case ONIG_META_CHAR_ONE_OR_MORE_TIME: - OnigMetaCharTable.one_or_more_time = c; + OnigMetaCharTable.one_or_more_time = (UChar )code; break; case ONIG_META_CHAR_ANYCHAR_ANYTIME: - OnigMetaCharTable.anychar_anytime = c; + OnigMetaCharTable.anychar_anytime = (UChar )code; break; default: return ONIGERR_INVALID_ARGUMENT; @@ -2574,10 +2580,20 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case 'p': case 'P': if (PPEEK == '{' && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY)) { + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { PINC; tok->type = TK_CHAR_PROPERTY; tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + int c2; + PFETCH(c2); + if (c2 == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } } break; @@ -3055,10 +3071,20 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case 'p': case 'P': if (PPEEK == '{' && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY)) { + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { PINC; tok->type = TK_CHAR_PROPERTY; tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + int c2; + PFETCH(c2); + if (c2 == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } } break; @@ -3483,22 +3509,40 @@ property_name_to_ctype(UChar* p, UChar* end) return pb->ctype; } - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + return -1; } static int fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) { int ctype; - UChar *prev, *p = *src; - int c = 0; + UChar *prev, *start, *p = *src; + int c; + + /* 'IsXXXX' => 'XXXX' */ + if (!PEND && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS)) { + c = PPEEK; + if (c == 'I') { + PINC; + if (! PEND) { + c = PPEEK; + if (c == 's') + PINC; + else + PUNFETCH; + } + } + } + + start = prev = p; while (!PEND) { prev = p; PFETCH(c); if (c == '}') { - ctype = property_name_to_ctype(*src, prev); - if (ctype < 0) return ctype; + ctype = property_name_to_ctype(start, prev); + if (ctype < 0) break; *src = p; return ctype; @@ -3507,6 +3551,8 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) break; } + onig_scan_env_set_error_string(env, ONIGERR_INVALID_CHAR_PROPERTY_NAME, + *src, prev); return ONIGERR_INVALID_CHAR_PROPERTY_NAME; }