* ascii.c, euc_jp.c, oniggnu.h, oniguruma.h, regcomp.c, regenc.c, regenc.h, regerror.c, regexec.c, reggnu.c, regint.h, regparse.c, regparse.h, sjis.c, utf8.c:

imported Oni Guruma 3.4.0.

* parse.y, re.c: Now mbclen() takes unsigned char as its argument.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7206 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ksaito 2004-11-04 14:43:08 +00:00
Родитель 5e853c811c
Коммит 82cb9eaa3b
9 изменённых файлов: 1884 добавлений и 1286 удалений

Просмотреть файл

@ -1,3 +1,13 @@
Thu Nov 4 23:41:55 2004 Kazuo Saito <ksaito@uranus.dti.ne.jp>
* ascii.c, euc_jp.c, oniggnu.h, oniguruma.h, regcomp.c,
regenc.c, regenc.h, regerror.c, regexec.c, reggnu.c,
regint.h, regparse.c, regparse.h, sjis.c, utf8.c:
imported Oni Guruma 3.4.0.
* parse.y, re.c: Now mbclen() takes unsigned char as
its argument.
Thu Nov 4 21:25:38 2004 Yukihiro Matsumoto <matz@ruby-lang.org>
* string.c (str_gsub): string modify check no longer based on

Просмотреть файл

@ -1,17 +1,38 @@
/**********************************************************************
oniguruma.h - Oniguruma (regular expression library)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef ONIGURUMA_H
#define ONIGURUMA_H
/**********************************************************************
oniguruma.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 2
#define ONIGURUMA_VERSION_MINOR 2
#define ONIGURUMA_VERSION_TEENY 8
#define ONIGURUMA_VERSION_MAJOR 3
#define ONIGURUMA_VERSION_MINOR 4
#define ONIGURUMA_VERSION_TEENY 0
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
@ -56,12 +77,56 @@ typedef struct {
OnigCodePoint to;
} OnigCodePointRange;
#define ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE 16
/* ambiguous match flag */
#define ONIGENC_AMBIGUOUS_MATCH_NONE 0
#define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0)
#define ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE (1<<1)
/* #define ONIGENC_AMBIGUOUS_MATCH_ACCENT (1<<2) */
/* #define ONIGENC_AMBIGUOUS_MATCH_HIRAGANA_KATAKANA (1<<3) */
/* #define ONIGENC_AMBIGUOUS_MATCH_KATAKANA_WIDTH (1<<4) */
#define ONIGENC_AMBIGUOUS_MATCH_LIMIT (1<<1)
#define ONIGENC_AMBIGUOUS_MATCH_COMPOUND (1<<30)
#define ONIGENC_AMBIGUOUS_MATCH_FULL \
( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | \
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | \
ONIGENC_AMBIGUOUS_MATCH_COMPOUND )
#define ONIGENC_AMBIGUOUS_MATCH_DEFAULT \
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | \
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | \
ONIGENC_AMBIGUOUS_MATCH_COMPOUND )
typedef unsigned int OnigAmbigType;
#define ONIGENC_MAX_COMP_AMBIG_CODE_LEN 3
#define ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM 4
typedef struct {
int target_num;
int target_byte_len[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
UChar* target_str[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
} OnigEncFoldMatchInfo;
int len;
OnigCodePoint code[ONIGENC_MAX_COMP_AMBIG_CODE_LEN];
} OnigCompAmbigCodeItem;
typedef struct {
int n;
OnigCodePoint code;
OnigCompAmbigCodeItem items[ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM];
} OnigCompAmbigCodes;
typedef struct {
OnigCodePoint from;
OnigCodePoint to;
} OnigPairAmbigCodes;
typedef struct {
OnigCodePoint esc;
OnigCodePoint anychar;
OnigCodePoint anytime;
OnigCodePoint zero_or_one_time;
OnigCodePoint one_or_more_time;
OnigCodePoint anychar_anytime;
} OnigMetaCharTableType;
#if defined(RUBY_PLATFORM) && defined(M17N_H)
@ -72,23 +137,24 @@ typedef m17n_encoding* OnigEncoding;
#else
typedef struct {
const char len_table[256];
int (*mbc_enc_len)(UChar* p);
const char* name;
int max_enc_len;
int is_fold_match;
int ctype_support_level; /* sb-only/full */
int is_continuous_sb_mb; /* code point is continuous from sb to mb */
int min_enc_len;
OnigAmbigType support_ambig_flag;
OnigMetaCharTableType meta_char_table;
int (*is_mbc_newline)(UChar* p, UChar* end);
OnigCodePoint (*mbc_to_code)(UChar* p, UChar* end);
int (*code_to_mbclen)(OnigCodePoint code);
int (*code_to_mbc)(OnigCodePoint code, UChar *buf);
int (*mbc_to_lower)(UChar* p, UChar* lower);
int (*mbc_is_case_ambig)(UChar* p);
int (*code_is_ctype)(OnigCodePoint code, unsigned int ctype);
int (*mbc_to_normalize)(OnigAmbigType flag, UChar** pp, UChar* end, UChar* to);
int (*is_mbc_ambiguous)(OnigAmbigType flag, UChar** pp, UChar* end);
int (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs);
int (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs);
int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype);
int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]);
UChar* (*left_adjust_char_head)(UChar* start, UChar* s);
int (*is_allowed_reverse_match)(UChar* p, UChar* e);
int (*get_all_fold_match_code)(OnigCodePoint** codes);
int (*get_fold_match_info)(UChar* p, UChar* end, OnigEncFoldMatchInfo** info);
UChar* (*left_adjust_char_head)(UChar* start, UChar* p);
int (*is_allowed_reverse_match)(UChar* p, UChar* end);
} OnigEncodingType;
typedef OnigEncodingType* OnigEncoding;
@ -110,6 +176,10 @@ ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF8;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_BE;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_LE;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_BE;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_LE;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR;
@ -136,6 +206,10 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8)
#define ONIG_ENCODING_UTF16_BE (&OnigEncodingUTF16_BE)
#define ONIG_ENCODING_UTF16_LE (&OnigEncodingUTF16_LE)
#define ONIG_ENCODING_UTF32_BE (&OnigEncodingUTF32_BE)
#define ONIG_ENCODING_UTF32_LE (&OnigEncodingUTF32_LE)
#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
@ -152,34 +226,31 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
/* work size */
#define ONIGENC_CODE_TO_MBC_MAXLEN 7
#define ONIGENC_MBC_TO_LOWER_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN
#define ONIGENC_MBC_NORMALIZE_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN
/* character types */
#define ONIGENC_CTYPE_ALPHA (1<< 0)
#define ONIGENC_CTYPE_BLANK (1<< 1)
#define ONIGENC_CTYPE_CNTRL (1<< 2)
#define ONIGENC_CTYPE_DIGIT (1<< 3)
#define ONIGENC_CTYPE_GRAPH (1<< 4)
#define ONIGENC_CTYPE_LOWER (1<< 5)
#define ONIGENC_CTYPE_PRINT (1<< 6)
#define ONIGENC_CTYPE_PUNCT (1<< 7)
#define ONIGENC_CTYPE_SPACE (1<< 8)
#define ONIGENC_CTYPE_UPPER (1<< 9)
#define ONIGENC_CTYPE_XDIGIT (1<<10)
#define ONIGENC_CTYPE_WORD (1<<11)
#define ONIGENC_CTYPE_ASCII (1<<12)
#define ONIGENC_CTYPE_NEWLINE (1<< 0)
#define ONIGENC_CTYPE_ALPHA (1<< 1)
#define ONIGENC_CTYPE_BLANK (1<< 2)
#define ONIGENC_CTYPE_CNTRL (1<< 3)
#define ONIGENC_CTYPE_DIGIT (1<< 4)
#define ONIGENC_CTYPE_GRAPH (1<< 5)
#define ONIGENC_CTYPE_LOWER (1<< 6)
#define ONIGENC_CTYPE_PRINT (1<< 7)
#define ONIGENC_CTYPE_PUNCT (1<< 8)
#define ONIGENC_CTYPE_SPACE (1<< 9)
#define ONIGENC_CTYPE_UPPER (1<<10)
#define ONIGENC_CTYPE_XDIGIT (1<<11)
#define ONIGENC_CTYPE_WORD (1<<12)
#define ONIGENC_CTYPE_ASCII (1<<13)
#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
/* ctype support level */
#define ONIGENC_CTYPE_SUPPORT_LEVEL_SB 0
#define ONIGENC_CTYPE_SUPPORT_LEVEL_FULL 1
#define enc_len(enc,byte) ONIGENC_MBC_LEN_BY_HEAD(enc,byte)
#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
#define ONIGENC_IS_MBC_HEAD(enc,byte) (ONIGENC_MBC_LEN_BY_HEAD(enc,byte) != 1)
#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1)
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
#define ONIGENC_IS_CODE_SB_WORD(enc,code) \
@ -192,31 +263,33 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
#include <ctype.h> /* for isblank(), isgraph() */
#define ONIGENC_MBC_TO_LOWER(enc,p,buf) onigenc_mbc_to_lower(enc,p,buf)
#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) onigenc_mbc_is_case_ambig(enc,p)
#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
onigenc_mbc_to_normalize(enc,flag,pp,end,buf)
#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
onigenc_is_mbc_ambiguous(enc,flag,pp,end)
#define ONIGENC_IS_FOLD_MATCH(enc) FALSE
#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) FALSE
#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ONIGENC_CTYPE_SUPPORT_LEVEL_SB
#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
onigenc_is_allowed_reverse_match(enc, s, end)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
onigenc_get_left_adjust_char_head(enc, start, s)
#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) 0
#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) ONIG_NO_SUPPORT_CONFIG
#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, ambig_flag, acs) 0
#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, ambig_flag, acs) 0
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
ONIG_NO_SUPPORT_CONFIG
#define ONIGENC_MBC_LEN_BY_HEAD(enc,b) m17n_mbclen(enc,(int )b)
#define ONIGENC_MBC_ENC_LEN(enc,p) m17n_mbclen(enc,(int )(*p))
#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc)
#define ONIGENC_MBC_MAXLEN_DIST(enc) \
(ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \
: ONIG_INFINITE_DISTANCE)
#define ONIGENC_MBC_MINLEN(enc) 1
#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e))
#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code))
#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf)
#if 0
#define ONIGENC_STEP_BACK(enc,start,s,n) /* !! not supported !! */
#if 0 /* !! not supported !! */
#define ONIGENC_IS_MBC_NEWLINE(enc,p,end)
#define ONIGENC_STEP_BACK(enc,start,s,n)
#endif
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \
@ -251,9 +324,9 @@ int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype));
ONIG_EXTERN
int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN
int onigenc_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* buf));
int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, UChar** pp, UChar* end, UChar* buf));
ONIG_EXTERN
int onigenc_mbc_is_case_ambig P_((OnigEncoding enc, UChar* p));
int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, UChar** pp, UChar* end));
ONIG_EXTERN
int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end));
@ -261,32 +334,35 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end)
#define ONIGENC_NAME(enc) ((enc)->name)
#define ONIGENC_MBC_TO_LOWER(enc,p,buf) (enc)->mbc_to_lower(p,buf)
#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) (enc)->mbc_is_case_ambig(p)
#define ONIGENC_IS_FOLD_MATCH(enc) ((enc)->is_fold_match)
#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) ((enc)->is_continuous_sb_mb)
#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ((enc)->ctype_support_level)
#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
(enc)->mbc_to_normalize(flag,pp,end,buf)
#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
(enc)->is_mbc_ambiguous(flag,pp,end)
#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ((enc)->support_ambig_flag)
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
(enc)->is_allowed_reverse_match(s,end)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
(enc)->left_adjust_char_head(start, s)
#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) \
(enc)->get_all_fold_match_code(codes)
#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) \
(enc)->get_fold_match_info(p,end,info)
#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc,ambig_flag,acs) \
(enc)->get_all_pair_ambig_codes(ambig_flag,acs)
#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc,ambig_flag,acs) \
(enc)->get_all_comp_ambig_codes(ambig_flag,acs)
#define ONIGENC_STEP_BACK(enc,start,s,n) \
onigenc_step_back((enc),(start),(s),(n))
#define ONIGENC_MBC_LEN_BY_HEAD(enc,byte) ((enc)->len_table[(int )(byte)])
#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p)
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
#define ONIGENC_MBC_TO_CODE(enc,p,e) (enc)->mbc_to_code((p),(e))
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end))
#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end))
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->code_is_ctype(code,ctype)
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype)
#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
#define ONIGENC_IS_CODE_GRAPH(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
#define ONIGENC_IS_CODE_PRINT(enc,code) \
@ -340,6 +416,12 @@ ONIG_EXTERN
UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
ONIG_EXTERN
UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
ONIG_EXTERN
int onigenc_strlen P_((OnigEncoding enc, UChar* p, UChar* end));
ONIG_EXTERN
int onigenc_strlen_null P_((OnigEncoding enc, UChar* p));
ONIG_EXTERN
int onigenc_str_bytelen_null P_((OnigEncoding enc, UChar* p));
@ -353,13 +435,6 @@ UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UC
/* constants */
#define ONIG_MAX_ERROR_MESSAGE_LEN 90
#if defined(RUBY_PLATFORM) && !defined(ONIG_RUBY_M17N)
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
#undef ismbchar
#define ismbchar(c) (mbclen((c)) != 1)
#define mbclen(c) (OnigEncDefaultCharEncoding->len_table[(unsigned char )(c)])
#endif
typedef unsigned int OnigOptionType;
#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
@ -467,6 +542,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1<<17) /* \p{^..}, \P{^..} */
#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1<<18) /* \p{IsXDigit} */
#define ONIG_SYN_OP2_ESC_H_XDIGIT (1<<19) /* \h, \H */
/* syntax (behavior) */
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */
@ -479,6 +555,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?<x>)(?<x>) */
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1<<9) /* a{n}?=(?:a{n})? */
/* syntax (behavior) in char class [...] */
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */
@ -565,6 +642,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
/* errors related to thread */
#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
@ -575,6 +653,15 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
typedef struct OnigCaptureTreeNodeStruct {
int group; /* group number */
int beg;
int end;
int allocated;
int num_childs;
struct OnigCaptureTreeNodeStruct** childs;
} OnigCaptureTreeNode;
/* match result region type */
struct re_registers {
int allocated;
@ -582,9 +669,16 @@ struct re_registers {
int* beg;
int* end;
/* extended */
struct re_registers** list; /* capture history. list[1]-list[31] */
OnigCaptureTreeNode* history_root; /* capture history tree root */
};
/* capture tree traverse */
#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
#define ONIG_REGION_NOTPOS -1
typedef struct re_registers OnigRegion;
@ -635,6 +729,7 @@ typedef struct re_pattern_buffer {
OnigEncoding enc;
OnigOptionType options;
OnigSyntaxType* syntax;
OnigAmbigType ambig_flag;
void* name_table;
/* optimization info (string search, char-map and anchors) */
@ -657,6 +752,15 @@ typedef struct re_pattern_buffer {
} regex_t;
typedef struct {
int num_of_elements;
OnigEncoding pattern_enc;
OnigEncoding target_enc;
OnigSyntaxType* syntax;
OnigOptionType option;
OnigAmbigType ambig_flag;
} OnigCompileInfo;
/* Oniguruma Native API */
ONIG_EXTERN
int onig_init P_((void));
@ -669,10 +773,14 @@ void onig_set_verb_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
int onig_new P_((regex_t**, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_new_deluxe P_((regex_t** reg, UChar* pattern, UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
void onig_free P_((regex_t*));
ONIG_EXTERN
int onig_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_recompile_deluxe P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
int onig_match P_((regex_t*, UChar* str, UChar* end, UChar* at, OnigRegion* region, OnigOptionType option));
@ -696,16 +804,34 @@ int onig_foreach_name P_((regex_t* reg, int (*func)(UChar*,UChar*,int,int*,regex
ONIG_EXTERN
int onig_number_of_names P_((regex_t* reg));
ONIG_EXTERN
int onig_number_of_captures P_((regex_t* reg));
ONIG_EXTERN
int onig_number_of_capture_histories P_((regex_t* reg));
ONIG_EXTERN
OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region));
ONIG_EXTERN
int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg));
ONIG_EXTERN
OnigEncoding onig_get_encoding P_((regex_t* reg));
ONIG_EXTERN
OnigOptionType onig_get_options P_((regex_t* reg));
ONIG_EXTERN
OnigAmbigType onig_get_ambig_flag P_((regex_t* reg));
ONIG_EXTERN
OnigSyntaxType* onig_get_syntax P_((regex_t* reg));
ONIG_EXTERN
int onig_set_default_syntax P_((OnigSyntaxType* syntax));
ONIG_EXTERN
void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
ONIG_EXTERN
unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax));
ONIG_EXTERN
unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax));
ONIG_EXTERN
unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax));
ONIG_EXTERN
OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax));
ONIG_EXTERN
void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
ONIG_EXTERN
void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
@ -714,7 +840,9 @@ void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)
ONIG_EXTERN
void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
ONIG_EXTERN
int onig_set_meta_char P_((unsigned int what, OnigCodePoint code));
int onig_set_meta_char P_((OnigEncoding enc, unsigned int what, OnigCodePoint code));
ONIG_EXTERN
void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from));
ONIG_EXTERN
unsigned int onig_get_match_stack_limit_size P_((void));
ONIG_EXTERN
@ -723,5 +851,7 @@ ONIG_EXTERN
int onig_end P_((void));
ONIG_EXTERN
const char* onig_version P_((void));
ONIG_EXTERN
const char* onig_copyright P_((void));
#endif /* ONIGURUMA_H */

46
parse.y
Просмотреть файл

@ -4853,8 +4853,10 @@ parser_tokadd_string(parser, func, term, paren, nest)
long *nest;
{
int c;
unsigned char uc;
while ((c = nextc()) != -1) {
uc = (unsigned char)c;
if (paren && c == paren) {
++*nest;
}
@ -4905,8 +4907,8 @@ parser_tokadd_string(parser, func, term, paren, nest)
}
}
}
else if (ismbchar(c)) {
int i, len = mbclen(c)-1;
else if (ismbchar(uc)) {
int i, len = mbclen(uc)-1;
for (i = 0; i < len; i++) {
tokadd(c);
@ -5002,6 +5004,7 @@ parser_heredoc_identifier(parser)
struct parser_params *parser;
{
int c = nextc(), term, func = 0, len;
unsigned int uc;
if (c == '-') {
c = nextc();
@ -5019,7 +5022,8 @@ parser_heredoc_identifier(parser)
tokadd(func);
term = c;
while ((c = nextc()) != -1 && c != term) {
len = mbclen(c);
uc = (unsigned int)c;
len = mbclen(uc);
do {tokadd(c);} while (--len > 0 && (c = nextc()) != -1);
}
if (c == -1) {
@ -5029,7 +5033,8 @@ parser_heredoc_identifier(parser)
break;
default:
if (!is_identchar(c)) {
uc = (unsigned int)c;
if (!is_identchar(uc)) {
pushback(c);
if (func & STR_FUNC_INDENT) {
pushback('-');
@ -5040,9 +5045,11 @@ parser_heredoc_identifier(parser)
term = '"';
tokadd(func |= str_dquote);
do {
len = mbclen(c);
uc = (unsigned int)c;
len = mbclen(uc);
do {tokadd(c);} while (--len > 0 && (c = nextc()) != -1);
} while ((c = nextc()) != -1 && is_identchar(c));
} while ((c = nextc()) != -1 &&
(uc = (unsigned char)c, is_identchar(uc)));
pushback(c);
break;
}
@ -5233,6 +5240,7 @@ parser_yylex(parser)
register int c;
int space_seen = 0;
int cmd_state;
unsigned char uc;
#ifdef RIPPER
int fallthru = Qfalse;
#endif
@ -5519,6 +5527,7 @@ parser_yylex(parser)
rb_compile_error(PARSER_ARG "incomplete character syntax");
return 0;
}
uc = (unsigned char)c;
if (ISSPACE(c)){
if (!IS_ARG()){
int c2 = 0;
@ -5551,7 +5560,7 @@ parser_yylex(parser)
lex_state = EXPR_TERNARY;
return '?';
}
else if (ismbchar(c)) {
else if (ismbchar(uc)) {
rb_warnI("multibyte character literal not supported yet; use ?\\%.3o", c);
goto ternary;
}
@ -6098,7 +6107,8 @@ parser_yylex(parser)
}
else {
term = nextc();
if (ISALNUM(term) || ismbchar(term)) {
uc = (unsigned char)c;
if (ISALNUM(term) || ismbchar(uc)) {
yyerror("unknown type of %string");
return 0;
}
@ -6177,7 +6187,8 @@ parser_yylex(parser)
switch (c) {
case '_': /* $_: last read line string */
c = nextc();
if (is_identchar(c)) {
uc = (unsigned char)c;
if (is_identchar(uc)) {
tokadd('$');
tokadd('_');
break;
@ -6243,7 +6254,8 @@ parser_yylex(parser)
return tNTH_REF;
default:
if (!is_identchar(c)) {
uc = (unsigned char)c;
if (!is_identchar(uc)) {
pushback(c);
return '$';
}
@ -6268,7 +6280,8 @@ parser_yylex(parser)
rb_compile_error(PARSER_ARG "`@@%c' is not allowed as a class variable name", c);
}
}
if (!is_identchar(c)) {
uc = (unsigned char)c;
if (!is_identchar(uc)) {
pushback(c);
return '@';
}
@ -6290,7 +6303,8 @@ parser_yylex(parser)
break;
default:
if (!is_identchar(c)) {
uc = (unsigned char)c;
if (!is_identchar(uc)) {
rb_compile_error(PARSER_ARG "Invalid char `\\%03o' in expression", c);
goto retry;
}
@ -6299,10 +6313,11 @@ parser_yylex(parser)
break;
}
uc = (unsigned char)c;
do {
tokadd(c);
if (ismbchar(c)) {
int i, len = mbclen(c)-1;
if (ismbchar(uc)) {
int i, len = mbclen(uc)-1;
for (i = 0; i < len; i++) {
c = nextc();
@ -6310,7 +6325,8 @@ parser_yylex(parser)
}
}
c = nextc();
} while (is_identchar(c));
uc = (unsigned char)c;
} while (is_identchar(uc));
if ((c == '!' || c == '?') && is_identchar(tok()[0]) && !peek('=')) {
tokadd(c);
}

31
re.c
Просмотреть файл

@ -248,11 +248,12 @@ rb_reg_mbclen2(c, re)
VALUE re;
{
int len;
unsigned char uc = (unsigned char)c;
if (!FL_TEST(re, KCODE_FIXED))
return mbclen(c);
return mbclen(uc);
kcode_set_option(re);
len = mbclen(c);
len = mbclen(uc);
kcode_reset_option();
return len;
}
@ -1775,8 +1776,8 @@ rb_reg_quote(str)
send = s + RSTRING(str)->len;
for (; s < send; s++) {
c = *s;
if (ismbchar(c)) {
int n = mbclen(c);
if (ismbchar(*s)) {
int n = mbclen(*s);
while (n-- && s < send)
s++;
@ -1804,8 +1805,8 @@ rb_reg_quote(str)
for (; s < send; s++) {
c = *s;
if (ismbchar(c)) {
int n = mbclen(c);
if (ismbchar(*s)) {
int n = mbclen(*s);
while (n-- && s < send)
*t++ = *s++;
@ -2044,21 +2045,23 @@ rb_reg_regsub(str, src, regs)
struct re_registers *regs;
{
VALUE val = 0;
char *p, *s, *e, c;
char *p, *s, *e;
unsigned char uc;
int no;
p = s = RSTRING(str)->ptr;
e = s + RSTRING(str)->len;
while (s < e) {
char *ss = s;
c = *s++;
if (ismbchar(c)) {
s += mbclen(c) - 1;
uc = (unsigned char)*s++;
if (ismbchar(uc)) {
s += mbclen(uc) - 1;
continue;
}
if (c != '\\' || s == e) continue;
if (uc != '\\' || s == e) continue;
if (!val) {
val = rb_str_buf_new(ss-p);
@ -2068,12 +2071,12 @@ rb_reg_regsub(str, src, regs)
rb_str_buf_cat(val, p, ss-p);
}
c = *s++;
uc = (unsigned char)*s++;
p = s;
switch (c) {
switch (uc) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
no = c - '0';
no = uc - '0';
break;
case '&':
no = 0;

704
regcomp.c

Разница между файлами не показана из-за своего большого размера Загрузить разницу

767
regexec.c

Разница между файлами не показана из-за своего большого размера Загрузить разницу

112
regint.h
Просмотреть файл

@ -1,12 +1,33 @@
/**********************************************************************
regint.h - Oniguruma (regular expression library)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef REGINT_H
#define REGINT_H
/**********************************************************************
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* for debug */
/* #define ONIG_DEBUG_PARSE_TREE */
@ -19,7 +40,8 @@
/* #define ONIG_DEBUG_STATISTICS */
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_STATISTICS)
defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
defined(ONIG_DEBUG_STATISTICS)
#ifndef ONIG_DEBUG
#define ONIG_DEBUG
#endif
@ -36,7 +58,6 @@
/* spec. config */
#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
#define USE_FOLD_MATCH /* ess-tsett etc... */
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
@ -51,12 +72,14 @@
/* interface to external system */
#ifdef NOT_RUBY /* gived from Makefile */
#include "config.h"
#define USE_CAPTURE_HISTORY
#define USE_VARIABLE_META_CHARS
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
#define THREAD_ATOMIC_START /* depend on thread system */
#define THREAD_ATOMIC_END /* depend on thread system */
#define THREAD_PASS /* depend on thread system */
#define CHECK_INTERRUPT /* depend on application */
#define xmalloc malloc
#define xrealloc realloc
#define xfree free
@ -67,6 +90,14 @@
#define THREAD_ATOMIC_START DEFER_INTS
#define THREAD_ATOMIC_END ENABLE_INTS
#define THREAD_PASS rb_thread_schedule()
#define CHECK_INTERRUPT do {\
if (rb_trap_pending) {\
if (! rb_prohibit_interrupt) {\
rb_trap_exec();\
}\
}\
} while (0)
#define DEFAULT_WARN_FUNCTION rb_warn
#define DEFAULT_VERB_WARN_FUNCTION rb_warning
@ -108,7 +139,9 @@
#endif
#include <ctype.h>
#ifndef __BORLANDC__
#include <sys/types.h>
#endif
#ifdef ONIG_DEBUG
# include <stdio.h>
@ -291,6 +324,8 @@ typedef unsigned int BitStatusType;
/* ignore-case and multibyte status are included in compiled code. */
#define IS_DYNAMIC_OPTION(option) 0
#define REPEAT_INFINITE -1
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
/* bitset */
#define BITS_PER_BYTE 8
@ -530,11 +565,11 @@ enum OpCode {
#define ARG_MEMNUM 4
#define ARG_OPTION 5
typedef short int RelAddrType;
typedef short int AbsAddrType;
typedef short int LengthType;
typedef short int MemNumType;
typedef int RelAddrType;
typedef int AbsAddrType;
typedef int LengthType;
typedef int RepeatNumType;
typedef short int MemNumType;
#define SIZE_OPCODE 1
#define SIZE_RELADDR sizeof(RelAddrType)
@ -575,6 +610,7 @@ typedef int RepeatNumType;
option = *((OnigOptionType* )(p));\
(p) += SIZE_OPTION;\
} while(0)
#else
#define GET_RELADDR_INC(addr,p) GET_SHORT_INC(addr,p)
@ -637,23 +673,37 @@ typedef int RepeatNumType;
#define SIZE_OP_RETURN SIZE_OPCODE
typedef struct {
UChar esc;
UChar anychar;
UChar anytime;
UChar zero_or_one_time;
UChar one_or_more_time;
UChar anychar_anytime;
} OnigMetaCharTableType;
#define MC_ESC(enc) (enc)->meta_char_table.esc
#define MC_ANYCHAR(enc) (enc)->meta_char_table.anychar
#define MC_ANYTIME(enc) (enc)->meta_char_table.anytime
#define MC_ZERO_OR_ONE_TIME(enc) (enc)->meta_char_table.zero_or_one_time
#define MC_ONE_OR_MORE_TIME(enc) (enc)->meta_char_table.one_or_more_time
#define MC_ANYCHAR_ANYTIME(enc) (enc)->meta_char_table.anychar_anytime
extern OnigMetaCharTableType OnigMetaCharTable;
#define SYN_POSIX_COMMON_OP \
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
ONIG_SYN_OP_DECIMAL_BACKREF | \
ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
ONIG_SYN_OP_LINE_ANCHOR | \
ONIG_SYN_OP_ESC_CONTROL_CHARS )
#define MC_ESC OnigMetaCharTable.esc
#define MC_ANYCHAR OnigMetaCharTable.anychar
#define MC_ANYTIME OnigMetaCharTable.anytime
#define MC_ZERO_OR_ONE_TIME OnigMetaCharTable.zero_or_one_time
#define MC_ONE_OR_MORE_TIME OnigMetaCharTable.one_or_more_time
#define MC_ANYCHAR_ANYTIME OnigMetaCharTable.anychar_anytime
#define SYN_GNU_REGEX_OP \
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
ONIG_SYN_OP_VBAR_ALT | \
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
ONIG_SYN_OP_QMARK_ZERO_ONE | \
ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
ONIG_SYN_OP_ESC_W_WORD | \
ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
ONIG_SYN_OP_LINE_ANCHOR )
#define SYN_GNU_REGEX_BV \
( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
#define SYN_POSIX_COMMON_OP \
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
@ -691,7 +741,7 @@ typedef struct {
extern OnigOpInfoType OnigOpInfo[];
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp));
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc));
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));
@ -703,9 +753,11 @@ extern char* onig_error_code_to_format P_((int code));
extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
extern UChar* onig_strdup P_((UChar* s, UChar* end));
extern int onig_bbuf_init P_((BBuf* buf, int size));
extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax));
extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax));
extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_chain_reduce P_((regex_t* reg));
extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
extern void onig_transfer P_((regex_t* to, regex_t* from));
extern int onig_is_in_code_range P_((UChar* p, OnigCodePoint code));
#endif /* REGINT_H */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,12 +1,33 @@
/**********************************************************************
regparse.h - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef REGPARSE_H
#define REGPARSE_H
/**********************************************************************
regparse.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
@ -43,7 +64,8 @@
#define CTYPE_NOT_WHITE_SPACE (1<<3)
#define CTYPE_DIGIT (1<<4)
#define CTYPE_NOT_DIGIT (1<<5)
#define CTYPE_XDIGIT (1<<6)
#define CTYPE_NOT_XDIGIT (1<<7)
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
@ -52,23 +74,23 @@
#define EFFECT_OPTION (1<<1)
#define EFFECT_STOP_BACKTRACK (1<<2)
#define REPEAT_INFINITE -1
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
#define NODE_STR_MARGIN 16
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 7
#define NSTR_RAW (1<<0) /* by backslashed number */
#define NSTR_CASE_AMBIG (1<<1)
#define NSTR_AMBIG (1<<1)
#define NSTR_AMBIG_REDUCE (1<<2)
#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
#define NSTRING_SET_CASE_AMBIG(node) (node)->u.str.flag |= NSTR_CASE_AMBIG
#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
#define NSTRING_SET_AMBIG_REDUCE(node) (node)->u.str.flag |= NSTR_AMBIG_REDUCE
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
#define NSTRING_IS_CASE_AMBIG(node) \
(((node)->u.str.flag & NSTR_CASE_AMBIG) != 0)
#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
#define NSTRING_IS_AMBIG_REDUCE(node) \
(((node)->u.str.flag & NSTR_AMBIG_REDUCE) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
@ -115,14 +137,13 @@ typedef struct {
#define NST_MARK1 (1<<3)
#define NST_MARK2 (1<<4)
#define NST_MEM_BACKREFED (1<<5)
#define NST_SIMPLE_REPEAT (1<<6) /* for stop backtrack optimization */
#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
#define NST_RECURSION (1<<7)
#define NST_CALLED (1<<8)
#define NST_ADDR_FIXED (1<<9)
#define NST_NAMED_GROUP (1<<10)
#define NST_NAME_REF (1<<11)
#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in match stack. */
#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
@ -135,7 +156,8 @@ typedef struct {
#define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
#define IS_EFFECT_SIMPLE_REPEAT(en) (((en)->state & NST_SIMPLE_REPEAT) != 0)
#define IS_EFFECT_STOP_BT_SIMPLE_REPEAT(en) \
(((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
@ -228,6 +250,7 @@ typedef struct _Node {
typedef struct {
OnigOptionType option;
OnigAmbigType ambig_flag;
OnigEncoding enc;
OnigSyntaxType* syntax;
BitStatusType capture_history;
@ -267,6 +290,9 @@ extern int onig_node_str_cat P_((Node* node, UChar* s, UChar* end));
extern void onig_node_free P_((Node* node));
extern Node* onig_node_new_effect P_((int type));
extern Node* onig_node_new_anchor P_((int type));
extern Node* onig_node_new_str P_((UChar* s, UChar* end));
extern Node* onig_node_new_list P_((Node* left, Node* right));
extern void onig_node_str_clear P_((Node* node));
extern int onig_free_node_list();
extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env));