зеркало из https://github.com/github/ruby.git
* ascii.c, euc_jp.c, oniggnu.h, oniguruma.h, regcomp.c, regenc.c, regenc.h, regerror.c, regexec.c, reggnu.c, regint.h, regparse.c, regparse.h, sjis.c, utf8.c:
imported Oni Guruma 3.4.0. * parse.y, re.c: Now mbclen() takes unsigned char as its argument. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7206 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
5e853c811c
Коммит
82cb9eaa3b
10
ChangeLog
10
ChangeLog
|
@ -1,3 +1,13 @@
|
|||
Thu Nov 4 23:41:55 2004 Kazuo Saito <ksaito@uranus.dti.ne.jp>
|
||||
|
||||
* ascii.c, euc_jp.c, oniggnu.h, oniguruma.h, regcomp.c,
|
||||
regenc.c, regenc.h, regerror.c, regexec.c, reggnu.c,
|
||||
regint.h, regparse.c, regparse.h, sjis.c, utf8.c:
|
||||
imported Oni Guruma 3.4.0.
|
||||
|
||||
* parse.y, re.c: Now mbclen() takes unsigned char as
|
||||
its argument.
|
||||
|
||||
Thu Nov 4 21:25:38 2004 Yukihiro Matsumoto <matz@ruby-lang.org>
|
||||
|
||||
* string.c (str_gsub): string modify check no longer based on
|
||||
|
|
290
oniguruma.h
290
oniguruma.h
|
@ -1,17 +1,38 @@
|
|||
/**********************************************************************
|
||||
|
||||
oniguruma.h - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#ifndef ONIGURUMA_H
|
||||
#define ONIGURUMA_H
|
||||
/**********************************************************************
|
||||
oniguruma.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define ONIGURUMA
|
||||
#define ONIGURUMA_VERSION_MAJOR 2
|
||||
#define ONIGURUMA_VERSION_MINOR 2
|
||||
#define ONIGURUMA_VERSION_TEENY 8
|
||||
#define ONIGURUMA_VERSION_MAJOR 3
|
||||
#define ONIGURUMA_VERSION_MINOR 4
|
||||
#define ONIGURUMA_VERSION_TEENY 0
|
||||
|
||||
#ifndef P_
|
||||
#if defined(__STDC__) || defined(_WIN32)
|
||||
|
@ -56,12 +77,56 @@ typedef struct {
|
|||
OnigCodePoint to;
|
||||
} OnigCodePointRange;
|
||||
|
||||
#define ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE 16
|
||||
|
||||
/* ambiguous match flag */
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_NONE 0
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0)
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE (1<<1)
|
||||
/* #define ONIGENC_AMBIGUOUS_MATCH_ACCENT (1<<2) */
|
||||
/* #define ONIGENC_AMBIGUOUS_MATCH_HIRAGANA_KATAKANA (1<<3) */
|
||||
/* #define ONIGENC_AMBIGUOUS_MATCH_KATAKANA_WIDTH (1<<4) */
|
||||
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_LIMIT (1<<1)
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_COMPOUND (1<<30)
|
||||
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_FULL \
|
||||
( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | \
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | \
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND )
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_DEFAULT \
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | \
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | \
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND )
|
||||
|
||||
typedef unsigned int OnigAmbigType;
|
||||
|
||||
#define ONIGENC_MAX_COMP_AMBIG_CODE_LEN 3
|
||||
#define ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM 4
|
||||
|
||||
typedef struct {
|
||||
int target_num;
|
||||
int target_byte_len[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
|
||||
UChar* target_str[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
|
||||
} OnigEncFoldMatchInfo;
|
||||
int len;
|
||||
OnigCodePoint code[ONIGENC_MAX_COMP_AMBIG_CODE_LEN];
|
||||
} OnigCompAmbigCodeItem;
|
||||
|
||||
typedef struct {
|
||||
int n;
|
||||
OnigCodePoint code;
|
||||
OnigCompAmbigCodeItem items[ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM];
|
||||
} OnigCompAmbigCodes;
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint from;
|
||||
OnigCodePoint to;
|
||||
} OnigPairAmbigCodes;
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint esc;
|
||||
OnigCodePoint anychar;
|
||||
OnigCodePoint anytime;
|
||||
OnigCodePoint zero_or_one_time;
|
||||
OnigCodePoint one_or_more_time;
|
||||
OnigCodePoint anychar_anytime;
|
||||
} OnigMetaCharTableType;
|
||||
|
||||
|
||||
#if defined(RUBY_PLATFORM) && defined(M17N_H)
|
||||
|
@ -72,23 +137,24 @@ typedef m17n_encoding* OnigEncoding;
|
|||
#else
|
||||
|
||||
typedef struct {
|
||||
const char len_table[256];
|
||||
int (*mbc_enc_len)(UChar* p);
|
||||
const char* name;
|
||||
int max_enc_len;
|
||||
int is_fold_match;
|
||||
int ctype_support_level; /* sb-only/full */
|
||||
int is_continuous_sb_mb; /* code point is continuous from sb to mb */
|
||||
int min_enc_len;
|
||||
OnigAmbigType support_ambig_flag;
|
||||
OnigMetaCharTableType meta_char_table;
|
||||
int (*is_mbc_newline)(UChar* p, UChar* end);
|
||||
OnigCodePoint (*mbc_to_code)(UChar* p, UChar* end);
|
||||
int (*code_to_mbclen)(OnigCodePoint code);
|
||||
int (*code_to_mbc)(OnigCodePoint code, UChar *buf);
|
||||
int (*mbc_to_lower)(UChar* p, UChar* lower);
|
||||
int (*mbc_is_case_ambig)(UChar* p);
|
||||
int (*code_is_ctype)(OnigCodePoint code, unsigned int ctype);
|
||||
int (*mbc_to_normalize)(OnigAmbigType flag, UChar** pp, UChar* end, UChar* to);
|
||||
int (*is_mbc_ambiguous)(OnigAmbigType flag, UChar** pp, UChar* end);
|
||||
int (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs);
|
||||
int (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs);
|
||||
int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype);
|
||||
int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]);
|
||||
UChar* (*left_adjust_char_head)(UChar* start, UChar* s);
|
||||
int (*is_allowed_reverse_match)(UChar* p, UChar* e);
|
||||
int (*get_all_fold_match_code)(OnigCodePoint** codes);
|
||||
int (*get_fold_match_info)(UChar* p, UChar* end, OnigEncFoldMatchInfo** info);
|
||||
UChar* (*left_adjust_char_head)(UChar* start, UChar* p);
|
||||
int (*is_allowed_reverse_match)(UChar* p, UChar* end);
|
||||
} OnigEncodingType;
|
||||
|
||||
typedef OnigEncodingType* OnigEncoding;
|
||||
|
@ -110,6 +176,10 @@ ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14;
|
|||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingUTF8;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_BE;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_LE;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_BE;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_LE;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR;
|
||||
|
@ -136,6 +206,10 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
|
|||
#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
|
||||
#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
|
||||
#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8)
|
||||
#define ONIG_ENCODING_UTF16_BE (&OnigEncodingUTF16_BE)
|
||||
#define ONIG_ENCODING_UTF16_LE (&OnigEncodingUTF16_LE)
|
||||
#define ONIG_ENCODING_UTF32_BE (&OnigEncodingUTF32_BE)
|
||||
#define ONIG_ENCODING_UTF32_LE (&OnigEncodingUTF32_LE)
|
||||
#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
|
||||
#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
|
||||
#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
|
||||
|
@ -152,34 +226,31 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
|
|||
|
||||
/* work size */
|
||||
#define ONIGENC_CODE_TO_MBC_MAXLEN 7
|
||||
#define ONIGENC_MBC_TO_LOWER_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN
|
||||
#define ONIGENC_MBC_NORMALIZE_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN
|
||||
|
||||
/* character types */
|
||||
#define ONIGENC_CTYPE_ALPHA (1<< 0)
|
||||
#define ONIGENC_CTYPE_BLANK (1<< 1)
|
||||
#define ONIGENC_CTYPE_CNTRL (1<< 2)
|
||||
#define ONIGENC_CTYPE_DIGIT (1<< 3)
|
||||
#define ONIGENC_CTYPE_GRAPH (1<< 4)
|
||||
#define ONIGENC_CTYPE_LOWER (1<< 5)
|
||||
#define ONIGENC_CTYPE_PRINT (1<< 6)
|
||||
#define ONIGENC_CTYPE_PUNCT (1<< 7)
|
||||
#define ONIGENC_CTYPE_SPACE (1<< 8)
|
||||
#define ONIGENC_CTYPE_UPPER (1<< 9)
|
||||
#define ONIGENC_CTYPE_XDIGIT (1<<10)
|
||||
#define ONIGENC_CTYPE_WORD (1<<11)
|
||||
#define ONIGENC_CTYPE_ASCII (1<<12)
|
||||
#define ONIGENC_CTYPE_NEWLINE (1<< 0)
|
||||
#define ONIGENC_CTYPE_ALPHA (1<< 1)
|
||||
#define ONIGENC_CTYPE_BLANK (1<< 2)
|
||||
#define ONIGENC_CTYPE_CNTRL (1<< 3)
|
||||
#define ONIGENC_CTYPE_DIGIT (1<< 4)
|
||||
#define ONIGENC_CTYPE_GRAPH (1<< 5)
|
||||
#define ONIGENC_CTYPE_LOWER (1<< 6)
|
||||
#define ONIGENC_CTYPE_PRINT (1<< 7)
|
||||
#define ONIGENC_CTYPE_PUNCT (1<< 8)
|
||||
#define ONIGENC_CTYPE_SPACE (1<< 9)
|
||||
#define ONIGENC_CTYPE_UPPER (1<<10)
|
||||
#define ONIGENC_CTYPE_XDIGIT (1<<11)
|
||||
#define ONIGENC_CTYPE_WORD (1<<12)
|
||||
#define ONIGENC_CTYPE_ASCII (1<<13)
|
||||
#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
|
||||
|
||||
/* ctype support level */
|
||||
#define ONIGENC_CTYPE_SUPPORT_LEVEL_SB 0
|
||||
#define ONIGENC_CTYPE_SUPPORT_LEVEL_FULL 1
|
||||
|
||||
|
||||
#define enc_len(enc,byte) ONIGENC_MBC_LEN_BY_HEAD(enc,byte)
|
||||
#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)
|
||||
|
||||
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
|
||||
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
|
||||
#define ONIGENC_IS_MBC_HEAD(enc,byte) (ONIGENC_MBC_LEN_BY_HEAD(enc,byte) != 1)
|
||||
#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1)
|
||||
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
|
||||
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
|
||||
#define ONIGENC_IS_CODE_SB_WORD(enc,code) \
|
||||
|
@ -192,31 +263,33 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
|
|||
|
||||
#include <ctype.h> /* for isblank(), isgraph() */
|
||||
|
||||
#define ONIGENC_MBC_TO_LOWER(enc,p,buf) onigenc_mbc_to_lower(enc,p,buf)
|
||||
#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) onigenc_mbc_is_case_ambig(enc,p)
|
||||
#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
|
||||
onigenc_mbc_to_normalize(enc,flag,pp,end,buf)
|
||||
#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
|
||||
onigenc_is_mbc_ambiguous(enc,flag,pp,end)
|
||||
|
||||
#define ONIGENC_IS_FOLD_MATCH(enc) FALSE
|
||||
#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) FALSE
|
||||
#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ONIGENC_CTYPE_SUPPORT_LEVEL_SB
|
||||
#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE
|
||||
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
|
||||
onigenc_is_allowed_reverse_match(enc, s, end)
|
||||
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
|
||||
onigenc_get_left_adjust_char_head(enc, start, s)
|
||||
#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) 0
|
||||
#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) ONIG_NO_SUPPORT_CONFIG
|
||||
#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, ambig_flag, acs) 0
|
||||
#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, ambig_flag, acs) 0
|
||||
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
|
||||
ONIG_NO_SUPPORT_CONFIG
|
||||
#define ONIGENC_MBC_LEN_BY_HEAD(enc,b) m17n_mbclen(enc,(int )b)
|
||||
#define ONIGENC_MBC_ENC_LEN(enc,p) m17n_mbclen(enc,(int )(*p))
|
||||
#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc)
|
||||
#define ONIGENC_MBC_MAXLEN_DIST(enc) \
|
||||
(ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \
|
||||
: ONIG_INFINITE_DISTANCE)
|
||||
#define ONIGENC_MBC_MINLEN(enc) 1
|
||||
#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e))
|
||||
#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code))
|
||||
#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf)
|
||||
|
||||
#if 0
|
||||
#define ONIGENC_STEP_BACK(enc,start,s,n) /* !! not supported !! */
|
||||
#if 0 /* !! not supported !! */
|
||||
#define ONIGENC_IS_MBC_NEWLINE(enc,p,end)
|
||||
#define ONIGENC_STEP_BACK(enc,start,s,n)
|
||||
#endif
|
||||
|
||||
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \
|
||||
|
@ -251,9 +324,9 @@ int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype));
|
|||
ONIG_EXTERN
|
||||
int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN
|
||||
int onigenc_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* buf));
|
||||
int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, UChar** pp, UChar* end, UChar* buf));
|
||||
ONIG_EXTERN
|
||||
int onigenc_mbc_is_case_ambig P_((OnigEncoding enc, UChar* p));
|
||||
int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, UChar** pp, UChar* end));
|
||||
ONIG_EXTERN
|
||||
int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end));
|
||||
|
||||
|
@ -261,32 +334,35 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end)
|
|||
|
||||
#define ONIGENC_NAME(enc) ((enc)->name)
|
||||
|
||||
#define ONIGENC_MBC_TO_LOWER(enc,p,buf) (enc)->mbc_to_lower(p,buf)
|
||||
#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) (enc)->mbc_is_case_ambig(p)
|
||||
|
||||
#define ONIGENC_IS_FOLD_MATCH(enc) ((enc)->is_fold_match)
|
||||
#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) ((enc)->is_continuous_sb_mb)
|
||||
#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ((enc)->ctype_support_level)
|
||||
#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
|
||||
(enc)->mbc_to_normalize(flag,pp,end,buf)
|
||||
#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
|
||||
(enc)->is_mbc_ambiguous(flag,pp,end)
|
||||
#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ((enc)->support_ambig_flag)
|
||||
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
|
||||
(enc)->is_allowed_reverse_match(s,end)
|
||||
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
|
||||
(enc)->left_adjust_char_head(start, s)
|
||||
#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) \
|
||||
(enc)->get_all_fold_match_code(codes)
|
||||
#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) \
|
||||
(enc)->get_fold_match_info(p,end,info)
|
||||
#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc,ambig_flag,acs) \
|
||||
(enc)->get_all_pair_ambig_codes(ambig_flag,acs)
|
||||
#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc,ambig_flag,acs) \
|
||||
(enc)->get_all_comp_ambig_codes(ambig_flag,acs)
|
||||
#define ONIGENC_STEP_BACK(enc,start,s,n) \
|
||||
onigenc_step_back((enc),(start),(s),(n))
|
||||
|
||||
#define ONIGENC_MBC_LEN_BY_HEAD(enc,byte) ((enc)->len_table[(int )(byte)])
|
||||
#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p)
|
||||
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
|
||||
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
|
||||
#define ONIGENC_MBC_TO_CODE(enc,p,e) (enc)->mbc_to_code((p),(e))
|
||||
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
|
||||
#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end))
|
||||
#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end))
|
||||
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
|
||||
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
|
||||
|
||||
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->code_is_ctype(code,ctype)
|
||||
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype)
|
||||
|
||||
#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
|
||||
#define ONIGENC_IS_CODE_GRAPH(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
|
||||
#define ONIGENC_IS_CODE_PRINT(enc,code) \
|
||||
|
@ -340,6 +416,12 @@ ONIG_EXTERN
|
|||
UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
|
||||
ONIG_EXTERN
|
||||
UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
|
||||
ONIG_EXTERN
|
||||
int onigenc_strlen P_((OnigEncoding enc, UChar* p, UChar* end));
|
||||
ONIG_EXTERN
|
||||
int onigenc_strlen_null P_((OnigEncoding enc, UChar* p));
|
||||
ONIG_EXTERN
|
||||
int onigenc_str_bytelen_null P_((OnigEncoding enc, UChar* p));
|
||||
|
||||
|
||||
|
||||
|
@ -353,13 +435,6 @@ UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UC
|
|||
/* constants */
|
||||
#define ONIG_MAX_ERROR_MESSAGE_LEN 90
|
||||
|
||||
#if defined(RUBY_PLATFORM) && !defined(ONIG_RUBY_M17N)
|
||||
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
|
||||
#undef ismbchar
|
||||
#define ismbchar(c) (mbclen((c)) != 1)
|
||||
#define mbclen(c) (OnigEncDefaultCharEncoding->len_table[(unsigned char )(c)])
|
||||
#endif
|
||||
|
||||
typedef unsigned int OnigOptionType;
|
||||
|
||||
#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
|
||||
|
@ -467,6 +542,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
|||
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */
|
||||
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1<<17) /* \p{^..}, \P{^..} */
|
||||
#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1<<18) /* \p{IsXDigit} */
|
||||
#define ONIG_SYN_OP2_ESC_H_XDIGIT (1<<19) /* \h, \H */
|
||||
|
||||
/* syntax (behavior) */
|
||||
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */
|
||||
|
@ -479,6 +555,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
|||
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */
|
||||
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */
|
||||
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?<x>)(?<x>) */
|
||||
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1<<9) /* a{n}?=(?:a{n})? */
|
||||
|
||||
/* syntax (behavior) in char class [...] */
|
||||
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */
|
||||
|
@ -565,6 +642,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
|||
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
|
||||
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
|
||||
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
|
||||
#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
|
||||
|
||||
/* errors related to thread */
|
||||
#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
|
||||
|
@ -575,6 +653,15 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
|||
#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
|
||||
((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
|
||||
|
||||
typedef struct OnigCaptureTreeNodeStruct {
|
||||
int group; /* group number */
|
||||
int beg;
|
||||
int end;
|
||||
int allocated;
|
||||
int num_childs;
|
||||
struct OnigCaptureTreeNodeStruct** childs;
|
||||
} OnigCaptureTreeNode;
|
||||
|
||||
/* match result region type */
|
||||
struct re_registers {
|
||||
int allocated;
|
||||
|
@ -582,9 +669,16 @@ struct re_registers {
|
|||
int* beg;
|
||||
int* end;
|
||||
/* extended */
|
||||
struct re_registers** list; /* capture history. list[1]-list[31] */
|
||||
OnigCaptureTreeNode* history_root; /* capture history tree root */
|
||||
};
|
||||
|
||||
/* capture tree traverse */
|
||||
#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
|
||||
#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
|
||||
#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
|
||||
( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
|
||||
|
||||
|
||||
#define ONIG_REGION_NOTPOS -1
|
||||
|
||||
typedef struct re_registers OnigRegion;
|
||||
|
@ -635,6 +729,7 @@ typedef struct re_pattern_buffer {
|
|||
OnigEncoding enc;
|
||||
OnigOptionType options;
|
||||
OnigSyntaxType* syntax;
|
||||
OnigAmbigType ambig_flag;
|
||||
void* name_table;
|
||||
|
||||
/* optimization info (string search, char-map and anchors) */
|
||||
|
@ -657,6 +752,15 @@ typedef struct re_pattern_buffer {
|
|||
} regex_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
int num_of_elements;
|
||||
OnigEncoding pattern_enc;
|
||||
OnigEncoding target_enc;
|
||||
OnigSyntaxType* syntax;
|
||||
OnigOptionType option;
|
||||
OnigAmbigType ambig_flag;
|
||||
} OnigCompileInfo;
|
||||
|
||||
/* Oniguruma Native API */
|
||||
ONIG_EXTERN
|
||||
int onig_init P_((void));
|
||||
|
@ -669,10 +773,14 @@ void onig_set_verb_warn_func P_((OnigWarnFunc f));
|
|||
ONIG_EXTERN
|
||||
int onig_new P_((regex_t**, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
int onig_new_deluxe P_((regex_t** reg, UChar* pattern, UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
void onig_free P_((regex_t*));
|
||||
ONIG_EXTERN
|
||||
int onig_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
int onig_recompile_deluxe P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
int onig_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
int onig_match P_((regex_t*, UChar* str, UChar* end, UChar* at, OnigRegion* region, OnigOptionType option));
|
||||
|
@ -696,16 +804,34 @@ int onig_foreach_name P_((regex_t* reg, int (*func)(UChar*,UChar*,int,int*,regex
|
|||
ONIG_EXTERN
|
||||
int onig_number_of_names P_((regex_t* reg));
|
||||
ONIG_EXTERN
|
||||
int onig_number_of_captures P_((regex_t* reg));
|
||||
ONIG_EXTERN
|
||||
int onig_number_of_capture_histories P_((regex_t* reg));
|
||||
ONIG_EXTERN
|
||||
OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region));
|
||||
ONIG_EXTERN
|
||||
int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg));
|
||||
ONIG_EXTERN
|
||||
OnigEncoding onig_get_encoding P_((regex_t* reg));
|
||||
ONIG_EXTERN
|
||||
OnigOptionType onig_get_options P_((regex_t* reg));
|
||||
ONIG_EXTERN
|
||||
OnigAmbigType onig_get_ambig_flag P_((regex_t* reg));
|
||||
ONIG_EXTERN
|
||||
OnigSyntaxType* onig_get_syntax P_((regex_t* reg));
|
||||
ONIG_EXTERN
|
||||
int onig_set_default_syntax P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
|
||||
|
@ -714,7 +840,9 @@ void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)
|
|||
ONIG_EXTERN
|
||||
void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
|
||||
ONIG_EXTERN
|
||||
int onig_set_meta_char P_((unsigned int what, OnigCodePoint code));
|
||||
int onig_set_meta_char P_((OnigEncoding enc, unsigned int what, OnigCodePoint code));
|
||||
ONIG_EXTERN
|
||||
void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from));
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_match_stack_limit_size P_((void));
|
||||
ONIG_EXTERN
|
||||
|
@ -723,5 +851,7 @@ ONIG_EXTERN
|
|||
int onig_end P_((void));
|
||||
ONIG_EXTERN
|
||||
const char* onig_version P_((void));
|
||||
ONIG_EXTERN
|
||||
const char* onig_copyright P_((void));
|
||||
|
||||
#endif /* ONIGURUMA_H */
|
||||
|
|
46
parse.y
46
parse.y
|
@ -4853,8 +4853,10 @@ parser_tokadd_string(parser, func, term, paren, nest)
|
|||
long *nest;
|
||||
{
|
||||
int c;
|
||||
unsigned char uc;
|
||||
|
||||
while ((c = nextc()) != -1) {
|
||||
uc = (unsigned char)c;
|
||||
if (paren && c == paren) {
|
||||
++*nest;
|
||||
}
|
||||
|
@ -4905,8 +4907,8 @@ parser_tokadd_string(parser, func, term, paren, nest)
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (ismbchar(c)) {
|
||||
int i, len = mbclen(c)-1;
|
||||
else if (ismbchar(uc)) {
|
||||
int i, len = mbclen(uc)-1;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
tokadd(c);
|
||||
|
@ -5002,6 +5004,7 @@ parser_heredoc_identifier(parser)
|
|||
struct parser_params *parser;
|
||||
{
|
||||
int c = nextc(), term, func = 0, len;
|
||||
unsigned int uc;
|
||||
|
||||
if (c == '-') {
|
||||
c = nextc();
|
||||
|
@ -5019,7 +5022,8 @@ parser_heredoc_identifier(parser)
|
|||
tokadd(func);
|
||||
term = c;
|
||||
while ((c = nextc()) != -1 && c != term) {
|
||||
len = mbclen(c);
|
||||
uc = (unsigned int)c;
|
||||
len = mbclen(uc);
|
||||
do {tokadd(c);} while (--len > 0 && (c = nextc()) != -1);
|
||||
}
|
||||
if (c == -1) {
|
||||
|
@ -5029,7 +5033,8 @@ parser_heredoc_identifier(parser)
|
|||
break;
|
||||
|
||||
default:
|
||||
if (!is_identchar(c)) {
|
||||
uc = (unsigned int)c;
|
||||
if (!is_identchar(uc)) {
|
||||
pushback(c);
|
||||
if (func & STR_FUNC_INDENT) {
|
||||
pushback('-');
|
||||
|
@ -5040,9 +5045,11 @@ parser_heredoc_identifier(parser)
|
|||
term = '"';
|
||||
tokadd(func |= str_dquote);
|
||||
do {
|
||||
len = mbclen(c);
|
||||
uc = (unsigned int)c;
|
||||
len = mbclen(uc);
|
||||
do {tokadd(c);} while (--len > 0 && (c = nextc()) != -1);
|
||||
} while ((c = nextc()) != -1 && is_identchar(c));
|
||||
} while ((c = nextc()) != -1 &&
|
||||
(uc = (unsigned char)c, is_identchar(uc)));
|
||||
pushback(c);
|
||||
break;
|
||||
}
|
||||
|
@ -5233,6 +5240,7 @@ parser_yylex(parser)
|
|||
register int c;
|
||||
int space_seen = 0;
|
||||
int cmd_state;
|
||||
unsigned char uc;
|
||||
#ifdef RIPPER
|
||||
int fallthru = Qfalse;
|
||||
#endif
|
||||
|
@ -5519,6 +5527,7 @@ parser_yylex(parser)
|
|||
rb_compile_error(PARSER_ARG "incomplete character syntax");
|
||||
return 0;
|
||||
}
|
||||
uc = (unsigned char)c;
|
||||
if (ISSPACE(c)){
|
||||
if (!IS_ARG()){
|
||||
int c2 = 0;
|
||||
|
@ -5551,7 +5560,7 @@ parser_yylex(parser)
|
|||
lex_state = EXPR_TERNARY;
|
||||
return '?';
|
||||
}
|
||||
else if (ismbchar(c)) {
|
||||
else if (ismbchar(uc)) {
|
||||
rb_warnI("multibyte character literal not supported yet; use ?\\%.3o", c);
|
||||
goto ternary;
|
||||
}
|
||||
|
@ -6098,7 +6107,8 @@ parser_yylex(parser)
|
|||
}
|
||||
else {
|
||||
term = nextc();
|
||||
if (ISALNUM(term) || ismbchar(term)) {
|
||||
uc = (unsigned char)c;
|
||||
if (ISALNUM(term) || ismbchar(uc)) {
|
||||
yyerror("unknown type of %string");
|
||||
return 0;
|
||||
}
|
||||
|
@ -6177,7 +6187,8 @@ parser_yylex(parser)
|
|||
switch (c) {
|
||||
case '_': /* $_: last read line string */
|
||||
c = nextc();
|
||||
if (is_identchar(c)) {
|
||||
uc = (unsigned char)c;
|
||||
if (is_identchar(uc)) {
|
||||
tokadd('$');
|
||||
tokadd('_');
|
||||
break;
|
||||
|
@ -6243,7 +6254,8 @@ parser_yylex(parser)
|
|||
return tNTH_REF;
|
||||
|
||||
default:
|
||||
if (!is_identchar(c)) {
|
||||
uc = (unsigned char)c;
|
||||
if (!is_identchar(uc)) {
|
||||
pushback(c);
|
||||
return '$';
|
||||
}
|
||||
|
@ -6268,7 +6280,8 @@ parser_yylex(parser)
|
|||
rb_compile_error(PARSER_ARG "`@@%c' is not allowed as a class variable name", c);
|
||||
}
|
||||
}
|
||||
if (!is_identchar(c)) {
|
||||
uc = (unsigned char)c;
|
||||
if (!is_identchar(uc)) {
|
||||
pushback(c);
|
||||
return '@';
|
||||
}
|
||||
|
@ -6290,7 +6303,8 @@ parser_yylex(parser)
|
|||
break;
|
||||
|
||||
default:
|
||||
if (!is_identchar(c)) {
|
||||
uc = (unsigned char)c;
|
||||
if (!is_identchar(uc)) {
|
||||
rb_compile_error(PARSER_ARG "Invalid char `\\%03o' in expression", c);
|
||||
goto retry;
|
||||
}
|
||||
|
@ -6299,10 +6313,11 @@ parser_yylex(parser)
|
|||
break;
|
||||
}
|
||||
|
||||
uc = (unsigned char)c;
|
||||
do {
|
||||
tokadd(c);
|
||||
if (ismbchar(c)) {
|
||||
int i, len = mbclen(c)-1;
|
||||
if (ismbchar(uc)) {
|
||||
int i, len = mbclen(uc)-1;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
c = nextc();
|
||||
|
@ -6310,7 +6325,8 @@ parser_yylex(parser)
|
|||
}
|
||||
}
|
||||
c = nextc();
|
||||
} while (is_identchar(c));
|
||||
uc = (unsigned char)c;
|
||||
} while (is_identchar(uc));
|
||||
if ((c == '!' || c == '?') && is_identchar(tok()[0]) && !peek('=')) {
|
||||
tokadd(c);
|
||||
}
|
||||
|
|
31
re.c
31
re.c
|
@ -248,11 +248,12 @@ rb_reg_mbclen2(c, re)
|
|||
VALUE re;
|
||||
{
|
||||
int len;
|
||||
unsigned char uc = (unsigned char)c;
|
||||
|
||||
if (!FL_TEST(re, KCODE_FIXED))
|
||||
return mbclen(c);
|
||||
return mbclen(uc);
|
||||
kcode_set_option(re);
|
||||
len = mbclen(c);
|
||||
len = mbclen(uc);
|
||||
kcode_reset_option();
|
||||
return len;
|
||||
}
|
||||
|
@ -1775,8 +1776,8 @@ rb_reg_quote(str)
|
|||
send = s + RSTRING(str)->len;
|
||||
for (; s < send; s++) {
|
||||
c = *s;
|
||||
if (ismbchar(c)) {
|
||||
int n = mbclen(c);
|
||||
if (ismbchar(*s)) {
|
||||
int n = mbclen(*s);
|
||||
|
||||
while (n-- && s < send)
|
||||
s++;
|
||||
|
@ -1804,8 +1805,8 @@ rb_reg_quote(str)
|
|||
|
||||
for (; s < send; s++) {
|
||||
c = *s;
|
||||
if (ismbchar(c)) {
|
||||
int n = mbclen(c);
|
||||
if (ismbchar(*s)) {
|
||||
int n = mbclen(*s);
|
||||
|
||||
while (n-- && s < send)
|
||||
*t++ = *s++;
|
||||
|
@ -2044,21 +2045,23 @@ rb_reg_regsub(str, src, regs)
|
|||
struct re_registers *regs;
|
||||
{
|
||||
VALUE val = 0;
|
||||
char *p, *s, *e, c;
|
||||
char *p, *s, *e;
|
||||
unsigned char uc;
|
||||
int no;
|
||||
|
||||
|
||||
p = s = RSTRING(str)->ptr;
|
||||
e = s + RSTRING(str)->len;
|
||||
|
||||
while (s < e) {
|
||||
char *ss = s;
|
||||
|
||||
c = *s++;
|
||||
if (ismbchar(c)) {
|
||||
s += mbclen(c) - 1;
|
||||
uc = (unsigned char)*s++;
|
||||
if (ismbchar(uc)) {
|
||||
s += mbclen(uc) - 1;
|
||||
continue;
|
||||
}
|
||||
if (c != '\\' || s == e) continue;
|
||||
if (uc != '\\' || s == e) continue;
|
||||
|
||||
if (!val) {
|
||||
val = rb_str_buf_new(ss-p);
|
||||
|
@ -2068,12 +2071,12 @@ rb_reg_regsub(str, src, regs)
|
|||
rb_str_buf_cat(val, p, ss-p);
|
||||
}
|
||||
|
||||
c = *s++;
|
||||
uc = (unsigned char)*s++;
|
||||
p = s;
|
||||
switch (c) {
|
||||
switch (uc) {
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
no = c - '0';
|
||||
no = uc - '0';
|
||||
break;
|
||||
case '&':
|
||||
no = 0;
|
||||
|
|
700
regcomp.c
700
regcomp.c
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
767
regexec.c
767
regexec.c
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
112
regint.h
112
regint.h
|
@ -1,12 +1,33 @@
|
|||
/**********************************************************************
|
||||
|
||||
regint.h - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#ifndef REGINT_H
|
||||
#define REGINT_H
|
||||
/**********************************************************************
|
||||
regint.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* for debug */
|
||||
/* #define ONIG_DEBUG_PARSE_TREE */
|
||||
|
@ -19,7 +40,8 @@
|
|||
/* #define ONIG_DEBUG_STATISTICS */
|
||||
|
||||
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
|
||||
defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_STATISTICS)
|
||||
defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
|
||||
defined(ONIG_DEBUG_STATISTICS)
|
||||
#ifndef ONIG_DEBUG
|
||||
#define ONIG_DEBUG
|
||||
#endif
|
||||
|
@ -36,7 +58,6 @@
|
|||
/* spec. config */
|
||||
#define USE_NAMED_GROUP
|
||||
#define USE_SUBEXP_CALL
|
||||
#define USE_FOLD_MATCH /* ess-tsett etc... */
|
||||
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
|
||||
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
|
||||
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
|
||||
|
@ -51,12 +72,14 @@
|
|||
/* interface to external system */
|
||||
#ifdef NOT_RUBY /* gived from Makefile */
|
||||
#include "config.h"
|
||||
#define USE_CAPTURE_HISTORY
|
||||
#define USE_VARIABLE_META_CHARS
|
||||
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
|
||||
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
|
||||
#define THREAD_ATOMIC_START /* depend on thread system */
|
||||
#define THREAD_ATOMIC_END /* depend on thread system */
|
||||
#define THREAD_PASS /* depend on thread system */
|
||||
#define CHECK_INTERRUPT /* depend on application */
|
||||
#define xmalloc malloc
|
||||
#define xrealloc realloc
|
||||
#define xfree free
|
||||
|
@ -67,6 +90,14 @@
|
|||
#define THREAD_ATOMIC_START DEFER_INTS
|
||||
#define THREAD_ATOMIC_END ENABLE_INTS
|
||||
#define THREAD_PASS rb_thread_schedule()
|
||||
#define CHECK_INTERRUPT do {\
|
||||
if (rb_trap_pending) {\
|
||||
if (! rb_prohibit_interrupt) {\
|
||||
rb_trap_exec();\
|
||||
}\
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
#define DEFAULT_WARN_FUNCTION rb_warn
|
||||
#define DEFAULT_VERB_WARN_FUNCTION rb_warning
|
||||
|
||||
|
@ -108,7 +139,9 @@
|
|||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#ifndef __BORLANDC__
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
# include <stdio.h>
|
||||
|
@ -291,6 +324,8 @@ typedef unsigned int BitStatusType;
|
|||
/* ignore-case and multibyte status are included in compiled code. */
|
||||
#define IS_DYNAMIC_OPTION(option) 0
|
||||
|
||||
#define REPEAT_INFINITE -1
|
||||
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
|
||||
|
||||
/* bitset */
|
||||
#define BITS_PER_BYTE 8
|
||||
|
@ -530,11 +565,11 @@ enum OpCode {
|
|||
#define ARG_MEMNUM 4
|
||||
#define ARG_OPTION 5
|
||||
|
||||
typedef short int RelAddrType;
|
||||
typedef short int AbsAddrType;
|
||||
typedef short int LengthType;
|
||||
typedef short int MemNumType;
|
||||
typedef int RelAddrType;
|
||||
typedef int AbsAddrType;
|
||||
typedef int LengthType;
|
||||
typedef int RepeatNumType;
|
||||
typedef short int MemNumType;
|
||||
|
||||
#define SIZE_OPCODE 1
|
||||
#define SIZE_RELADDR sizeof(RelAddrType)
|
||||
|
@ -575,6 +610,7 @@ typedef int RepeatNumType;
|
|||
option = *((OnigOptionType* )(p));\
|
||||
(p) += SIZE_OPTION;\
|
||||
} while(0)
|
||||
|
||||
#else
|
||||
|
||||
#define GET_RELADDR_INC(addr,p) GET_SHORT_INC(addr,p)
|
||||
|
@ -637,23 +673,37 @@ typedef int RepeatNumType;
|
|||
#define SIZE_OP_RETURN SIZE_OPCODE
|
||||
|
||||
|
||||
typedef struct {
|
||||
UChar esc;
|
||||
UChar anychar;
|
||||
UChar anytime;
|
||||
UChar zero_or_one_time;
|
||||
UChar one_or_more_time;
|
||||
UChar anychar_anytime;
|
||||
} OnigMetaCharTableType;
|
||||
#define MC_ESC(enc) (enc)->meta_char_table.esc
|
||||
#define MC_ANYCHAR(enc) (enc)->meta_char_table.anychar
|
||||
#define MC_ANYTIME(enc) (enc)->meta_char_table.anytime
|
||||
#define MC_ZERO_OR_ONE_TIME(enc) (enc)->meta_char_table.zero_or_one_time
|
||||
#define MC_ONE_OR_MORE_TIME(enc) (enc)->meta_char_table.one_or_more_time
|
||||
#define MC_ANYCHAR_ANYTIME(enc) (enc)->meta_char_table.anychar_anytime
|
||||
|
||||
extern OnigMetaCharTableType OnigMetaCharTable;
|
||||
#define SYN_POSIX_COMMON_OP \
|
||||
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
|
||||
ONIG_SYN_OP_DECIMAL_BACKREF | \
|
||||
ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
|
||||
ONIG_SYN_OP_LINE_ANCHOR | \
|
||||
ONIG_SYN_OP_ESC_CONTROL_CHARS )
|
||||
|
||||
#define MC_ESC OnigMetaCharTable.esc
|
||||
#define MC_ANYCHAR OnigMetaCharTable.anychar
|
||||
#define MC_ANYTIME OnigMetaCharTable.anytime
|
||||
#define MC_ZERO_OR_ONE_TIME OnigMetaCharTable.zero_or_one_time
|
||||
#define MC_ONE_OR_MORE_TIME OnigMetaCharTable.one_or_more_time
|
||||
#define MC_ANYCHAR_ANYTIME OnigMetaCharTable.anychar_anytime
|
||||
#define SYN_GNU_REGEX_OP \
|
||||
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
|
||||
ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
|
||||
ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
|
||||
ONIG_SYN_OP_VBAR_ALT | \
|
||||
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
|
||||
ONIG_SYN_OP_QMARK_ZERO_ONE | \
|
||||
ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
|
||||
ONIG_SYN_OP_ESC_W_WORD | \
|
||||
ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
|
||||
ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
|
||||
ONIG_SYN_OP_LINE_ANCHOR )
|
||||
|
||||
#define SYN_GNU_REGEX_BV \
|
||||
( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
|
||||
ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
|
||||
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
|
||||
|
||||
#define SYN_POSIX_COMMON_OP \
|
||||
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
|
||||
|
@ -691,7 +741,7 @@ typedef struct {
|
|||
|
||||
extern OnigOpInfoType OnigOpInfo[];
|
||||
|
||||
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp));
|
||||
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc));
|
||||
|
||||
#ifdef ONIG_DEBUG_STATISTICS
|
||||
extern void onig_statistics_init P_((void));
|
||||
|
@ -703,9 +753,11 @@ extern char* onig_error_code_to_format P_((int code));
|
|||
extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
|
||||
extern UChar* onig_strdup P_((UChar* s, UChar* end));
|
||||
extern int onig_bbuf_init P_((BBuf* buf, int size));
|
||||
extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax));
|
||||
extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax));
|
||||
extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo));
|
||||
extern void onig_chain_reduce P_((regex_t* reg));
|
||||
extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
|
||||
extern void onig_transfer P_((regex_t* to, regex_t* from));
|
||||
extern int onig_is_in_code_range P_((UChar* p, OnigCodePoint code));
|
||||
|
||||
#endif /* REGINT_H */
|
||||
|
|
840
regparse.c
840
regparse.c
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
64
regparse.h
64
regparse.h
|
@ -1,12 +1,33 @@
|
|||
/**********************************************************************
|
||||
|
||||
regparse.h - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#ifndef REGPARSE_H
|
||||
#define REGPARSE_H
|
||||
/**********************************************************************
|
||||
regparse.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
|
@ -43,7 +64,8 @@
|
|||
#define CTYPE_NOT_WHITE_SPACE (1<<3)
|
||||
#define CTYPE_DIGIT (1<<4)
|
||||
#define CTYPE_NOT_DIGIT (1<<5)
|
||||
|
||||
#define CTYPE_XDIGIT (1<<6)
|
||||
#define CTYPE_NOT_XDIGIT (1<<7)
|
||||
|
||||
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL)
|
||||
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
|
||||
|
@ -52,23 +74,23 @@
|
|||
#define EFFECT_OPTION (1<<1)
|
||||
#define EFFECT_STOP_BACKTRACK (1<<2)
|
||||
|
||||
#define REPEAT_INFINITE -1
|
||||
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
|
||||
|
||||
#define NODE_STR_MARGIN 16
|
||||
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
|
||||
#define NODE_BACKREFS_SIZE 7
|
||||
|
||||
#define NSTR_RAW (1<<0) /* by backslashed number */
|
||||
#define NSTR_CASE_AMBIG (1<<1)
|
||||
#define NSTR_AMBIG (1<<1)
|
||||
#define NSTR_AMBIG_REDUCE (1<<2)
|
||||
|
||||
#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
|
||||
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
|
||||
#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
|
||||
#define NSTRING_SET_CASE_AMBIG(node) (node)->u.str.flag |= NSTR_CASE_AMBIG
|
||||
#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
|
||||
#define NSTRING_SET_AMBIG_REDUCE(node) (node)->u.str.flag |= NSTR_AMBIG_REDUCE
|
||||
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
|
||||
#define NSTRING_IS_CASE_AMBIG(node) \
|
||||
(((node)->u.str.flag & NSTR_CASE_AMBIG) != 0)
|
||||
#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
|
||||
#define NSTRING_IS_AMBIG_REDUCE(node) \
|
||||
(((node)->u.str.flag & NSTR_AMBIG_REDUCE) != 0)
|
||||
|
||||
#define BACKREFS_P(br) \
|
||||
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
|
||||
|
@ -115,14 +137,13 @@ typedef struct {
|
|||
#define NST_MARK1 (1<<3)
|
||||
#define NST_MARK2 (1<<4)
|
||||
#define NST_MEM_BACKREFED (1<<5)
|
||||
#define NST_SIMPLE_REPEAT (1<<6) /* for stop backtrack optimization */
|
||||
|
||||
#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
|
||||
#define NST_RECURSION (1<<7)
|
||||
#define NST_CALLED (1<<8)
|
||||
#define NST_ADDR_FIXED (1<<9)
|
||||
#define NST_NAMED_GROUP (1<<10)
|
||||
#define NST_NAME_REF (1<<11)
|
||||
#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in match stack. */
|
||||
#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
|
||||
|
||||
#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
|
||||
#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
|
||||
|
@ -135,7 +156,8 @@ typedef struct {
|
|||
#define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
|
||||
#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
|
||||
#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
|
||||
#define IS_EFFECT_SIMPLE_REPEAT(en) (((en)->state & NST_SIMPLE_REPEAT) != 0)
|
||||
#define IS_EFFECT_STOP_BT_SIMPLE_REPEAT(en) \
|
||||
(((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
|
||||
#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
|
||||
|
||||
#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
|
||||
|
@ -228,6 +250,7 @@ typedef struct _Node {
|
|||
|
||||
typedef struct {
|
||||
OnigOptionType option;
|
||||
OnigAmbigType ambig_flag;
|
||||
OnigEncoding enc;
|
||||
OnigSyntaxType* syntax;
|
||||
BitStatusType capture_history;
|
||||
|
@ -267,6 +290,9 @@ extern int onig_node_str_cat P_((Node* node, UChar* s, UChar* end));
|
|||
extern void onig_node_free P_((Node* node));
|
||||
extern Node* onig_node_new_effect P_((int type));
|
||||
extern Node* onig_node_new_anchor P_((int type));
|
||||
extern Node* onig_node_new_str P_((UChar* s, UChar* end));
|
||||
extern Node* onig_node_new_list P_((Node* left, Node* right));
|
||||
extern void onig_node_str_clear P_((Node* node));
|
||||
extern int onig_free_node_list();
|
||||
extern int onig_names_free P_((regex_t* reg));
|
||||
extern int onig_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env));
|
||||
|
|
Загрузка…
Ссылка в новой задаче