* include/ruby/oniguruma.h: upgrade to Oniguruma 5.9.0. fixes

some memory violation.  [ruby-dev:31070]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12841 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2007-07-23 09:39:30 +00:00
Родитель 46eb6e9428
Коммит e1def8a987
14 изменённых файлов: 1393 добавлений и 1412 удалений

Просмотреть файл

@ -1,3 +1,8 @@
Mon Jul 23 18:37:14 2007 Yukihiro Matsumoto <matz@ruby-lang.org>
* include/ruby/oniguruma.h: upgrade to Oniguruma 5.9.0. fixes
some memory violation. [ruby-dev:31070]
Sun Jul 22 20:09:49 2007 Tadayoshi Funaba <tadf@dotrb.org>
* lib/date/format.rb (Date._parse): now accepts some new

Просмотреть файл

@ -113,7 +113,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf)
#if 1
if (enc_len(ONIG_ENCODING_EUC_JP, buf) != (p - buf))
return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
@ -234,7 +234,7 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
if (ctype >= (unsigned int )PropertyListNum)
return ONIGENCERR_TYPE_BUG;
return ONIGENC_ERR_TYPE_BUG;
return onig_is_in_code_range((UChar* )PropertyList[ctype], code);
}
@ -256,7 +256,7 @@ get_ctype_code_range(int ctype, OnigCodePoint* sb_out,
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
if (ctype >= PropertyListNum)
return ONIGENCERR_TYPE_BUG;
return ONIGENC_ERR_TYPE_BUG;
*ranges = PropertyList[ctype];
return 0;

Просмотреть файл

@ -38,7 +38,7 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 5
#define ONIGURUMA_VERSION_MINOR 7
#define ONIGURUMA_VERSION_MINOR 9
#define ONIGURUMA_VERSION_TEENY 0
#ifdef __cplusplus

1231
regcomp.c

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -169,10 +169,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
}
}
const UChar* OnigEncAsciiToLowerCaseTable = (const UChar* )0;
#ifndef USE_EXTERNAL_LOWER_CASE_CONV_TABLE
static const UChar BuiltInAsciiToLowerCaseTable[] = {
const UChar OnigEncAsciiToLowerCaseTable[] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@ -206,7 +203,6 @@ static const UChar BuiltInAsciiToLowerCaseTable[] = {
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
};
#endif /* not USE_EXTERNAL_LOWER_CASE_CONV_TABLE */
#ifdef USE_UPPER_CASE_TABLE
const UChar OnigEncAsciiToUpperCaseTable[256] = {
@ -355,17 +351,8 @@ const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
extern void
onigenc_set_default_caseconv_table(const UChar* table)
{
if (table == (const UChar* )0) {
#ifndef USE_EXTERNAL_LOWER_CASE_CONV_TABLE
table = BuiltInAsciiToLowerCaseTable;
#else
return ;
#endif
}
if (table != OnigEncAsciiToLowerCaseTable) {
OnigEncAsciiToLowerCaseTable = table;
}
/* nothing */
/* obsoleted. */
}
extern UChar*
@ -732,7 +719,7 @@ onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
#if 1
if (enc_len(enc, buf) != (p - buf))
return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
@ -755,7 +742,7 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
#if 1
if (enc_len(enc, buf) != (p - buf))
return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}

Просмотреть файл

@ -4,7 +4,7 @@
regenc.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -56,10 +56,10 @@ typedef struct {
#endif
/* error codes */
#define ONIGENCERR_MEMORY -5
#define ONIGENCERR_TYPE_BUG -6
#define ONIGENCERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE -401
#define ONIGENC_ERR_MEMORY -5
#define ONIGENC_ERR_TYPE_BUG -6
#define ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE -401
#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
@ -96,10 +96,9 @@ typedef struct {
} PosixBracketEntryType;
/* #define USE_CRNL_AS_LINE_TERMINATOR */
#define USE_UNICODE_PROPERTIES
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
/* following must not use with USE_CRNL_AS_LINE_TERMINATOR */
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */
@ -163,7 +162,7 @@ onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable;
ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[];
ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];

Просмотреть файл

@ -2,7 +2,7 @@
regerror.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -317,7 +317,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
va_list args;
va_init_list(args, fmt);
n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args);
n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args);
va_end(args);
need = (pat_end - pat) * 4 + 4;

477
regexec.c

Разница между файлами не показана из-за своего большого размера Загрузить разницу

181
regint.h
Просмотреть файл

@ -51,7 +51,7 @@
(defined(__ppc__) && defined(__APPLE__)) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD86) || \
defined(__mc68020__)
#define PLATFORM_UNALIGNED_WORD_ACCESS
/* #define PLATFORM_UNALIGNED_WORD_ACCESS */
#endif
/* config */
@ -63,16 +63,13 @@
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
/* #define USE_RECOMPILE_API */
/* treat \r\n as line terminator.
!!! NO SUPPORT !!!
use this configuration on your own responsibility */
/* #define USE_CRNL_AS_LINE_TERMINATOR */
/* #define USE_CRNL_AS_LINE_TERMINATOR */ /* moved to regenc.h. */
/* internal config */
#define USE_RECYCLE_NODE
#define USE_PARSE_TREE_NODE_RECYCLE
#define USE_OP_PUSH_OR_JUMP_EXACT
#define USE_QUANTIFIER_PEEK_NEXT
#define USE_ST_HASH_TABLE
#define USE_QTFR_PEEK_NEXT
#define USE_ST_LIBRARY
#define USE_SHARED_CCLASS_TABLE
#define INIT_MATCH_STACK_SIZE 160
@ -109,10 +106,6 @@
#define CHECK_INTERRUPT_IN_MATCH_AT
#if defined(_WIN32) && !defined(__GNUC__) && !defined(vsnprintf)
#define vsnprintf _vsnprintf
#endif
#ifdef RUBY
#define onig_st_init_table st_init_table
@ -165,12 +158,16 @@
#define xmemset memset
#define xmemcpy memcpy
#define xmemmove memmove
#if defined(_WIN32) && !defined(__GNUC__)
#define xalloca _alloca
#define xvsnprintf _vsnprintf
#else
#define xalloca alloca
#define xvsnprintf vsnprintf
#endif
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
#define ONIG_STATE_INC(reg) (reg)->state++
#define ONIG_STATE_DEC(reg) (reg)->state--
@ -235,11 +232,26 @@
#define IS_NULL(p) (((void*)(p)) == (void*)0)
#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
#define CHECK_NULL_RETURN_VAL(p,val) if (IS_NULL(p)) return (val)
#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY
#define NULL_UCHARP ((UChar* )0)
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
#define WORD_ALIGNMENT_SIZE SIZEOF_INT
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
#define PLATFORM_GET_INC(val,p,type) do{\
val = *(type* )p;\
(p) += sizeof(type);\
} while(0)
#else
#define PLATFORM_GET_INC(val,p,type) do{\
xmemcpy(&val, (p), sizeof(type));\
(p) += sizeof(type);\
} while(0)
/* sizeof(OnigCodePoint) */
#define WORD_ALIGNMENT_SIZE SIZEOF_LONG
#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
(pad_size) = WORD_ALIGNMENT_SIZE \
@ -252,86 +264,6 @@
(addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
} while (0)
#define B_SHIFT 8
#define B_MASK 0xff
#define SERIALIZE_2BYTE_INT(i,p) do {\
*(p) = ((i) >> B_SHIFT) & B_MASK;\
*((p)+1) = (i) & B_MASK;\
} while (0)
#define SERIALIZE_4BYTE_INT(i,p) do {\
*(p) = ((i) >> B_SHIFT*3) & B_MASK;\
*((p)+1) = ((i) >> B_SHIFT*2) & B_MASK;\
*((p)+2) = ((i) >> B_SHIFT ) & B_MASK;\
*((p)+3) = (i) & B_MASK;\
} while (0)
#define SERIALIZE_8BYTE_INT(i,p) do {\
*(p) = ((i) >> B_SHIFT*7) & B_MASK;\
*((p)+1) = ((i) >> B_SHIFT*6) & B_MASK;\
*((p)+2) = ((i) >> B_SHIFT*5) & B_MASK;\
*((p)+3) = ((i) >> B_SHIFT*4) & B_MASK;\
*((p)+4) = ((i) >> B_SHIFT*3) & B_MASK;\
*((p)+5) = ((i) >> B_SHIFT*2) & B_MASK;\
*((p)+6) = ((i) >> B_SHIFT ) & B_MASK;\
*((p)+7) = (i) & B_MASK;\
} while (0)
#define GET_2BYTE_INT_INC(type,i,p) do {\
(i) = (type )(((unsigned int )(*(p)) << B_SHIFT) | (unsigned int )((p)[1]));\
(p) += 2;\
} while (0)
#define GET_4BYTE_INT_INC(type,i,p) do {\
(i) = (type )(((unsigned int )((p)[0]) << B_SHIFT*3) | \
((unsigned int )((p)[1]) << B_SHIFT*2) | \
((unsigned int )((p)[2]) << B_SHIFT ) | \
((unsigned int )((p)[3]) )); \
(p) += 4;\
} while (0)
#define GET_8BYTE_INT_INC(type,i,p) do {\
(i) = (type )(((unsigned long )((p)[0]) << B_SHIFT*7) | \
((unsigned long )((p)[1]) << B_SHIFT*6) | \
((unsigned long )((p)[2]) << B_SHIFT*5) | \
((unsigned long )((p)[3]) << B_SHIFT*4) | \
((unsigned long )((p)[4]) << B_SHIFT*3) | \
((unsigned long )((p)[5]) << B_SHIFT*2) | \
((unsigned long )((p)[6]) << B_SHIFT ) | \
((unsigned long )((p)[7]) )); \
(p) += 8;\
} while (0)
#if SIZEOF_SHORT == 2
#define GET_SHORT_INC(i,p) GET_2BYTE_INT_INC(short,i,p)
#define SERIALIZE_SHORT(i,p) SERIALIZE_2BYTE_INT(i,p)
#elif SIZEOF_SHORT == 4
#define GET_SHORT_INC(i,p) GET_4BYTE_INT_INC(short,i,p)
#define SERIALIZE_SHORT(i,p) SERIALIZE_4BYTE_INT(i,p)
#elif SIZEOF_SHORT == 8
#define GET_SHORT_INC(i,p) GET_8BYTE_INT_INC(short,i,p)
#define SERIALIZE_SHORT(i,p) SERIALIZE_8BYTE_INT(i,p)
#endif
#if SIZEOF_INT == 2
#define GET_INT_INC(i,p) GET_2BYTE_INT_INC(int,i,p)
#define GET_UINT_INC(i,p) GET_2BYTE_INT_INC(unsigned,i,p)
#define SERIALIZE_INT(i,p) SERIALIZE_2BYTE_INT(i,p)
#define SERIALIZE_UINT(i,p) SERIALIZE_2BYTE_INT(i,p)
#elif SIZEOF_INT == 4
#define GET_INT_INC(i,p) GET_4BYTE_INT_INC(int,i,p)
#define GET_UINT_INC(i,p) GET_4BYTE_INT_INC(unsigned,i,p)
#define SERIALIZE_INT(i,p) SERIALIZE_4BYTE_INT(i,p)
#define SERIALIZE_UINT(i,p) SERIALIZE_4BYTE_INT(i,p)
#elif SIZEOF_INT == 8
#define GET_INT_INC(i,p) GET_8BYTE_INT_INC(int,i,p)
#define GET_UINT_INC(i,p) GET_8BYTE_INT_INC(unsigned,i,p)
#define SERIALIZE_INT(i,p) SERIALIZE_8BYTE_INT(i,p)
#define SERIALIZE_UINT(i,p) SERIALIZE_8BYTE_INT(i,p)
#endif
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
/* stack pop level */
@ -383,7 +315,6 @@ typedef unsigned int BitStatusType;
#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
#define IS_POSIXLINE(option) (IS_SINGLELINE(option) && IS_MULTILINE(option))
#define IS_FIND_CONDITION(option) ((option) & \
(ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
@ -597,9 +528,6 @@ enum OpCode {
OP_MEMORY_END,
OP_MEMORY_END_REC, /* push marker to stack */
OP_SET_OPTION_PUSH, /* set option and push recover option */
OP_SET_OPTION, /* set option */
OP_FAIL, /* pop stack and move */
OP_JUMP,
OP_PUSH,
@ -634,7 +562,11 @@ enum OpCode {
OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
OP_STATE_CHECK, /* check only */
OP_STATE_CHECK_ANYCHAR_STAR,
OP_STATE_CHECK_ANYCHAR_ML_STAR
OP_STATE_CHECK_ANYCHAR_ML_STAR,
/* no need: IS_DYNAMIC_OPTION() == 0 */
OP_SET_OPTION_PUSH, /* set option and push recover option */
OP_SET_OPTION /* set option */
};
typedef int RelAddrType;
@ -657,22 +589,6 @@ typedef void* PointerType;
#define SIZE_POINTER sizeof(PointerType)
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
#define PLATFORM_GET_INC(val,p,type) do{\
val = *(type* )p;\
(p) += sizeof(type);\
} while(0)
#else
#define PLATFORM_GET_INC(val,p,type) do{\
xmemcpy(&val, (p), sizeof(type));\
(p) += sizeof(type);\
} while(0)
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType)
@ -767,21 +683,33 @@ typedef void* PointerType;
ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
#define NCCLASS_FLAGS(cc) ((cc)->flags)
#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag))
#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag))
#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0)
/* cclass node */
#define CCLASS_FLAG_NOT 1
#define CCLASS_FLAG_SHARE (1<<1)
#define FLAG_NCCLASS_NOT (1<<0)
#define FLAG_NCCLASS_SHARE (1<<1)
#define CCLASS_SET_NOT(cc) (cc)->flags |= CCLASS_FLAG_NOT
#define CCLASS_SET_SHARE(cc) (cc)->flags |= CCLASS_FLAG_SHARE
#define CCLASS_CLEAR_NOT(cc) (cc)->flags &= ~CCLASS_FLAG_NOT
#define IS_CCLASS_NOT(cc) (((cc)->flags & CCLASS_FLAG_NOT) != 0)
#define IS_CCLASS_SHARE(cc) (((cc)->flags & CCLASS_FLAG_SHARE) != 0)
#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT)
#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE)
#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE)
typedef struct {
int flags;
int type;
/* struct _Node* next; */
/* unsigned int flags; */
} NodeBase;
typedef struct {
NodeBase base;
unsigned int flags;
BitSet bs;
BBuf* mbuf; /* multi-byte info or NULL */
BBuf* mbuf; /* multi-byte info or NULL */
} CClassNode;
typedef long OnigStackIndex;
@ -873,6 +801,7 @@ extern void onig_chain_reduce P_((regex_t* reg));
extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
extern void onig_transfer P_((regex_t* to, regex_t* from));
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc));
/* strend hash */
typedef void hash_table_type;

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -32,38 +32,61 @@
#include "regint.h"
/* node type */
#define N_STRING (1<< 0)
#define N_CCLASS (1<< 1)
#define N_CTYPE (1<< 2)
#define N_ANYCHAR (1<< 3)
#define N_BACKREF (1<< 4)
#define N_QUANTIFIER (1<< 5)
#define N_EFFECT (1<< 6)
#define N_ANCHOR (1<< 7)
#define N_LIST (1<< 8)
#define N_ALT (1<< 9)
#define N_CALL (1<<10)
#define NT_STR 0
#define NT_CCLASS 1
#define NT_CTYPE 2
#define NT_CANY 3
#define NT_BREF 4
#define NT_QTFR 5
#define NT_ENCLOSE 6
#define NT_ANCHOR 7
#define NT_LIST 8
#define NT_ALT 9
#define NT_CALL 10
/* node type bit */
#define NTYPE2BIT(type) (1<<(type))
#define BIT_NT_STR NTYPE2BIT(NT_STR)
#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
#define BIT_NT_CANY NTYPE2BIT(NT_CANY)
#define BIT_NT_BREF NTYPE2BIT(NT_BREF)
#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
#define BIT_NT_LIST NTYPE2BIT(NT_LIST)
#define BIT_NT_ALT NTYPE2BIT(NT_ALT)
#define BIT_NT_CALL NTYPE2BIT(NT_CALL)
#define IS_NODE_TYPE_SIMPLE(type) \
(((type) & (N_STRING | N_CCLASS | N_CTYPE | N_ANYCHAR | N_BACKREF)) != 0)
((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
BIT_NT_CANY | BIT_NT_BREF)) != 0)
#define NTYPE(node) ((node)->u.base.type)
#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
#define NSTR(node) (&((node)->u.str))
#define NCCLASS(node) (&((node)->u.cclass))
#define NCTYPE(node) (&((node)->u.ctype))
#define NBREF(node) (&((node)->u.bref))
#define NQTFR(node) (&((node)->u.qtfr))
#define NENCLOSE(node) (&((node)->u.enclose))
#define NANCHOR(node) (&((node)->u.anchor))
#define NCONS(node) (&((node)->u.cons))
#define NCALL(node) (&((node)->u.call))
#define NCAR(node) (NCONS(node)->car)
#define NCDR(node) (NCONS(node)->cdr)
#define NTYPE(node) ((node)->type)
#define NCONS(node) ((node)->u.cons)
#define NSTRING(node) ((node)->u.str)
#define NCCLASS(node) ((node)->u.cclass)
#define NCTYPE(node) ((node)->u.ctype)
#define NQUANTIFIER(node) ((node)->u.quant)
#define NANCHOR(node) ((node)->u.anchor)
#define NBACKREF(node) ((node)->u.backref)
#define NEFFECT(node) ((node)->u.effect)
#define NCALL(node) ((node)->u.call)
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
#define EFFECT_MEMORY (1<<0)
#define EFFECT_OPTION (1<<1)
#define EFFECT_STOP_BACKTRACK (1<<2)
#define ENCLOSE_MEMORY (1<<0)
#define ENCLOSE_OPTION (1<<1)
#define ENCLOSE_STOP_BACKTRACK (1<<2)
#define NODE_STR_MARGIN 16
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
@ -92,30 +115,6 @@
#define NQ_TARGET_IS_EMPTY_MEM 2
#define NQ_TARGET_IS_EMPTY_REC 3
typedef struct {
UChar* s;
UChar* end;
unsigned int flag;
int capa; /* (allocated size - 1) or 0: use buf[] */
UChar buf[NODE_STR_BUF_SIZE];
} StrNode;
typedef struct {
int state;
struct _Node* target;
int lower;
int upper;
int greedy;
int target_empty_info;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
#endif
} QuantifierNode;
/* status bits */
#define NST_MIN_FIXED (1<<0)
#define NST_MAX_FIXED (1<<1)
@ -133,20 +132,20 @@ typedef struct {
#define NST_NEST_LEVEL (1<<13)
#define NST_BY_NUMBER (1<<14) /* {n,m} */
#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
#define IS_EFFECT_CALLED(en) (((en)->state & NST_CALLED) != 0)
#define IS_EFFECT_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
#define IS_EFFECT_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
#define IS_EFFECT_MARK1(en) (((en)->state & NST_MARK1) != 0)
#define IS_EFFECT_MARK2(en) (((en)->state & NST_MARK2) != 0)
#define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
#define IS_EFFECT_STOP_BT_SIMPLE_REPEAT(en) \
#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
(((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
@ -156,7 +155,35 @@ typedef struct {
#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
#define CALLNODE_REFNUM_UNDEF -1
typedef struct {
NodeBase base;
UChar* s;
UChar* end;
unsigned int flag;
int capa; /* (allocated size - 1) or 0: use buf[] */
UChar buf[NODE_STR_BUF_SIZE];
} StrNode;
typedef struct {
NodeBase base;
int state;
struct _Node* target;
int lower;
int upper;
int greedy;
int target_empty_info;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
#endif
} QtfrNode;
typedef struct {
NodeBase base;
int state;
int type;
int regnum;
@ -168,9 +195,7 @@ typedef struct {
OnigDistance max_len; /* max length (byte) */
int char_len; /* character length */
int opt_count; /* referenced count in optimize_node_left() */
} EffectNode;
#define CALLNODE_REFNUM_UNDEF -1
} EncloseNode;
#ifdef USE_SUBEXP_CALL
@ -186,53 +211,63 @@ typedef struct {
} UnsetAddrList;
typedef struct {
NodeBase base;
int state;
int ref_num;
UChar* name;
UChar* name_end;
struct _Node* target; /* EffectNode : EFFECT_MEMORY */
struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */
UnsetAddrList* unset_addr_list;
} CallNode;
#endif
typedef struct {
NodeBase base;
int state;
int back_num;
int back_static[NODE_BACKREFS_SIZE];
int* back_dynamic;
int nest_level;
} BackrefNode;
} BRefNode;
typedef struct {
NodeBase base;
int type;
struct _Node* target;
int char_len;
} AnchorNode;
typedef struct {
NodeBase base;
struct _Node* car;
struct _Node* cdr;
} ConsAltNode;
typedef struct {
NodeBase base;
int ctype;
int not;
} CtypeNode;
typedef struct _Node {
int type;
union {
StrNode str;
CClassNode cclass;
QuantifierNode quant;
EffectNode effect;
NodeBase base;
StrNode str;
CClassNode cclass;
QtfrNode qtfr;
EncloseNode enclose;
BRefNode bref;
AnchorNode anchor;
ConsAltNode cons;
CtypeNode ctype;
#ifdef USE_SUBEXP_CALL
CallNode call;
CallNode call;
#endif
BackrefNode backref;
AnchorNode anchor;
struct {
struct _Node* left;
struct _Node* right;
} cons;
struct {
int ctype;
int not;
} ctype;
} u;
} Node;
#define NULL_NODE ((Node* )0)
#define SCANENV_MEMNODES_SIZE 8
@ -295,7 +330,7 @@ extern void onig_node_conv_to_str_node P_((Node* node, int raw));
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
extern void onig_node_free P_((Node* node));
extern Node* onig_node_new_effect P_((int type));
extern Node* onig_node_new_enclose P_((int type));
extern Node* onig_node_new_anchor P_((int type));
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
extern Node* onig_node_new_list P_((Node* left, Node* right));

4
sjis.c
Просмотреть файл

@ -269,7 +269,7 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
if (ctype >= (unsigned int )PropertyListNum)
return ONIGENCERR_TYPE_BUG;
return ONIGENC_ERR_TYPE_BUG;
return onig_is_in_code_range((UChar* )PropertyList[ctype], code);
}
@ -291,7 +291,7 @@ get_ctype_code_range(int ctype, OnigCodePoint* sb_out,
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
if (ctype >= PropertyListNum)
return ONIGENCERR_TYPE_BUG;
return ONIGENC_ERR_TYPE_BUG;
*ranges = PropertyList[ctype];
return 0;

Просмотреть файл

@ -10756,7 +10756,7 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
}
if (ctype >= CODE_RANGES_NUM) {
return ONIGENCERR_TYPE_BUG;
return ONIGENC_ERR_TYPE_BUG;
}
if (CodeRangeTableInited == 0) init_code_range_array();
@ -10769,7 +10769,7 @@ extern int
onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[])
{
if (ctype >= CODE_RANGES_NUM) {
return ONIGENCERR_TYPE_BUG;
return ONIGENC_ERR_TYPE_BUG;
}
if (CodeRangeTableInited == 0) init_code_range_array();

10
utf8.c
Просмотреть файл

@ -2,7 +2,7 @@
utf8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -72,7 +72,9 @@ utf8_is_mbc_newline(const UChar* p, const UChar* end)
if (*p == 0x0a) return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
#ifndef USE_CRNL_AS_LINE_TERMINATOR
if (*p == 0x0d) return 1;
#endif
if (p + 1 < end) {
if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
return 1;
@ -133,7 +135,7 @@ utf8_code_to_mbclen(OnigCodePoint code)
else if (code == INVALID_CODE_FF) return 1;
#endif
else
return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
return ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE;
}
#if 0
@ -154,7 +156,7 @@ utf8_code_to_mbc_first(OnigCodePoint code)
else if ((code & 0x80000000) == 0)
return ((code>>30) & 0x01) | 0xfc;
else {
return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
return ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE;
}
}
}
@ -209,7 +211,7 @@ utf8_code_to_mbc(OnigCodePoint code, UChar *buf)
}
#endif
else {
return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
return ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE;
}
*p++ = UTF8_TRAIL0(code);