diff --git a/ChangeLog b/ChangeLog index 684fec5fe4..f6bde16499 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Sat Jan 29 00:10:33 2005 Kazuo Saito + + * ascii.c, euc_jp.c, hash.c, oniggnu.h, oniguruma.h, regcomp.c, + regenc.c, regenc.h, regerror.c, regexec.c, reggnu.c, regint.h, + regparse.c, regparse.h, sjis.c, st.c, st.h, utf8.c: imported + Oni Guruma 3.5.4. + Fri Jan 28 17:16:55 2005 Tanaka Akira * lib/resolv.rb (Resolv::DNS::Config.parse_resolv_conf): diff --git a/hash.c b/hash.c index 16f6325955..beadac14ee 100644 --- a/hash.c +++ b/hash.c @@ -102,6 +102,8 @@ rb_any_hash(a) static struct st_hash_type objhash = { rb_any_cmp, rb_any_hash, + st_nothing_key_free, + st_nothing_key_clone }; struct foreach_safe_arg { diff --git a/imp.log b/imp.log deleted file mode 100644 index e3828c79a8..0000000000 --- a/imp.log +++ /dev/null @@ -1,8 +0,0 @@ -Vim: Warning: Output is not to a terminal -7[?47h[?1h="/tmp/cvss7mRju" 4L, 229CCVS: ---------------------------------------------------------------------- -CVS: Enter Log. Lines beginning with `CVS:' are removed automatically -CVS: -CVS: ---------------------------------------------------------------------- -~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ :q!-- INSERT --i-- INSERT --mported Oni Guruma 3.5.4."/private/tmp/cvss7mRju" 5L, 256C written - -[?1l>[?47l8 \ No newline at end of file diff --git a/oniguruma.h b/oniguruma.h index c10f3b4d18..95cd109384 100644 --- a/oniguruma.h +++ b/oniguruma.h @@ -4,7 +4,7 @@ oniguruma.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2005 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,8 +31,17 @@ #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 3 -#define ONIGURUMA_VERSION_MINOR 4 -#define ONIGURUMA_VERSION_TEENY 0 +#define ONIGURUMA_VERSION_MINOR 5 +#define ONIGURUMA_VERSION_TEENY 4 + +#ifdef __cplusplus +# ifndef HAVE_PROTOTYPES +# define HAVE_PROTOTYPES 1 +# endif +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif #ifndef P_ #if defined(__STDC__) || defined(_WIN32) @@ -72,12 +81,6 @@ typedef unsigned int OnigDistance; #define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) -typedef struct { - OnigCodePoint from; - OnigCodePoint to; -} OnigCodePointRange; - - /* ambiguous match flag */ #define ONIGENC_AMBIGUOUS_MATCH_NONE 0 #define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0) @@ -103,6 +106,11 @@ typedef unsigned int OnigAmbigType; #define ONIGENC_MAX_COMP_AMBIG_CODE_LEN 3 #define ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM 4 +/* code range */ +#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0]) +#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1] +#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2] + typedef struct { int len; OnigCodePoint code[ONIGENC_MAX_COMP_AMBIG_CODE_LEN]; @@ -152,7 +160,7 @@ typedef struct { int (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs); int (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs); int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype); - int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]); + int (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_range[], OnigCodePoint* mb_range[]); UChar* (*left_adjust_char_head)(UChar* start, UChar* p); int (*is_allowed_reverse_match)(UChar* p, UChar* end); } OnigEncodingType; @@ -245,7 +253,6 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; #define ONIGENC_CTYPE_ASCII (1<<13) #define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT) - #define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc,p) #define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) @@ -275,7 +282,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; onigenc_get_left_adjust_char_head(enc, start, s) #define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, ambig_flag, acs) 0 #define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, ambig_flag, acs) 0 -#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \ +#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \ ONIG_NO_SUPPORT_CONFIG #define ONIGENC_MBC_ENC_LEN(enc,p) m17n_mbclen(enc,(int )(*p)) #define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc) @@ -390,8 +397,8 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end) #define ONIGENC_IS_CODE_WORD(enc,code) \ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD) -#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \ - (enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr) +#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \ + (enc)->get_ctype_code_range(ctype,sbr,mbr) ONIG_EXTERN UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n)); @@ -600,7 +607,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101 #define ONIGERR_EMPTY_CHAR_CLASS -102 #define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103 -#define ONIGERR_END_PATTERN_AT_BACKSLASH -104 +#define ONIGERR_END_PATTERN_AT_ESCAPE -104 #define ONIGERR_END_PATTERN_AT_META -105 #define ONIGERR_END_PATTERN_AT_CONTROL -106 #define ONIGERR_META_CODE_SYNTAX -108 diff --git a/regcomp.c b/regcomp.c index 7217f71ab8..116bcb7c9b 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2,7 +2,7 @@ regcomp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2005 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,6 +33,21 @@ static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; #endif +static UChar* +k_strdup(UChar* s, UChar* end) +{ + int len = end - s; + + if (len > 0) { + UChar* r = (UChar* )xmalloc(len + 1); + CHECK_NULL_RETURN(r); + xmemcpy(r, s, len); + r[len] = (UChar )0; + return r; + } + else return NULL; +} + /* Caution: node should not be a string node. (s and end member address break) @@ -189,16 +204,14 @@ add_mem_num(regex_t* reg, int num) return 0; } -#if 0 static int -add_repeat_num(regex_t* reg, int num) +add_pointer(regex_t* reg, void* addr) { - RepeatNumType n = (RepeatNumType )num; + PointerType ptr = (PointerType )addr; - BBUF_ADD(reg, &n, SIZE_REPEATNUM); + BBUF_ADD(reg, &ptr, SIZE_POINTER); return 0; } -#endif static int add_option(regex_t* reg, OnigOptionType option) @@ -518,6 +531,11 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg) { int len; + if (IS_CCLASS_SHARE(cc)) { + len = SIZE_OPCODE + SIZE_POINTER; + return len; + } + if (IS_NULL(cc->mbuf)) { len = SIZE_OPCODE + SIZE_BITSET; } @@ -543,22 +561,34 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) { int r; + if (IS_CCLASS_SHARE(cc)) { + add_opcode(reg, OP_CCLASS_NODE); + r = add_pointer(reg, cc); + return r; + } + if (IS_NULL(cc->mbuf)) { - if (cc->not) add_opcode(reg, OP_CCLASS_NOT); - else add_opcode(reg, OP_CCLASS); + if (IS_CCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_NOT); + else + add_opcode(reg, OP_CCLASS); r = add_bitset(reg, cc->bs); } else { if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { - if (cc->not) add_opcode(reg, OP_CCLASS_MB_NOT); - else add_opcode(reg, OP_CCLASS_MB); + if (IS_CCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_MB_NOT); + else + add_opcode(reg, OP_CCLASS_MB); r = add_multi_byte_cclass(cc->mbuf, reg); } else { - if (cc->not) add_opcode(reg, OP_CCLASS_MIX_NOT); - else add_opcode(reg, OP_CCLASS_MIX); + if (IS_CCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_MIX_NOT); + else + add_opcode(reg, OP_CCLASS_MIX); r = add_bitset(reg, cc->bs); if (r) return r; @@ -631,7 +661,6 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info, else { r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); } - if (r) return r; r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ return r; @@ -1408,12 +1437,9 @@ compile_tree(Node* node, regex_t* reg) } #ifdef USE_NAMED_GROUP -typedef struct { - int new_val; -} NumMap; static int -noname_disable_map(Node** plink, NumMap* map, int* counter) +noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) { int r = 0; Node* node = *plink; @@ -1467,7 +1493,7 @@ noname_disable_map(Node** plink, NumMap* map, int* counter) } static int -renumber_node_backref(Node* node, NumMap* map) +renumber_node_backref(Node* node, GroupNumRemap* map) { int i, pos, n, old_num; int *backs; @@ -1495,7 +1521,7 @@ renumber_node_backref(Node* node, NumMap* map) } static int -renumber_by_map(Node* node, NumMap* map) +renumber_by_map(Node* node, GroupNumRemap* map) { int r = 0; @@ -1560,9 +1586,9 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) { int r, i, pos, counter; BitStatusType loc; - NumMap* map; + GroupNumRemap* map; - map = (NumMap* )xalloca(sizeof(NumMap) * (env->num_mem + 1)); + map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY); for (i = 1; i <= env->num_mem; i++) { map[i].new_val = 0; @@ -1591,7 +1617,8 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) env->num_mem = env->num_named; reg->num_mem = env->num_named; - return 0; + + return onig_renumber_name_table(reg, map); } #endif /* USE_NAMED_GROUP */ @@ -2092,10 +2119,10 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); } - if (cc->not == 0) - return found; - else + if (IS_CCLASS_NOT(cc)) return !found; + else + return found; } /* x is not included y ==> 1 : 0 */ @@ -2158,7 +2185,7 @@ is_not_included(Node* x, Node* y, regex_t* reg) case N_CTYPE: switch (NCTYPE(y).type) { case CTYPE_WORD: - if (IS_NULL(xc->mbuf) && xc->not == 0) { + if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (BITSET_AT(xc->bs, i)) { if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0; @@ -2171,7 +2198,7 @@ is_not_included(Node* x, Node* y, regex_t* reg) case CTYPE_NOT_WORD: for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) { - if (xc->not == 0) { + if (!IS_CCLASS_NOT(xc)) { if (BITSET_AT(xc->bs, i)) return 0; } @@ -2196,14 +2223,16 @@ is_not_included(Node* x, Node* y, regex_t* reg) for (i = 0; i < SINGLE_BYTE_SIZE; i++) { v = BITSET_AT(xc->bs, i); - if ((v != 0 && xc->not == 0) || (v == 0 && xc->not)) { + if ((v != 0 && !IS_CCLASS_NOT(xc)) || + (v == 0 && IS_CCLASS_NOT(xc))) { v = BITSET_AT(yc->bs, i); - if ((v != 0 && yc->not == 0) || (v == 0 && yc->not)) + if ((v != 0 && !IS_CCLASS_NOT(yc)) || + (v == 0 && IS_CCLASS_NOT(yc))) return 0; } } - if ((IS_NULL(xc->mbuf) && xc->not == 0) || - (IS_NULL(yc->mbuf) && yc->not == 0)) + if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) || + (IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc))) return 1; return 0; } @@ -3333,22 +3362,27 @@ typedef struct { OptMapInfo map; /* boundary */ } NodeOptInfo; -static short int ByteValTable[] = { - 14, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, - 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, - 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 -}; static int -map_position_value(int i) +map_position_value(OnigEncoding enc, int i) { - if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) - return (int )ByteValTable[i]; + static short int ByteValTable[] = { + 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 + }; + + if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) { + if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) + return 20; + else + return (int )ByteValTable[i]; + } else return 4; /* Take it easy. */ } @@ -3634,7 +3668,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) } static void -select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt) +select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) { int v1, v2; @@ -3643,8 +3677,8 @@ select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt) if (v1 <= 2 && v2 <= 2) { /* ByteValTable[x] is big value --> low price */ - v2 = map_position_value(now->s[0]); - v1 = map_position_value(alt->s[0]); + v2 = map_position_value(enc, now->s[0]); + v1 = map_position_value(enc, alt->s[0]); if (now->len > 1) v1 += 5; if (alt->len > 1) v2 += 5; @@ -3660,13 +3694,29 @@ select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt) static void clear_opt_map_info(OptMapInfo* map) { - int i; + static OptMapInfo clean_info = { + {0, 0}, {0, 0}, 0, + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + } + }; - clear_mml(&map->mmd); - clear_opt_anc_info(&map->anc); - map->value = 0; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - map->map[i] = 0; + xmemcpy(map, &clean_info, sizeof(OptMapInfo)); } static void @@ -3676,11 +3726,11 @@ copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) } static void -add_char_opt_map_info(OptMapInfo* map, UChar c) +add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) { if (map->map[c] == 0) { map->map[c] = 1; - map->value += map_position_value(c); + map->value += map_position_value(enc, c); } } @@ -3695,7 +3745,7 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, OnigPairAmbigCodes* pccs; OnigAmbigType amb; - add_char_opt_map_info(map, p[0]); + add_char_opt_map_info(map, p[0], enc); code = ONIGENC_MBC_TO_CODE(enc, p, end); for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) { @@ -3706,7 +3756,7 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, if (pccs[i].from == code) { len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf); if (len < 0) return len; - add_char_opt_map_info(map, buf[0]); + add_char_opt_map_info(map, buf[0], enc); } } @@ -3718,7 +3768,7 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, ccode = ccs[i].items[j].code[0]; len = ONIGENC_CODE_TO_MBC(enc, ccode, buf); if (len < 0) return len; - add_char_opt_map_info(map, buf[0]); + add_char_opt_map_info(map, buf[0], enc); } break; } @@ -3761,7 +3811,7 @@ comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) } static void -alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add) +alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) { int i, val; @@ -3780,7 +3830,7 @@ alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add) to->map[i] = 1; if (to->map[i]) - val += map_position_value(i); + val += map_position_value(enc, i); } to->value = val; @@ -3813,7 +3863,7 @@ copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) } static void -concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add) +concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) { int exb_reach, exm_reach; OptAncInfo tanc; @@ -3848,8 +3898,8 @@ concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add) clear_opt_exact_info(&add->exb); } } - select_opt_exact_info(&to->exm, &add->exb); - select_opt_exact_info(&to->exm, &add->exm); + select_opt_exact_info(enc, &to->exm, &add->exb); + select_opt_exact_info(enc, &to->exm, &add->exm); if (to->expr.len > 0) { if (add->len.max > 0) { @@ -3857,9 +3907,9 @@ concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add) to->expr.len = add->len.max; if (to->expr.mmd.max == 0) - select_opt_exact_info(&to->exb, &to->expr); + select_opt_exact_info(enc, &to->exb, &to->expr); else - select_opt_exact_info(&to->exm, &to->expr); + select_opt_exact_info(enc, &to->exm, &to->expr); } } else if (add->expr.len > 0) { @@ -3878,7 +3928,7 @@ alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) alt_merge_opt_exact_info(&to->exb, &add->exb, env); alt_merge_opt_exact_info(&to->exm, &add->exm, env); alt_merge_opt_exact_info(&to->expr, &add->expr, env); - alt_merge_opt_map_info (&to->map, &add->map); + alt_merge_opt_map_info(env->enc, &to->map, &add->map); alt_merge_mml(&to->len, &add->len); } @@ -3908,7 +3958,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) r = optimize_node_left(NCONS(nd).left, &nopt, &nenv); if (r == 0) { add_mml(&nenv.mmd, &nopt.len); - concat_left_node_opt_info(opt, &nopt); + concat_left_node_opt_info(env->enc, opt, &nopt); } } while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right)); } @@ -3939,7 +3989,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, NSTRING_IS_RAW(node), env->enc); if (slen > 0) { - add_char_opt_map_info(&opt->map, *(sn->s)); + add_char_opt_map_info(&opt->map, *(sn->s), env->enc); } set_mml(&opt->len, slen, slen); } @@ -3978,7 +4028,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) /* no need to check ignore case. (setted in setup_tree()) */ - if (IS_NOT_NULL(cc->mbuf) || cc->not != 0) { + if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) { OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); @@ -3987,8 +4037,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) else { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { z = BITSET_AT(cc->bs, i); - if ((z && !cc->not) || (!z && cc->not)) { - add_char_opt_map_info(&opt->map, (UChar )i); + if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); } } set_mml(&opt->len, 1, 1); @@ -4009,7 +4059,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case CTYPE_NOT_WORD: for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i); + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); } } break; @@ -4017,7 +4067,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case CTYPE_WORD: for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i); + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); } } break; @@ -4245,7 +4295,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) else { int allow_reverse; - reg->exact = onig_strdup(e->s, e->s + e->len); + reg->exact = k_strdup(e->s, e->s + e->len); CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); reg->exact_end = reg->exact + e->len; @@ -4334,7 +4384,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) } if (opt.exb.len > 0 || opt.exm.len > 0) { - select_opt_exact_info(&opt.exb, &opt.exm); + select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); if (opt.map.value > 0 && comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { goto set_map; @@ -4506,7 +4556,7 @@ onig_free_body(regex_t* reg) if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); - if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); + if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); #ifdef USE_NAMED_GROUP onig_names_free(reg); @@ -4579,11 +4629,12 @@ onig_clone(regex_t** to, regex_t* from) int r, size; regex_t* reg; - if (ONIG_STATE(from) == ONIG_STATE_NORMAL) { - from->state++; /* increment as search counter */ - if (IS_NOT_NULL(from->chain)) { +#ifdef USE_MULTI_THREAD_SYSTEM + if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(from); + if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { onig_chain_reduce(from); - from->state++; + ONIG_STATE_INC(from); } } else { @@ -4593,19 +4644,20 @@ onig_clone(regex_t** to, regex_t* from) return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; THREAD_PASS; } - from->state++; /* increment as search counter */ + ONIG_STATE_INC(from); } +#endif /* USE_MULTI_THREAD_SYSTEM */ r = onig_alloc_init(®, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT, from->enc, ONIG_SYNTAX_DEFAULT); if (r != 0) { - from->state--; + ONIG_STATE_DEC(from); return r; } xmemcpy(reg, from, sizeof(onig_t)); - reg->state = ONIG_STATE_NORMAL; reg->chain = (regex_t* )NULL; + reg->state = ONIG_STATE_NORMAL; if (from->p) { reg->p = (UChar* )xmalloc(reg->alloc); @@ -4638,12 +4690,12 @@ onig_clone(regex_t** to, regex_t* from) reg->name_table = names_clone(from); /* names_clone is not implemented */ #endif - from->state--; + ONIG_STATE_DEC(from); *to = reg; return 0; mem_error: - from->state--; + ONIG_STATE_DEC(from); return ONIGERR_MEMORY; } #endif @@ -4839,6 +4891,7 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, *reg = (regex_t* )xmalloc(sizeof(regex_t)); if (IS_NULL(*reg)) return ONIGERR_MEMORY; + (*reg)->state = ONIG_STATE_MODIFY; if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { option |= syntax->options; @@ -4847,7 +4900,6 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, else option |= syntax->options; - (*reg)->state = ONIG_STATE_NORMAL; (*reg)->enc = enc; (*reg)->options = option; (*reg)->syntax = syntax; @@ -4910,9 +4962,14 @@ onig_init() return 0; } + extern int onig_end() { + extern int onig_free_shared_cclass_table(); + + THREAD_ATOMIC_START; + #ifdef ONIG_DEBUG_STATISTICS onig_print_statistics(stderr); #endif @@ -4921,10 +4978,17 @@ onig_end() onig_free_node_list(); #endif +#ifdef USE_SHARED_CCLASS_TABLE + onig_free_shared_cclass_table(); +#endif + onig_inited = 0; + + THREAD_ATOMIC_END; return 0; } + #ifdef ONIG_DEBUG OnigOpInfoType OnigOpInfo[] = { @@ -4950,6 +5014,7 @@ OnigOpInfoType OnigOpInfo[] = { { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, + { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, { OP_ANYCHAR, "anychar", ARG_NON }, { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, @@ -5203,6 +5268,16 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, fprintf(f, ":%d:%d:%d", n, (int )code, len); break; + case OP_CCLASS_NODE: + { + CClassNode *cc; + + GET_POINTER_INC(cc, bp); + n = bitset_on_num(cc->bs); + fprintf(f, ":%u:%d", (unsigned int )cc, n); + } + break; + case OP_BACKREFN_IC: mem = *((MemNumType* )bp); bp += SIZE_MEMNUM; @@ -5330,7 +5405,7 @@ print_indent_tree(FILE* f, Node* node, int indent) case N_CCLASS: fprintf(f, "", (int )node); - if (NCCLASS(node).not) fputs(" not", f); + if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f); if (NCCLASS(node).mbuf) { BBuf* bbuf = NCCLASS(node).mbuf; for (i = 0; i < bbuf->used; i++) { diff --git a/regexec.c b/regexec.c index 07af4fe104..795a26dd76 100644 --- a/regexec.c +++ b/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2005 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -274,7 +274,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) /** stack **/ #define INVALID_STACK_INDEX -1 -typedef int StackIndex; +typedef long StackIndex; typedef struct _StackType { unsigned int type; @@ -986,7 +986,7 @@ trap_ensure(VALUE arg) TrapEnsureArg* ta = (TrapEnsureArg* )arg; if (ta->state == 0) { /* trap_exec() is not normal return */ - ta->reg->state--; + ONIG_STATE_DEC(ta->reg); if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p) xfree(ta->stk_base); @@ -1147,6 +1147,27 @@ onig_is_in_code_range(UChar* p, OnigCodePoint code) return ((low < n && code >= data[low * 2]) ? 1 : 0); } +static int +code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen) +{ + unsigned int in_cc; + CClassNode* cc = (CClassNode* )node; + + if (enclen == 1) { + in_cc = BITSET_AT(cc->bs, code); + } + else { + UChar* p = ((BBuf* )(cc->mbuf))->p; + in_cc = onig_is_in_code_range(p, code); + } + + if (IS_CCLASS_NOT(cc)) { + return (in_cc ? 0 : 1); + } + else { + return (in_cc ? 1 : 0); + } +} /* matching region of POSIX API */ typedef int regoff_t; @@ -1340,14 +1361,31 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC); { int len; - UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; DATA_ENSURE(1); + ss = s; + sp = p; + + exact1_ic_retry: len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { - if (*p != *q) goto fail; + if (*p != *q) { +#if 1 + if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; + s = ss; + p = sp; + goto exact1_ic_retry; + } + else + goto fail; +#else + goto fail; +#endif + } p++; q++; } } @@ -1424,7 +1462,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC); { int len; - UChar *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; GET_LENGTH_INC(tlen, p); endp = p + tlen; @@ -1432,11 +1470,28 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, while (p < endp) { sprev = s; DATA_ENSURE(1); + ss = s; + sp = p; + + exactn_ic_retry: len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { - if (*p != *q) goto fail; + if (*p != *q) { +#if 1 + if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; + s = ss; + p = sp; + goto exactn_ic_retry; + } + else + goto fail; +#else + goto fail; +#endif + } p++; q++; } } @@ -1655,6 +1710,24 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, STAT_OP_OUT; break; + case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE); + { + OnigCodePoint code; + void *node; + int mb_len; + UChar *ss; + + DATA_ENSURE(1); + GET_POINTER_INC(node, p); + mb_len = enc_len(encode, s); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail; + } + STAT_OP_OUT; + break; + case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR); DATA_ENSURE(1); n = enc_len(encode, s); @@ -2519,13 +2592,26 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag, UChar* t, UChar* tend, UChar* p, UChar* end) { int lowlen; - UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *q, *tsave, *psave, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + tsave = t; + psave = p; + + retry: while (t < tend) { lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf); q = lowbuf; while (lowlen > 0) { - if (*t++ != *q++) return 0; + if (*t++ != *q++) { + if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; + t = tsave; + p = psave; + goto retry; + } + else + return 0; + } lowlen--; } } @@ -2538,9 +2624,7 @@ slow_search_ic(OnigEncoding enc, int ambig_flag, UChar* target, UChar* target_end, UChar* text, UChar* text_end, UChar* text_range) { - int lowlen; - UChar *t, *p, *s, *end, *z; - UChar lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *s, *end; end = text_end - (target_end - target) + 1; if (end > text_range) @@ -2549,21 +2633,10 @@ slow_search_ic(OnigEncoding enc, int ambig_flag, s = text; while (s < end) { - z = s; - lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s, text_end, lowbuf); - if (*target == *lowbuf) { - p = lowbuf + 1; - t = target + 1; - while (--lowlen > 0) { - if (*p != *t) break; - p++; t++; - } - if (lowlen == 0) { - if (str_lower_case_match(enc, ambig_flag, - t, target_end, s, text_end)) - return z; - } - } + if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end)) + return s; + + s += enc_len(enc, s); } return (UChar* )NULL; @@ -2605,9 +2678,7 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag, UChar* text, UChar* adjust_text, UChar* text_end, UChar* text_start) { - int len, lowlen; - UChar *t, *p, *s, *z; - UChar lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *s; s = text_end - (target_end - target); if (s > text_start) @@ -2616,24 +2687,11 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag, s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); while (s >= text) { - len = enc_len(enc, s); - z = s; - lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s, text_end, lowbuf); - if (*target == *lowbuf) { - p = lowbuf + 1; - t = target + 1; - while (--lowlen > 0) { - if (*p != *t) break; - p++; t++; - } - if (lowlen == 0) { - if (str_lower_case_match(enc, ambig_flag, - t, target_end, s, text_end)) - return z; - } - } + if (str_lower_case_match(enc, ambig_flag, + target, target_end, s, text_end)) + return s; - s = onigenc_get_prev_char_head(enc, adjust_text, z); + s = onigenc_get_prev_char_head(enc, adjust_text, s); } return (UChar* )NULL; @@ -2828,11 +2886,12 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region, UChar *prev; MatchArg msa; - if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - reg->state++; /* increment as search counter */ - if (IS_NOT_NULL(reg->chain)) { +#ifdef USE_MULTI_THREAD_SYSTEM + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { onig_chain_reduce(reg); - reg->state++; + ONIG_STATE_INC(reg); } } else { @@ -2842,8 +2901,9 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region, return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; THREAD_PASS; } - reg->state++; /* increment as search counter */ + ONIG_STATE_INC(reg); } +#endif /* USE_MULTI_THREAD_SYSTEM */ MATCH_ARG_INIT(msa, option, region, at); @@ -2863,7 +2923,7 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region, } MATCH_ARG_FREE(msa); - reg->state--; /* decrement as search counter */ + ONIG_STATE_DEC(reg); return r; } @@ -3098,11 +3158,12 @@ onig_search(regex_t* reg, UChar* str, UChar* end, UChar *s, *prev; MatchArg msa; - if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - reg->state++; /* increment as search counter */ - if (IS_NOT_NULL(reg->chain)) { +#ifdef USE_MULTI_THREAD_SYSTEM + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { onig_chain_reduce(reg); - reg->state++; + ONIG_STATE_INC(reg); } } else { @@ -3112,8 +3173,9 @@ onig_search(regex_t* reg, UChar* str, UChar* end, return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; THREAD_PASS; } - reg->state++; /* increment as search counter */ + ONIG_STATE_INC(reg); } +#endif /* USE_MULTI_THREAD_SYSTEM */ #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", @@ -3360,7 +3422,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, finish: MATCH_ARG_FREE(msa); - reg->state--; /* decrement as search counter */ + ONIG_STATE_DEC(reg); /* If result is mismatch and no FIND_NOT_EMPTY option, then the region is not setted in match_at(). */ @@ -3381,7 +3443,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, mismatch_no_msa: r = ONIG_MISMATCH; finish_no_msa: - reg->state--; /* decrement as search counter */ + ONIG_STATE_DEC(reg); #ifdef ONIG_DEBUG if (r != ONIG_MISMATCH) fprintf(stderr, "onig_search: error %d\n", r); @@ -3389,7 +3451,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, return r; match: - reg->state--; /* decrement as search counter */ + ONIG_STATE_DEC(reg); MATCH_ARG_FREE(msa); return s - str; } diff --git a/regint.h b/regint.h index e77536c124..4cfd9c9768 100644 --- a/regint.h +++ b/regint.h @@ -4,7 +4,7 @@ regint.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2005 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -56,6 +56,7 @@ /* config */ /* spec. config */ +/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */ #define USE_NAMED_GROUP #define USE_SUBEXP_CALL #define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */ @@ -65,6 +66,8 @@ #define USE_RECYCLE_NODE #define USE_OP_PUSH_OR_JUMP_EXACT #define USE_QUALIFIER_PEEK_NEXT +#define USE_ST_HASH_TABLE +#define USE_SHARED_CCLASS_TABLE #define INIT_MATCH_STACK_SIZE 160 #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ @@ -76,17 +79,21 @@ #define USE_VARIABLE_META_CHARS #define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ #define USE_POSIX_REGION_OPTION /* needed for POSIX API support */ +/* #define USE_MULTI_THREAD_SYSTEM */ #define THREAD_ATOMIC_START /* depend on thread system */ #define THREAD_ATOMIC_END /* depend on thread system */ #define THREAD_PASS /* depend on thread system */ #define CHECK_INTERRUPT /* depend on application */ #define xmalloc malloc #define xrealloc realloc +#define xcalloc calloc #define xfree free #else #include "ruby.h" #include "version.h" #include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */ + +#define USE_MULTI_THREAD_SYSTEM #define THREAD_ATOMIC_START DEFER_INTS #define THREAD_ATOMIC_END ENABLE_INTS #define THREAD_PASS rb_thread_schedule() @@ -101,17 +108,9 @@ #define DEFAULT_WARN_FUNCTION rb_warn #define DEFAULT_VERB_WARN_FUNCTION rb_warning -#if defined(RUBY_VERSION_MAJOR) -#if RUBY_VERSION_MAJOR > 1 || \ -(RUBY_VERSION_MAJOR == 1 && \ - defined(RUBY_VERSION_MINOR) && RUBY_VERSION_MINOR >= 8) -#define USE_ST_HASH_TABLE -#endif -#endif - #endif /* else NOT_RUBY */ -#define THREAD_PASS_LIMIT_COUNT 10 +#define THREAD_PASS_LIMIT_COUNT 8 #define xmemset memset #define xmemcpy memcpy #define xmemmove memmove @@ -124,6 +123,69 @@ #define xalloca alloca #endif + +#ifdef USE_MULTI_THREAD_SYSTEM +#define ONIG_STATE_INC(reg) (reg)->state++ +#define ONIG_STATE_DEC(reg) (reg)->state-- +#else +#define ONIG_STATE_INC(reg) /* Nothing */ +#define ONIG_STATE_DEC(reg) /* Nothing */ +#endif /* USE_MULTI_THREAD_SYSTEM */ + + +#define onig_st_is_member st_is_member + +#ifdef NOT_RUBY + +#define st_init_table onig_st_init_table +#define st_init_table_with_size onig_st_init_table_with_size +#define st_init_numtable onig_st_init_numtable +#define st_init_numtable_with_size onig_st_init_numtable_with_size +#define st_init_strtable onig_st_init_strtable +#define st_init_strtable_with_size onig_st_init_strtable_with_size +#define st_init_strend_table_with_size onig_st_init_strend_table_with_size +#define st_delete onig_st_delete +#define st_delete_safe onig_st_delete_safe +#define st_insert onig_st_insert +#define st_insert_strend onig_st_insert_strend +#define st_lookup onig_st_lookup +#define st_lookup_strend onig_st_lookup_strend +#define st_foreach onig_st_foreach +#define st_add_direct onig_st_add_direct +#define st_add_direct_strend onig_st_add_direct_strend +#define st_free_table onig_st_free_table +#define st_cleanup_safe onig_st_cleanup_safe +#define st_copy onig_st_copy +#define st_nothing_key_clone onig_st_nothing_key_clone +#define st_nothing_key_free onig_st_nothing_key_free + +#else /* NOT_RUBY */ + +#define onig_st_init_table st_init_table +#define onig_st_init_table_with_size st_init_table_with_size +#define onig_st_init_numtable st_init_numtable +#define onig_st_init_numtable_with_size st_init_numtable_with_size +#define onig_st_init_strtable st_init_strtable +#define onig_st_init_strtable_with_size st_init_strtable_with_size +#define onig_st_init_strend_table_with_size st_init_strend_table_with_size +#define onig_st_delete st_delete +#define onig_st_delete_safe st_delete_safe +#define onig_st_insert st_insert +#define onig_st_insert_strend st_insert_strend +#define onig_st_lookup st_lookup +#define onig_st_lookup_strend st_lookup_strend +#define onig_st_foreach st_foreach +#define onig_st_add_direct st_add_direct +#define onig_st_add_direct_strend st_add_direct_strend +#define onig_st_free_table st_free_table +#define onig_st_cleanup_safe st_cleanup_safe +#define onig_st_copy st_copy +#define onig_st_nothing_key_clone st_nothing_key_clone +#define onig_st_nothing_key_free st_nothing_key_free + +#endif /* NOT_RUBY */ + + #ifdef HAVE_STDLIB_H #include #endif @@ -139,9 +201,11 @@ #endif #include +#ifdef HAVE_SYS_TYPES_H #ifndef __BORLANDC__ #include #endif +#endif #ifdef ONIG_DEBUG # include @@ -483,6 +547,7 @@ enum OpCode { OP_CCLASS_NOT, OP_CCLASS_MB_NOT, OP_CCLASS_MIX_NOT, + OP_CCLASS_NODE, /* pointer to CClassNode node */ OP_ANYCHAR, /* "." */ OP_ANYCHAR_ML, /* "." multi-line */ @@ -570,6 +635,7 @@ typedef int AbsAddrType; typedef int LengthType; typedef int RepeatNumType; typedef short int MemNumType; +typedef void* PointerType; #define SIZE_OPCODE 1 #define SIZE_RELADDR sizeof(RelAddrType) @@ -579,7 +645,7 @@ typedef short int MemNumType; #define SIZE_REPEATNUM sizeof(RepeatNumType) #define SIZE_OPTION sizeof(OnigOptionType) #define SIZE_CODE_POINT sizeof(OnigCodePoint) - +#define SIZE_POINTER sizeof(PointerType) #ifdef PLATFORM_UNALIGNED_WORD_ACCESS @@ -604,6 +670,7 @@ typedef short int MemNumType; #define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType) #define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType) #define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) +#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) /* code point's address must be aligned address. */ #define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) @@ -679,6 +746,22 @@ typedef short int MemNumType; ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) +/* cclass node */ +#define FLAG_CCLASS_NOT 1 +#define FLAG_CCLASS_SHARE (1<<1) + +#define CCLASS_SET_NOT(cc) (cc)->flags |= FLAG_CCLASS_NOT +#define CCLASS_CLEAR_NOT(cc) (cc)->flags &= ~FLAG_CCLASS_NOT +#define CCLASS_SET_SHARE(cc) (cc)->flags |= FLAG_CCLASS_SHARE +#define IS_CCLASS_NOT(cc) (((cc)->flags & FLAG_CCLASS_NOT) != 0) +#define IS_CCLASS_SHARE(cc) (((cc)->flags & FLAG_CCLASS_SHARE) != 0) + +typedef struct { + int flags; + BitSet bs; + BBuf* mbuf; /* multi-byte info or NULL */ +} CClassNode; + #ifdef ONIG_DEBUG @@ -700,13 +783,11 @@ extern void onig_print_statistics P_((FILE* f)); extern char* onig_error_code_to_format P_((int code)); extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...)); -extern UChar* onig_strdup P_((UChar* s, UChar* end)); extern int onig_bbuf_init P_((BBuf* buf, int size)); extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax)); extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo)); extern void onig_chain_reduce P_((regex_t* reg)); extern void onig_chain_link_add P_((regex_t* to, regex_t* add)); extern void onig_transfer P_((regex_t* to, regex_t* from)); -extern int onig_is_in_code_range P_((UChar* p, OnigCodePoint code)); #endif /* REGINT_H */ diff --git a/regparse.c b/regparse.c index 2d26786771..6fe9044bdd 100644 --- a/regparse.c +++ b/regparse.c @@ -2,7 +2,7 @@ regparse.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2005 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -219,21 +219,26 @@ k_strcpy(UChar* dest, UChar* src, UChar* end) } } -extern UChar* -onig_strdup(UChar* s, UChar* end) +static UChar* +strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) { - int len = end - s; + int slen, term_len, i; + UChar *r; - if (len > 0) { - UChar* r = (UChar* )xmalloc(len + 1); - CHECK_NULL_RETURN(r); - xmemcpy(r, s, len); - r[len] = (UChar )0; - return r; - } - else return NULL; + slen = end - s; + term_len = ONIGENC_MBC_MINLEN(enc); + + r = (UChar* )xmalloc(slen + term_len); + CHECK_NULL_RETURN(r); + xmemcpy(r, s, slen); + + for (i = 0; i < term_len; i++) + r[slen + i] = (UChar )0; + + return r; } + /* scan pattern methods */ #define PEND_VALUE 0 @@ -298,7 +303,7 @@ typedef struct { #ifdef USE_ST_HASH_TABLE -#include +#include "st.h" typedef st_table NameTable; typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ @@ -335,7 +340,7 @@ onig_print_names(FILE* fp, regex_t* reg) if (IS_NOT_NULL(t)) { fprintf(fp, "name table\n"); - st_foreach(t, i_print_name_entry, (HashDataType )fp); + onig_st_foreach(t, i_print_name_entry, (HashDataType )fp); fputs("\n", fp); } return 0; @@ -356,7 +361,7 @@ names_clear(regex_t* reg) NameTable* t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) { - st_foreach(t, i_free_name_entry, 0); + onig_st_foreach(t, i_free_name_entry, 0); } return 0; } @@ -371,7 +376,7 @@ onig_names_free(regex_t* reg) if (r) return r; t = (NameTable* )reg->name_table; - if (IS_NOT_NULL(t)) st_free_table(t); + if (IS_NOT_NULL(t)) onig_st_free_table(t); reg->name_table = (void* )NULL; return 0; } @@ -379,33 +384,12 @@ onig_names_free(regex_t* reg) static NameEntry* name_find(regex_t* reg, UChar* name, UChar* name_end) { - int len; - UChar namebuf[NAMEBUF_SIZE_1]; - UChar *key; NameEntry* e; NameTable* t = (NameTable* )reg->name_table; e = (NameEntry* )NULL; if (IS_NOT_NULL(t)) { - if (*name_end == '\0') { - key = name; - } - else { - /* dirty, but st.c API claims NULL terminated key. */ - len = name_end - name; - if (len <= NAMEBUF_SIZE) { - xmemcpy(namebuf, name, len); - namebuf[len] = '\0'; - key = namebuf; - } - else { - key = onig_strdup(name, name_end); - if (IS_NULL(key)) return (NameEntry* )NULL; - } - } - - st_lookup(t, (HashDataType )key, (HashDataType * )&e); - if (key != name && key != namebuf) xfree(key); + onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e))); } return e; } @@ -422,7 +406,8 @@ static int i_names(UChar* key, NameEntry* e, INamesArg* arg) { int r = (*(arg->func))(e->name, - e->name + onigenc_str_bytelen_null(arg->enc, e->name), + /*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */ + e->name + e->name_len, e->back_num, (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), arg->reg, arg->arg); @@ -447,11 +432,40 @@ onig_foreach_name(regex_t* reg, narg.reg = reg; narg.arg = arg; narg.enc = reg->enc; /* should be pattern encoding. */ - st_foreach(t, i_names, (HashDataType )&narg); + onig_st_foreach(t, i_names, (HashDataType )&narg); } return narg.ret; } +static int +i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map) +{ + int i; + + if (e->back_num > 1) { + for (i = 0; i < e->back_num; i++) { + e->back_refs[i] = map[e->back_refs[i]].new_val; + } + } + else if (e->back_num == 1) { + e->back_ref1 = map[e->back_ref1].new_val; + } + + return ST_CONTINUE; +} + +extern int +onig_renumber_name_table(regex_t* reg, GroupNumRemap* map) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_renumber_name, (HashDataType )map); + } + return 0; +} + + extern int onig_number_of_names(regex_t* reg) { @@ -617,14 +631,16 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) if (IS_NULL(e)) { #ifdef USE_ST_HASH_TABLE if (IS_NULL(t)) { - reg->name_table = t = st_init_strtable(); + t = onig_st_init_strend_table_with_size(5); + reg->name_table = (void* )t; } e = (NameEntry* )xmalloc(sizeof(NameEntry)); CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY); - e->name = onig_strdup(name, name_end); + e->name = strdup_with_null(reg->enc, name, name_end); if (IS_NULL(e->name)) return ONIGERR_MEMORY; - st_insert(t, (HashDataType )e->name, (HashDataType )e); + onig_st_insert_strend(t, e->name, (e->name + (name_end - name)), + (HashDataType )e); e->name_len = name_end - name; e->back_num = 0; @@ -669,7 +685,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) } e = &(t->e[t->num]); t->num++; - e->name = onig_strdup(name, name_end); + e->name = strdup_with_null(reg->enc, name, name_end); e->name_len = name_end - name; #endif } @@ -886,8 +902,11 @@ onig_node_free(Node* node) #ifdef USE_RECYCLE_NODE { FreeNode* n = (FreeNode* )node; + + THREAD_ATOMIC_START; n->next = FreeNodeList; FreeNodeList = n; + THREAD_ATOMIC_END; } #else xfree(node); @@ -899,8 +918,15 @@ onig_node_free(Node* node) break; case N_CCLASS: - if (NCCLASS(node).mbuf) - bbuf_free(NCCLASS(node).mbuf); + { + CClassNode* cc = &(NCCLASS(node)); + + if (IS_CCLASS_SHARE(cc)) + return ; + + if (cc->mbuf) + bbuf_free(cc->mbuf); + } break; case N_QUALIFIER: @@ -927,8 +953,11 @@ onig_node_free(Node* node) #ifdef USE_RECYCLE_NODE { FreeNode* n = (FreeNode* )node; + + THREAD_ATOMIC_START; n->next = FreeNodeList; FreeNodeList = n; + THREAD_ATOMIC_END; } #else xfree(node); @@ -959,8 +988,10 @@ node_new() #ifdef USE_RECYCLE_NODE if (IS_NOT_NULL(FreeNodeList)) { + THREAD_ATOMIC_START; node = (Node* )FreeNodeList; FreeNodeList = FreeNodeList->next; + THREAD_ATOMIC_END; return node; } #endif @@ -974,8 +1005,8 @@ static void initialize_cclass(CClassNode* cc) { BITSET_CLEAR(cc->bs); - cc->not = 0; - cc->mbuf = NULL; + cc->flags = 0; + cc->mbuf = NULL; } static Node* @@ -989,6 +1020,54 @@ node_new_cclass() return node; } +extern Node* +node_new_cclass_by_codepoint_range(int not, + OnigCodePoint sbr[], OnigCodePoint mbr[]) +{ + CClassNode* cc; + int n, i, j; + + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_CCLASS; + + cc = &(NCCLASS(node)); + cc->flags = 0; + if (not != 0) CCLASS_SET_NOT(cc); + + BITSET_CLEAR(cc->bs); + if (IS_NOT_NULL(sbr)) { + n = ONIGENC_CODE_RANGE_NUM(sbr); + for (i = 0; i < n; i++) { + for (j = ONIGENC_CODE_RANGE_FROM(sbr, i); + j <= (int )ONIGENC_CODE_RANGE_TO(sbr, i); j++) { + BITSET_SET_BIT(cc->bs, j); + } + } + } + + if (IS_NULL(mbr)) { + is_null: + cc->mbuf = NULL; + } + else { + BBuf* bbuf; + + n = ONIGENC_CODE_RANGE_NUM(mbr); + if (n == 0) goto is_null; + + bbuf = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN_VAL(bbuf, NULL); + bbuf->alloc = n + 1; + bbuf->used = n + 1; + bbuf->p = (UChar* )((void* )mbr); + + cc->mbuf = bbuf; + } + + return node; +} + static Node* node_new_ctype(int type) { @@ -1711,7 +1790,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) BBuf *tbuf; int r; - if (cc->not != 0) { + if (IS_CCLASS_NOT(cc)) { bitset_invert(cc->bs); if (! ONIGENC_IS_SINGLEBYTE(enc)) { @@ -1722,7 +1801,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) cc->mbuf = tbuf; } - cc->not = 0; + CCLASS_CLEAR_NOT(cc); } return 0; @@ -1736,10 +1815,10 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) BitSetRef bsr1, bsr2; BitSet bs1, bs2; - not1 = dest->not; + not1 = IS_CCLASS_NOT(dest); bsr1 = dest->bs; buf1 = dest->mbuf; - not2 = cc->not; + not2 = IS_CCLASS_NOT(cc); bsr2 = cc->bs; buf2 = cc->mbuf; @@ -1794,10 +1873,10 @@ or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) BitSetRef bsr1, bsr2; BitSet bs1, bs2; - not1 = dest->not; + not1 = IS_CCLASS_NOT(dest); bsr1 = dest->bs; buf1 = dest->mbuf; - not2 = cc->not; + not2 = IS_CCLASS_NOT(cc); bsr2 = cc->bs; buf2 = cc->mbuf; @@ -2158,7 +2237,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) UChar* p = *src; PFETCH_READY; - if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH; + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; PFETCH(c); switch (c) { @@ -2468,7 +2547,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) goto end; - if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH; + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; PFETCH(c); tok->escaped = 1; @@ -2576,9 +2655,9 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = (OnigCodePoint )num; + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; } break; @@ -2669,7 +2748,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) PFETCH(c); if (c == MC_ESC(enc)) { - if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH; + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; tok->backp = p; PFETCH(c); @@ -2907,9 +2986,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = (OnigCodePoint )num; + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; } break; @@ -3057,7 +3136,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (num < 0) return num; /* set_raw: */ if (tok->u.c != num) { - tok->type = TK_CODE_POINT; + tok->type = TK_CODE_POINT; tok->u.code = (OnigCodePoint )num; } else { /* string */ @@ -3225,21 +3304,26 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) static int add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc, - int nsb, int nmb, - OnigCodePointRange *sbr, OnigCodePointRange *mbr) + OnigCodePoint sbr[], OnigCodePoint mbr[]) { int i, r; OnigCodePoint j; + int nsb = ONIGENC_CODE_RANGE_NUM(sbr); + int nmb = ONIGENC_CODE_RANGE_NUM(mbr); + if (not == 0) { for (i = 0; i < nsb; i++) { - for (j = sbr[i].from; j <= sbr[i].to; j++) { + for (j = ONIGENC_CODE_RANGE_FROM(sbr, i); + j <= ONIGENC_CODE_RANGE_TO(sbr, i); j++) { BITSET_SET_BIT(cc->bs, j); } } for (i = 0; i < nmb; i++) { - r = add_code_range_to_buf(&(cc->mbuf), mbr[i].from, mbr[i].to); + r = add_code_range_to_buf(&(cc->mbuf), + ONIGENC_CODE_RANGE_FROM(mbr, i), + ONIGENC_CODE_RANGE_TO(mbr, i)); if (r != 0) return r; } } @@ -3248,10 +3332,11 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc, if (ONIGENC_MBC_MINLEN(enc) == 1) { for (i = 0; i < nsb; i++) { - for (j = prev; j < sbr[i].from; j++) { + for (j = prev; + j < ONIGENC_CODE_RANGE_FROM(sbr, i); j++) { BITSET_SET_BIT(cc->bs, j); } - prev = sbr[i].to + 1; + prev = ONIGENC_CODE_RANGE_TO(sbr, i) + 1; } if (prev < 0x7f) { for (j = prev; j < 0x7f; j++) { @@ -3263,11 +3348,12 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc, } for (i = 0; i < nmb; i++) { - if (prev < mbr[i].from) { - r = add_code_range_to_buf(&(cc->mbuf), prev, mbr[i].from - 1); + if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { + r = add_code_range_to_buf(&(cc->mbuf), prev, + ONIGENC_CODE_RANGE_FROM(mbr, i) - 1); if (r != 0) return r; } - prev = mbr[i].to + 1; + prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; } if (prev < 0x7fffffff) { r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff); @@ -3282,14 +3368,12 @@ static int add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) { int c, r; - int nsb, nmb; - OnigCodePointRange *sbr, *mbr; + OnigCodePoint *sbr, *mbr; OnigEncoding enc = env->enc; - r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &nsb, &nmb, &sbr, &mbr); + r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr); if (r == 0) { - return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, - nsb, nmb, sbr, mbr); + return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sbr, mbr); } else if (r != ONIG_NO_SUPPORT_CONFIG) { return r; @@ -3349,8 +3433,8 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) } else { for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) && - ! ONIGENC_IS_CODE_WORD(enc, c)) + if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* 0: invalid code point */ + && ! ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); } } @@ -3839,7 +3923,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, break; case TK_CODE_POINT: - v = (OnigCodePoint )tok->u.code; + v = tok->u.code; in_israw = 1; val_entry: len = ONIGENC_CODE_TO_MBCLEN(env->enc, v); @@ -4017,8 +4101,11 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, cc = prev_cc; } - cc->not = neg; - if (cc->not != 0 && + if (neg != 0) + CCLASS_SET_NOT(cc); + else + CCLASS_CLEAR_NOT(cc); + if (IS_CCLASS_NOT(cc) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { int is_empty; @@ -4388,7 +4475,7 @@ make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc, for (j = 0; j < ccs[i].n; j++) { ci = &(ccs[i].items[j]); if (ci->len > 1) { /* compound only */ - if (cc->not) clear_not_flag_cclass(cc, enc); + if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc); clen = ci->len; for (k = 0; k < clen; k++) { @@ -4417,6 +4504,98 @@ make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc, return n; } + +#ifdef USE_SHARED_CCLASS_TABLE + +#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8 + +/* for ctype node hash table */ + +typedef struct { + OnigEncoding enc; + int not; + int type; +} type_cclass_key; + +static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y) +{ + if (x->type != y->type) return 1; + if (x->enc != y->enc) return 1; + if (x->not != y->not) return 1; + return 0; +} + +static int type_cclass_hash(type_cclass_key* key) +{ + int i, val; + unsigned char *p; + + val = 0; + + p = (unsigned char* )&(key->enc); + for (i = 0; i < sizeof(OnigEncodingType); i++) { + val = val * 997 + (int )*p++; + } + + p = (unsigned char* )(&key->type); + for (i = 0; i < sizeof(int); i++) { + val = val * 997 + (int )*p++; + } + + val += key->not; + return val + (val >> 5); +} + +static int type_cclass_key_free(st_data_t x) +{ + xfree((void* )x); + return 0; +} + +static st_data_t type_cclass_key_clone(st_data_t x) +{ + type_cclass_key* new_key; + type_cclass_key* key = (type_cclass_key* )x; + + new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); + *new_key = *key; + return (st_data_t )new_key; +} + +static struct st_hash_type type_type_cclass_hash = { + type_cclass_cmp, + type_cclass_hash, + type_cclass_key_free, + type_cclass_key_clone +}; + +static st_table* OnigTypeCClassTable; + + +static int +i_free_shared_class(type_cclass_key* key, Node* node, void* arg) +{ + if (IS_NOT_NULL(node)) { + CClassNode* cc = &(NCCLASS(node)); + if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf); + xfree(node); + } + return ST_DELETE; +} + +extern int +onig_free_shared_cclass_table() +{ + if (IS_NOT_NULL(OnigTypeCClassTable)) { + onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0); + } + + return 0; +} + +#endif /* USE_SHARED_CCLASS_TABLE */ + + static int parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env) @@ -4561,13 +4740,63 @@ parse_exp(Node** np, OnigToken* tok, int term, CClassNode* cc; int ctype, not; - ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); +#ifdef USE_SHARED_CCLASS_TABLE + OnigCodePoint *sbr, *mbr; - *np = node_new_cclass(); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); - cc = &(NCCLASS(*np)); - add_ctype_to_cc(cc, ctype, 0, env); - if (not != 0) CCLASS_SET_NOT(cc); + ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); + r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr); + if (r == 0 && + ONIGENC_CODE_RANGE_NUM(mbr) + >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) { + type_cclass_key key; + type_cclass_key* new_key; + + key.enc = env->enc; + key.not = not; + key.type = ctype; + + THREAD_ATOMIC_START; + + if (IS_NULL(OnigTypeCClassTable)) { + OnigTypeCClassTable + = onig_st_init_table_with_size(&type_type_cclass_hash, 10); + if (IS_NULL(OnigTypeCClassTable)) { + THREAD_ATOMIC_END; + return ONIGERR_MEMORY; + } + } + else { + if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key, + (st_data_t* )np)) { + THREAD_ATOMIC_END; + break; + } + } + + *np = node_new_cclass_by_codepoint_range(not, sbr, mbr); + if (IS_NULL(*np)) { + THREAD_ATOMIC_END; + return ONIGERR_MEMORY; + } + + CCLASS_SET_SHARE(&(NCCLASS(*np))); + new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); + onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key, + (st_data_t )*np); + + THREAD_ATOMIC_END; + } + else { +#endif + ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); + *np = node_new_cclass(); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + cc = &(NCCLASS(*np)); + add_ctype_to_cc(cc, ctype, 0, env); + if (not != 0) CCLASS_SET_NOT(cc); +#ifdef USE_SHARED_CCLASS_TABLE + } +#endif } break; @@ -4605,7 +4834,8 @@ parse_exp(Node** np, OnigToken* tok, int term, for (i = 0; i < n; i++) { in_cc = onig_is_code_in_cc(env->enc, ccs[i].from, cc); - if ((in_cc != 0 && cc->not == 0) || (in_cc == 0 && cc->not != 0)) { + if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) || + (in_cc == 0 && IS_CCLASS_NOT(cc))) { if (ONIGENC_MBC_MINLEN(env->enc) > 1 || ccs[i].from >= SINGLE_BYTE_SIZE) { /* if (cc->not) clear_not_flag_cclass(cc, env->enc); */ diff --git a/regparse.h b/regparse.h index 5982ec8081..6014b9290b 100644 --- a/regparse.h +++ b/regparse.h @@ -4,7 +4,7 @@ regparse.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2005 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -95,8 +95,6 @@ #define BACKREFS_P(br) \ (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static); -#define CCLASS_SET_NOT(cc) (cc)->not = 1 - #define NQ_TARGET_ISNOT_EMPTY 0 #define NQ_TARGET_IS_EMPTY 1 #define NQ_TARGET_IS_EMPTY_MEM 2 @@ -111,11 +109,14 @@ typedef struct { UChar buf[NODE_STR_BUF_SIZE]; } StrNode; +/* move to regint.h */ +#if 0 typedef struct { - int not; + int flags; BitSet bs; BBuf* mbuf; /* multi-byte info or NULL */ } CClassNode; +#endif typedef struct { int state; @@ -280,6 +281,15 @@ typedef struct { #define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0) #define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0) + +#ifdef USE_NAMED_GROUP +typedef struct { + int new_val; +} GroupNumRemap; + +extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); +#endif + extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); extern int onig_strncmp P_((UChar* s1, UChar* s2, int n)); extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); diff --git a/st.c b/st.c index e4036f1025..cad73288f6 100644 --- a/st.c +++ b/st.c @@ -6,12 +6,29 @@ #include #include #include -#include "st.h" #ifdef _WIN32 #include #endif +#ifdef NOT_RUBY +#include "regint.h" +#else +#ifdef RUBY_PLATFORM +#define xmalloc ruby_xmalloc +#define xcalloc ruby_xcalloc +#define xrealloc ruby_xrealloc +#define xfree ruby_xfree + +void *xmalloc(long); +void *xcalloc(long, long); +void *xrealloc(void *, long); +void xfree(void *); +#endif +#endif + +#include "st.h" + typedef struct st_table_entry st_table_entry; struct st_table_entry { @@ -33,11 +50,14 @@ struct st_table_entry { * allocated initially * */ + static int numcmp(long, long); static int numhash(long); static struct st_hash_type type_numhash = { numcmp, numhash, + st_nothing_key_free, + st_nothing_key_clone }; /* extern int strcmp(const char *, const char *); */ @@ -45,19 +65,21 @@ static int strhash(const char *); static struct st_hash_type type_strhash = { strcmp, strhash, + st_nothing_key_free, + st_nothing_key_clone }; -#ifdef RUBY_PLATFORM -#define xmalloc ruby_xmalloc -#define xcalloc ruby_xcalloc -#define xrealloc ruby_xrealloc -#define xfree ruby_xfree +static int strend_cmp(st_strend_key*, st_strend_key*); +static int strend_hash(st_strend_key*); +static int strend_key_free(st_data_t key); +static st_data_t strend_key_clone(st_data_t x); -void *xmalloc(long); -void *xcalloc(long, long); -void *xrealloc(void *, long); -void xfree(void *); -#endif +static struct st_hash_type type_strend_hash = { + strend_cmp, + strend_hash, + strend_key_free, + strend_key_clone +}; static void rehash(st_table *); @@ -125,7 +147,7 @@ new_size(size) int newsize; for (i = 0, newsize = MINSIZE; - i < sizeof(primes)/sizeof(primes[0]); + i < (int )(sizeof(primes)/sizeof(primes[0])); i++, newsize <<= 1) { if (newsize > size) return primes[i]; @@ -206,6 +228,13 @@ st_init_strtable_with_size(size) return st_init_table_with_size(&type_strhash, size); } +st_table* +st_init_strend_table_with_size(size) + int size; +{ + return st_init_table_with_size(&type_strend_hash, size); +} + void st_free_table(table) st_table *table; @@ -267,6 +296,21 @@ st_lookup(table, key, value) } } +int +st_lookup_strend(table, str_key, end_key, value) + st_table *table; + unsigned char* str_key; + unsigned char* end_key; + st_data_t *value; +{ + st_strend_key key; + + key.s = (unsigned char* )str_key; + key.end = (unsigned char* )end_key; + + return st_lookup(table, (st_data_t )(&key), value); +} + #define ADD_DIRECT(table, key, value, hash_val, bin_pos)\ do {\ st_table_entry *entry;\ @@ -307,6 +351,22 @@ st_insert(table, key, value) } } +int +st_insert_strend(table, str_key, end_key, value) + st_table *table; + unsigned char* str_key; + unsigned char* end_key; + st_data_t value; +{ + st_strend_key* key; + + key = alloc(st_strend_key); + key->s = (unsigned char* )str_key; + key->end = (unsigned char* )end_key; + + return st_insert(table, (st_data_t )key, value); +} + void st_add_direct(table, key, value) st_table *table; @@ -320,6 +380,21 @@ st_add_direct(table, key, value) ADD_DIRECT(table, key, value, hash_val, bin_pos); } +void +st_add_direct_strend(table, str_key, end_key, value) + st_table *table; + unsigned char* str_key; + unsigned char* end_key; + st_data_t value; +{ + st_strend_key* key; + + key = alloc(st_strend_key); + key->s = (unsigned char* )str_key; + key->end = (unsigned char* )end_key; + st_add_direct(table, (st_data_t )key, value); +} + static void rehash(table) register st_table *table; @@ -379,6 +454,7 @@ st_copy(old_table) return 0; } *entry = *ptr; + entry->key = old_table->type->key_clone(ptr->key); entry->next = new_table->bins[i]; new_table->bins[i] = entry; ptr = ptr->next; @@ -522,6 +598,7 @@ st_foreach(table, func, arg) last->next = ptr->next; } ptr = ptr->next; + table->type->key_free(tmp->key); free(tmp); table->num_entries--; } @@ -581,3 +658,59 @@ numhash(n) { return n; } + +extern int +st_nothing_key_free(st_data_t key) { return 0; } + +extern st_data_t +st_nothing_key_clone(st_data_t x) { return x; } + +static int strend_cmp(st_strend_key* x, st_strend_key* y) +{ + unsigned char *p, *q; + int c; + + if ((x->end - x->s) != (y->end - y->s)) + return 1; + + p = x->s; + q = y->s; + while (p < x->end) { + c = (int )*p - (int )*q; + if (c != 0) return c; + + p++; q++; + } + + return 0; +} + +static int strend_hash(st_strend_key* x) +{ + int val; + unsigned char *p; + + val = 0; + p = x->s; + while (p < x->end) { + val = val * 997 + (int )*p++; + } + + return val + (val >> 5); +} + +static int strend_key_free(st_data_t x) +{ + xfree((void* )x); + return 0; +} + +static st_data_t strend_key_clone(st_data_t x) +{ + st_strend_key* new_key; + st_strend_key* key = (st_strend_key* )x; + + new_key = alloc(st_strend_key); + *new_key = *key; + return (st_data_t )new_key; +} diff --git a/st.h b/st.h index bc12624b90..fffbb32376 100644 --- a/st.h +++ b/st.h @@ -14,6 +14,8 @@ typedef struct st_table st_table; struct st_hash_type { int (*compare)(); int (*hash)(); + int (*key_free)(); + st_data_t (*key_clone)(); }; struct st_table { @@ -23,6 +25,11 @@ struct st_table { struct st_table_entry **bins; }; +typedef struct { + unsigned char* s; + unsigned char* end; +} st_strend_key; + #define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0) enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK}; @@ -44,22 +51,27 @@ st_table *st_init_numtable _((void)); st_table *st_init_numtable_with_size _((int)); st_table *st_init_strtable _((void)); st_table *st_init_strtable_with_size _((int)); +st_table *st_init_strend_table_with_size _((int)); int st_delete _((st_table *, st_data_t *, st_data_t *)); int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t)); int st_insert _((st_table *, st_data_t, st_data_t)); +int st_insert_strend _((st_table *, unsigned char*, unsigned char*, st_data_t)); int st_lookup _((st_table *, st_data_t, st_data_t *)); +int st_lookup_strend _((st_table *, unsigned char*, unsigned char*, st_data_t*)); void st_foreach _((st_table *, int (*)(ANYARGS), st_data_t)); void st_add_direct _((st_table *, st_data_t, st_data_t)); +void st_add_direct_strend _((st_table *, unsigned char*, unsigned char*, st_data_t)); void st_free_table _((st_table *)); void st_cleanup_safe _((st_table *, st_data_t)); st_table *st_copy _((st_table *)); +extern st_data_t st_nothing_key_clone _((st_data_t key)); +extern int st_nothing_key_free _((st_data_t key)); + #define ST_NUMCMP ((int (*)()) 0) #define ST_NUMHASH ((int (*)()) -2) #define st_numcmp ST_NUMCMP #define st_numhash ST_NUMHASH -int st_strhash(); - #endif /* ST_INCLUDED */ diff --git a/utf8.c b/utf8.c index 5a777e177c..e7095baa5c 100644 --- a/utf8.c +++ b/utf8.c @@ -2,7 +2,7 @@ utf8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2004 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -226,9 +226,9 @@ utf8_mbc_to_normalize(OnigAmbigType flag, UChar** pp, UChar* end, UChar* lower) if (*p == 195) { /* 195 == '\303' */ int c = *(p + 1); if (c >= 128) { - if (c <= (unsigned char)'\236' && /* upper */ + if (c <= (UChar )'\236' && /* upper */ (flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) { - if (c != (unsigned char)'\227') { + if (c != (UChar )'\227') { *lower++ = *p; *lower = (UChar )(c + 32); (*pp) += 2; @@ -236,7 +236,7 @@ utf8_mbc_to_normalize(OnigAmbigType flag, UChar** pp, UChar* end, UChar* lower) } } #if 0 - else if (c == '\237' && + else if (c == (UChar )'\237' && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { *lower++ = '\303'; *lower = '\237'; @@ -286,16 +286,16 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, UChar** pp, UChar* end) int c = *(p + 1); if (c >= 128) { if ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) { - if (c <= (unsigned char)'\236') { /* upper */ - if (c == '\227') return FALSE; + if (c <= (UChar )'\236') { /* upper */ + if (c == (UChar )'\227') return FALSE; return TRUE; } - else if (c >= (unsigned char)'\240' && c <= (unsigned char)'\276') { /* lower */ - if (c == '\267') return FALSE; + else if (c >= (UChar )'\240' && c <= (UChar )'\276') { /* lower */ + if (c == (UChar )'\267') return FALSE; return TRUE; } } - else if (c == '\237' && + else if (c == (UChar )'\237' && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { return TRUE; } @@ -306,163 +306,3266 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, UChar** pp, UChar* end) return FALSE; } -static int -utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) { - return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); - } - if ((ctype & ONIGENC_CTYPE_WORD) != 0) { -#ifdef USE_INVALID_CODE_SCHEME - if (code <= VALID_CODE_LIMIT) +static OnigCodePoint EmptyRange[] = { 0 }; + +static OnigCodePoint SBAlnum[] = { + 3, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x0061, 0x007a +}; + +static OnigCodePoint MBAlnum[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 411, +#else + 6, #endif - return TRUE; - } + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x0357, + 0x035d, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x03fb, + 0x0400, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bef, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f29, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1369, 0x1371, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x20d0, 0x20ea, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213d, 0x213f, + 0x2145, 0x2149, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fa5, + 0xa000, 0xa48c, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBAlnum */ + +static OnigCodePoint SBAlpha[] = { + 2, + 0x0041, 0x005a, + 0x0061, 0x007a +}; + +static OnigCodePoint MBAlpha[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 394, +#else + 6, +#endif + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x0357, + 0x035d, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x03fb, + 0x0400, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09f0, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a70, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x180b, 0x180d, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x20d0, 0x20ea, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213d, 0x213f, + 0x2145, 0x2149, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fa5, + 0xa000, 0xa48c, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBAlpha */ + +static OnigCodePoint SBBlank[] = { + 2, + 0x0009, 0x0009, + 0x0020, 0x0020 +}; + +static OnigCodePoint MBBlank[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 7, +#else + 1, +#endif + 0x00a0, 0x00a0 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBBlank */ + +static OnigCodePoint SBCntrl[] = { + 2, + 0x0000, 0x001f, + 0x007f, 0x007f +}; + +static OnigCodePoint MBCntrl[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 18, +#else + 2, +#endif + 0x0080, 0x009f, + 0x00ad, 0x00ad +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0600, 0x0603, + 0x06dd, 0x06dd, + 0x070f, 0x070f, + 0x17b4, 0x17b5, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2063, + 0x206a, 0x206f, + 0xd800, 0xf8ff, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBCntrl */ + +static OnigCodePoint SBDigit[] = { + 1, + 0x0030, 0x0039 +}; + +static OnigCodePoint MBDigit[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 22, +#else + 0 +#endif +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be7, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x1369, 0x1371, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBDigit */ + +static OnigCodePoint SBGraph[] = { + 1, + 0x0021, 0x007e +}; + +static OnigCodePoint MBGraph[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 404, +#else + 1, +#endif + 0x00a1, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x0357, + 0x035d, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03fb, + 0x0400, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060c, 0x0615, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x10fb, 0x10fb, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1361, 0x137c, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1681, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x19e0, 0x19ff, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x200b, 0x2027, + 0x202a, 0x202e, + 0x2030, 0x2054, + 0x2057, 0x2057, + 0x2060, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x20a0, 0x20b1, + 0x20d0, 0x20ea, + 0x2100, 0x213b, + 0x213d, 0x214b, + 0x2153, 0x2183, + 0x2190, 0x23d0, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x2617, + 0x2619, 0x267d, + 0x2680, 0x2691, + 0x26a0, 0x26a1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27d0, 0x27eb, + 0x27f0, 0x2b0d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3001, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x327d, + 0x327f, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fa5, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1013f, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x1039f, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBGraph */ + +static OnigCodePoint SBLower[] = { + 1, + 0x0061, 0x007a +}; + +static OnigCodePoint MBLower[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 423, +#else + 5, +#endif + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00df, 0x00f6, + 0x00f8, 0x00ff +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0138, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018d, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019b, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01aa, 0x01ab, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01c6, 0x01c6, + 0x01c9, 0x01c9, + 0x01cc, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f0, + 0x01f3, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0221, 0x0221, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0236, + 0x0250, 0x02af, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f3, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fb, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04ce, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f9, 0x04f9, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0561, 0x0587, + 0x1d00, 0x1d2b, + 0x1d62, 0x1d6b, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9b, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1f00, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x210a, 0x210a, + 0x210e, 0x210f, + 0x2113, 0x2113, + 0x212f, 0x212f, + 0x2134, 0x2134, + 0x2139, 0x2139, + 0x213d, 0x213d, + 0x2146, 0x2149, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a3, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBLower */ + +static OnigCodePoint SBPrint[] = { + 2, + 0x0009, 0x000d, + 0x0020, 0x007e +}; + +static OnigCodePoint MBPrint[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 403, +#else + 2, +#endif + 0x0085, 0x0085, + 0x00a0, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x0357, + 0x035d, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03fb, + 0x0400, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060c, 0x0615, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x10fb, 0x10fb, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1361, 0x137c, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1680, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x19e0, 0x19ff, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2000, 0x2054, + 0x2057, 0x2057, + 0x205f, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x20a0, 0x20b1, + 0x20d0, 0x20ea, + 0x2100, 0x213b, + 0x213d, 0x214b, + 0x2153, 0x2183, + 0x2190, 0x23d0, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x2617, + 0x2619, 0x267d, + 0x2680, 0x2691, + 0x26a0, 0x26a1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27d0, 0x27eb, + 0x27f0, 0x2b0d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3000, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x327d, + 0x327f, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fa5, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1013f, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x1039f, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBPrint */ + +static OnigCodePoint SBPunct[] = { + 9, + 0x0021, 0x0023, + 0x0025, 0x002a, + 0x002c, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005b, 0x005d, + 0x005f, 0x005f, + 0x007b, 0x007b, + 0x007d, 0x007d +}; /* end of SBPunct */ + +static OnigCodePoint MBPunct[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 77, +#else + 5, +#endif + 0x00a1, 0x00a1, + 0x00ab, 0x00ab, + 0x00b7, 0x00b7, + 0x00bb, 0x00bb, + 0x00bf, 0x00bf +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x058a, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f3a, 0x0f3d, + 0x0f85, 0x0f85, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x169b, 0x169c, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x180a, + 0x1944, 0x1945, + 0x2010, 0x2027, + 0x2030, 0x2043, + 0x2045, 0x2051, + 0x2053, 0x2054, + 0x2057, 0x2057, + 0x207d, 0x207e, + 0x208d, 0x208e, + 0x2329, 0x232a, + 0x23b4, 0x23b6, + 0x2768, 0x2775, + 0x27e6, 0x27eb, + 0x2983, 0x2998, + 0x29d8, 0x29db, + 0x29fc, 0x29fd, + 0x3001, 0x3003, + 0x3008, 0x3011, + 0x3014, 0x301f, + 0x3030, 0x3030, + 0x303d, 0x303d, + 0x30a0, 0x30a0, + 0x30fb, 0x30fb, + 0xfd3e, 0xfd3f, + 0xfe30, 0xfe52, + 0xfe54, 0xfe61, + 0xfe63, 0xfe63, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff0a, + 0xff0c, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3b, 0xff3d, + 0xff3f, 0xff3f, + 0xff5b, 0xff5b, + 0xff5d, 0xff5d, + 0xff5f, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBPunct */ + +static OnigCodePoint SBSpace[] = { + 2, + 0x0009, 0x000d, + 0x0020, 0x0020 +}; + +static OnigCodePoint MBSpace[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 9, +#else + 2, +#endif + 0x0085, 0x0085, + 0x00a0, 0x00a0 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBSpace */ + +static OnigCodePoint SBUpper[] = { + 1, + 0x0041, 0x005a +}; + +static OnigCodePoint MBUpper[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 420, +#else + 2, +#endif + 0x00c0, 0x00d6, + 0x00d8, 0x00de +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x014a, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c4, + 0x01c7, 0x01c7, + 0x01ca, 0x01ca, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f1, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03d2, 0x03d4, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f4, 0x03f4, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x0400, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f8, 0x04f8, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x2130, 0x2131, + 0x2133, 0x2133, + 0x213e, 0x213f, + 0x2145, 0x2145, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49c, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBUpper */ + +static OnigCodePoint SBXDigit[] = { + 3, + 0x0030, 0x0039, + 0x0041, 0x0046, + 0x0061, 0x0066 +}; + +static OnigCodePoint SBASCII[] = { + 1, + 0x0000, 0x007f +}; + +static OnigCodePoint SBWord[] = { + 4, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x005f, 0x005f, + 0x0061, 0x007a +}; + +static OnigCodePoint MBWord[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 432, +#else + 8, +#endif + 0x00aa, 0x00aa, + 0x00b2, 0x00b3, + 0x00b5, 0x00b5, + 0x00b9, 0x00ba, + 0x00bc, 0x00be, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, +#ifndef USE_UNICODE_FULL_RANGE_CTYPE + 0x00f8, 0x7fffffff +#else /* not USE_UNICODE_FULL_RANGE_CTYPE */ + 0x00f8, 0x0236, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x0357, + 0x035d, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x03fb, + 0x0400, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x09f4, 0x09f9, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bf2, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f33, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1369, 0x137c, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0x2070, 0x2071, + 0x2074, 0x2079, + 0x207f, 0x2089, + 0x20d0, 0x20ea, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213d, 0x213f, + 0x2145, 0x2149, + 0x2153, 0x2183, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x3005, 0x3007, + 0x3021, 0x302f, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3192, 0x3195, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0x3400, 0x4db5, + 0x4e00, 0x9fa5, + 0xa000, 0xa48c, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff3f, 0xff3f, + 0xff41, 0xff5a, + 0xff65, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10107, 0x10133, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBWord */ - return FALSE; -} static int -utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb, - OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]) +utf8_get_ctype_code_range(int ctype, + OnigCodePoint* sbr[], OnigCodePoint* mbr[]) { #define CR_SET(sbl,mbl) do { \ - *nsb = sizeof(sbl) / sizeof(OnigCodePointRange); \ - *nmb = sizeof(mbl) / sizeof(OnigCodePointRange); \ *sbr = sbl; \ *mbr = mbl; \ } while (0) #define CR_SB_SET(sbl) do { \ - *nsb = sizeof(sbl) / sizeof(OnigCodePointRange); \ - *nmb = 0; \ *sbr = sbl; \ + *mbr = EmptyRange; \ } while (0) - static OnigCodePointRange SBAlpha[] = { - { 0x41, 0x5a }, { 0x61, 0x7a } - }; - - static OnigCodePointRange MBAlpha[] = { - { 0xaa, 0xaa }, { 0xb5, 0xb5 }, - { 0xba, 0xba }, { 0xc0, 0xd6 }, - { 0xd8, 0xf6 }, { 0xf8, 0x220 } - }; - - static OnigCodePointRange SBBlank[] = { - { 0x09, 0x09 }, { 0x20, 0x20 } - }; - - static OnigCodePointRange MBBlank[] = { - { 0xa0, 0xa0 } - }; - - static OnigCodePointRange SBCntrl[] = { - { 0x00, 0x1f }, { 0x7f, 0x7f } - }; - - static OnigCodePointRange MBCntrl[] = { - { 0x80, 0x9f } - }; - - static OnigCodePointRange SBDigit[] = { - { 0x30, 0x39 } - }; - - static OnigCodePointRange SBGraph[] = { - { 0x21, 0x7e } - }; - - static OnigCodePointRange MBGraph[] = { - { 0xa1, 0x220 } - }; - - static OnigCodePointRange SBLower[] = { - { 0x61, 0x7a } - }; - - static OnigCodePointRange MBLower[] = { - { 0xaa, 0xaa }, { 0xb5, 0xb5 }, - { 0xba, 0xba }, { 0xdf, 0xf6 }, - { 0xf8, 0xff } - }; - - static OnigCodePointRange SBPrint[] = { - { 0x20, 0x7e } - }; - - static OnigCodePointRange MBPrint[] = { - { 0xa0, 0x220 } - }; - - static OnigCodePointRange SBPunct[] = { - { 0x21, 0x23 }, { 0x25, 0x2a }, - { 0x2c, 0x2f }, { 0x3a, 0x3b }, - { 0x3f, 0x40 }, { 0x5b, 0x5d }, - { 0x5f, 0x5f }, { 0x7b, 0x7b }, - { 0x7d, 0x7d } - }; - - static OnigCodePointRange MBPunct[] = { - { 0xa1, 0xa1 }, { 0xab, 0xab }, - { 0xad, 0xad }, { 0xb7, 0xb7 }, - { 0xbb, 0xbb }, { 0xbf, 0xbf } - }; - - static OnigCodePointRange SBSpace[] = { - { 0x09, 0x0d }, { 0x20, 0x20 } - }; - - static OnigCodePointRange MBSpace[] = { - { 0xa0, 0xa0 } - }; - - static OnigCodePointRange SBUpper[] = { - { 0x41, 0x5a } - }; - - static OnigCodePointRange MBUpper[] = { - { 0xc0, 0xd6 }, { 0xd8, 0xde } - }; - - static OnigCodePointRange SBXDigit[] = { - { 0x30, 0x39 }, { 0x41, 0x46 }, - { 0x61, 0x66 } - }; - - static OnigCodePointRange SBWord[] = { - { 0x30, 0x39 }, { 0x41, 0x5a }, - { 0x5f, 0x5f }, { 0x61, 0x7a } - }; - - static OnigCodePointRange MBWord[] = { - { 0xaa, 0xaa }, { 0xb2, 0xb3 }, - { 0xb5, 0xb5 }, { 0xb9, 0xba }, - { 0xbc, 0xbe }, { 0xc0, 0xd6 }, - { 0xd8, 0xf6 }, -#if 0 - { 0xf8, 0x220 } -#else - { 0xf8, 0x7fffffff } /* all multibyte code as word */ -#endif - }; - - static OnigCodePointRange SBAscii[] = { - { 0x00, 0x7f } - }; - - static OnigCodePointRange SBAlnum[] = { - { 0x30, 0x39 }, { 0x41, 0x5a }, - { 0x61, 0x7a } - }; - - static OnigCodePointRange MBAlnum[] = { - { 0xaa, 0xaa }, { 0xb5, 0xb5 }, - { 0xba, 0xba }, { 0xc0, 0xd6 }, - { 0xd8, 0xf6 }, { 0xf8, 0x220 } - }; - switch (ctype) { case ONIGENC_CTYPE_ALPHA: CR_SET(SBAlpha, MBAlpha); @@ -474,7 +3577,7 @@ utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb, CR_SET(SBCntrl, MBCntrl); break; case ONIGENC_CTYPE_DIGIT: - CR_SB_SET(SBDigit); + CR_SET(SBDigit, MBDigit); break; case ONIGENC_CTYPE_GRAPH: CR_SET(SBGraph, MBGraph); @@ -501,7 +3604,7 @@ utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb, CR_SET(SBWord, MBWord); break; case ONIGENC_CTYPE_ASCII: - CR_SB_SET(SBAscii); + CR_SB_SET(SBASCII); break; case ONIGENC_CTYPE_ALNUM: CR_SET(SBAlnum, MBAlnum); @@ -515,6 +3618,83 @@ utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb, return 0; } +static int +utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + OnigCodePoint *range; +#endif + + if (code < 256) { + return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); + } + +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + + switch (ctype) { + case ONIGENC_CTYPE_ALPHA: + range = MBAlpha; + break; + case ONIGENC_CTYPE_BLANK: + range = MBBlank; + break; + case ONIGENC_CTYPE_CNTRL: + range = MBCntrl; + break; + case ONIGENC_CTYPE_DIGIT: + range = MBDigit; + break; + case ONIGENC_CTYPE_GRAPH: + range = MBGraph; + break; + case ONIGENC_CTYPE_LOWER: + range = MBLower; + break; + case ONIGENC_CTYPE_PRINT: + range = MBPrint; + break; + case ONIGENC_CTYPE_PUNCT: + range = MBPunct; + break; + case ONIGENC_CTYPE_SPACE: + range = MBSpace; + break; + case ONIGENC_CTYPE_UPPER: + range = MBUpper; + break; + case ONIGENC_CTYPE_XDIGIT: + return FALSE; + break; + case ONIGENC_CTYPE_WORD: + range = MBWord; + break; + case ONIGENC_CTYPE_ASCII: + return FALSE; + break; + case ONIGENC_CTYPE_ALNUM: + range = MBAlnum; + break; + + default: + return ONIGENCERR_TYPE_BUG; + break; + } + + return onig_is_in_code_range((UChar* )range, code); + +#else + + if ((ctype & ONIGENC_CTYPE_WORD) != 0) { +#ifdef USE_INVALID_CODE_SCHEME + if (code <= VALID_CODE_LIMIT) +#endif + return TRUE; + } +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ + + return FALSE; +} + static UChar* utf8_left_adjust_char_head(UChar* start, UChar* s) {