From 78dbaa16481c6ccda491d40695abe3d1c1d8c9b1 Mon Sep 17 00:00:00 2001 From: naruse Date: Fri, 1 Mar 2013 16:36:37 +0000 Subject: [PATCH] * Merge Onigmo 0fe387da2fee089254f6b04990541c731a26757f v5.13.3 [Bug#7972] [Bug#7974] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@39547 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 5 +++ enc/shift_jis.c | 2 +- include/ruby/oniguruma.h | 2 +- regcomp.c | 33 ++++++++++--------- regexec.c | 4 +-- regint.h | 10 +++--- regparse.c | 71 ++++++++++++++++++++++------------------ tool/enc-unicode.rb | 9 ++++- 8 files changed, 80 insertions(+), 56 deletions(-) diff --git a/ChangeLog b/ChangeLog index 329a78be99..8b38879b0e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Sat Mar 2 01:33:17 2013 NARUSE, Yui + + * Merge Onigmo 0fe387da2fee089254f6b04990541c731a26757f + v5.13.3 [Bug#7972] [Bug#7974] + Fri Mar 1 11:09:06 2013 Eric Hodel * lib/fileutils.rb: Revert r34669 which altered the way diff --git a/enc/shift_jis.c b/enc/shift_jis.c index e2bcaec189..5f5a802874 100644 --- a/enc/shift_jis.c +++ b/enc/shift_jis.c @@ -231,7 +231,7 @@ code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) } else if (code <= 0xffff) { int low = code & 0xff; - if (low < 0x40 || low == 0x7f || 0xfc < low) + if (! SJIS_ISMB_TRAIL(low)) return ONIGERR_INVALID_CODE_POINT_VALUE; return 2; } diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h index c01b6f7bb4..e3763adc59 100644 --- a/include/ruby/oniguruma.h +++ b/include/ruby/oniguruma.h @@ -40,7 +40,7 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 5 #define ONIGURUMA_VERSION_MINOR 13 -#define ONIGURUMA_VERSION_TEENY 1 +#define ONIGURUMA_VERSION_TEENY 3 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES diff --git a/regcomp.c b/regcomp.c index 3b69786a94..ff74e292ed 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3,7 +3,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2008 K.Kosako - * Copyright (c) 2011-2012 K.Takata + * Copyright (c) 2011-2013 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -118,7 +118,7 @@ static int bitset_is_empty(BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { + for (i = 0; i < BITSET_SIZE; i++) { if (bs[i] != 0) return 0; } return 1; @@ -4311,7 +4311,7 @@ typedef struct { OptAncInfo anc; int reach_end; - int ignore_case; + int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */ int len; UChar s[OPT_EXACT_MAXLEN]; } OptExactInfo; @@ -4548,7 +4548,7 @@ clear_opt_exact_info(OptExactInfo* ex) clear_mml(&ex->mmd); clear_opt_anc_info(&ex->anc); ex->reach_end = 0; - ex->ignore_case = 0; + ex->ignore_case = -1; /* unset */ ex->len = 0; ex->s[0] = '\0'; } @@ -4566,11 +4566,10 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) UChar *p, *end; OptAncInfo tanc; - if (! to->ignore_case && add->ignore_case) { - if (to->len >= add->len) return ; /* avoid */ - - to->ignore_case = 1; - } + if (to->ignore_case < 0) + to->ignore_case = add->ignore_case; + else if (to->ignore_case != add->ignore_case) + return ; /* avoid */ p = add->s; end = p + add->len; @@ -4636,7 +4635,10 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) to->reach_end = 0; } to->len = i; - to->ignore_case |= add->ignore_case; + if (to->ignore_case < 0) + to->ignore_case = add->ignore_case; + else if (add->ignore_case >= 0) + to->ignore_case |= add->ignore_case; alt_merge_opt_anc_info(&to->anc, &add->anc); if (! to->reach_end) to->anc.right_anchor = 0; @@ -4666,8 +4668,8 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) if (alt->len > 1) v2 += 5; } - if (now->ignore_case == 0) v1 *= 2; - if (alt->ignore_case == 0) v2 *= 2; + if (now->ignore_case <= 0) v1 *= 2; + if (alt->ignore_case <= 0) v2 *= 2; if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) copy_opt_exact_info(now, alt); @@ -4765,7 +4767,7 @@ comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) if (m->value <= 0) return -1; - ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); + ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2); vm = COMP_EM_BASE * 5 * 2 / m->value; return comp_distance_value(&e->mmd, &m->mmd, ve, vm); } @@ -4947,7 +4949,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) if (! NSTRING_IS_AMBIG(node)) { concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - NSTRING_IS_RAW(node), env->enc); + is_raw, env->enc); + opt->exb.ignore_case = 0; if (slen > 0) { add_char_opt_map_info(&opt->map, *(sn->s), env->enc); } @@ -5260,7 +5263,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) allow_reverse = ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); - if (e->ignore_case) { + if (e->ignore_case > 0) { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { r = set_bm_skip(reg->exact, reg->exact_end, reg, reg->map, &(reg->int_map), 1); diff --git a/regexec.c b/regexec.c index 743b2f3cf8..553186135b 100644 --- a/regexec.c +++ b/regexec.c @@ -2559,7 +2559,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, (int )mem, (intptr_t )s, s); #endif if (isnull == -1) goto fail; - goto null_check_found; + goto null_check_found; } } MOP_OUT; @@ -2585,7 +2585,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, (int )mem, (intptr_t )s, s); #endif if (isnull == -1) goto fail; - goto null_check_found; + goto null_check_found; } else { STACK_PUSH_NULL_CHECK_END(mem); diff --git a/regint.h b/regint.h index 1a2519bc1b..3b55d8e9f9 100644 --- a/regint.h +++ b/regint.h @@ -390,7 +390,7 @@ typedef unsigned int BitStatusType; /* bitset */ #define BITS_PER_BYTE 8 #define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) -#define BITS_IN_ROOM ((int)sizeof(Bits) * BITS_PER_BYTE) +#define BITS_IN_ROOM ((int )sizeof(Bits) * BITS_PER_BYTE) #define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) #ifdef PLATFORM_UNALIGNED_WORD_ACCESS @@ -405,11 +405,11 @@ typedef Bits* BitSetRef; #define BITSET_CLEAR(bs) do {\ int i;\ - for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ + for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; } \ } while (0) -#define BS_ROOM(bs,pos) (bs)[(int)(pos) / BITS_IN_ROOM] -#define BS_BIT(pos) (1 << ((int)(pos) % BITS_IN_ROOM)) +#define BS_ROOM(bs,pos) (bs)[(int )(pos) / BITS_IN_ROOM] +#define BS_BIT(pos) (1 << ((int )(pos) % BITS_IN_ROOM)) #define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos)) #define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos) @@ -457,7 +457,7 @@ typedef struct _BBuf { #define BBUF_WRITE1(buf,pos,byte) do{\ int used = (pos) + 1;\ if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ - (buf)->p[(pos)] = (byte);\ + (buf)->p[(pos)] = (UChar )(byte);\ if ((buf)->used < (unsigned int )used) (buf)->used = used;\ } while (0) diff --git a/regparse.c b/regparse.c index 74863829bc..82af0a8fc3 100644 --- a/regparse.c +++ b/regparse.c @@ -3,7 +3,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2008 K.Kosako - * Copyright (c) 2011-2012 K.Takata + * Copyright (c) 2011-2013 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -156,7 +156,7 @@ bbuf_clone(BBuf** rto, BBuf* from) #define BITSET_IS_EMPTY(bs,empty) do {\ int i;\ empty = 1;\ - for (i = 0; i < (int )BITSET_SIZE; i++) {\ + for (i = 0; i < BITSET_SIZE; i++) {\ if ((bs)[i] != 0) {\ empty = 0; break;\ }\ @@ -185,35 +185,35 @@ static void bitset_invert(BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } + for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } } static void bitset_invert_to(BitSetRef from, BitSetRef to) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); } + for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); } } static void bitset_and(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; } + for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; } } static void bitset_or(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; } + for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; } } static void bitset_copy(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; } + for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; } } extern int @@ -425,9 +425,6 @@ typedef struct { typedef st_table NameTable; typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ -#define NAMEBUF_SIZE 24 -#define NAMEBUF_SIZE_1 25 - #ifdef ONIG_DEBUG static int i_print_name_entry(UChar* key, NameEntry* e, void* arg) @@ -589,7 +586,7 @@ onig_number_of_names(regex_t* reg) NameTable* t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) - return (int)t->num_entries; + return (int )t->num_entries; else return 0; } @@ -2627,7 +2624,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, name_end = p; PFETCH(c); if (c == end_code || c == ')' || c == '+' || c == '-') { - if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; break; } @@ -2752,7 +2749,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, name_end = p; PFETCH(c); if (c == end_code || c == ')') { - if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; break; } @@ -4124,24 +4121,36 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env) r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); if (r == 0) { - r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges); - if ((r == 0) && ascii_range) { - if (not != 0) { - r = add_code_range_to_buf0(&(cc->mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE); - } - else { - CClassNode ccascii; - initialize_cclass(&ccascii); - if (ONIGENC_MBC_MINLEN(env->enc) > 1) { - add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F); + if (ascii_range) { + CClassNode ccwork; + initialize_cclass(&ccwork); + r = add_ctype_to_cc_by_range(&ccwork, ctype, not, env, sb_out, + ranges); + if (r == 0) { + if (not) { + r = add_code_range_to_buf0(&(ccwork.mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE); } else { - bitset_set_range(env, ccascii.bs, 0x00, 0x7F); + CClassNode ccascii; + initialize_cclass(&ccascii); + if (ONIGENC_MBC_MINLEN(env->enc) > 1) { + add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F); + } + else { + bitset_set_range(env, ccascii.bs, 0x00, 0x7F); + } + r = and_cclass(&ccwork, &ccascii, env); + if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf); } - r = and_cclass(cc, &ccascii, env); - if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf); + if (r == 0) { + r = or_cclass(cc, &ccwork, env); + } + if (IS_NOT_NULL(ccwork.mbuf)) bbuf_free(ccwork.mbuf); } } + else { + r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges); + } return r; } else if (r != ONIG_NO_SUPPORT_CONFIG) { @@ -4562,7 +4571,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, UChar* psave = p; int i, base = tok->base; - buf[0] = tok->u.c; + buf[0] = (UChar )tok->u.c; for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { r = fetch_token_in_cc(tok, &p, end, env); if (r < 0) goto err; @@ -4570,7 +4579,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, fetched = 1; break; } - buf[i] = tok->u.c; + buf[i] = (UChar )tok->u.c; } if (i < ONIGENC_MBC_MINLEN(env->enc)) { @@ -4706,7 +4715,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { CC_ESC_WARN(env, (UChar* )"-"); - goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */ + goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */ } r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; goto err; @@ -5684,7 +5693,7 @@ countbits(unsigned int bits) static int is_onechar_cclass(CClassNode* cc, OnigCodePoint* code) { - const OnigCodePoint not_found = (OnigCodePoint)-1; + const OnigCodePoint not_found = ONIG_LAST_CODE_POINT; OnigCodePoint c = not_found; int i; BBuf *bbuf = cc->mbuf; @@ -5710,7 +5719,7 @@ is_onechar_cclass(CClassNode* cc, OnigCodePoint* code) } /* check bitset */ - for (i = 0; i < (int )BITSET_SIZE; i++) { + for (i = 0; i < BITSET_SIZE; i++) { Bits b1 = cc->bs[i]; if (b1 != 0) { if (((b1 & (b1 - 1)) == 0) && (c == not_found)) { diff --git a/tool/enc-unicode.rb b/tool/enc-unicode.rb index 7584f6e030..255d9c5175 100755 --- a/tool/enc-unicode.rb +++ b/tool/enc-unicode.rb @@ -225,13 +225,20 @@ def parse_block(data) blocks << constname end +# shim for Ruby 1.8 +unless {}.respond_to?(:key) + class Hash + alias key index + end +end + $const_cache = {} # make_const(property, pairs, name): Prints a 'static const' structure for a # given property, group of paired codepoints, and a human-friendly name for # the group def make_const(prop, data, name) puts "\n/* '#{prop}': #{name} */" - if origprop = $const_cache.index(data) # don't use Hash#key because it is 1.9 feature + if origprop = $const_cache.key(data) puts "#define CR_#{prop} CR_#{origprop}" else $const_cache[prop] = data