* reg*.c: Merge Onigmo 5.15.0 38a870960aa7370051a3544

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@47598 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2014-09-15 16:18:41 +00:00
Родитель d198d64e04
Коммит d2a5354255
33 изменённых файлов: 390 добавлений и 236 удалений

Просмотреть файл

@ -1,3 +1,7 @@
Tue Sep 16 01:06:40 2014 NARUSE, Yui <naruse@ruby-lang.org>
* reg*.c: Merge Onigmo 5.15.0 38a870960aa7370051a3544
Mon Sep 15 16:21:10 2014 Eric Wong <e@80x24.org>
* io.c (struct io_advise_struct): 32 => 24 bytes on 64-bit

Просмотреть файл

@ -293,7 +293,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
numberof(CaseFoldMap), CaseFoldMap, 0,
flag, f, arg);
}
@ -504,7 +504,8 @@ static int
property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
{
UChar *s = p, *e = end;
const struct enc_property *prop = onig_jis_property((const char *)s, (unsigned int)(e-s));
const struct enc_property *prop =
onig_jis_property((const char* )s, (unsigned int )(e - s));
if (!prop) {
return onigenc_minimum_property_name_to_ctype(enc, s, e);

Просмотреть файл

@ -29,8 +29,6 @@
#include "regenc.h"
#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)

Просмотреть файл

@ -208,7 +208,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@ -219,7 +219,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}

Просмотреть файл

@ -197,7 +197,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@ -208,7 +208,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}

Просмотреть файл

@ -210,7 +210,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@ -221,7 +221,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}

Просмотреть файл

@ -204,7 +204,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@ -215,7 +215,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}

Просмотреть файл

@ -206,7 +206,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@ -217,7 +217,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}

Просмотреть файл

@ -29,8 +29,6 @@
#include "regenc.h"
#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \
((EncISO_8859_2_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)

Просмотреть файл

@ -204,7 +204,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@ -215,7 +215,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}

Просмотреть файл

@ -206,7 +206,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@ -217,7 +217,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}

Просмотреть файл

@ -194,7 +194,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
numberof(CaseFoldMap), CaseFoldMap, 0,
flag, f, arg);
}
@ -205,7 +205,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
numberof(CaseFoldMap), CaseFoldMap, 0,
flag, p, end, items);
}

Просмотреть файл

@ -190,7 +190,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
numberof(CaseFoldMap), CaseFoldMap, 0,
flag, f, arg);
}
@ -201,7 +201,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
numberof(CaseFoldMap), CaseFoldMap, 0,
flag, p, end, items);
}

Просмотреть файл

@ -197,7 +197,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@ -208,7 +208,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}

Просмотреть файл

@ -183,7 +183,7 @@ koi8_r_apply_all_case_fold(OnigCaseFoldType flag,
void* arg, OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
numberof(CaseFoldMap), CaseFoldMap, 0,
flag, f, arg);
}
@ -193,7 +193,7 @@ koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
numberof(CaseFoldMap), CaseFoldMap, 0,
flag, p, end, items);
}

Просмотреть файл

@ -187,7 +187,7 @@ koi8_u_apply_all_case_fold(OnigCaseFoldType flag,
void* arg, OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
numberof(CaseFoldMap), CaseFoldMap, 0,
flag, f, arg);
}
@ -197,7 +197,7 @@ koi8_u_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
numberof(CaseFoldMap), CaseFoldMap, 0,
flag, p, end, items);
}

Просмотреть файл

@ -278,7 +278,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
numberof(CaseFoldMap), CaseFoldMap, 0,
flag, f, arg);
}
@ -493,7 +493,8 @@ static int
property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
{
UChar *s = p, *e = end;
const struct enc_property *prop = onig_jis_property((const char *)s, (unsigned int)(e-s));
const struct enc_property *prop =
onig_jis_property((const char* )s, (unsigned int )(e - s));
if (!prop) {
return onigenc_minimum_property_name_to_ctype(enc, s, e);

Просмотреть файл

@ -141,7 +141,6 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
#include "enc/unicode/name2ctype.h"
#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
#define CODE_RANGES_NUM numberof(CodeRanges)
extern int

Просмотреть файл

@ -1,5 +1,5 @@
#include "regenc.h"
/* dummy for unsupported, statefull encoding */
#define ENC_DUMMY_UNICODE(name) ENC_REPLICATE(name, name "BE")
#define ENC_DUMMY_UNICODE(name) ENC_DUMMY(name)
ENC_DUMMY_UNICODE("UTF-16");
ENC_DUMMY_UNICODE("UTF-32");

Просмотреть файл

@ -29,10 +29,6 @@
#include "regenc.h"
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
#if 0
static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

Просмотреть файл

@ -29,10 +29,6 @@
#include "regenc.h"
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
#if 0
static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

Просмотреть файл

@ -167,7 +167,7 @@ cp1251_apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
numberof(CaseFoldMap), CaseFoldMap, 0,
flag, f, arg);
}
@ -176,7 +176,7 @@ cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
numberof(CaseFoldMap), CaseFoldMap, 0,
flag, p, end, items);
}

Просмотреть файл

@ -39,8 +39,8 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 5
#define ONIGURUMA_VERSION_MINOR 14
#define ONIGURUMA_VERSION_TEENY 1
#define ONIGURUMA_VERSION_MINOR 15
#define ONIGURUMA_VERSION_TEENY 0
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES

123
regcomp.c
Просмотреть файл

@ -330,9 +330,10 @@ static int compile_tree(Node* node, regex_t* reg);
(op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
static int
select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
{
int op;
OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
if (ignore_case) {
switch (str_len) {
@ -434,11 +435,11 @@ compile_tree_n_times(Node* node, int n, regex_t* reg)
}
static int
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
regex_t* reg ARG_UNUSED, int ignore_case)
{
int len;
int op = select_str_opcode(mb_len, str_len, ignore_case);
int op = select_str_opcode(mb_len, byte_len, ignore_case);
len = SIZE_OPCODE;
@ -446,15 +447,15 @@ add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
if (IS_NEED_STR_LEN_OP_EXACT(op))
len += SIZE_LENGTH;
len += mb_len * (int )str_len;
len += (int )byte_len;
return len;
}
static int
add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
regex_t* reg, int ignore_case)
{
int op = select_str_opcode(mb_len, str_len, ignore_case);
int op = select_str_opcode(mb_len, byte_len, ignore_case);
add_opcode(reg, op);
if (op == OP_EXACTMBN)
@ -462,12 +463,12 @@ add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
if (IS_NEED_STR_LEN_OP_EXACT(op)) {
if (op == OP_EXACTN_IC)
add_length(reg, mb_len * str_len);
add_length(reg, byte_len);
else
add_length(reg, str_len);
add_length(reg, byte_len / mb_len);
}
add_bytes(reg, s, mb_len * str_len);
add_bytes(reg, s, byte_len);
return 0;
}
@ -475,7 +476,7 @@ add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
static int
compile_length_string_node(Node* node, regex_t* reg)
{
int rlen, r, len, prev_len, slen, ambig;
int rlen, r, len, prev_len, blen, ambig;
OnigEncoding enc = reg->enc;
UChar *p, *prev;
StrNode* sn;
@ -489,24 +490,24 @@ compile_length_string_node(Node* node, regex_t* reg)
p = prev = sn->s;
prev_len = enclen(enc, p, sn->end);
p += prev_len;
slen = 1;
blen = prev_len;
rlen = 0;
for (; p < sn->end; ) {
len = enclen(enc, p, sn->end);
if (len == prev_len) {
slen++;
if (len == prev_len || ambig) {
blen += len;
}
else {
r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
rlen += r;
prev = p;
slen = 1;
blen = len;
prev_len = len;
}
p += len;
}
r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
rlen += r;
return rlen;
}
@ -523,7 +524,7 @@ compile_length_string_raw_node(StrNode* sn, regex_t* reg)
static int
compile_string_node(Node* node, regex_t* reg)
{
int r, len, prev_len, slen, ambig;
int r, len, prev_len, blen, ambig;
OnigEncoding enc = reg->enc;
UChar *p, *prev, *end;
StrNode* sn;
@ -538,25 +539,25 @@ compile_string_node(Node* node, regex_t* reg)
p = prev = sn->s;
prev_len = enclen(enc, p, end);
p += prev_len;
slen = 1;
blen = prev_len;
for (; p < end; ) {
len = enclen(enc, p, end);
if (len == prev_len) {
slen++;
if (len == prev_len || ambig) {
blen += len;
}
else {
r = add_compile_string(prev, prev_len, slen, reg, ambig);
r = add_compile_string(prev, prev_len, blen, reg, ambig);
if (r) return r;
prev = p;
slen = 1;
blen = len;
prev_len = len;
}
p += len;
}
return add_compile_string(prev, prev_len, slen, reg, ambig);
return add_compile_string(prev, prev_len, blen, reg, ambig);
}
static int
@ -2591,6 +2592,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
return 0;
}
else {
if (IS_NOT_NULL(xc->mbuf)) return 0;
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
int is_word;
if (NCTYPE(y)->ascii_range)
@ -3311,7 +3313,7 @@ next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
qn->next_head_exact = n;
}
#endif
/* automatic possessivation a*b ==> (?>a*)b */
/* automatic possessification a*b ==> (?>a*)b */
if (qn->lower <= 1) {
int ttype = NTYPE(qn->target);
if (IS_NODE_TYPE_SIMPLE(ttype)) {
@ -3432,27 +3434,40 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
return 0;
}
static int
is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
int slen)
{
int i;
for (i = 0; i < item_num; i++) {
if (items[i].byte_len != slen) {
return 1;
}
if (items[i].code_len != 1) {
return 1;
}
}
return 0;
}
static int
expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
UChar *p, int slen, UChar *end,
regex_t* reg, Node **rnode)
{
int r, i, j, len, varlen, varclen;
int r, i, j, len, varlen;
Node *anode, *var_anode, *snode, *xnode, *an;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
*rnode = var_anode = NULL_NODE;
varlen = 0;
varclen = 0;
for (i = 0; i < item_num; i++) {
if (items[i].byte_len != slen) {
varlen = 1;
break;
}
if (items[i].code_len != 1) {
varclen |= 1;
}
}
if (varlen != 0) {
@ -3537,8 +3552,6 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
}
}
if (varclen && !varlen)
return 2;
return varlen;
mem_err2:
@ -3582,7 +3595,8 @@ expand_case_fold_string(Node* node, regex_t* reg)
len = enclen(reg->enc, p, end);
if (n == 0) {
varlen = is_case_fold_variable_len(n, items, len);
if (n == 0 || varlen == 0) {
if (IS_NULL(snode)) {
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
top_root = root = onig_node_list_add(NULL_NODE, prev_node);
@ -3607,11 +3621,14 @@ expand_case_fold_string(Node* node, regex_t* reg)
}
else {
alt_num *= (n + 1);
if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) {
varlen = 1; /* Assume that expanded strings are variable length. */
break;
}
if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
if (IS_NOT_NULL(snode)) {
r = update_string_node_case_fold(reg, snode);
if (r == 0) {
NSTRING_SET_AMBIG(snode);
}
}
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
top_root = root = onig_node_list_add(NULL_NODE, prev_node);
if (IS_NULL(root)) {
@ -3622,7 +3639,6 @@ expand_case_fold_string(Node* node, regex_t* reg)
r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
if (r < 0) goto mem_err;
if (r > 0) varlen = 1;
if (r == 1) {
if (IS_NULL(root)) {
top_root = prev_node;
@ -3636,7 +3652,7 @@ expand_case_fold_string(Node* node, regex_t* reg)
root = NCAR(prev_node);
}
else { /* r == 0 || r == 2 */
else { /* r == 0 */
if (IS_NOT_NULL(root)) {
if (IS_NULL(onig_node_list_add(root, prev_node))) {
onig_node_free(prev_node);
@ -3650,6 +3666,12 @@ expand_case_fold_string(Node* node, regex_t* reg)
p += len;
}
if (IS_NOT_NULL(snode)) {
r = update_string_node_case_fold(reg, snode);
if (r == 0) {
NSTRING_SET_AMBIG(snode);
}
}
if (p < end) {
Node *srem;
@ -3679,20 +3701,9 @@ expand_case_fold_string(Node* node, regex_t* reg)
/* ending */
top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
if (!varlen) {
/* When all expanded strings are same length, case-insensitive
BM search will be used. */
r = update_string_node_case_fold(reg, node);
if (r == 0) {
NSTRING_SET_AMBIG(node);
}
}
else {
swap_node(node, top_root);
r = 0;
}
onig_node_free(top_root);
return r;
return 0;
mem_err:
r = ONIGERR_MEMORY;
@ -4367,7 +4378,7 @@ map_position_value(OnigEncoding enc, int i)
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
};
if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
if (i < numberof(ByteValTable)) {
if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
return 20;
else
@ -4399,7 +4410,7 @@ distance_value(MinMaxLen* mm)
if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
d = mm->max - mm->min;
if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
if (d < numberof(dist_vals))
/* return dist_vals[d] * 16 / (mm->min + 12); */
return (int )dist_vals[d];
else
@ -4507,6 +4518,9 @@ concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
if (right_len == 0) {
to->right_anchor |= left->right_anchor;
}
else {
to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
}
}
static int
@ -5081,6 +5095,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case ANCHOR_SEMI_END_BUF:
case ANCHOR_END_LINE:
case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
break;
@ -5103,7 +5118,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
break;
case ANCHOR_PREC_READ_NOT:
case ANCHOR_LOOK_BEHIND_NOT:
break;
}
@ -5369,7 +5383,8 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
ANCHOR_LOOK_BEHIND);
reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
ANCHOR_PREC_READ_NOT);
if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
reg->anchor_dmin = opt.len.min;

Просмотреть файл

@ -414,9 +414,7 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
OnigCodePoint code;
int i, r;
for (i = 0;
i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
i++) {
for (i = 0; i < numberof(OnigAsciiLowerMap); i++) {
code = OnigAsciiLowerMap[i].to;
r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
if (r != 0) return r;
@ -431,8 +429,8 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
extern int
onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[],
OnigEncoding enc ARG_UNUSED)
const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
{
if (0x41 <= *p && *p <= 0x5a) {
items[0].byte_len = 1;
@ -570,8 +568,9 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
extern int
onigenc_not_support_get_ctype_code_range(OnigCtype ctype,
OnigCodePoint* sb_out, const OnigCodePoint* ranges[],
onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
OnigCodePoint* sb_out ARG_UNUSED,
const OnigCodePoint* ranges[] ARG_UNUSED,
OnigEncoding enc)
{
return ONIG_NO_SUPPORT_CONFIG;
@ -639,22 +638,25 @@ onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc
}
extern UChar*
onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s,
const UChar* end,
onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
const UChar* s,
const UChar* end ARG_UNUSED,
OnigEncoding enc ARG_UNUSED)
{
return (UChar* )s;
}
extern int
onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
const UChar* end ARG_UNUSED,
OnigEncoding enc ARG_UNUSED)
{
return TRUE;
}
extern int
onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
const UChar* end ARG_UNUSED,
OnigEncoding enc ARG_UNUSED)
{
return FALSE;
@ -716,7 +718,7 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
#if 0
extern int
onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
const UChar** pp ARG_UNUSED, const UChar* end ARG_UNUSED)
const UChar** pp, const UChar* end ARG_UNUSED)
{
const UChar* p = *pp;
@ -791,27 +793,27 @@ extern int
onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
{
static const PosixBracketEntryType PBS[] = {
PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM),
PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA),
PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK),
PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL),
PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT),
PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH),
PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER),
PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT),
PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT),
PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE),
PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER),
PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT),
PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII),
PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD),
POSIX_BRACKET_ENTRY_INIT("Alnum", ONIGENC_CTYPE_ALNUM),
POSIX_BRACKET_ENTRY_INIT("Alpha", ONIGENC_CTYPE_ALPHA),
POSIX_BRACKET_ENTRY_INIT("Blank", ONIGENC_CTYPE_BLANK),
POSIX_BRACKET_ENTRY_INIT("Cntrl", ONIGENC_CTYPE_CNTRL),
POSIX_BRACKET_ENTRY_INIT("Digit", ONIGENC_CTYPE_DIGIT),
POSIX_BRACKET_ENTRY_INIT("Graph", ONIGENC_CTYPE_GRAPH),
POSIX_BRACKET_ENTRY_INIT("Lower", ONIGENC_CTYPE_LOWER),
POSIX_BRACKET_ENTRY_INIT("Print", ONIGENC_CTYPE_PRINT),
POSIX_BRACKET_ENTRY_INIT("Punct", ONIGENC_CTYPE_PUNCT),
POSIX_BRACKET_ENTRY_INIT("Space", ONIGENC_CTYPE_SPACE),
POSIX_BRACKET_ENTRY_INIT("Upper", ONIGENC_CTYPE_UPPER),
POSIX_BRACKET_ENTRY_INIT("XDigit", ONIGENC_CTYPE_XDIGIT),
POSIX_BRACKET_ENTRY_INIT("ASCII", ONIGENC_CTYPE_ASCII),
POSIX_BRACKET_ENTRY_INIT("Word", ONIGENC_CTYPE_WORD),
};
const PosixBracketEntryType *pb, *pbe;
const PosixBracketEntryType *pb;
int len;
len = onigenc_strlen(enc, p, end);
for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) {
for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
if (len == pb->len &&
onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0)
return pb->ctype;

Просмотреть файл

@ -29,15 +29,18 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef REGINT_H
#ifndef RUBY_EXTERN
#include "ruby/config.h"
#include "ruby/defines.h"
#endif
#endif
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
#endif
#include "ruby/oniguruma.h"
RUBY_SYMBOL_EXPORT_BEGIN
@ -104,7 +107,13 @@ typedef struct {
short int len;
} PosixBracketEntryType;
#define PosixBracketEntryInit(name, ctype) {(const UChar *)name, ctype, (short int)(sizeof(name) - 1)}
#define POSIX_BRACKET_ENTRY_INIT(name, ctype) \
{(const UChar* )(name), (ctype), (short int )(sizeof(name) - 1)}
#ifndef numberof
#define numberof(array) (int )(sizeof(array) / sizeof((array)[0]))
#endif
#define USE_CRNL_AS_LINE_TERMINATOR
#define USE_UNICODE_PROPERTIES
@ -159,6 +168,7 @@ ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, O
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
OnigEncISO_8859_1_ToLowerCaseTable[c]

Просмотреть файл

@ -1406,7 +1406,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (s) {
UChar *q, *bp, buf[50];
int len;
fprintf(stderr, "%4d> \"", (*p == OP_FINISH) ? -1 : (int )(s - str));
fprintf(stderr, "%4"PRIdPTR"> \"", (*p == OP_FINISH) ? (ptrdiff_t )-1 : s - str);
bp = buf;
if (*p != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */
for (i = 0, q = s; i < 7 && q < end; i++) {
@ -1419,6 +1419,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
*bp = 0;
fputs((char* )buf, stderr);
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
fprintf(stderr, "%4"PRIdPTR":", (p == FinishCode) ? (ptrdiff_t )-1 : p - reg->p);
onig_print_compiled_byte_code(stderr, p, p + strlen((char *)p), NULL, encode);
fprintf(stderr, "\n");
}
@ -4183,7 +4184,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
prev = s;
s += enclen(reg->enc, s, end);
if ((reg->anchor & ANCHOR_LOOK_BEHIND) == 0) {
if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
&& s < range) {
prev = s;

Просмотреть файл

@ -193,6 +193,8 @@ extern pthread_mutex_t gOnigMutex;
#define USE_UPPER_CASE_TABLE
#else
#define CHECK_INTERRUPT_IN_MATCH_AT
#define st_init_table onig_st_init_table
#define st_init_table_with_size onig_st_init_table_with_size
#define st_init_numtable onig_st_init_numtable
@ -213,8 +215,6 @@ extern pthread_mutex_t gOnigMutex;
/* */
#define onig_st_is_member st_is_member
#define CHECK_INTERRUPT_IN_MATCH_AT
#endif
#define STATE_CHECK_STRING_THRESHOLD_LEN 7
@ -913,9 +913,7 @@ typedef struct {
extern OnigOpInfoType OnigOpInfo[];
#ifdef ONIG_DEBUG
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc));
#endif
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));

Просмотреть файл

@ -4153,17 +4153,15 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
}
static int
add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env)
add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* env)
{
int maxcode, ascii_range;
int maxcode;
int c, r;
const OnigCodePoint *ranges;
OnigCodePoint sb_out;
OnigEncoding enc = env->enc;
OnigOptionType option = env->option;
ascii_range = IS_ASCII_RANGE(option) && (char_prop == 0);
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
if (r == 0) {
if (ascii_range) {
@ -4280,31 +4278,32 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env)
}
static int
parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc,
UChar** src, UChar* end, ScanEnv* env)
{
#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
#define POSIX_BRACKET_NAME_MIN_LEN 4
static const PosixBracketEntryType PBS[] = {
{ (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
{ (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
{ (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
{ (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
{ (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
{ (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
{ (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
{ (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
{ (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
{ (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
{ (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
{ (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
{ (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
{ (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },
{ (UChar* )NULL, -1, 0 }
POSIX_BRACKET_ENTRY_INIT("alnum", ONIGENC_CTYPE_ALNUM),
POSIX_BRACKET_ENTRY_INIT("alpha", ONIGENC_CTYPE_ALPHA),
POSIX_BRACKET_ENTRY_INIT("blank", ONIGENC_CTYPE_BLANK),
POSIX_BRACKET_ENTRY_INIT("cntrl", ONIGENC_CTYPE_CNTRL),
POSIX_BRACKET_ENTRY_INIT("digit", ONIGENC_CTYPE_DIGIT),
POSIX_BRACKET_ENTRY_INIT("graph", ONIGENC_CTYPE_GRAPH),
POSIX_BRACKET_ENTRY_INIT("lower", ONIGENC_CTYPE_LOWER),
POSIX_BRACKET_ENTRY_INIT("print", ONIGENC_CTYPE_PRINT),
POSIX_BRACKET_ENTRY_INIT("punct", ONIGENC_CTYPE_PUNCT),
POSIX_BRACKET_ENTRY_INIT("space", ONIGENC_CTYPE_SPACE),
POSIX_BRACKET_ENTRY_INIT("upper", ONIGENC_CTYPE_UPPER),
POSIX_BRACKET_ENTRY_INIT("xdigit", ONIGENC_CTYPE_XDIGIT),
POSIX_BRACKET_ENTRY_INIT("ascii", ONIGENC_CTYPE_ASCII),
POSIX_BRACKET_ENTRY_INIT("word", ONIGENC_CTYPE_WORD),
};
const PosixBracketEntryType *pb;
int not, i, r;
int ascii_range;
OnigCodePoint c;
OnigEncoding enc = env->enc;
UChar *p = *src;
@ -4319,17 +4318,25 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
goto not_posix_bracket;
for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
ascii_range = IS_ASCII_RANGE(env->option) &&
! IS_POSIX_BRACKET_ALL_RANGE(env->option);
for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
p = (UChar* )onigenc_step(enc, p, end, pb->len);
if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
r = add_ctype_to_cc(cc, pb->ctype, not,
IS_POSIX_BRACKET_ALL_RANGE(env->option),
env);
r = add_ctype_to_cc(cc, pb->ctype, not, ascii_range, env);
if (r != 0) return r;
if (IS_NOT_NULL(asc_cc)) {
if (pb->ctype != ONIGENC_CTYPE_WORD &&
pb->ctype != ONIGENC_CTYPE_ASCII &&
!ascii_range)
r = add_ctype_to_cc(asc_cc, pb->ctype, not, ascii_range, env);
if (r != 0) return r;
}
PINC_S; PINC_S;
*src = p;
return 0;
@ -4386,6 +4393,8 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
return r;
}
static int cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env);
static int
parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
ScanEnv* env)
@ -4399,11 +4408,15 @@ parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
*np = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(*np);
cc = NCCLASS(*np);
r = add_ctype_to_cc(cc, ctype, 0, 1, env);
r = add_ctype_to_cc(cc, ctype, 0, 0, env);
if (r != 0) return r;
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
return 0;
if (IS_IGNORECASE(env->option)) {
if (ctype != ONIGENC_CTYPE_ASCII)
r = cclass_case_fold(np, cc, cc, env);
}
return r;
}
@ -4421,7 +4434,8 @@ enum CCVALTYPE {
};
static int
next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
next_state_class(CClassNode* cc, CClassNode* asc_cc,
OnigCodePoint* vs, enum CCVALTYPE* type,
enum CCSTATE* state, ScanEnv* env)
{
int r;
@ -4430,11 +4444,18 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
if (*state == CCS_VALUE && *type != CCV_CLASS) {
if (*type == CCV_SB)
if (*type == CCV_SB) {
BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
if (IS_NOT_NULL(asc_cc))
BITSET_SET_BIT(asc_cc->bs, (int )(*vs));
}
else if (*type == CCV_CODE_POINT) {
r = add_code_range(&(cc->mbuf), env, *vs, *vs);
if (r < 0) return r;
if (IS_NOT_NULL(asc_cc)) {
r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0);
if (r < 0) return r;
}
}
}
@ -4444,7 +4465,8 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
}
static int
next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
next_state_val(CClassNode* cc, CClassNode* asc_cc,
OnigCodePoint *vs, OnigCodePoint v,
int* vs_israw, int v_israw,
enum CCVALTYPE intype, enum CCVALTYPE* type,
enum CCSTATE* state, ScanEnv* env)
@ -4453,11 +4475,18 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
switch (*state) {
case CCS_VALUE:
if (*type == CCV_SB)
if (*type == CCV_SB) {
BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
if (IS_NOT_NULL(asc_cc))
BITSET_SET_BIT(asc_cc->bs, (int )(*vs));
}
else if (*type == CCV_CODE_POINT) {
r = add_code_range(&(cc->mbuf), env, *vs, *vs);
if (r < 0) return r;
if (IS_NOT_NULL(asc_cc)) {
r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0);
if (r < 0) return r;
}
}
break;
@ -4474,10 +4503,16 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
}
bitset_set_range(env, cc->bs, (int )*vs, (int )v);
if (IS_NOT_NULL(asc_cc))
bitset_set_range(env, asc_cc->bs, (int )*vs, (int )v);
}
else {
r = add_code_range(&(cc->mbuf), env, *vs, v);
if (r < 0) return r;
if (IS_NOT_NULL(asc_cc)) {
r = add_code_range0(&(asc_cc->mbuf), env, *vs, v, 0);
if (r < 0) return r;
}
}
}
else {
@ -4493,6 +4528,11 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
if (r < 0) return r;
if (IS_NOT_NULL(asc_cc)) {
bitset_set_range(env, asc_cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
r = add_code_range0(&(asc_cc->mbuf), env, (OnigCodePoint )*vs, v, 0);
if (r < 0) return r;
}
#if 0
}
else
@ -4542,22 +4582,24 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
}
static int
parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* end,
ScanEnv* env)
{
int r, neg, len, fetched, and_start;
OnigCodePoint v, vs;
UChar *p;
Node* node;
Node* asc_node;
CClassNode *cc, *prev_cc;
CClassNode work_cc;
CClassNode *asc_cc, *asc_prev_cc;
CClassNode work_cc, asc_work_cc;
enum CCSTATE state;
enum CCVALTYPE val_type, in_type;
int val_israw, in_israw;
prev_cc = (CClassNode* )NULL;
*np = NULL_NODE;
prev_cc = asc_prev_cc = (CClassNode* )NULL;
*np = *asc_np = NULL_NODE;
r = fetch_token_in_cc(tok, src, end, env);
if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
neg = 1;
@ -4581,6 +4623,16 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
CHECK_NULL_RETURN_MEMERR(node);
cc = NCCLASS(node);
if (IS_IGNORECASE(env->option)) {
*asc_np = asc_node = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(asc_node);
asc_cc = NCCLASS(asc_node);
}
else {
asc_node = NULL_NODE;
asc_cc = NULL;
}
and_start = 0;
state = CCS_START;
p = *src;
@ -4671,13 +4723,13 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
}
in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
val_entry2:
r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
r = next_state_val(cc, asc_cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
&state, env);
if (r != 0) goto err;
break;
case TK_POSIX_BRACKET_OPEN:
r = parse_posix_bracket(cc, &p, end, env);
r = parse_posix_bracket(cc, asc_cc, &p, end, env);
if (r < 0) goto err;
if (r == 1) { /* is not POSIX bracket */
CC_ESC_WARN(env, (UChar* )"[");
@ -4690,11 +4742,18 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
break;
case TK_CHAR_TYPE:
r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, 0, env);
r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not,
IS_ASCII_RANGE(env->option), env);
if (r != 0) return r;
if (IS_NOT_NULL(asc_cc)) {
if (tok->u.prop.ctype != ONIGENC_CTYPE_WORD)
r = add_ctype_to_cc(asc_cc, tok->u.prop.ctype, tok->u.prop.not,
IS_ASCII_RANGE(env->option), env);
if (r != 0) return r;
}
next_class:
r = next_state_class(cc, &vs, &val_type, &state, env);
r = next_state_class(cc, asc_cc, &vs, &val_type, &state, env);
if (r != 0) goto err;
break;
@ -4704,8 +4763,13 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
ctype = fetch_char_property_to_ctype(&p, end, env);
if (ctype < 0) return ctype;
r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 1, env);
r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 0, env);
if (r != 0) return r;
if (IS_NOT_NULL(asc_cc)) {
if (ctype != ONIGENC_CTYPE_ASCII)
r = add_ctype_to_cc(asc_cc, ctype, tok->u.prop.not, 0, env);
if (r != 0) return r;
}
goto next_class;
}
break;
@ -4766,15 +4830,20 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
case TK_CC_CC_OPEN: /* [ */
{
Node *anode;
Node *anode, *aasc_node;
CClassNode* acc;
r = parse_char_class(&anode, tok, &p, end, env);
r = parse_char_class(&anode, &aasc_node, tok, &p, end, env);
if (r == 0) {
acc = NCCLASS(anode);
r = or_cclass(cc, acc, env);
}
if (r == 0 && IS_NOT_NULL(aasc_node)) {
acc = NCCLASS(aasc_node);
r = or_cclass(asc_cc, acc, env);
}
onig_node_free(anode);
onig_node_free(aasc_node);
if (r != 0) goto err;
}
break;
@ -4782,7 +4851,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
case TK_CC_AND: /* && */
{
if (state == CCS_VALUE) {
r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
&val_type, &state, env);
if (r != 0) goto err;
}
@ -4794,12 +4863,23 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
r = and_cclass(prev_cc, cc, env);
if (r != 0) goto err;
bbuf_free(cc->mbuf);
if (IS_NOT_NULL(asc_cc)) {
r = and_cclass(asc_prev_cc, asc_cc, env);
if (r != 0) goto err;
bbuf_free(asc_cc->mbuf);
}
}
else {
prev_cc = cc;
cc = &work_cc;
if (IS_NOT_NULL(asc_cc)) {
asc_prev_cc = asc_cc;
asc_cc = &asc_work_cc;
}
}
initialize_cclass(cc);
if (IS_NOT_NULL(asc_cc))
initialize_cclass(asc_cc);
}
break;
@ -4822,7 +4902,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
}
if (state == CCS_VALUE) {
r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
&val_type, &state, env);
if (r != 0) goto err;
}
@ -4832,12 +4912,24 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
if (r != 0) goto err;
bbuf_free(cc->mbuf);
cc = prev_cc;
if (IS_NOT_NULL(asc_cc)) {
r = and_cclass(asc_prev_cc, asc_cc, env);
if (r != 0) goto err;
bbuf_free(asc_cc->mbuf);
asc_cc = asc_prev_cc;
}
}
if (neg != 0)
if (neg != 0) {
NCCLASS_SET_NOT(cc);
else
if (IS_NOT_NULL(asc_cc))
NCCLASS_SET_NOT(asc_cc);
}
else {
NCCLASS_CLEAR_NOT(cc);
if (IS_NOT_NULL(asc_cc))
NCCLASS_CLEAR_NOT(asc_cc);
}
if (IS_NCCLASS_NOT(cc) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
int is_empty;
@ -4865,6 +4957,8 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
err:
if (cc != NCCLASS(*np))
bbuf_free(cc->mbuf);
if (IS_NOT_NULL(asc_cc) && (asc_cc != NCCLASS(*asc_np)))
bbuf_free(asc_cc->mbuf);
return r;
}
@ -5489,6 +5583,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
typedef struct {
ScanEnv* env;
CClassNode* cc;
CClassNode* asc_cc;
Node* alt_root;
Node** ptail;
} IApplyCaseFoldArg;
@ -5500,18 +5595,34 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
IApplyCaseFoldArg* iarg;
ScanEnv* env;
CClassNode* cc;
CClassNode* asc_cc;
BitSetRef bs;
int add_flag;
iarg = (IApplyCaseFoldArg* )arg;
env = iarg->env;
cc = iarg->cc;
asc_cc = iarg->asc_cc;
bs = cc->bs;
if (IS_NULL(asc_cc)) {
add_flag = 0;
}
else if (ONIGENC_IS_ASCII_CODE(from) == ONIGENC_IS_ASCII_CODE(*to)) {
add_flag = 1;
}
else {
add_flag = onig_is_code_in_cc(env->enc, from, asc_cc);
if (IS_NCCLASS_NOT(asc_cc))
add_flag = !add_flag;
}
if (to_len == 1) {
int is_in = onig_is_code_in_cc(env->enc, from, cc);
#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
(is_in == 0 && IS_NCCLASS_NOT(cc))) {
if (add_flag) {
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
add_code_range0(&(cc->mbuf), env, *to, *to, 0);
}
@ -5519,8 +5630,10 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
BITSET_SET_BIT(bs, *to);
}
}
}
#else
if (is_in != 0) {
if (add_flag) {
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
add_code_range0(&(cc->mbuf), env, *to, *to, 0);
@ -5529,10 +5642,12 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
if (IS_NCCLASS_NOT(cc)) {
BITSET_CLEAR_BIT(bs, *to);
}
else
else {
BITSET_SET_BIT(bs, *to);
}
}
}
}
#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
}
else {
@ -5573,6 +5688,35 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
return 0;
}
static int
cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env)
{
int r;
IApplyCaseFoldArg iarg;
iarg.env = env;
iarg.cc = cc;
iarg.asc_cc = asc_cc;
iarg.alt_root = NULL_NODE;
iarg.ptail = &(iarg.alt_root);
r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
i_apply_case_fold, &iarg);
if (r != 0) {
onig_node_free(iarg.alt_root);
return r;
}
if (IS_NOT_NULL(iarg.alt_root)) {
Node* work = onig_node_new_alt(*np, iarg.alt_root);
if (IS_NULL(work)) {
onig_node_free(iarg.alt_root);
return ONIGERR_MEMORY;
}
*np = work;
}
return r;
}
static int
node_linebreak(Node** np, ScanEnv* env)
{
@ -5658,7 +5802,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
np1 = node_new_cclass();
if (IS_NULL(np1)) goto err;
cc1 = NCCLASS(np1);
r = add_ctype_to_cc(cc1, ctype, 0, 1, env);
r = add_ctype_to_cc(cc1, ctype, 0, 0, env);
if (r != 0) goto err;
NCCLASS_SET_NOT(cc1);
@ -5666,7 +5810,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
np2 = node_new_cclass();
if (IS_NULL(np2)) goto err;
cc2 = NCCLASS(np2);
r = add_ctype_to_cc(cc2, ctype, 0, 1, env);
r = add_ctype_to_cc(cc2, ctype, 0, 0, env);
if (r != 0) goto err;
qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
@ -6013,7 +6157,8 @@ parse_exp(Node** np, OnigToken* tok, int term,
*np = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(*np);
cc = NCCLASS(*np);
r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0, 0, env);
r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0,
IS_ASCII_RANGE(env->option), env);
if (r != 0) return r;
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
#ifdef USE_SHARED_CCLASS_TABLE
@ -6036,15 +6181,20 @@ parse_exp(Node** np, OnigToken* tok, int term,
case TK_CC_OPEN:
{
Node *asc_node;
CClassNode* cc;
OnigCodePoint code;
r = parse_char_class(np, tok, src, end, env);
if (r != 0) return r;
r = parse_char_class(np, &asc_node, tok, src, end, env);
if (r != 0) {
onig_node_free(asc_node);
return r;
}
cc = NCCLASS(*np);
if (is_onechar_cclass(cc, &code)) {
onig_node_free(*np);
onig_node_free(asc_node);
*np = node_new_empty();
CHECK_NULL_RETURN_MEMERR(*np);
r = node_str_cat_codepoint(*np, env->enc, code);
@ -6052,28 +6202,13 @@ parse_exp(Node** np, OnigToken* tok, int term,
goto string_loop;
}
if (IS_IGNORECASE(env->option)) {
IApplyCaseFoldArg iarg;
iarg.env = env;
iarg.cc = cc;
iarg.alt_root = NULL_NODE;
iarg.ptail = &(iarg.alt_root);
r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
i_apply_case_fold, &iarg);
r = cclass_case_fold(np, cc, NCCLASS(asc_node), env);
if (r != 0) {
onig_node_free(iarg.alt_root);
onig_node_free(asc_node);
return r;
}
if (IS_NOT_NULL(iarg.alt_root)) {
Node* work = onig_node_new_alt(*np, iarg.alt_root);
if (IS_NULL(work)) {
onig_node_free(iarg.alt_root);
return ONIGERR_MEMORY;
}
*np = work;
}
}
onig_node_free(asc_node);
}
break;

Просмотреть файл

@ -193,8 +193,8 @@ typedef struct {
int type;
int regnum;
OnigOptionType option;
struct _Node* target;
AbsAddrType call_addr;
struct _Node* target;
/* for multiple call reference */
OnigDistance min_len; /* min length (byte) */
OnigDistance max_len; /* max length (byte) */
@ -296,10 +296,10 @@ typedef struct {
UChar* error;
UChar* error_end;
regex_t* reg; /* for reg->names only */
int num_call;
#ifdef USE_SUBEXP_CALL
UnsetAddrList* unset_addr_list;
#endif
int num_call;
int num_mem;
#ifdef USE_NAMED_GROUP
int num_named;