git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10782 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
kosako 2006-08-27 12:58:22 +00:00
Родитель 0384b2330c
Коммит 0046f355c1
8 изменённых файлов: 833 добавлений и 135 удалений

Просмотреть файл

@ -1,3 +1,17 @@
Sun Aug 27 21:41:23 2006 K.Kosako <sndgk393 AT ybb.ne.jp>
* oniguruma.h: Version 4.4.0
* regint.h: ditto.
* regparse.h: ditto.
* regexec.c: ditto.
* regcomp.c ditto.
* regparse.c: ditto.
Sat Aug 26 08:03:03 2006 Tadayoshi Funaba <tadf@dotrb.org> Sat Aug 26 08:03:03 2006 Tadayoshi Funaba <tadf@dotrb.org>
* lib/date.rb, lib/date/format.rb: updated based on date2 3.8.2. * lib/date.rb, lib/date/format.rb: updated based on date2 3.8.2.

Просмотреть файл

@ -35,8 +35,8 @@ extern "C" {
#define ONIGURUMA #define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 4 #define ONIGURUMA_VERSION_MAJOR 4
#define ONIGURUMA_VERSION_MINOR 2 #define ONIGURUMA_VERSION_MINOR 4
#define ONIGURUMA_VERSION_TEENY 2 #define ONIGURUMA_VERSION_TEENY 0
#ifdef __cplusplus #ifdef __cplusplus
# ifndef HAVE_PROTOTYPES # ifndef HAVE_PROTOTYPES
@ -742,6 +742,7 @@ typedef struct re_pattern_buffer {
int num_mem; /* used memory(...) num counted from 1 */ int num_mem; /* used memory(...) num counted from 1 */
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
int num_null_check; /* OP_NULL_CHECK_START/END id counter */ int num_null_check; /* OP_NULL_CHECK_START/END id counter */
int num_comb_exp_check; /* combination explosion check */
int num_call; /* number of subexp call */ int num_call; /* number of subexp call */
unsigned int capture_history; /* (?@...) flag (1-31) */ unsigned int capture_history; /* (?@...) flag (1-31) */
unsigned int bt_mem_start; /* need backtrack flag */ unsigned int bt_mem_start; /* need backtrack flag */

479
regcomp.c
Просмотреть файл

@ -186,6 +186,17 @@ add_opcode(regex_t* reg, int opcode)
return 0; return 0;
} }
#ifdef USE_COMBINATION_EXPLOSION_CHECK
static int
add_state_check_num(regex_t* reg, int num)
{
StateCheckNumType n = (StateCheckNumType )num;
BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
return 0;
}
#endif
static int static int
add_rel_addr(regex_t* reg, int addr) add_rel_addr(regex_t* reg, int addr)
{ {
@ -644,7 +655,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
} }
p[id].lower = lower; p[id].lower = lower;
p[id].upper = upper; p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
return 0; return 0;
} }
@ -684,7 +695,254 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
return r; return r;
} }
static int
is_anychar_star_qualifier(QualifierNode* qn)
{
if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
NTYPE(qn->target) == N_ANYCHAR)
return 1;
else
return 0;
}
#define QUALIFIER_EXPAND_LIMIT_SIZE 50 #define QUALIFIER_EXPAND_LIMIT_SIZE 50
#define CKN_ON (ckn > 0)
#ifdef USE_COMBINATION_EXPLOSION_CHECK
static int
compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
{
int len, mod_tlen, cklen;
int ckn;
int infinite = IS_REPEAT_INFINITE(qn->upper);
int empty_info = qn->target_empty_info;
int tlen = compile_length_tree(qn->target, reg);
if (tlen < 0) return tlen;
ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
/* anychar repeat */
if (NTYPE(qn->target) == N_ANYCHAR) {
if (qn->greedy && infinite) {
if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
else
return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
}
}
if (empty_info != 0)
mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
else
mod_tlen = tlen;
if (infinite && qn->lower <= 1) {
if (qn->greedy) {
if (qn->lower == 1)
len = SIZE_OP_JUMP;
else
len = 0;
len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
}
else {
if (qn->lower == 0)
len = SIZE_OP_JUMP;
else
len = 0;
len += mod_tlen + SIZE_OP_PUSH + cklen;
}
}
else if (qn->upper == 0) {
if (qn->is_refered != 0) /* /(?<n>..){0}/ */
len = SIZE_OP_JUMP + tlen;
else
len = 0;
}
else if (qn->upper == 1 && qn->greedy) {
if (qn->lower == 0) {
if (CKN_ON) {
len = SIZE_OP_STATE_CHECK_PUSH + tlen;
}
else {
len = SIZE_OP_PUSH + tlen;
}
}
else {
len = tlen;
}
}
else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
}
else {
len = SIZE_OP_REPEAT_INC
+ mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
if (CKN_ON)
len += SIZE_OP_STATE_CHECK;
}
return len;
}
static int
compile_qualifier_node(QualifierNode* qn, regex_t* reg)
{
int r, mod_tlen;
int ckn;
int infinite = IS_REPEAT_INFINITE(qn->upper);
int empty_info = qn->target_empty_info;
int tlen = compile_length_tree(qn->target, reg);
if (tlen < 0) return tlen;
ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
if (is_anychar_star_qualifier(qn)) {
r = compile_tree_n_times(qn->target, qn->lower, reg);
if (r) return r;
if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
if (IS_MULTILINE(reg->options))
r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
else
r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
if (r) return r;
if (CKN_ON) {
r = add_state_check_num(reg, ckn);
if (r) return r;
}
return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
}
else {
if (IS_MULTILINE(reg->options)) {
r = add_opcode(reg, (CKN_ON ?
OP_STATE_CHECK_ANYCHAR_ML_STAR
: OP_ANYCHAR_ML_STAR));
}
else {
r = add_opcode(reg, (CKN_ON ?
OP_STATE_CHECK_ANYCHAR_STAR
: OP_ANYCHAR_STAR));
}
if (r) return r;
if (CKN_ON)
r = add_state_check_num(reg, ckn);
return r;
}
}
if (empty_info != 0)
mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
else
mod_tlen = tlen;
if (infinite && qn->lower <= 1) {
if (qn->greedy) {
if (qn->lower == 1) {
r = add_opcode_rel_addr(reg, OP_JUMP,
(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
if (r) return r;
}
if (CKN_ON) {
r = add_opcode(reg, OP_STATE_CHECK_PUSH);
if (r) return r;
r = add_state_check_num(reg, ckn);
if (r) return r;
r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
}
else {
r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
}
if (r) return r;
r = compile_tree_empty_check(qn->target, reg, empty_info);
if (r) return r;
r = add_opcode_rel_addr(reg, OP_JUMP,
-(mod_tlen + (int )SIZE_OP_JUMP
+ (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
}
else {
if (qn->lower == 0) {
r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
if (r) return r;
}
r = compile_tree_empty_check(qn->target, reg, empty_info);
if (r) return r;
if (CKN_ON) {
r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
if (r) return r;
r = add_state_check_num(reg, ckn);
if (r) return r;
r = add_rel_addr(reg,
-(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
}
else
r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
}
}
else if (qn->upper == 0) {
if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
if (r) return r;
r = compile_tree(qn->target, reg);
}
else
r = 0;
}
else if (qn->upper == 1 && qn->greedy) {
if (qn->lower == 0) {
if (CKN_ON) {
r = add_opcode(reg, OP_STATE_CHECK_PUSH);
if (r) return r;
r = add_state_check_num(reg, ckn);
if (r) return r;
r = add_rel_addr(reg, tlen);
}
else {
r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
}
if (r) return r;
}
r = compile_tree(qn->target, reg);
}
else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
if (CKN_ON) {
r = add_opcode(reg, OP_STATE_CHECK_PUSH);
if (r) return r;
r = add_state_check_num(reg, ckn);
if (r) return r;
r = add_rel_addr(reg, SIZE_OP_JUMP);
}
else {
r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
}
if (r) return r;
r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
if (r) return r;
r = compile_tree(qn->target, reg);
}
else {
r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
if (CKN_ON) {
if (r) return r;
r = add_opcode(reg, OP_STATE_CHECK);
if (r) return r;
r = add_state_check_num(reg, ckn);
}
}
return r;
}
#else /* USE_COMBINATION_EXPLOSION_CHECK */
static int static int
compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
@ -751,16 +1009,6 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
return len; return len;
} }
static int
is_anychar_star_qualifier(QualifierNode* qn)
{
if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
NTYPE(qn->target) == N_ANYCHAR)
return 1;
else
return 0;
}
static int static int
compile_qualifier_node(QualifierNode* qn, regex_t* reg) compile_qualifier_node(QualifierNode* qn, regex_t* reg)
{ {
@ -887,6 +1135,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
} }
return r; return r;
} }
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
static int static int
compile_length_option_node(EffectNode* node, regex_t* reg) compile_length_option_node(EffectNode* node, regex_t* reg)
@ -1276,7 +1525,7 @@ compile_length_tree(Node* node, regex_t* reg)
else else
#endif #endif
if (br->back_num == 1) { if (br->back_num == 1) {
r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 3) r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
} }
else { else {
@ -1414,7 +1663,6 @@ compile_tree(Node* node, regex_t* reg)
switch (n) { switch (n) {
case 1: r = add_opcode(reg, OP_BACKREF1); break; case 1: r = add_opcode(reg, OP_BACKREF1); break;
case 2: r = add_opcode(reg, OP_BACKREF2); break; case 2: r = add_opcode(reg, OP_BACKREF2); break;
case 3: r = add_opcode(reg, OP_BACKREF3); break;
default: default:
r = add_opcode(reg, OP_BACKREFN); r = add_opcode(reg, OP_BACKREFN);
if (r) return r; if (r) return r;
@ -1435,7 +1683,9 @@ compile_tree(Node* node, regex_t* reg)
} }
if (r) return r; if (r) return r;
#ifdef USE_BACKREF_AT_LEVEL
add_bacref_mems: add_bacref_mems:
#endif
r = add_length(reg, br->back_num); r = add_length(reg, br->back_num);
if (r) return r; if (r) return r;
p = BACKREFS_P(br); p = BACKREFS_P(br);
@ -3040,6 +3290,146 @@ divide_ambig_string_node(Node* node, regex_t* reg)
return 0; return 0;
} }
#ifdef USE_COMBINATION_EXPLOSION_CHECK
#define CEC_THRES_NUM_BIG_REPEAT 512
#define CEC_INFINITE_NUM 0x7fffffff
#define CEC_IN_INFINITE_REPEAT (1<<0)
#define CEC_IN_FINITE_REPEAT (1<<1)
#define CEC_CONT_BIG_REPEAT (1<<2)
static int
setup_comb_exp_check(Node* node, int state, ScanEnv* env)
{
int type;
int r = state;
type = NTYPE(node);
switch (type) {
case N_LIST:
{
Node* prev = NULL_NODE;
do {
r = setup_comb_exp_check(NCONS(node).left, r, env);
prev = NCONS(node).left;
} while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right));
}
break;
case N_ALT:
{
int ret;
do {
ret = setup_comb_exp_check(NCONS(node).left, state, env);
r |= ret;
} while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right));
}
break;
case N_QUALIFIER:
{
int child_state = state;
int add_state = 0;
QualifierNode* qn = &(NQUALIFIER(node));
Node* target = qn->target;
int var_num;
if (! IS_REPEAT_INFINITE(qn->upper)) {
if (qn->upper > 1) {
/* {0,1}, {1,1} are allowed */
child_state |= CEC_IN_FINITE_REPEAT;
/* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
if (env->backrefed_mem == 0) {
if (NTYPE(qn->target) == N_EFFECT) {
EffectNode* en = &(NEFFECT(qn->target));
if (en->type == EFFECT_MEMORY) {
if (NTYPE(en->target) == N_QUALIFIER) {
QualifierNode* q = &(NQUALIFIER(en->target));
if (IS_REPEAT_INFINITE(q->upper)
&& q->greedy == qn->greedy) {
qn->upper = (qn->lower == 0 ? 1 : qn->lower);
if (qn->upper == 1)
child_state = state;
}
}
}
}
}
}
}
if (state & CEC_IN_FINITE_REPEAT) {
qn->comb_exp_check_num = -1;
}
else {
if (IS_REPEAT_INFINITE(qn->upper)) {
var_num = CEC_INFINITE_NUM;
child_state |= CEC_IN_INFINITE_REPEAT;
}
else {
var_num = qn->upper - qn->lower;
}
if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
add_state |= CEC_CONT_BIG_REPEAT;
if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
((state & CEC_CONT_BIG_REPEAT) != 0 &&
var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
if (qn->comb_exp_check_num == 0) {
env->num_comb_exp_check++;
qn->comb_exp_check_num = env->num_comb_exp_check;
if (env->curr_max_regnum > env->comb_exp_max_regnum)
env->comb_exp_max_regnum = env->curr_max_regnum;
}
}
}
r = setup_comb_exp_check(target, child_state, env);
r |= add_state;
}
break;
case N_EFFECT:
{
EffectNode* en = &(NEFFECT(node));
switch (en->type) {
case EFFECT_MEMORY:
{
if (env->curr_max_regnum < en->regnum)
env->curr_max_regnum = en->regnum;
r = setup_comb_exp_check(en->target, state, env);
}
break;
default:
r = setup_comb_exp_check(en->target, state, env);
break;
}
}
break;
#ifdef USE_SUBEXP_CALL
case N_CALL:
if (IS_CALL_RECURSION(&(NCALL(node))))
env->has_recursion = 1;
else
r = setup_comb_exp_check(NCALL(node).target, state, env);
break;
#endif
default:
break;
}
return r;
}
#endif
#define IN_ALT (1<<0) #define IN_ALT (1<<0)
#define IN_NOT (1<<1) #define IN_NOT (1<<1)
#define IN_REPEAT (1<<2) #define IN_REPEAT (1<<2)
@ -4746,6 +5136,9 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->num_null_check = 0; reg->num_null_check = 0;
reg->repeat_range_alloc = 0; reg->repeat_range_alloc = 0;
reg->repeat_range = (OnigRepeatRange* )NULL; reg->repeat_range = (OnigRepeatRange* )NULL;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
reg->num_comb_exp_check = 0;
#endif
r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
if (r != 0) goto err; if (r != 0) goto err;
@ -4799,6 +5192,33 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->bt_mem_end |= reg->capture_history; reg->bt_mem_end |= reg->capture_history;
} }
#ifdef USE_COMBINATION_EXPLOSION_CHECK
if (scan_env.backrefed_mem == 0
#ifdef USE_SUBEXP_CALL
|| scan_env.num_call == 0
#endif
) {
setup_comb_exp_check(root, 0, &scan_env);
#ifdef USE_SUBEXP_CALL
if (scan_env.has_recursion != 0) {
scan_env.num_comb_exp_check = 0;
}
else
#endif
if (scan_env.comb_exp_max_regnum > 0) {
int i;
for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
scan_env.num_comb_exp_check = 0;
break;
}
}
}
}
reg->num_comb_exp_check = scan_env.num_comb_exp_check;
#endif
clear_optimize_info(reg); clear_optimize_info(reg);
#ifndef ONIG_DONT_OPTIMIZE #ifndef ONIG_DONT_OPTIMIZE
r = set_optimize_info_from_tree(root, reg, &scan_env); r = set_optimize_info_from_tree(root, reg, &scan_env);
@ -5008,6 +5428,16 @@ onig_end()
#ifdef ONIG_DEBUG #ifdef ONIG_DEBUG
/* arguments type */
#define ARG_SPECIAL -1
#define ARG_NON 0
#define ARG_RELADDR 1
#define ARG_ABSADDR 2
#define ARG_LENGTH 3
#define ARG_MEMNUM 4
#define ARG_OPTION 5
#define ARG_STATE_CHECK 6
OnigOpInfoType OnigOpInfo[] = { OnigOpInfoType OnigOpInfo[] = {
{ OP_FINISH, "finish", ARG_NON }, { OP_FINISH, "finish", ARG_NON },
{ OP_END, "end", ARG_NON }, { OP_END, "end", ARG_NON },
@ -5054,7 +5484,6 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_BEGIN_POSITION, "begin-position", ARG_NON }, { OP_BEGIN_POSITION, "begin-position", ARG_NON },
{ OP_BACKREF1, "backref1", ARG_NON }, { OP_BACKREF1, "backref1", ARG_NON },
{ OP_BACKREF2, "backref2", ARG_NON }, { OP_BACKREF2, "backref2", ARG_NON },
{ OP_BACKREF3, "backref3", ARG_NON },
{ OP_BACKREFN, "backrefn", ARG_MEMNUM }, { OP_BACKREFN, "backrefn", ARG_MEMNUM },
{ OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
{ OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
@ -5095,6 +5524,12 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
{ OP_CALL, "call", ARG_ABSADDR }, { OP_CALL, "call", ARG_ABSADDR },
{ OP_RETURN, "return", ARG_NON }, { OP_RETURN, "return", ARG_NON },
{ OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
{ OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
{ OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
{ OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
{ OP_STATE_CHECK_ANYCHAR_ML_STAR,
"state-check-anychar-ml*", ARG_STATE_CHECK },
{ -1, "", ARG_NON } { -1, "", ARG_NON }
}; };
@ -5153,6 +5588,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
RelAddrType addr; RelAddrType addr;
LengthType len; LengthType len;
MemNumType mem; MemNumType mem;
StateCheckNumType scn;
OnigCodePoint code; OnigCodePoint code;
UChar *q; UChar *q;
@ -5187,6 +5623,12 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
fprintf(f, ":%d", option); fprintf(f, ":%d", option);
} }
break; break;
case ARG_STATE_CHECK:
scn = *((StateCheckNumType* )bp);
bp += SIZE_STATE_CHECK_NUM;
fprintf(f, ":%d", scn);
break;
} }
} }
else { else {
@ -5364,6 +5806,15 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
fprintf(f, ":%d:(%d)", len, addr); fprintf(f, ":%d:(%d)", len, addr);
break; break;
case OP_STATE_CHECK_PUSH:
case OP_STATE_CHECK_PUSH_OR_JUMP:
scn = *((StateCheckNumType* )bp);
bp += SIZE_STATE_CHECK_NUM;
addr = *((RelAddrType* )bp);
bp += SIZE_RELADDR;
fprintf(f, ":%d:(%d)", scn, addr);
break;
default: default:
fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
*--bp); *--bp);

249
regexec.c
Просмотреть файл

@ -306,6 +306,9 @@ typedef struct _StackType {
UChar *pcode; /* byte code position */ UChar *pcode; /* byte code position */
UChar *pstr; /* string position */ UChar *pstr; /* string position */
UChar *pstr_prev; /* previous char position of pstr */ UChar *pstr_prev; /* previous char position of pstr */
#ifdef USE_COMBINATION_EXPLOSION_CHECK
unsigned int state_check;
#endif
} state; } state;
struct { struct {
int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
@ -339,29 +342,28 @@ typedef struct _StackType {
/* stack type */ /* stack type */
/* used by normal-POP */ /* used by normal-POP */
#define STK_ALT 0x0001 #define STK_ALT 0x0001
#define STK_LOOK_BEHIND_NOT 0x0003 #define STK_LOOK_BEHIND_NOT 0x0002
#define STK_POS_NOT 0x0005 #define STK_POS_NOT 0x0003
/* avoided by normal-POP, but value should be small */
#define STK_NULL_CHECK_START 0x0100
/* handled by normal-POP */ /* handled by normal-POP */
#define STK_MEM_START 0x0200 #define STK_MEM_START 0x0100
#define STK_MEM_END 0x0300 #define STK_MEM_END 0x8200
#define STK_REPEAT_INC 0x0400 #define STK_REPEAT_INC 0x0300
#define STK_STATE_CHECK_MARK 0x1000
/* avoided by normal-POP */ /* avoided by normal-POP */
#define STK_NULL_CHECK_START 0x3000
#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
#define STK_MEM_END_MARK 0x8400
#define STK_POS 0x0500 /* used when POP-POS */ #define STK_POS 0x0500 /* used when POP-POS */
#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
#define STK_REPEAT 0x0700 #define STK_REPEAT 0x0700
#define STK_CALL_FRAME 0x0800 #define STK_CALL_FRAME 0x0800
#define STK_RETURN 0x0900 #define STK_RETURN 0x0900
#define STK_MEM_END_MARK 0x0a00 #define STK_VOID 0x0a00 /* for fill a blank */
#define STK_VOID 0x0b00 /* for fill a blank */
#define STK_NULL_CHECK_END 0x0c00 /* for recursive call */
/* stack type check mask */ /* stack type check mask */
#define STK_MASK_POP_USED 0x00ff #define STK_MASK_POP_USED 0x00ff
#define IS_TO_VOID_TARGET(stk) \ #define STK_MASK_TO_VOID_TARGET 0x10ff
(((stk)->type & STK_MASK_POP_USED) || \ #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
(stk)->type == STK_NULL_CHECK_START || (stk)->type == STK_NULL_CHECK_END)
typedef struct { typedef struct {
void* stack_p; void* stack_p;
@ -369,6 +371,10 @@ typedef struct {
OnigOptionType options; OnigOptionType options;
OnigRegion* region; OnigRegion* region;
const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
#ifdef USE_COMBINATION_EXPLOSION_CHECK
void* state_check_buff;
int state_check_buff_size;
#endif
} MatchArg; } MatchArg;
#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
@ -378,7 +384,36 @@ typedef struct {
(msa).start = (arg_start);\ (msa).start = (arg_start);\
} while (0) } while (0)
#ifdef USE_COMBINATION_EXPLOSION_CHECK
#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
#define STATE_CHECK_BUFF_INIT(msa, str_len, state_num) do { \
(msa).state_check_buff = (void* )0;\
if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
int size = ((int )((str_len) + 1) * (state_num) + 7) / 8;\
(msa).state_check_buff_size = size; \
if (size > 0 && size < STATE_CHECK_BUFF_MAX_SIZE) {\
if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
(msa).state_check_buff = (void* )xmalloc(size);\
else \
(msa).state_check_buff = (void* )xalloca(size);\
xmemset((msa).state_check_buff, 0, (size_t )size);\
}\
}\
} while (0)
#define MATCH_ARG_FREE(msa) do {\
if ((msa).stack_p) xfree((msa).stack_p);\
if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
if ((msa).state_check_buff) xfree((msa).state_check_buff);\
}\
} while (0);
#else
#define STATE_CHECK_BUFF_INIT(msa, str_len, state_num)
#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
#endif
#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
@ -472,6 +507,73 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#define STACK_AT(index) (stk_base + (index)) #define STACK_AT(index) (stk_base + (index))
#define GET_STACK_INDEX(stk) ((stk) - stk_base) #define GET_STACK_INDEX(stk) ((stk) - stk_base)
#define STACK_PUSH_TYPE(stack_type) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
STACK_INC;\
} while(0)
#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
#ifdef USE_COMBINATION_EXPLOSION_CHECK
#define STATE_CHECK_POS(s,snum) \
(((s) - str) * num_comb_exp_check + ((snum) - 1))
#define STATE_CHECK_VAL(v,snum) do {\
if (state_check_buff != NULL) {\
int x = STATE_CHECK_POS(s,snum);\
(v) = state_check_buff[x/8] & (1<<(x%8));\
}\
else (v) = 0;\
} while(0)
#define ELSE_IF_STATE_CHECK_MARK(stk) \
else if ((stk)->type == STK_STATE_CHECK_MARK) { \
int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
state_check_buff[x/8] |= (1<<(x%8)); \
}
#define STACK_PUSH(stack_type,pat,s,sprev) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = (s);\
stk->u.state.pstr_prev = (sprev);\
stk->u.state.state_check = 0;\
STACK_INC;\
} while(0)
#define STACK_PUSH_ENSURED(stack_type,pat) do {\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
stk->u.state.state_check = 0;\
STACK_INC;\
} while(0)
#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
STACK_ENSURE(1);\
stk->type = STK_ALT;\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = (s);\
stk->u.state.pstr_prev = (sprev);\
stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
STACK_INC;\
} while(0)
#define STACK_PUSH_STATE_CHECK(s,snum) do {\
if (state_check_buff != NULL) {\
STACK_ENSURE(1);\
stk->type = STK_STATE_CHECK_MARK;\
stk->u.state.pstr = (s);\
stk->u.state.state_check = (snum);\
STACK_INC;\
}\
} while(0)
#else /* USE_COMBINATION_EXPLOSION_CHECK */
#define ELSE_IF_STATE_CHECK_MARK(stk)
#define STACK_PUSH(stack_type,pat,s,sprev) do {\ #define STACK_PUSH(stack_type,pat,s,sprev) do {\
STACK_ENSURE(1);\ STACK_ENSURE(1);\
stk->type = (stack_type);\ stk->type = (stack_type);\
@ -486,12 +588,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
stk->u.state.pcode = (pat);\ stk->u.state.pcode = (pat);\
STACK_INC;\ STACK_INC;\
} while(0) } while(0)
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
#define STACK_PUSH_TYPE(stack_type) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
STACK_INC;\
} while(0)
#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
@ -551,7 +648,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
k = stk;\ k = stk;\
while (k > stk_base) {\ while (k > stk_base) {\
k--;\ k--;\
if ((k->type == STK_MEM_END_MARK || k->type == STK_MEM_END) \ if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
&& k->u.mem.num == (mnum)) {\ && k->u.mem.num == (mnum)) {\
level++;\ level++;\
}\ }\
@ -631,6 +728,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
stk--;\ stk--;\
STACK_BASE_CHECK(stk, "STACK_POP"); \ STACK_BASE_CHECK(stk, "STACK_POP"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\ if ((stk->type & STK_MASK_POP_USED) != 0) break;\
ELSE_IF_STATE_CHECK_MARK(stk);\
}\ }\
break;\ break;\
case STACK_POP_LEVEL_MEM_START:\ case STACK_POP_LEVEL_MEM_START:\
@ -642,6 +740,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\ }\
ELSE_IF_STATE_CHECK_MARK(stk);\
}\ }\
break;\ break;\
default:\ default:\
@ -660,6 +759,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\ }\
ELSE_IF_STATE_CHECK_MARK(stk);\
}\ }\
break;\ break;\
}\ }\
@ -681,6 +781,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\ }\
ELSE_IF_STATE_CHECK_MARK(stk);\
}\ }\
} while(0) } while(0)
@ -700,6 +801,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\ }\
ELSE_IF_STATE_CHECK_MARK(stk);\
}\ }\
} while(0) } while(0)
@ -947,6 +1049,7 @@ static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
is_fail = 0; \ is_fail = 0; \
} while(0) } while(0)
#define ON_STR_BEGIN(s) ((s) == str) #define ON_STR_BEGIN(s) ((s) == str)
#define ON_STR_END(s) ((s) == end) #define ON_STR_END(s) ((s) == end)
#define IS_EMPTY_STR (str == end) #define IS_EMPTY_STR (str == end)
@ -1314,6 +1417,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
StackIndex si; StackIndex si;
StackIndex *repeat_stk; StackIndex *repeat_stk;
StackIndex *mem_start_stk, *mem_end_stk; StackIndex *mem_start_stk, *mem_end_stk;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int scv;
unsigned char* state_check_buff = msa->state_check_buff;
int num_comb_exp_check = reg->num_comb_exp_check;
#endif
n = reg->num_repeat + reg->num_mem * 2; n = reg->num_repeat + reg->num_mem * 2;
STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
@ -1924,6 +2032,47 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
STAT_OP_OUT; STAT_OP_OUT;
break; break;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
GET_STATE_CHECK_NUM_INC(mem, p);
while (s < end) {
STATE_CHECK_VAL(scv, mem);
if (scv) goto fail;
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
n = enc_len(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
sprev = s;
s += n;
}
STAT_OP_OUT;
break;
case OP_STATE_CHECK_ANYCHAR_ML_STAR:
STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
GET_STATE_CHECK_NUM_INC(mem, p);
while (s < end) {
STATE_CHECK_VAL(scv, mem);
if (scv) goto fail;
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
n = enc_len(encode, s);
if (n > 1) {
DATA_ENSURE(n);
sprev = s;
s += n;
}
else {
sprev = s;
s++;
}
}
STAT_OP_OUT;
break;
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
case OP_WORD: STAT_OP_IN(OP_WORD); case OP_WORD: STAT_OP_IN(OP_WORD);
DATA_ENSURE(1); DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_WORD(encode, s, end)) if (! ONIGENC_IS_MBC_WORD(encode, s, end))
@ -2154,11 +2303,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
goto backref; goto backref;
break; break;
case OP_BACKREF3: STAT_OP_IN(OP_BACKREF3);
mem = 3;
goto backref;
break;
case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN); case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN);
GET_MEMNUM_INC(mem, p); GET_MEMNUM_INC(mem, p);
backref: backref:
@ -2451,6 +2595,43 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
continue; continue;
break; break;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
case OP_STATE_CHECK_PUSH: STAT_OP_IN(OP_STATE_CHECK_PUSH);
GET_STATE_CHECK_NUM_INC(mem, p);
STATE_CHECK_VAL(scv, mem);
if (scv) goto fail;
GET_RELADDR_INC(addr, p);
STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
STAT_OP_OUT;
continue;
break;
case OP_STATE_CHECK_PUSH_OR_JUMP: STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
GET_STATE_CHECK_NUM_INC(mem, p);
GET_RELADDR_INC(addr, p);
STATE_CHECK_VAL(scv, mem);
if (scv) {
p += addr;
}
else {
STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
}
STAT_OP_OUT;
continue;
break;
case OP_STATE_CHECK: STAT_OP_IN(OP_STATE_CHECK);
GET_STATE_CHECK_NUM_INC(mem, p);
STATE_CHECK_VAL(scv, mem);
if (scv) goto fail;
STACK_PUSH_STATE_CHECK(s, mem);
STAT_OP_OUT;
continue;
break;
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
case OP_POP: STAT_OP_IN(OP_POP); case OP_POP: STAT_OP_IN(OP_POP);
STACK_POP_ONE; STACK_POP_ONE;
STAT_OP_OUT; STAT_OP_OUT;
@ -2525,7 +2706,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
repeat_inc: repeat_inc:
stkp->u.repeat.count++; stkp->u.repeat.count++;
if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
/* end of repeat. Nothing to do. */ /* end of repeat. Nothing to do. */
} }
else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
@ -2555,8 +2736,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
repeat_inc_ng: repeat_inc_ng:
stkp->u.repeat.count++; stkp->u.repeat.count++;
if (stkp->u.repeat.count < reg->repeat_range[mem].upper || if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
IS_REPEAT_INFINITE(reg->repeat_range[mem].upper)) {
if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
UChar* pcode = stkp->u.repeat.pcode; UChar* pcode = stkp->u.repeat.pcode;
@ -2685,6 +2865,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
p = stk->u.state.pcode; p = stk->u.state.pcode;
s = stk->u.state.pstr; s = stk->u.state.pstr;
sprev = stk->u.state.pstr_prev; sprev = stk->u.state.pstr_prev;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
if (stk->u.state.state_check != 0) {
stk->type = STK_STATE_CHECK_MARK;
stk++;
}
#endif
STAT_OP_OUT; STAT_OP_OUT;
continue; continue;
break; break;
@ -3073,6 +3261,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
MATCH_ARG_INIT(msa, option, region, at); MATCH_ARG_INIT(msa, option, region, at);
STATE_CHECK_BUFF_INIT(msa, end - str, reg->num_comb_exp_check);
if (region if (region
#ifdef USE_POSIX_REGION_OPTION #ifdef USE_POSIX_REGION_OPTION
@ -3475,6 +3664,9 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
prev = (UChar* )NULL; prev = (UChar* )NULL;
MATCH_ARG_INIT(msa, option, region, start); MATCH_ARG_INIT(msa, option, region, start);
#ifdef USE_COMBINATION_EXPLOSION_CHECK
msa.state_check_buff = (void* )0;
#endif
MATCH_AND_RETURN_CHECK; MATCH_AND_RETURN_CHECK;
goto mismatch; goto mismatch;
} }
@ -3487,6 +3679,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
#endif #endif
MATCH_ARG_INIT(msa, option, region, orig_start); MATCH_ARG_INIT(msa, option, region, orig_start);
STATE_CHECK_BUFF_INIT(msa, end - str, reg->num_comb_exp_check);
s = (UChar* )start; s = (UChar* )start;
if (range > start) { /* forward search */ if (range > start) { /* forward search */

Просмотреть файл

@ -59,7 +59,6 @@
/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */ /* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */
#define USE_NAMED_GROUP #define USE_NAMED_GROUP
#define USE_SUBEXP_CALL #define USE_SUBEXP_CALL
/* #define USE_BACKREF_AT_LEVEL */
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */ #define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
@ -87,6 +86,7 @@
#define USE_VARIABLE_META_CHARS #define USE_VARIABLE_META_CHARS
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ #define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */ #define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
/* #define USE_MULTI_THREAD_SYSTEM */ /* #define USE_MULTI_THREAD_SYSTEM */
#define THREAD_ATOMIC_START /* depend on thread system */ #define THREAD_ATOMIC_START /* depend on thread system */
#define THREAD_ATOMIC_END /* depend on thread system */ #define THREAD_ATOMIC_END /* depend on thread system */
@ -101,7 +101,9 @@
#include "version.h" #include "version.h"
#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */ #include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
#define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */
#define USE_MULTI_THREAD_SYSTEM #define USE_MULTI_THREAD_SYSTEM
#define THREAD_ATOMIC_START DEFER_INTS #define THREAD_ATOMIC_START DEFER_INTS
#define THREAD_ATOMIC_END ENABLE_INTS #define THREAD_ATOMIC_END ENABLE_INTS
#define THREAD_PASS rb_thread_schedule() #define THREAD_PASS rb_thread_schedule()
@ -118,6 +120,9 @@
#endif /* else NOT_RUBY */ #endif /* else NOT_RUBY */
#define STATE_CHECK_STRING_THRESHOLD_LEN 7
#define STATE_CHECK_BUFF_MAX_SIZE 0x08000000
#define THREAD_PASS_LIMIT_COUNT 8 #define THREAD_PASS_LIMIT_COUNT 8
#define xmemset memset #define xmemset memset
#define xmemcpy memcpy #define xmemcpy memcpy
@ -595,7 +600,6 @@ enum OpCode {
OP_BACKREF1, OP_BACKREF1,
OP_BACKREF2, OP_BACKREF2,
OP_BACKREF3,
OP_BACKREFN, OP_BACKREFN,
OP_BACKREFN_IC, OP_BACKREFN_IC,
OP_BACKREF_MULTI, OP_BACKREF_MULTI,
@ -640,23 +644,21 @@ enum OpCode {
OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */ OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
OP_CALL, /* \g<name> */ OP_CALL, /* \g<name> */
OP_RETURN OP_RETURN,
};
/* arguments type */ OP_STATE_CHECK_PUSH, /* combination explosion check and push */
#define ARG_SPECIAL -1 OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
#define ARG_NON 0 OP_STATE_CHECK, /* check only */
#define ARG_RELADDR 1 OP_STATE_CHECK_ANYCHAR_STAR,
#define ARG_ABSADDR 2 OP_STATE_CHECK_ANYCHAR_ML_STAR
#define ARG_LENGTH 3 };
#define ARG_MEMNUM 4
#define ARG_OPTION 5
typedef int RelAddrType; typedef int RelAddrType;
typedef int AbsAddrType; typedef int AbsAddrType;
typedef int LengthType; typedef int LengthType;
typedef int RepeatNumType; typedef int RepeatNumType;
typedef short int MemNumType; typedef short int MemNumType;
typedef short int StateCheckNumType;
typedef void* PointerType; typedef void* PointerType;
#define SIZE_OPCODE 1 #define SIZE_OPCODE 1
@ -664,6 +666,7 @@ typedef void* PointerType;
#define SIZE_ABSADDR sizeof(AbsAddrType) #define SIZE_ABSADDR sizeof(AbsAddrType)
#define SIZE_LENGTH sizeof(LengthType) #define SIZE_LENGTH sizeof(LengthType)
#define SIZE_MEMNUM sizeof(MemNumType) #define SIZE_MEMNUM sizeof(MemNumType)
#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType)
#define SIZE_REPEATNUM sizeof(RepeatNumType) #define SIZE_REPEATNUM sizeof(RepeatNumType)
#define SIZE_OPTION sizeof(OnigOptionType) #define SIZE_OPTION sizeof(OnigOptionType)
#define SIZE_CODE_POINT sizeof(OnigCodePoint) #define SIZE_CODE_POINT sizeof(OnigCodePoint)
@ -693,6 +696,7 @@ typedef void* PointerType;
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType) #define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) #define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) #define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
/* code point's address must be aligned address. */ /* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) #define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
@ -735,6 +739,12 @@ typedef void* PointerType;
#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR) #define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
#define SIZE_OP_RETURN SIZE_OPCODE #define SIZE_OP_RETURN SIZE_OPCODE
#ifdef USE_COMBINATION_EXPLOSION_CHECK
#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
#endif
#define MC_ESC(enc) (enc)->meta_char_table.esc #define MC_ESC(enc) (enc)->meta_char_table.esc
#define MC_ANYCHAR(enc) (enc)->meta_char_table.anychar #define MC_ANYCHAR(enc) (enc)->meta_char_table.anychar

Просмотреть файл

@ -940,6 +940,13 @@ scan_env_clear(ScanEnv* env)
for (i = 0; i < SCANENV_MEMNODES_SIZE; i++) for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
env->mem_nodes_static[i] = NULL_NODE; env->mem_nodes_static[i] = NULL_NODE;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
env->num_comb_exp_check = 0;
env->comb_exp_max_regnum = 0;
env->curr_max_regnum = 0;
env->has_recursion = 0;
#endif
} }
static int static int
@ -1321,11 +1328,17 @@ node_new_qualifier(int lower, int upper, int by_number)
NQUALIFIER(node).lower = lower; NQUALIFIER(node).lower = lower;
NQUALIFIER(node).upper = upper; NQUALIFIER(node).upper = upper;
NQUALIFIER(node).greedy = 1; NQUALIFIER(node).greedy = 1;
NQUALIFIER(node).by_number = by_number;
NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY; NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
NQUALIFIER(node).head_exact = NULL_NODE; NQUALIFIER(node).head_exact = NULL_NODE;
NQUALIFIER(node).next_head_exact = NULL_NODE; NQUALIFIER(node).next_head_exact = NULL_NODE;
NQUALIFIER(node).is_refered = 0; NQUALIFIER(node).is_refered = 0;
if (by_number != 0)
NQUALIFIER(node).state |= NST_BY_NUMBER;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
NQUALIFIER(node).comb_exp_check_num = 0;
#endif
return node; return node;
} }
@ -2140,7 +2153,7 @@ enum ReduceType {
RQ_AQ, /* to '*?' */ RQ_AQ, /* to '*?' */
RQ_QQ, /* to '??' */ RQ_QQ, /* to '??' */
RQ_P_QQ, /* to '+)??' */ RQ_P_QQ, /* to '+)??' */
RQ_PQ_Q, /* to '+?)?' */ RQ_PQ_Q /* to '+?)?' */
}; };
static enum ReduceType ReduceTypeTable[6][6] = { static enum ReduceType ReduceTypeTable[6][6] = {
@ -4633,16 +4646,14 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
{ /* check redundant double repeat. */ { /* check redundant double repeat. */
/* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
QualifierNode* qnt = &(NQUALIFIER(target)); QualifierNode* qnt = &(NQUALIFIER(target));
int nestq_num = popular_qualifier_num(qn);
int targetq_num = popular_qualifier_num(qnt);
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
if (qn->by_number == 0 && qnt->by_number == 0 && if (!IS_QUALIFIER_BY_NUMBER(qn) && !IS_QUALIFIER_BY_NUMBER(qnt) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
int nestq_num, targetq_num;
UChar buf[WARN_BUFSIZE]; UChar buf[WARN_BUFSIZE];
nestq_num = popular_qualifier_num(qn);
targetq_num = popular_qualifier_num(qnt);
switch(ReduceTypeTable[targetq_num][nestq_num]) { switch(ReduceTypeTable[targetq_num][nestq_num]) {
case RQ_ASIS: case RQ_ASIS:
break; break;
@ -4673,10 +4684,18 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
warn_exit: warn_exit:
#endif #endif
if (popular_qualifier_num(qnt) >= 0 && popular_qualifier_num(qn) >= 0) { if (targetq_num >= 0) {
if (nestq_num >= 0) {
onig_reduce_nested_qualifier(qnode, target); onig_reduce_nested_qualifier(qnode, target);
goto q_exit; goto q_exit;
} }
else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
/* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
qn->upper = (qn->lower == 0 ? 1 : qn->lower);
}
}
}
} }
break; break;

Просмотреть файл

@ -124,11 +124,13 @@ typedef struct {
int lower; int lower;
int upper; int upper;
int greedy; int greedy;
int by_number; /* {n,m} */
int target_empty_info; int target_empty_info;
struct _Node* head_exact; struct _Node* head_exact;
struct _Node* next_head_exact; struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */ int is_refered; /* include called node. don't eliminate even if {0} */
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
#endif
} QualifierNode; } QualifierNode;
/* status bits */ /* status bits */
@ -146,6 +148,7 @@ typedef struct {
#define NST_NAME_REF (1<<11) #define NST_NAME_REF (1<<11)
#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */ #define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
#define NST_NEST_LEVEL (1<<13) #define NST_NEST_LEVEL (1<<13)
#define NST_BY_NUMBER (1<<14) /* {n,m} */
#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f) #define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f) #define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
@ -168,6 +171,7 @@ typedef struct {
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) #define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) #define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
#define IS_QUALIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) #define IS_QUALIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
#define IS_QUALIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
typedef struct { typedef struct {
int state; int state;
@ -277,6 +281,12 @@ typedef struct {
int mem_alloc; int mem_alloc;
Node* mem_nodes_static[SCANENV_MEMNODES_SIZE]; Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
Node** mem_nodes_dynamic; Node** mem_nodes_dynamic;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int num_comb_exp_check;
int comb_exp_max_regnum;
int curr_max_regnum;
int has_recursion;
#endif
} ScanEnv; } ScanEnv;

Просмотреть файл

@ -1,14 +1,14 @@
#define RUBY_VERSION "1.9.0" #define RUBY_VERSION "1.9.0"
#define RUBY_RELEASE_DATE "2006-08-26" #define RUBY_RELEASE_DATE "2006-08-27"
#define RUBY_VERSION_CODE 190 #define RUBY_VERSION_CODE 190
#define RUBY_RELEASE_CODE 20060826 #define RUBY_RELEASE_CODE 20060827
#define RUBY_VERSION_MAJOR 1 #define RUBY_VERSION_MAJOR 1
#define RUBY_VERSION_MINOR 9 #define RUBY_VERSION_MINOR 9
#define RUBY_VERSION_TEENY 0 #define RUBY_VERSION_TEENY 0
#define RUBY_RELEASE_YEAR 2006 #define RUBY_RELEASE_YEAR 2006
#define RUBY_RELEASE_MONTH 8 #define RUBY_RELEASE_MONTH 8
#define RUBY_RELEASE_DAY 26 #define RUBY_RELEASE_DAY 27
RUBY_EXTERN const char ruby_version[]; RUBY_EXTERN const char ruby_version[];
RUBY_EXTERN const char ruby_release_date[]; RUBY_EXTERN const char ruby_release_date[];