From b82a05989e388a5d9500051ce91fd47f02aacbf9 Mon Sep 17 00:00:00 2001 From: akr Date: Fri, 21 Dec 2007 16:39:36 +0000 Subject: [PATCH] * re.c (ARG_ENCODING_NONE): defined for /.../n option. (REG_ENCODING_NONE): ditto. (rb_char_to_option_kcode): return ARG_ENCODING_NONE for n. (rb_reg_prepare_re): warn /ascii/n =~ "non-ascii". (rb_reg_initialize): set REG_ENCODING_NONE from ARG_ENCODING_NONE. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14438 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- .gdbinit | 3 +++ ChangeLog | 8 ++++++++ re.c | 27 +++++++++++++++++++++------ 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/.gdbinit b/.gdbinit index e5ebb03c16..dee9fd87e8 100644 --- a/.gdbinit +++ b/.gdbinit @@ -91,6 +91,9 @@ define rp output ((struct RRegexp*)$arg0)->str set print address on printf " len:%d ", ((struct RRegexp*)$arg0)->len + if $flags & RUBY_FL_USER6 + printf "(none) " + end if $flags & RUBY_FL_USER5 printf "(literal) " end diff --git a/ChangeLog b/ChangeLog index 1e6e991c0e..ff81be4fac 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +Sat Dec 22 01:35:41 2007 Tanaka Akira + + * re.c (ARG_ENCODING_NONE): defined for /.../n option. + (REG_ENCODING_NONE): ditto. + (rb_char_to_option_kcode): return ARG_ENCODING_NONE for n. + (rb_reg_prepare_re): warn /ascii/n =~ "non-ascii". + (rb_reg_initialize): set REG_ENCODING_NONE from ARG_ENCODING_NONE. + Sat Dec 22 01:23:10 2007 Shugo Maeda * test/json/test_json_addition.rb (test_core): do not use Time.now diff --git a/re.c b/re.c index 16c9b55d44..e6b2e50ac6 100644 --- a/re.c +++ b/re.c @@ -133,12 +133,14 @@ rb_memsearch(const void *x0, long m, const void *y0, long n) } #define REG_LITERAL FL_USER5 +#define REG_ENCODING_NONE FL_USER6 #define KCODE_FIXED FL_USER4 #define ARG_REG_OPTION_MASK \ (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND) #define ARG_ENCODING_FIXED 16 +#define ARG_ENCODING_NONE 32 #define ARG_KCODE_NONE 0 #define ARG_KCODE_EUC 1 @@ -186,8 +188,8 @@ rb_char_to_option_kcode(int c, int *option, int *kcode) switch (c) { case 'n': - *kcode = ARG_KCODE_NONE; - break; + *kcode = -1; + return (*option = ARG_ENCODING_NONE); case 'e': *kcode = ARG_KCODE_EUC; break; @@ -946,9 +948,16 @@ rb_reg_prepare_re(VALUE re, VALUE str) rb_raise(rb_eArgError, "fixed character encoding regexp with incompatible string (encoding: %s)", rb_enc_name(rb_enc_get(str))); } } - else if ((enc = rb_enc_get(str)) != 0 && + else { + if ((enc = rb_enc_get(str)) != 0 && RREGEXP(re)->ptr->enc != enc) { - need_recompile = 1; + need_recompile = 1; + } + if ((RBASIC(re)->flags & REG_ENCODING_NONE) && + rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { + rb_warn("none encoding regexp with non ASCII string (string encoding: %s)", + rb_enc_name(rb_enc_get(str))); + } } if (need_recompile) { @@ -1971,7 +1980,8 @@ rb_reg_initialize(VALUE obj, const char *s, int len, rb_encoding *enc, return -1; if (fixed_enc) { - if (fixed_enc != enc && (options & ARG_ENCODING_FIXED)) { + if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) || + (fixed_enc != d_enc && (options & ARG_ENCODING_NONE))) { strcpy(err, "incompatible character encoding"); return -1; } @@ -1983,11 +1993,15 @@ rb_reg_initialize(VALUE obj, const char *s, int len, rb_encoding *enc, else if (!(options & ARG_ENCODING_FIXED)) { enc = d_enc; } - + rb_enc_associate((VALUE)re, enc); if ((options & ARG_ENCODING_FIXED) || fixed_enc) { re->basic.flags |= KCODE_FIXED; } + if (options & ARG_ENCODING_NONE) { + re->basic.flags |= REG_ENCODING_NONE; + } + re->ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc, options & ARG_REG_OPTION_MASK, err); if (!re->ptr) return -1; @@ -2536,6 +2550,7 @@ rb_reg_options(VALUE re) rb_reg_check(re); options = RREGEXP(re)->ptr->options & ARG_REG_OPTION_MASK; if (RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED; + if (RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE; return options; }