string.c: search by rb_str_index

* re.c (match_regexp): set regexp for MatchData from string.
* re.c (rb_backref_set_string): create MatchData from string and
  set backref.
* string.c (rb_pat_search, rb_str_sub, rb_str_sub_bang, str_gsub),
  (scan_once, rb_str_scan, rb_str_partition): use rb_str_index
  instead of rb_reg_search() when pattern is a String.  based on
  the patch by Sam Rawlins <sam.rawlins@gmail.com> [Fixes GH-579]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@45451 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2014-03-27 09:58:12 +00:00
Родитель 227a5a2aae
Коммит 5752b61d86
5 изменённых файлов: 169 добавлений и 57 удалений

Просмотреть файл

@ -1,3 +1,15 @@
Thu Mar 27 18:58:10 2014 Nobuyoshi Nakada <nobu@ruby-lang.org>
* re.c (match_regexp): set regexp for MatchData from string.
* re.c (rb_backref_set_string): create MatchData from string and
set backref.
* string.c (rb_pat_search, rb_str_sub, rb_str_sub_bang, str_gsub),
(scan_once, rb_str_scan, rb_str_partition): use rb_str_index
instead of rb_reg_search() when pattern is a String. based on
the patch by Sam Rawlins <sam.rawlins@gmail.com> [Fixes GH-579]
Thu Mar 27 11:58:55 2014 NARUSE, Yui <naruse@ruby-lang.org>
* addr2line.c (fill_lines): check shdr[i].sh_type because even if

Просмотреть файл

@ -821,6 +821,7 @@ VALUE rb_rational_reciprocal(VALUE x);
VALUE rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline);
VALUE rb_reg_check_preprocess(VALUE);
long rb_reg_search0(VALUE, VALUE, long, int, int);
void rb_backref_set_string(VALUE string, long pos, long len);
/* signal.c */
int rb_get_next_signal(void);

38
re.c
Просмотреть файл

@ -1017,8 +1017,15 @@ match_init_copy(VALUE obj, VALUE orig)
static VALUE
match_regexp(VALUE match)
{
VALUE regexp;
match_check(match);
return RMATCH(match)->regexp;
regexp = RMATCH(match)->regexp;
if (NIL_P(regexp)) {
VALUE str = rb_reg_nth_match(0, match);
regexp = rb_reg_regcomp(rb_reg_quote(str));
RMATCH(match)->regexp = regexp;
}
return regexp;
}
/*
@ -1216,6 +1223,31 @@ rb_match_busy(VALUE match)
FL_SET(match, MATCH_BUSY);
}
static void
match_set_string(VALUE m, VALUE string, long pos, long len)
{
struct RMatch *match = (struct RMatch *)m;
struct rmatch *rmatch = match->rmatch;
match->str = string;
match->regexp = Qnil;
onig_region_resize(&rmatch->regs, 1);
rmatch->regs.beg[0] = pos;
rmatch->regs.end[0] = pos + len;
rmatch->char_offset_updated = 0;
}
void
rb_backref_set_string(VALUE string, long pos, long len)
{
VALUE match = rb_backref_get();
if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
match = match_alloc(rb_cMatch);
}
match_set_string(match, string, pos, len);
rb_backref_set(match);
}
/*
* call-seq:
* rxp.fixed_encoding? -> true or false
@ -1909,6 +1941,10 @@ match_inspect(VALUE match)
if (regexp == 0) {
return rb_sprintf("#<%"PRIsVALUE":%p>", cname, (void*)match);
}
else if (NIL_P(regexp)) {
return rb_sprintf("#<%"PRIsVALUE": %"PRIsVALUE">",
cname, rb_reg_nth_match(0, match));
}
names = ALLOCA_N(struct backref_name_tag, num_regs);
MEMZERO(names, struct backref_name_tag, num_regs);

151
string.c
Просмотреть файл

@ -2906,7 +2906,7 @@ rb_str_match(VALUE x, VALUE y)
}
static VALUE get_pat(VALUE, int);
static VALUE get_pat(VALUE);
/*
@ -2946,7 +2946,7 @@ rb_str_match_m(int argc, VALUE *argv, VALUE str)
rb_check_arity(argc, 1, 2);
re = argv[0];
argv[0] = str;
result = rb_funcall2(get_pat(re, 0), rb_intern("match"), argc, argv);
result = rb_funcall2(get_pat(re), rb_intern("match"), argc, argv);
if (!NIL_P(result) && rb_block_given_p()) {
return rb_yield(result);
}
@ -3837,11 +3837,12 @@ rb_str_slice_bang(int argc, VALUE *argv, VALUE str)
}
static VALUE
get_pat(VALUE pat, int quote)
get_pat(VALUE pat)
{
VALUE val;
switch (TYPE(pat)) {
if (SPECIAL_CONST_P(pat)) goto to_string;
switch (BUILTIN_TYPE(pat)) {
case T_REGEXP:
return pat;
@ -3849,6 +3850,7 @@ get_pat(VALUE pat, int quote)
break;
default:
to_string:
val = rb_check_string_type(pat);
if (NIL_P(val)) {
Check_Type(pat, T_REGEXP);
@ -3856,11 +3858,50 @@ get_pat(VALUE pat, int quote)
pat = val;
}
if (quote) {
pat = rb_reg_quote(pat);
return rb_reg_regcomp(pat);
}
return rb_reg_regcomp(pat);
static VALUE
get_pat_quoted(VALUE pat, int check)
{
VALUE val;
if (SPECIAL_CONST_P(pat)) goto to_string;
switch (BUILTIN_TYPE(pat)) {
case T_REGEXP:
return pat;
case T_STRING:
break;
default:
to_string:
val = rb_check_string_type(pat);
if (NIL_P(val)) {
Check_Type(pat, T_REGEXP);
}
pat = val;
}
if (check && is_broken_string(pat)) {
rb_raise(rb_eTypeError, "%"PRIsVALUE, rb_reg_new_str(pat, 0));
}
return pat;
}
static long
rb_pat_search(VALUE pat, VALUE str, long pos, int set_backref_str)
{
if (BUILTIN_TYPE(pat) == T_STRING) {
pos = rb_str_index(str, pat, pos);
if (pos >= 0 && set_backref_str) {
str = rb_str_new_frozen(str);
rb_backref_set_string(str, pos, RSTRING_LEN(pat));
}
return pos;
}
else {
return rb_reg_search0(pat, str, pos, 0, set_backref_str);
}
}
@ -3883,6 +3924,7 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
int tainted = 0;
long plen;
int min_arity = rb_block_given_p() ? 1 : 2;
long beg;
rb_check_arity(argc, min_arity, 2);
if (argc == 1) {
@ -3897,23 +3939,38 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
if (OBJ_TAINTED(repl)) tainted = 1;
}
pat = get_pat(argv[0], 1);
pat = get_pat_quoted(argv[0], 1);
str_modifiable(str);
if (rb_reg_search(pat, str, 0, 0) >= 0) {
beg = rb_pat_search(pat, str, 0, 1);
if (beg >= 0) {
rb_encoding *enc;
int cr = ENC_CODERANGE(str);
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
long beg0 = BEG(0);
long end0 = END(0);
long beg0, end0;
VALUE match, match0;
struct re_registers *regs;
char *p, *rp;
long len, rlen;
if (RB_TYPE_P(pat, T_STRING)) {
beg0 = beg;
end0 = beg0 + RSTRING_LEN(pat);
match0 = pat;
}
else {
match = rb_backref_get();
regs = RMATCH_REGS(match);
beg0 = BEG(0);
end0 = END(0);
if (!iter && NIL_P(hash)) repl = rb_reg_regsub(repl, str, regs, pat);
if (iter) match0 = rb_reg_nth_match(0, match);
}
if (iter || !NIL_P(hash)) {
p = RSTRING_PTR(str); len = RSTRING_LEN(str);
if (iter) {
repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
repl = rb_obj_as_string(rb_yield(match0));
}
else {
repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
@ -3922,9 +3979,7 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
str_mod_check(str, p, len);
rb_check_frozen(str);
}
else {
repl = rb_reg_regsub(repl, str, regs, pat);
}
enc = rb_enc_compatible(str, repl);
if (!enc) {
rb_encoding *str_enc = STR_ENC_GET(str);
@ -4021,7 +4076,7 @@ rb_str_sub(int argc, VALUE *argv, VALUE str)
static VALUE
str_gsub(int argc, VALUE *argv, VALUE str, int bang)
{
VALUE pat, val, repl, match, dest, hash = Qnil;
VALUE pat, val, repl, match, match0, dest, hash = Qnil;
struct re_registers *regs;
long beg, n;
long beg0, end0;
@ -4049,9 +4104,9 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang)
rb_check_arity(argc, 1, 2);
}
pat = get_pat(argv[0], 1);
pat = get_pat_quoted(argv[0], 1);
need_backref = iter || !NIL_P(hash);
beg = rb_reg_search0(pat, str, 0, 0, need_backref);
beg = rb_pat_search(pat, str, 0, need_backref);
if (beg < 0) {
if (bang) return Qnil; /* no match, no substitution */
return rb_str_dup(str);
@ -4070,16 +4125,28 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang)
do {
n++;
if (RB_TYPE_P(pat, T_STRING)) {
beg0 = beg;
end0 = beg0 + RSTRING_LEN(pat);
if (!need_backref) val = repl;
match0 = pat;
}
else {
match = rb_backref_get();
regs = RMATCH_REGS(match);
beg0 = BEG(0);
end0 = END(0);
if (!need_backref) val = rb_reg_regsub(repl, str, regs, pat);
if (iter) match0 = rb_reg_nth_match(0, match);
}
if (need_backref) {
if (iter) {
val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
val = rb_obj_as_string(rb_yield(match0));
}
else {
val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
val = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
val = rb_obj_as_string(val);
}
str_mod_check(str, sp, slen);
@ -4087,9 +4154,6 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang)
rb_raise(rb_eRuntimeError, "block should not cheat");
}
}
else {
val = rb_reg_regsub(repl, str, regs, pat);
}
if (OBJ_TAINTED(val)) tainted = 1;
@ -4114,12 +4178,12 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang)
}
cp = RSTRING_PTR(str) + offset;
if (offset > RSTRING_LEN(str)) break;
beg = rb_reg_search0(pat, str, offset, 0, need_backref);
beg = rb_pat_search(pat, str, offset, need_backref);
} while (beg >= 0);
if (RSTRING_LEN(str) > offset) {
rb_enc_str_buf_cat(dest, cp, RSTRING_LEN(str) - offset, str_enc);
}
rb_reg_search(pat, str, last, 0);
rb_pat_search(pat, str, last, 1);
if (bang) {
rb_str_shared_replace(str, dest);
}
@ -6118,7 +6182,8 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
}
else {
fs_set:
if (RB_TYPE_P(spat, T_STRING)) {
spat = get_pat_quoted(spat, 1);
if (BUILTIN_TYPE(spat) == T_STRING) {
rb_encoding *enc2 = STR_ENC_GET(spat);
split_type = string;
@ -6141,7 +6206,6 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
}
}
else {
spat = get_pat(spat, 1);
split_type = regexp;
}
}
@ -7143,7 +7207,7 @@ scan_once(VALUE str, VALUE pat, long *start)
struct re_registers *regs;
int i;
if (rb_reg_search(pat, str, *start, 0) >= 0) {
if (rb_pat_search(pat, str, *start, 1) >= 0) {
match = rb_backref_get();
regs = RMATCH_REGS(match);
if (BEG(0) == END(0)) {
@ -7213,7 +7277,8 @@ rb_str_scan(VALUE str, VALUE pat)
long last = -1, prev = 0;
char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str);
pat = get_pat(pat, 1);
pat = get_pat_quoted(pat, 1);
mustnot_broken(str);
if (!rb_block_given_p()) {
VALUE ary = rb_ary_new();
@ -7222,7 +7287,7 @@ rb_str_scan(VALUE str, VALUE pat)
prev = start;
rb_ary_push(ary, result);
}
if (last >= 0) rb_reg_search(pat, str, last, 0);
if (last >= 0) rb_pat_search(pat, str, last, 1);
return ary;
}
@ -7232,7 +7297,7 @@ rb_str_scan(VALUE str, VALUE pat)
rb_yield(result);
str_mod_check(str, p, len);
}
if (last >= 0) rb_reg_search(pat, str, last, 0);
if (last >= 0) rb_pat_search(pat, str, last, 1);
return str;
}
@ -7619,31 +7684,21 @@ static VALUE
rb_str_partition(VALUE str, VALUE sep)
{
long pos;
int regex = FALSE;
sep = get_pat_quoted(sep, 0);
if (RB_TYPE_P(sep, T_REGEXP)) {
pos = rb_reg_search(sep, str, 0, 0);
regex = TRUE;
}
else {
VALUE tmp;
tmp = rb_check_string_type(sep);
if (NIL_P(tmp)) {
rb_raise(rb_eTypeError, "type mismatch: %s given",
rb_obj_classname(sep));
}
sep = tmp;
pos = rb_str_index(str, sep, 0);
}
if (pos < 0) {
failed:
return rb_ary_new3(3, str, str_new_empty(str), str_new_empty(str));
}
if (regex) {
sep = rb_str_subpat(str, sep, INT2FIX(0));
if (pos == 0 && RSTRING_LEN(sep) == 0) goto failed;
}
else {
pos = rb_str_index(str, sep, 0);
if (pos < 0) goto failed;
}
return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
sep,
rb_str_subseq(str, pos+RSTRING_LEN(sep),

Просмотреть файл

@ -831,6 +831,8 @@ class TestString < Test::Unit::TestCase
c.force_encoding Encoding::US_ASCII
assert_equal Encoding::UTF_8, a.gsub(/world/, c).encoding
assert_equal S("a\u{e9}apos&lt;"), S("a\u{e9}'&lt;").gsub("'", "apos")
end
def test_gsub!
@ -1454,6 +1456,12 @@ class TestString < Test::Unit::TestCase
o = Object.new
def o.to_s; self; end
assert_match(/^foo#<Object:0x.*>baz$/, "foobarbaz".sub("bar") { o })
assert_equal(S("Abc"), S("abc").sub("a", "A"))
m = nil
assert_equal(S("Abc"), S("abc").sub("a") {m = $~; "A"})
assert_equal(S("a"), m[0])
assert_equal(/a/, m.regexp)
end
def test_sub!