* ext/strscan/strscan.c: new method StringScanner#beginning_of_line? (alias #bol?)
* ext/strscan/strscan.c: new method StringScanner#concat and #<<.
* ext/strscan/strscan.c: StringScanner#new(str) does not duplicate nor freeze STR (allow destructive modification).
* test/strscan/test_stringscanner.rb: test new methods above.
* test/strscan/test_stringscanner.rb: test destructive string modification.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5201 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
aamine 2003-12-16 15:18:11 +00:00
Родитель ff9f067f89
Коммит 6ef31af2d1
3 изменённых файлов: 173 добавлений и 41 удалений

Просмотреть файл

@ -1,3 +1,18 @@
Wed Dec 17 00:16:14 2003 Minero Aoki <aamine@loveruby.net>
* ext/strscan/strscan.c: new method
StringScanner#beginning_of_line? (alias #bol?)
* ext/strscan/strscan.c: new method StringScanner#concat and #<<.
* ext/strscan/strscan.c: StringScanner#new(str) does not duplicate
nor freeze STR (allow destructive modification).
* test/strscan/test_stringscanner.rb: test new methods above.
* test/strscan/test_stringscanner.rb: test destructive string
modification.
Tue Dec 16 21:20:47 2003 Tanaka Akira <akr@m17n.org>
* lib/pp.rb: don't use local variable `pp'.

Просмотреть файл

@ -52,10 +52,10 @@ struct strscanner
#define MATCHED(s) (s)->flags |= FLAG_MATCHED
#define CLEAR_MATCH_STATUS(s) (s)->flags &= ~FLAG_MATCHED
#define S_PTR(s) (RSTRING((s)->str)->ptr)
#define S_PBEG(s) (RSTRING((s)->str)->ptr)
#define S_LEN(s) (RSTRING((s)->str)->len)
#define S_END(s) (S_PTR(s) + S_LEN(s))
#define CURPTR(s) (S_PTR(s) + (s)->curr)
#define S_PEND(s) (S_PBEG(s) + S_LEN(s))
#define CURPTR(s) (S_PBEG(s) + (s)->curr)
#define S_RESTLEN(s) (S_LEN(s) - (s)->curr)
#define EOS_P(s) ((s)->curr >= RSTRING(p->str)->len)
@ -82,6 +82,7 @@ static VALUE strscan_s_mustc _((VALUE self));
static VALUE strscan_terminate _((VALUE self));
static VALUE strscan_get_string _((VALUE self));
static VALUE strscan_set_string _((VALUE self, VALUE str));
static VALUE strscan_concat _((VALUE self, VALUE str));
static VALUE strscan_get_pos _((VALUE self));
static VALUE strscan_set_pos _((VALUE self, VALUE pos));
static VALUE strscan_do_scan _((VALUE self, VALUE regex,
@ -102,6 +103,7 @@ static VALUE strscan_getch _((VALUE self));
static VALUE strscan_get_byte _((VALUE self));
static VALUE strscan_peek _((VALUE self, VALUE len));
static VALUE strscan_unscan _((VALUE self));
static VALUE strscan_bol_p _((VALUE self));
static VALUE strscan_eos_p _((VALUE self));
static VALUE strscan_rest_p _((VALUE self));
static VALUE strscan_matched_p _((VALUE self));
@ -114,8 +116,8 @@ static VALUE strscan_rest _((VALUE self));
static VALUE strscan_rest_size _((VALUE self));
static VALUE strscan_inspect _((VALUE self));
static char* inspect_before _((struct strscanner *p, char *buf));
static char* inspect_after _((struct strscanner *p, char *buf));
static VALUE inspect1 _((struct strscanner *p));
static VALUE inspect2 _((struct strscanner *p));
/* =======================================================================
Utils
@ -135,7 +137,10 @@ extract_range(p, beg_i, end_i)
struct strscanner *p;
long beg_i, end_i;
{
return infect(rb_str_new(S_PTR(p) + beg_i, end_i - beg_i), p);
if (beg_i > S_LEN(p)) return Qnil;
if (end_i > S_LEN(p))
end_i = S_LEN(p);
return infect(rb_str_new(S_PBEG(p) + beg_i, end_i - beg_i), p);
}
static VALUE
@ -143,7 +148,10 @@ extract_beg_len(p, beg_i, len)
struct strscanner *p;
long beg_i, len;
{
return infect(rb_str_new(S_PTR(p) + beg_i, len), p);
if (beg_i > S_LEN(p)) return Qnil;
if (beg_i + len > S_LEN(p))
len = S_LEN(p) - beg_i;
return infect(rb_str_new(S_PBEG(p) + beg_i, len), p);
}
@ -192,11 +200,9 @@ strscan_initialize(argc, argv, self)
VALUE str, need_dup;
Data_Get_Struct(self, struct strscanner, p);
if (rb_scan_args(argc, argv, "11", &str, &need_dup) == 1)
need_dup = Qtrue;
rb_scan_args(argc, argv, "11", &str, &need_dup);
StringValue(str);
p->str = RTEST(need_dup) ? rb_str_dup(str) : str;
rb_obj_freeze(p->str);
p->str = str;
return self;
}
@ -262,6 +268,18 @@ strscan_set_string(self, str)
return str;
}
static VALUE
strscan_concat(self, str)
VALUE self, str;
{
struct strscanner *p;
GET_SCANNER(self, p);
StringValue(str);
rb_str_append(p->str, str);
return self;
}
static VALUE
strscan_get_pos(self)
VALUE self;
@ -304,6 +322,9 @@ strscan_do_scan(self, regex, succptr, getstr, headonly)
GET_SCANNER(self, p);
CLEAR_MATCH_STATUS(p);
if (EOS_P(p)) {
return Qnil;
}
strscan_prepare_re(regex);
if (headonly) {
ret = re_match(RREGEXP(regex)->ptr,
@ -485,7 +506,6 @@ strscan_peek(self, vlen)
return extract_beg_len(p, p->curr, len);
}
static VALUE
strscan_unscan(self)
VALUE self;
@ -501,6 +521,17 @@ strscan_unscan(self)
return self;
}
static VALUE
strscan_bol_p(self)
VALUE self;
{
struct strscanner *p;
GET_SCANNER(self, p);
if (CURPTR(p) > S_PEND(p)) return Qnil;
if (p->curr == 0) return Qtrue;
return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse;
}
static VALUE
strscan_eos_p(self)
@ -652,9 +683,9 @@ strscan_inspect(self)
{
struct strscanner *p;
char buf[BUFSIZE];
char buf_before[16];
char buf_after[16];
long len;
VALUE result;
VALUE a, b;
Data_Get_Struct(self, struct strscanner, p);
if (NIL_P(p->str)) {
@ -667,24 +698,33 @@ strscan_inspect(self)
rb_class2name(CLASS_OF(self)));
return infect(rb_str_new(buf, len), p);
}
len = snprintf(buf, BUFSIZE, "#<%s %ld/%ld %s@%s>",
if (p->curr == 0) {
b = inspect2(p);
len = snprintf(buf, BUFSIZE, "#<%s %ld/%ld @ %s>",
rb_class2name(CLASS_OF(self)),
p->curr, S_LEN(p),
RSTRING(b)->ptr);
return infect(rb_str_new(buf, len), p);
}
a = inspect1(p);
b = inspect2(p);
len = snprintf(buf, BUFSIZE, "#<%s %ld/%ld %s @ %s>",
rb_class2name(CLASS_OF(self)),
p->curr, S_LEN(p),
inspect_before(p, buf_before),
inspect_after(p, buf_after));
RSTRING(a)->ptr,
RSTRING(b)->ptr);
return infect(rb_str_new(buf, len), p);
}
static char*
inspect_before(p, buf)
static VALUE
inspect1(p)
struct strscanner *p;
char *buf;
{
char buf[BUFSIZE];
char *bp = buf;
long len;
if (p->curr == 0) return "";
*bp++ = '"';
if (p->curr == 0) return rb_str_new2("");
if (p->curr > INSPECT_LENGTH) {
strcpy(bp, "..."); bp += 3;
len = INSPECT_LENGTH;
@ -693,22 +733,18 @@ inspect_before(p, buf)
len = p->curr;
}
memcpy(bp, CURPTR(p) - len, len); bp += len;
*bp++ = '"';
*bp++ = ' ';
*bp++ = '\0';
return buf;
return rb_str_dump(rb_str_new(buf, bp - buf));
}
static char*
inspect_after(p, buf)
static VALUE
inspect2(p)
struct strscanner *p;
char *buf;
{
char buf[BUFSIZE];
char *bp = buf;
long len;
*bp++ = ' ';
*bp++ = '"';
if (EOS_P(p)) return rb_str_new2("");
len = S_LEN(p) - p->curr;
if (len > INSPECT_LENGTH) {
len = INSPECT_LENGTH;
@ -718,9 +754,7 @@ inspect_after(p, buf)
else {
memcpy(bp, CURPTR(p), len); bp += len;
}
*bp++ = '"';
*bp++ = '\0';
return buf;
return rb_str_dump(rb_str_new(buf, bp - buf));
}
/* =======================================================================
@ -756,6 +790,8 @@ Init_strscan()
rb_define_method(StringScanner, "clear", strscan_terminate, 0);
rb_define_method(StringScanner, "string", strscan_get_string, 0);
rb_define_method(StringScanner, "string=", strscan_set_string, 1);
rb_define_method(StringScanner, "concat", strscan_concat, 1);
rb_define_method(StringScanner, "<<", strscan_concat, 1);
rb_define_method(StringScanner, "pos", strscan_get_pos, 0);
rb_define_method(StringScanner, "pos=", strscan_set_pos, 1);
rb_define_method(StringScanner, "pointer", strscan_get_pos, 0);
@ -781,6 +817,8 @@ Init_strscan()
rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
rb_define_method(StringScanner, "bol?", strscan_bol_p, 0);
rb_define_method(StringScanner, "eos?", strscan_eos_p, 0);
rb_define_method(StringScanner, "empty?", strscan_eos_p, 0);
rb_define_method(StringScanner, "rest?", strscan_rest_p, 0);

Просмотреть файл

@ -5,14 +5,11 @@
require 'strscan'
require 'test/unit'
class TestStringScanner < Test::Unit::TestCase
def test_s_new
s = StringScanner.new('test string')
assert_instance_of StringScanner, s
assert_equal false, s.eos?
assert_equal true, s.string.frozen?
assert_equal false, s.tainted?
str = 'test string'
@ -21,7 +18,6 @@ class TestStringScanner < Test::Unit::TestCase
assert_instance_of StringScanner, s
assert_equal false, s.eos?
assert_same str, s.string
assert_equal true, s.string.frozen?
assert_equal true, s.string.tainted?
str = 'test string'
@ -30,8 +26,8 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal true, s.string.tainted?
end
if VERSION >= '1.7.0'
UNINIT_ERROR = ArgumentError
def test_s_allocate
s = StringScanner.allocate
assert_equal '#<StringScanner (uninitialized)>', s.inspect.sub(/StringScanner_C/, 'StringScanner')
@ -42,7 +38,6 @@ if VERSION >= '1.7.0'
assert_nothing_raised(UNINIT_ERROR) { s.eos? }
assert_equal false, s.eos?
end
end
def test_s_mustc
assert_nothing_raised(NotImplementedError) {
@ -70,6 +65,9 @@ end
s.get_byte
assert_equal '#<StringScanner 1/11 "t" @ "est s...">', s.inspect.sub(/StringScanner_C/, 'StringScanner')
assert_equal true, s.inspect.tainted?
s = StringScanner.new("\n")
assert_equal '#<StringScanner 0/1 @ "\n">', s.inspect
end
def test_eos?
@ -85,6 +83,51 @@ end
assert_equal true, s.eos?
s.scan(/\w+/)
assert_equal true, s.eos?
s = StringScanner.new('test')
s.scan(/te/)
s.string.replace ''
assert_equal true, s.eos?
end
def test_bol?
s = StringScanner.new("a\nbbb\n\ncccc\nddd\r\neee")
assert_equal true, s.bol?
assert_equal true, s.bol?
s.scan(/a/)
assert_equal false, s.bol?
assert_equal false, s.bol?
s.scan(/\n/)
assert_equal true, s.bol?
s.scan(/b/)
assert_equal false, s.bol?
s.scan(/b/)
assert_equal false, s.bol?
s.scan(/b/)
assert_equal false, s.bol?
s.scan(/\n/)
assert_equal true, s.bol?
s.unscan
assert_equal false, s.bol?
s.scan(/\n/)
s.scan(/\n/)
assert_equal true, s.bol?
s.scan(/c+\n/)
assert_equal true, s.bol?
s.scan(/d+\r\n/)
assert_equal true, s.bol?
s.scan(/e+/)
assert_equal false, s.bol?
end
def test_string
s = StringScanner.new('test')
assert_equal 'test', s.string
s.string = 'a'
assert_equal 'a', s.string
s.scan(/a/)
s.string = 'b'
assert_equal 0, s.pos
end
def test_pos
@ -98,6 +141,19 @@ end
assert_equal 11, s.pos
end
def test_concat
s = StringScanner.new('a')
s.scan(/a/)
s.concat 'b'
assert_equal false, s.eos?
assert_equal 'b', s.scan(/b/)
assert_equal true, s.eos?
s.concat 'c'
assert_equal false, s.eos?
assert_equal 'c', s.scan(/c/)
assert_equal true, s.eos?
end
def test_scan
s = StringScanner.new('stra strb strc', true)
tmp = s.scan(/\w+/)
@ -139,6 +195,15 @@ end
assert_nil s.scan(/\w+/)
assert_nil s.scan(/\w+/)
s = StringScanner.new('test')
s.scan(/te/)
# This assumes #string does not duplicate string,
# but it is implementation specific issue.
# DO NOT RELY ON THIS FEATURE.
s.string.replace ''
# unspecified: assert_equal 2, s.pos
assert_equal nil, s.scan(/test/)
end
def test_skip
@ -151,6 +216,11 @@ end
assert_nil s.skip(/\w+/)
assert_nil s.skip(/\s+/)
assert_equal true, s.eos?
s = StringScanner.new('test')
s.scan(/te/)
s.string.replace ''
assert_equal nil, s.skip(/./)
end
def test_getch
@ -175,6 +245,11 @@ end
assert_equal "\244\242", s.getch
assert_nil s.getch
$KCODE = 'NONE'
s = StringScanner.new('test')
s.scan(/te/)
s.string.replace ''
assert_equal nil, s.getch
end
def test_get_byte
@ -201,6 +276,11 @@ end
assert_equal "\242", s.get_byte
assert_nil s.get_byte
$KCODE = 'NONE'
s = StringScanner.new('test')
s.scan(/te/)
s.string.replace ''
assert_equal nil, s.get_byte
end
def test_matched
@ -392,5 +472,4 @@ end
s.reset
assert_equal 0, s.pos
end
end