Add MatchData#bytebegin and MatchData#byteend

These methods return the byte-based offset of the beginning or end of the specified match.

[Feature #20576]
This commit is contained in:
Shugo Maeda 2024-06-12 11:35:53 +09:00 коммит произвёл Shugo Maeda
Родитель a887b41875
Коммит e048a073a3
4 изменённых файлов: 120 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,30 @@
Returns the offset (in bytes) of the beginning of the specified match.
When non-negative integer argument +n+ is given,
returns the offset of the beginning of the <tt>n</tt>th match:
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
# => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
m[0] # => "HX1138"
m.bytebegin(0) # => 1
m[3] # => "113"
m.bytebegin(3) # => 3
m = /(т)(е)(с)/.match('тест')
# => #<MatchData "тес" 1:"т" 2:"е" 3:"с">
m[0] # => "тес"
m.bytebegin(0) # => 0
m[3] # => "с"
m.bytebegin(3) # => 4
When string or symbol argument +name+ is given,
returns the offset of the beginning for the named match:
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
# => #<MatchData "hog" foo:"h" bar:"g">
m[:foo] # => "h"
m.bytebegin('foo') # => 0
m[:bar] # => "g"
m.bytebegin(:bar) # => 2
Related: MatchData#byteend, MatchData#byteoffset.

Просмотреть файл

@ -0,0 +1,30 @@
Returns the offset (in bytes) of the end of the specified match.
When non-negative integer argument +n+ is given,
returns the offset of the end of the <tt>n</tt>th match:
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
# => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
m[0] # => "HX1138"
m.byteend(0) # => 7
m[3] # => "113"
m.byteend(3) # => 6
m = /(т)(е)(с)/.match('тест')
# => #<MatchData "тес" 1:"т" 2:"е" 3:"с">
m[0] # => "тес"
m.byteend(0) # => 6
m[3] # => "с"
m.byteend(3) # => 6
When string or symbol argument +name+ is given,
returns the offset of the end for the named match:
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
# => #<MatchData "hog" foo:"h" bar:"g">
m[:foo] # => "h"
m.byteend('foo') # => 1
m[:bar] # => "g"
m.byteend(:bar) # => 3
Related: MatchData#bytebegin, MatchData#byteoffset.

50
re.c
Просмотреть файл

@ -1296,6 +1296,54 @@ match_byteoffset(VALUE match, VALUE n)
}
/*
* call-seq:
* bytebegin(n) -> integer
* bytebegin(name) -> integer
*
* :include: doc/matchdata/bytebegin.rdoc
*
*/
static VALUE
match_bytebegin(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
backref_number_check(regs, i);
if (BEG(i) < 0)
return Qnil;
return LONG2NUM(BEG(i));
}
/*
* call-seq:
* byteend(n) -> integer
* byteend(name) -> integer
*
* :include: doc/matchdata/byteend.rdoc
*
*/
static VALUE
match_byteend(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
backref_number_check(regs, i);
if (BEG(i) < 0)
return Qnil;
return LONG2NUM(END(i));
}
/*
* call-seq:
* begin(n) -> integer
@ -4842,6 +4890,8 @@ Init_Regexp(void)
rb_define_method(rb_cMatch, "length", match_size, 0);
rb_define_method(rb_cMatch, "offset", match_offset, 1);
rb_define_method(rb_cMatch, "byteoffset", match_byteoffset, 1);
rb_define_method(rb_cMatch, "bytebegin", match_bytebegin, 1);
rb_define_method(rb_cMatch, "byteend", match_byteend, 1);
rb_define_method(rb_cMatch, "begin", match_begin, 1);
rb_define_method(rb_cMatch, "end", match_end, 1);
rb_define_method(rb_cMatch, "match", match_nth, 1);

Просмотреть файл

@ -559,16 +559,26 @@ class TestRegexp < Test::Unit::TestCase
assert_raise(IndexError) { m.byteoffset(2) }
assert_raise(IndexError) { m.begin(2) }
assert_raise(IndexError) { m.end(2) }
assert_raise(IndexError) { m.bytebegin(2) }
assert_raise(IndexError) { m.byteend(2) }
m = /(?<x>q..)?/.match("foobarbaz")
assert_equal([nil, nil], m.byteoffset("x"))
assert_equal(nil, m.begin("x"))
assert_equal(nil, m.end("x"))
assert_equal(nil, m.bytebegin("x"))
assert_equal(nil, m.byteend("x"))
m = /\A\u3042(.)(.)?(.)\z/.match("\u3042\u3043\u3044")
assert_equal([3, 6], m.byteoffset(1))
assert_equal(3, m.bytebegin(1))
assert_equal(6, m.byteend(1))
assert_equal([nil, nil], m.byteoffset(2))
assert_equal(nil, m.bytebegin(2))
assert_equal(nil, m.byteend(2))
assert_equal([6, 9], m.byteoffset(3))
assert_equal(6, m.bytebegin(3))
assert_equal(9, m.byteend(3))
end
def test_match_to_s