зеркало из https://github.com/github/ruby.git
pack.c: add an offset argument to unpack and unpack1
[Feature #18254] This is useful to avoid repeteadly copying strings when parsing binary formats
This commit is contained in:
Родитель
717ab0bb2e
Коммит
e5319dc985
19
pack.c
19
pack.c
|
@ -944,7 +944,7 @@ hex2num(char c)
|
||||||
#define UNPACK_1 2
|
#define UNPACK_1 2
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
pack_unpack_internal(VALUE str, VALUE fmt, int mode)
|
pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset)
|
||||||
{
|
{
|
||||||
#define hexdigits ruby_hexdigits
|
#define hexdigits ruby_hexdigits
|
||||||
char *s, *send;
|
char *s, *send;
|
||||||
|
@ -973,8 +973,15 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode)
|
||||||
|
|
||||||
StringValue(str);
|
StringValue(str);
|
||||||
StringValue(fmt);
|
StringValue(fmt);
|
||||||
|
|
||||||
|
if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
|
||||||
|
len = RSTRING_LEN(str);
|
||||||
|
if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
|
||||||
|
|
||||||
s = RSTRING_PTR(str);
|
s = RSTRING_PTR(str);
|
||||||
send = s + RSTRING_LEN(str);
|
send = s + len;
|
||||||
|
s += offset;
|
||||||
|
|
||||||
p = RSTRING_PTR(fmt);
|
p = RSTRING_PTR(fmt);
|
||||||
pend = p + RSTRING_LEN(fmt);
|
pend = p + RSTRING_LEN(fmt);
|
||||||
|
|
||||||
|
@ -1614,16 +1621,16 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode)
|
||||||
}
|
}
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt)
|
pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
|
||||||
{
|
{
|
||||||
int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
|
int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
|
||||||
return pack_unpack_internal(str, fmt, mode);
|
return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset));
|
||||||
}
|
}
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt)
|
pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
|
||||||
{
|
{
|
||||||
return pack_unpack_internal(str, fmt, UNPACK_1);
|
return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset));
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
31
pack.rb
31
pack.rb
|
@ -148,10 +148,11 @@ end
|
||||||
class String
|
class String
|
||||||
# call-seq:
|
# call-seq:
|
||||||
# str.unpack(format) -> anArray
|
# str.unpack(format) -> anArray
|
||||||
|
# str.unpack(format, offset: anInteger) -> anArray
|
||||||
#
|
#
|
||||||
# Decodes <i>str</i> (which may contain binary data) according to the
|
# Decodes <i>str</i> (which may contain binary data) according to the
|
||||||
# format string, returning an array of each value extracted. The
|
# format string, returning an array of each value extracted.
|
||||||
# format string consists of a sequence of single-character directives,
|
# The format string consists of a sequence of single-character directives,
|
||||||
# summarized in the table at the end of this entry.
|
# summarized in the table at the end of this entry.
|
||||||
# Each directive may be followed
|
# Each directive may be followed
|
||||||
# by a number, indicating the number of times to repeat with this
|
# by a number, indicating the number of times to repeat with this
|
||||||
|
@ -161,7 +162,15 @@ class String
|
||||||
# exclamation mark (``<code>!</code>'') to use the underlying
|
# exclamation mark (``<code>!</code>'') to use the underlying
|
||||||
# platform's native size for the specified type; otherwise, it uses a
|
# platform's native size for the specified type; otherwise, it uses a
|
||||||
# platform-independent consistent size. Spaces are ignored in the
|
# platform-independent consistent size. Spaces are ignored in the
|
||||||
# format string. See also String#unpack1, Array#pack.
|
# format string.
|
||||||
|
#
|
||||||
|
# The keyword <i>offset</i> can be given to start the decoding after skipping
|
||||||
|
# the specified amount of bytes:
|
||||||
|
# "abc".unpack("C*") # => [97, 98, 99]
|
||||||
|
# "abc".unpack("C*", offset: 2) # => [99]
|
||||||
|
# "abc".unpack("C*", offset: 4) # => offset outside of string (ArgumentError)
|
||||||
|
#
|
||||||
|
# See also String#unpack1, Array#pack.
|
||||||
#
|
#
|
||||||
# "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
|
# "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
|
||||||
# "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
|
# "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
|
||||||
|
@ -263,15 +272,23 @@ class String
|
||||||
# * J, J! j, and j! are available since Ruby 2.3.
|
# * J, J! j, and j! are available since Ruby 2.3.
|
||||||
# * Q_, Q!, q_, and q! are available since Ruby 2.1.
|
# * Q_, Q!, q_, and q! are available since Ruby 2.1.
|
||||||
# * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
|
# * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
|
||||||
def unpack(fmt)
|
def unpack(fmt, offset: 0)
|
||||||
Primitive.pack_unpack(fmt)
|
Primitive.pack_unpack(fmt, offset)
|
||||||
end
|
end
|
||||||
|
|
||||||
# call-seq:
|
# call-seq:
|
||||||
# str.unpack1(format) -> obj
|
# str.unpack1(format) -> obj
|
||||||
|
# str.unpack1(format, offset: anInteger) -> obj
|
||||||
#
|
#
|
||||||
# Decodes <i>str</i> (which may contain binary data) according to the
|
# Decodes <i>str</i> (which may contain binary data) according to the
|
||||||
# format string, returning the first value extracted.
|
# format string, returning the first value extracted.
|
||||||
|
#
|
||||||
|
# The keyword <i>offset</i> can be given to start the decoding after skipping
|
||||||
|
# the specified amount of bytes:
|
||||||
|
# "abc".unpack1("C*") # => 97
|
||||||
|
# "abc".unpack1("C*", offset: 2) # => 99
|
||||||
|
# "abc".unpack1("C*", offset: 4) # => offset outside of string (ArgumentError)
|
||||||
|
#
|
||||||
# See also String#unpack, Array#pack.
|
# See also String#unpack, Array#pack.
|
||||||
#
|
#
|
||||||
# Contrast with String#unpack:
|
# Contrast with String#unpack:
|
||||||
|
@ -287,7 +304,7 @@ class String
|
||||||
#
|
#
|
||||||
# Thus unpack1 is convenient, makes clear the intention and signals
|
# Thus unpack1 is convenient, makes clear the intention and signals
|
||||||
# the expected return value to those reading the code.
|
# the expected return value to those reading the code.
|
||||||
def unpack1(fmt)
|
def unpack1(fmt, offset: 0)
|
||||||
Primitive.pack_unpack1(fmt)
|
Primitive.pack_unpack1(fmt, offset)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -16,6 +16,12 @@ describe :string_unpack_basic, shared: true do
|
||||||
it "raises a TypeError when passed an Integer" do
|
it "raises a TypeError when passed an Integer" do
|
||||||
-> { "abc".unpack(1) }.should raise_error(TypeError)
|
-> { "abc".unpack(1) }.should raise_error(TypeError)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
ruby_version_is "3.1" do
|
||||||
|
it "starts unpacking from the given offset" do
|
||||||
|
"abc".unpack("CC", offset: 1).should == [98, 99]
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe :string_unpack_no_platform, shared: true do
|
describe :string_unpack_no_platform, shared: true do
|
||||||
|
@ -26,4 +32,18 @@ describe :string_unpack_no_platform, shared: true do
|
||||||
it "raises an ArgumentError when the format modifier is '!'" do
|
it "raises an ArgumentError when the format modifier is '!'" do
|
||||||
-> { "abcdefgh".unpack(unpack_format("!")) }.should raise_error(ArgumentError)
|
-> { "abcdefgh".unpack(unpack_format("!")) }.should raise_error(ArgumentError)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
ruby_version_is "3.1" do
|
||||||
|
it "raises an ArgumentError when the offset is negative" do
|
||||||
|
-> { "a".unpack("C", offset: -1) }.should raise_error(ArgumentError)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns nil if the offset is at the end of the string" do
|
||||||
|
"a".unpack("C", offset: 1).should == [nil]
|
||||||
|
end
|
||||||
|
|
||||||
|
it "raises an ArgumentError when the offset is larget than the string" do
|
||||||
|
-> { "a".unpack("C", offset: 2) }.should raise_error(ArgumentError)
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -7,4 +7,24 @@ describe "String#unpack1" do
|
||||||
"aG9nZWZ1Z2E=".unpack1("m").should == "hogefuga"
|
"aG9nZWZ1Z2E=".unpack1("m").should == "hogefuga"
|
||||||
"A".unpack1("B*").should == "01000001"
|
"A".unpack1("B*").should == "01000001"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
ruby_version_is "3.1" do
|
||||||
|
it "starts unpacking from the given offset" do
|
||||||
|
"ZZABCD".unpack1('x3C', offset: 2).should == "ABCD".unpack('x3C')[0]
|
||||||
|
"ZZZZaG9nZWZ1Z2E=".unpack1("m", offset: 4).should == "hogefuga"
|
||||||
|
"ZA".unpack1("B*", offset: 1).should == "01000001"
|
||||||
|
end
|
||||||
|
|
||||||
|
it "raises an ArgumentError when the offset is negative" do
|
||||||
|
-> { "a".unpack1("C", offset: -1) }.should raise_error(ArgumentError)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns nil if the offset is at the end of the string" do
|
||||||
|
"a".unpack1("C", offset: 1).should == nil
|
||||||
|
end
|
||||||
|
|
||||||
|
it "raises an ArgumentError when the offset is larget than the string" do
|
||||||
|
-> { "a".unpack1("C", offset: 2) }.should raise_error(ArgumentError)
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -869,4 +869,30 @@ EXPECTED
|
||||||
assert_equal "hogefuga", "aG9nZWZ1Z2E=".unpack1("m")
|
assert_equal "hogefuga", "aG9nZWZ1Z2E=".unpack1("m")
|
||||||
assert_equal "01000001", "A".unpack1("B*")
|
assert_equal "01000001", "A".unpack1("B*")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_unpack1_offset
|
||||||
|
assert_equal 65, "ZA".unpack1("C", offset: 1)
|
||||||
|
assert_equal "01000001", "YZA".unpack1("B*", offset: 2)
|
||||||
|
assert_nil "abc".unpack1("C", offset: 3)
|
||||||
|
assert_raise_with_message(ArgumentError, /offset can't be negative/) {
|
||||||
|
"a".unpack1("C", offset: -1)
|
||||||
|
}
|
||||||
|
assert_raise_with_message(ArgumentError, /offset outside of string/) {
|
||||||
|
"a".unpack1("C", offset: 2)
|
||||||
|
}
|
||||||
|
assert_nil "a".unpack1("C", offset: 1)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_unpack_offset
|
||||||
|
assert_equal [65], "ZA".unpack("C", offset: 1)
|
||||||
|
assert_equal ["01000001"], "YZA".unpack("B*", offset: 2)
|
||||||
|
assert_equal [nil, nil, nil], "abc".unpack("CCC", offset: 3)
|
||||||
|
assert_raise_with_message(ArgumentError, /offset can't be negative/) {
|
||||||
|
"a".unpack("C", offset: -1)
|
||||||
|
}
|
||||||
|
assert_raise_with_message(ArgumentError, /offset outside of string/) {
|
||||||
|
"a".unpack("C", offset: 2)
|
||||||
|
}
|
||||||
|
assert_equal [nil], "a".unpack("C", offset: 1)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Загрузка…
Ссылка в новой задаче