pack.c: add an offset argument to unpack and unpack1

[Feature #18254]

This is useful to avoid repeteadly copying strings when parsing binary formats
This commit is contained in:
Jean Boussier 2021-10-18 16:23:54 +02:00
Родитель 717ab0bb2e
Коммит e5319dc985
5 изменённых файлов: 103 добавлений и 13 удалений

19
pack.c
Просмотреть файл

@ -944,7 +944,7 @@ hex2num(char c)
#define UNPACK_1 2 #define UNPACK_1 2
static VALUE static VALUE
pack_unpack_internal(VALUE str, VALUE fmt, int mode) pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset)
{ {
#define hexdigits ruby_hexdigits #define hexdigits ruby_hexdigits
char *s, *send; char *s, *send;
@ -973,8 +973,15 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode)
StringValue(str); StringValue(str);
StringValue(fmt); StringValue(fmt);
if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
len = RSTRING_LEN(str);
if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
s = RSTRING_PTR(str); s = RSTRING_PTR(str);
send = s + RSTRING_LEN(str); send = s + len;
s += offset;
p = RSTRING_PTR(fmt); p = RSTRING_PTR(fmt);
pend = p + RSTRING_LEN(fmt); pend = p + RSTRING_LEN(fmt);
@ -1614,16 +1621,16 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode)
} }
static VALUE static VALUE
pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt) pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
{ {
int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY; int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
return pack_unpack_internal(str, fmt, mode); return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset));
} }
static VALUE static VALUE
pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt) pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
{ {
return pack_unpack_internal(str, fmt, UNPACK_1); return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset));
} }
int int

31
pack.rb
Просмотреть файл

@ -148,10 +148,11 @@ end
class String class String
# call-seq: # call-seq:
# str.unpack(format) -> anArray # str.unpack(format) -> anArray
# str.unpack(format, offset: anInteger) -> anArray
# #
# Decodes <i>str</i> (which may contain binary data) according to the # Decodes <i>str</i> (which may contain binary data) according to the
# format string, returning an array of each value extracted. The # format string, returning an array of each value extracted.
# format string consists of a sequence of single-character directives, # The format string consists of a sequence of single-character directives,
# summarized in the table at the end of this entry. # summarized in the table at the end of this entry.
# Each directive may be followed # Each directive may be followed
# by a number, indicating the number of times to repeat with this # by a number, indicating the number of times to repeat with this
@ -161,7 +162,15 @@ class String
# exclamation mark (``<code>!</code>'') to use the underlying # exclamation mark (``<code>!</code>'') to use the underlying
# platform's native size for the specified type; otherwise, it uses a # platform's native size for the specified type; otherwise, it uses a
# platform-independent consistent size. Spaces are ignored in the # platform-independent consistent size. Spaces are ignored in the
# format string. See also String#unpack1, Array#pack. # format string.
#
# The keyword <i>offset</i> can be given to start the decoding after skipping
# the specified amount of bytes:
# "abc".unpack("C*") # => [97, 98, 99]
# "abc".unpack("C*", offset: 2) # => [99]
# "abc".unpack("C*", offset: 4) # => offset outside of string (ArgumentError)
#
# See also String#unpack1, Array#pack.
# #
# "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "] # "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
# "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"] # "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
@ -263,15 +272,23 @@ class String
# * J, J! j, and j! are available since Ruby 2.3. # * J, J! j, and j! are available since Ruby 2.3.
# * Q_, Q!, q_, and q! are available since Ruby 2.1. # * Q_, Q!, q_, and q! are available since Ruby 2.1.
# * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3. # * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
def unpack(fmt) def unpack(fmt, offset: 0)
Primitive.pack_unpack(fmt) Primitive.pack_unpack(fmt, offset)
end end
# call-seq: # call-seq:
# str.unpack1(format) -> obj # str.unpack1(format) -> obj
# str.unpack1(format, offset: anInteger) -> obj
# #
# Decodes <i>str</i> (which may contain binary data) according to the # Decodes <i>str</i> (which may contain binary data) according to the
# format string, returning the first value extracted. # format string, returning the first value extracted.
#
# The keyword <i>offset</i> can be given to start the decoding after skipping
# the specified amount of bytes:
# "abc".unpack1("C*") # => 97
# "abc".unpack1("C*", offset: 2) # => 99
# "abc".unpack1("C*", offset: 4) # => offset outside of string (ArgumentError)
#
# See also String#unpack, Array#pack. # See also String#unpack, Array#pack.
# #
# Contrast with String#unpack: # Contrast with String#unpack:
@ -287,7 +304,7 @@ class String
# #
# Thus unpack1 is convenient, makes clear the intention and signals # Thus unpack1 is convenient, makes clear the intention and signals
# the expected return value to those reading the code. # the expected return value to those reading the code.
def unpack1(fmt) def unpack1(fmt, offset: 0)
Primitive.pack_unpack1(fmt) Primitive.pack_unpack1(fmt, offset)
end end
end end

Просмотреть файл

@ -16,6 +16,12 @@ describe :string_unpack_basic, shared: true do
it "raises a TypeError when passed an Integer" do it "raises a TypeError when passed an Integer" do
-> { "abc".unpack(1) }.should raise_error(TypeError) -> { "abc".unpack(1) }.should raise_error(TypeError)
end end
ruby_version_is "3.1" do
it "starts unpacking from the given offset" do
"abc".unpack("CC", offset: 1).should == [98, 99]
end
end
end end
describe :string_unpack_no_platform, shared: true do describe :string_unpack_no_platform, shared: true do
@ -26,4 +32,18 @@ describe :string_unpack_no_platform, shared: true do
it "raises an ArgumentError when the format modifier is '!'" do it "raises an ArgumentError when the format modifier is '!'" do
-> { "abcdefgh".unpack(unpack_format("!")) }.should raise_error(ArgumentError) -> { "abcdefgh".unpack(unpack_format("!")) }.should raise_error(ArgumentError)
end end
ruby_version_is "3.1" do
it "raises an ArgumentError when the offset is negative" do
-> { "a".unpack("C", offset: -1) }.should raise_error(ArgumentError)
end
it "returns nil if the offset is at the end of the string" do
"a".unpack("C", offset: 1).should == [nil]
end
it "raises an ArgumentError when the offset is larget than the string" do
-> { "a".unpack("C", offset: 2) }.should raise_error(ArgumentError)
end
end
end end

Просмотреть файл

@ -7,4 +7,24 @@ describe "String#unpack1" do
"aG9nZWZ1Z2E=".unpack1("m").should == "hogefuga" "aG9nZWZ1Z2E=".unpack1("m").should == "hogefuga"
"A".unpack1("B*").should == "01000001" "A".unpack1("B*").should == "01000001"
end end
ruby_version_is "3.1" do
it "starts unpacking from the given offset" do
"ZZABCD".unpack1('x3C', offset: 2).should == "ABCD".unpack('x3C')[0]
"ZZZZaG9nZWZ1Z2E=".unpack1("m", offset: 4).should == "hogefuga"
"ZA".unpack1("B*", offset: 1).should == "01000001"
end
it "raises an ArgumentError when the offset is negative" do
-> { "a".unpack1("C", offset: -1) }.should raise_error(ArgumentError)
end
it "returns nil if the offset is at the end of the string" do
"a".unpack1("C", offset: 1).should == nil
end
it "raises an ArgumentError when the offset is larget than the string" do
-> { "a".unpack1("C", offset: 2) }.should raise_error(ArgumentError)
end
end
end end

Просмотреть файл

@ -869,4 +869,30 @@ EXPECTED
assert_equal "hogefuga", "aG9nZWZ1Z2E=".unpack1("m") assert_equal "hogefuga", "aG9nZWZ1Z2E=".unpack1("m")
assert_equal "01000001", "A".unpack1("B*") assert_equal "01000001", "A".unpack1("B*")
end end
def test_unpack1_offset
assert_equal 65, "ZA".unpack1("C", offset: 1)
assert_equal "01000001", "YZA".unpack1("B*", offset: 2)
assert_nil "abc".unpack1("C", offset: 3)
assert_raise_with_message(ArgumentError, /offset can't be negative/) {
"a".unpack1("C", offset: -1)
}
assert_raise_with_message(ArgumentError, /offset outside of string/) {
"a".unpack1("C", offset: 2)
}
assert_nil "a".unpack1("C", offset: 1)
end
def test_unpack_offset
assert_equal [65], "ZA".unpack("C", offset: 1)
assert_equal ["01000001"], "YZA".unpack("B*", offset: 2)
assert_equal [nil, nil, nil], "abc".unpack("CCC", offset: 3)
assert_raise_with_message(ArgumentError, /offset can't be negative/) {
"a".unpack("C", offset: -1)
}
assert_raise_with_message(ArgumentError, /offset outside of string/) {
"a".unpack("C", offset: 2)
}
assert_equal [nil], "a".unpack("C", offset: 1)
end
end end