зеркало из https://github.com/github/ruby.git
Implement String#append_as_bytes(String | Integer, ...)
[Feature #20594] A handy method to construct a string out of multiple chunks. Contrary to `String#concat`, it doesn't do any encoding negociation, and simply append the content as bytes regardless of whether this result in a broken string or not. It's the caller responsibility to check for `String#valid_encoding?` in cases where it's needed. When passed integers, only the lower byte is considered, like in `String#setbyte`.
This commit is contained in:
Родитель
966901b39d
Коммит
16f241f0aa
|
@ -0,0 +1,46 @@
|
|||
require_relative '../../spec_helper'
|
||||
|
||||
describe "String#append_bytes" do
|
||||
ruby_version_is "3.4" do
|
||||
it "doesn't allow to mutate frozen strings" do
|
||||
str = "hello".freeze
|
||||
-> { str.append_as_bytes("\xE2\x82") }.should raise_error(FrozenError)
|
||||
end
|
||||
|
||||
it "allows creating broken strings" do
|
||||
str = +"hello"
|
||||
str.append_as_bytes("\xE2\x82")
|
||||
str.valid_encoding?.should == false
|
||||
|
||||
str.append_as_bytes("\xAC")
|
||||
str.valid_encoding?.should == true
|
||||
|
||||
str = "abc".encode(Encoding::UTF_32LE)
|
||||
str.append_as_bytes("def")
|
||||
str.encoding.should == Encoding::UTF_32LE
|
||||
str.valid_encoding?.should == false
|
||||
end
|
||||
|
||||
it "never changes the receiver encoding" do
|
||||
str = "".b
|
||||
str.append_as_bytes("€")
|
||||
str.encoding.should == Encoding::BINARY
|
||||
end
|
||||
|
||||
it "accepts variadic String or Integer arguments" do
|
||||
str = "hello".b
|
||||
str.append_as_bytes("\xE2\x82", 12, 43, "\xAC")
|
||||
str.encoding.should == Encoding::BINARY
|
||||
str.should == "hello\xE2\x82\f+\xAC".b
|
||||
end
|
||||
|
||||
it "only accepts strings or integers, and doesn't attempt to cast with #to_str or #to_int" do
|
||||
to_str = mock("to_str")
|
||||
to_str.should_not_receive(:to_str)
|
||||
to_str.should_not_receive(:to_int)
|
||||
|
||||
str = +"hello"
|
||||
-> { str.append_as_bytes(to_str) }.should raise_error(TypeError, "wrong argument type MockObject (expected String or Integer)")
|
||||
end
|
||||
end
|
||||
end
|
165
string.c
165
string.c
|
@ -3308,6 +3308,32 @@ rb_str_resize(VALUE str, long len)
|
|||
return str;
|
||||
}
|
||||
|
||||
static void
|
||||
str_ensure_available_capa(VALUE str, long len)
|
||||
{
|
||||
str_modify_keep_cr(str);
|
||||
|
||||
const int termlen = TERM_LEN(str);
|
||||
long olen = RSTRING_LEN(str);
|
||||
|
||||
if (RB_UNLIKELY(olen > LONG_MAX - len)) {
|
||||
rb_raise(rb_eArgError, "string sizes too big");
|
||||
}
|
||||
|
||||
long total = olen + len;
|
||||
long capa = str_capacity(str, termlen);
|
||||
|
||||
if (capa < total) {
|
||||
if (total >= LONG_MAX / 2) {
|
||||
capa = total;
|
||||
}
|
||||
while (total > capa) {
|
||||
capa = 2 * capa + termlen; /* == 2*(capa+termlen)-termlen */
|
||||
}
|
||||
RESIZE_CAPA_TERM(str, capa, termlen);
|
||||
}
|
||||
}
|
||||
|
||||
static VALUE
|
||||
str_buf_cat4(VALUE str, const char *ptr, long len, bool keep_cr)
|
||||
{
|
||||
|
@ -3662,6 +3688,144 @@ rb_str_concat_multi(int argc, VALUE *argv, VALUE str)
|
|||
return str;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* append_as_bytes(*objects) -> string
|
||||
*
|
||||
* Concatenates each object in +objects+ into +self+ without any encoding
|
||||
* validation or conversion and returns +self+:
|
||||
*
|
||||
* s = 'foo'
|
||||
* s.append_as_bytes(" \xE2\x82") # => "foo \xE2\x82"
|
||||
* s.valid_encoding? # => false
|
||||
* s.append_as_bytes("\xAC 12")
|
||||
* s.valid_encoding? # => true
|
||||
*
|
||||
* For each given object +object+ that is an Integer,
|
||||
* the value is considered a Byte. If the Integer is bigger
|
||||
* than one byte, only the lower byte is considered, similar to String#setbyte:
|
||||
*
|
||||
* s = ""
|
||||
* s.append_as_bytes(0, 257) # => "\u0000\u0001"
|
||||
*
|
||||
* Related: String#<<, String#concat, which do an encoding aware concatenation.
|
||||
*/
|
||||
|
||||
VALUE
|
||||
rb_str_append_as_bytes(int argc, VALUE *argv, VALUE str)
|
||||
{
|
||||
long needed_capacity = 0;
|
||||
volatile VALUE t0;
|
||||
enum ruby_value_type *types = ALLOCV_N(enum ruby_value_type, t0, argc);
|
||||
|
||||
for (int index = 0; index < argc; index++) {
|
||||
VALUE obj = argv[index];
|
||||
enum ruby_value_type type = types[index] = rb_type(obj);
|
||||
switch (type) {
|
||||
case T_FIXNUM:
|
||||
case T_BIGNUM:
|
||||
needed_capacity++;
|
||||
break;
|
||||
case T_STRING:
|
||||
needed_capacity += RSTRING_LEN(obj);
|
||||
break;
|
||||
default:
|
||||
rb_raise(
|
||||
rb_eTypeError,
|
||||
"wrong argument type %"PRIsVALUE" (expected String or Integer)",
|
||||
rb_obj_class(obj)
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
str_ensure_available_capa(str, needed_capacity);
|
||||
char *sptr = RSTRING_END(str);
|
||||
|
||||
for (int index = 0; index < argc; index++) {
|
||||
VALUE obj = argv[index];
|
||||
enum ruby_value_type type = types[index];
|
||||
switch (type) {
|
||||
case T_FIXNUM:
|
||||
case T_BIGNUM: {
|
||||
argv[index] = obj = rb_int_and(obj, INT2FIX(0xff));
|
||||
char byte = (char)(NUM2INT(obj) & 0xFF);
|
||||
*sptr = byte;
|
||||
sptr++;
|
||||
break;
|
||||
}
|
||||
case T_STRING: {
|
||||
const char *ptr;
|
||||
long len;
|
||||
RSTRING_GETMEM(obj, ptr, len);
|
||||
memcpy(sptr, ptr, len);
|
||||
sptr += len;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE;
|
||||
RUBY_ASSERT("append_as_bytes arguments should have been validated");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
|
||||
TERM_FILL(sptr, TERM_LEN(str)); /* sentinel */
|
||||
|
||||
int cr = ENC_CODERANGE(str);
|
||||
switch (cr) {
|
||||
case ENC_CODERANGE_7BIT: {
|
||||
for (int index = 0; index < argc; index++) {
|
||||
VALUE obj = argv[index];
|
||||
enum ruby_value_type type = types[index];
|
||||
switch (type) {
|
||||
case T_FIXNUM:
|
||||
case T_BIGNUM: {
|
||||
if (!ISASCII(NUM2INT(obj))) {
|
||||
goto clear_cr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case T_STRING: {
|
||||
if (ENC_CODERANGE(obj) != ENC_CODERANGE_7BIT) {
|
||||
goto clear_cr;
|
||||
}
|
||||
}
|
||||
default:
|
||||
UNREACHABLE;
|
||||
RUBY_ASSERT("append_as_bytes arguments should have been validated");
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ENC_CODERANGE_VALID:
|
||||
if (ENCODING_GET_INLINED(str) == ENCINDEX_ASCII_8BIT) {
|
||||
goto keep_cr;
|
||||
}
|
||||
else {
|
||||
goto clear_cr;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
goto clear_cr;
|
||||
break;
|
||||
}
|
||||
|
||||
RB_GC_GUARD(t0);
|
||||
|
||||
clear_cr:
|
||||
// If no fast path was hit, we clear the coderange.
|
||||
// append_as_bytes is predominently meant to be used in
|
||||
// buffering situation, hence it's likely the coderange
|
||||
// will never be scanned, so it's not worth spending time
|
||||
// precomputing the coderange except for simple and common
|
||||
// situations.
|
||||
ENC_CODERANGE_CLEAR(str);
|
||||
keep_cr:
|
||||
return str;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* string << object -> string
|
||||
|
@ -12433,6 +12597,7 @@ Init_String(void)
|
|||
rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
|
||||
rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0);
|
||||
rb_define_method(rb_cString, "concat", rb_str_concat_multi, -1);
|
||||
rb_define_method(rb_cString, "append_as_bytes", rb_str_append_as_bytes, -1);
|
||||
rb_define_method(rb_cString, "<<", rb_str_concat, 1);
|
||||
rb_define_method(rb_cString, "prepend", rb_str_prepend_multi, -1);
|
||||
rb_define_method(rb_cString, "crypt", rb_str_crypt, 1);
|
||||
|
|
|
@ -3630,6 +3630,55 @@ CODE
|
|||
assert_bytesplice_raise(ArgumentError, S("hello"), 0..-1, "bye", 0, 3)
|
||||
end
|
||||
|
||||
def test_append_bytes_into_binary
|
||||
buf = S("".b)
|
||||
assert_equal Encoding::BINARY, buf.encoding
|
||||
|
||||
buf.append_as_bytes(S("hello"))
|
||||
assert_equal "hello".b, buf
|
||||
assert_equal Encoding::BINARY, buf.encoding
|
||||
|
||||
buf.append_as_bytes(S("こんにちは"))
|
||||
assert_equal S("helloこんにちは".b), buf
|
||||
assert_equal Encoding::BINARY, buf.encoding
|
||||
end
|
||||
|
||||
def test_append_bytes_into_utf8
|
||||
buf = S("")
|
||||
assert_equal Encoding::UTF_8, buf.encoding
|
||||
|
||||
buf.append_as_bytes(S("hello"))
|
||||
assert_equal S("hello"), buf
|
||||
assert_equal Encoding::UTF_8, buf.encoding
|
||||
assert_predicate buf, :ascii_only?
|
||||
assert_predicate buf, :valid_encoding?
|
||||
|
||||
buf.append_as_bytes(S("こんにちは"))
|
||||
assert_equal S("helloこんにちは"), buf
|
||||
assert_equal Encoding::UTF_8, buf.encoding
|
||||
refute_predicate buf, :ascii_only?
|
||||
assert_predicate buf, :valid_encoding?
|
||||
|
||||
buf.append_as_bytes(S("\xE2\x82".b))
|
||||
assert_equal S("helloこんにちは\xE2\x82"), buf
|
||||
assert_equal Encoding::UTF_8, buf.encoding
|
||||
refute_predicate buf, :valid_encoding?
|
||||
|
||||
buf.append_as_bytes(S("\xAC".b))
|
||||
assert_equal S("helloこんにちは€"), buf
|
||||
assert_equal Encoding::UTF_8, buf.encoding
|
||||
assert_predicate buf, :valid_encoding?
|
||||
end
|
||||
|
||||
def test_append_bytes_into_utf32
|
||||
buf = S("abc".encode(Encoding::UTF_32LE))
|
||||
assert_equal Encoding::UTF_32LE, buf.encoding
|
||||
|
||||
buf.append_as_bytes("def")
|
||||
assert_equal Encoding::UTF_32LE, buf.encoding
|
||||
refute_predicate buf, :valid_encoding?
|
||||
end
|
||||
|
||||
def test_chilled_string
|
||||
chilled_string = eval('"chilled"')
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче