Make String.new size pools aware.

If the required capacity would fit in an embded string,
returns one.

This can reduce malloc churn for code that use string buffers.
This commit is contained in:
Jean Boussier 2023-11-02 11:02:43 +01:00 коммит произвёл Jean Boussier
Родитель ee7bf4643d
Коммит ac8ec004e5
2 изменённых файлов: 94 добавлений и 1 удалений

Просмотреть файл

@ -1874,6 +1874,98 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
return str;
}
/* :nodoc: */
static VALUE
rb_str_s_new(int argc, VALUE *argv, VALUE klass)
{
if (klass != rb_cString) {
return rb_class_new_instance_pass_kw(argc, argv, klass);
}
static ID keyword_ids[2];
VALUE orig, opt, encoding = Qnil, capacity = Qnil;
VALUE kwargs[2];
rb_encoding *enc = NULL;
int n = rb_scan_args(argc, argv, "01:", &orig, &opt);
if (NIL_P(opt)) {
return rb_class_new_instance_pass_kw(argc, argv, klass);
}
keyword_ids[0] = rb_id_encoding();
CONST_ID(keyword_ids[1], "capacity");
rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs);
encoding = kwargs[0];
capacity = kwargs[1];
int termlen = 1;
if (n == 1) {
orig = StringValue(orig);
}
else {
orig = Qnil;
}
if (UNDEF_P(encoding)) {
if (!NIL_P(orig)) {
encoding = rb_obj_encoding(orig);
}
}
if (!UNDEF_P(encoding)) {
enc = rb_to_encoding(encoding);
termlen = rb_enc_mbminlen(enc);
}
// If capacity is nil, we're basically just duping `orig`.
if (UNDEF_P(capacity)) {
if (NIL_P(orig)) {
VALUE empty_str = str_new(klass, "", 0);
if (enc) {
rb_enc_associate(empty_str, enc);
}
return empty_str;
}
VALUE copy = str_duplicate(klass, orig);
rb_enc_associate(copy, enc);
ENC_CODERANGE_CLEAR(copy);
return copy;
}
long capa = 0;
capa = NUM2LONG(capacity);
if (capa < 0) {
capa = 0;
}
if (!NIL_P(orig)) {
long orig_capa = rb_str_capacity(orig);
if (orig_capa > capa) {
capa = orig_capa;
}
}
long fake_len = capa - termlen;
if (fake_len < 0) {
fake_len = 0;
}
VALUE str = str_new0(klass, NULL, fake_len, termlen);
STR_SET_LEN(str, 0);
TERM_FILL(RSTRING_PTR(str), termlen);
if (enc) {
rb_enc_associate(str, enc);
}
if (!NIL_P(orig)) {
rb_str_buf_append(str, orig);
}
return str;
}
#ifdef NONASCII_MASK
#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
@ -11931,6 +12023,7 @@ Init_String(void)
st_foreach(rb_vm_fstring_table(), fstring_set_class_i, rb_cString);
rb_include_module(rb_cString, rb_mComparable);
rb_define_alloc_func(rb_cString, empty_str_alloc);
rb_define_singleton_method(rb_cString, "new", rb_str_s_new, -1);
rb_define_singleton_method(rb_cString, "try_convert", rb_str_s_try_convert, 1);
rb_define_method(rb_cString, "initialize", rb_str_init, -1);
rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1);

Просмотреть файл

@ -23,7 +23,7 @@ class Test_StringCapacity < Test::Unit::TestCase
def test_s_new_capacity
assert_equal("", String.new(capacity: 1000))
assert_equal(String, String.new(capacity: 1000).class)
assert_equal(10000, capa(String.new(capacity: 10000)))
assert_equal(10_000 - 1, capa(String.new(capacity: 10_000))) # Real capa doesn't account for termlen
assert_equal("", String.new(capacity: -1000))
assert_equal(capa(String.new(capacity: -10000)), capa(String.new(capacity: -1000)))