Add a fast path implementation for appending single byte values to binary strings.

Co-authored-by: Aaron Patterson <tenderlove@ruby-lang.org>
This commit is contained in:
Kevin Menard 2024-06-10 17:36:52 -04:00 коммит произвёл Aaron Patterson
Родитель b73dd8f6d0
Коммит 27e13fbc58
1 изменённых файлов: 57 добавлений и 1 удалений

Просмотреть файл

@ -3346,6 +3346,58 @@ rb_str_cat_cstr(VALUE str, const char *ptr)
return rb_str_buf_cat(str, ptr, strlen(ptr));
}
static void
rb_str_buf_cat_byte(VALUE str, unsigned char byte)
{
RUBY_ASSERT(RB_ENCODING_GET_INLINED(str) == ENCINDEX_ASCII_8BIT);
// We can't write directly to shared strings without impacting others, so we must make the string independent.
if (UNLIKELY(!str_independent(str))) {
str_make_independent(str);
}
long string_length = -1;
const int null_terminator_length = 1;
char *sptr;
RSTRING_GETMEM(str, sptr, string_length);
// Ensure the resulting string wouldn't be too long.
if (UNLIKELY(string_length > LONG_MAX - 1)) {
rb_raise(rb_eArgError, "string sizes too big");
}
long string_capacity = str_capacity(str, null_terminator_length);
// Get the code range before any modifications since those might clear the code range.
int cr = ENC_CODERANGE(str);
// Check if the string has spare string_capacity to write the new byte.
if (LIKELY(string_capacity >= string_length + 1)) {
// In fast path we can write the new byte and note the string's new length.
sptr[string_length] = byte;
STR_SET_LEN(str, string_length + 1);
TERM_FILL(sptr + string_length + 1, null_terminator_length);
}
else {
// If there's not enough string_capacity, make a call into the general string concatenation function.
char buf[1] = {byte};
str_buf_cat(str, buf, 1);
}
// If the code range is already known, we can derive the resulting code range cheaply by looking at the byte we
// just appended. If the code range is unknown, but the string was empty, then we can also derive the code range
// by looking at the byte we just appended. Otherwise, we'd have to scan the bytes to determine the code range so
// we leave it as unknown. It cannot be broken for binary strings so we don't need to handle that option.
if (cr == ENC_CODERANGE_7BIT || string_length == 0) {
if (ISASCII(byte)) {
ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
}
else {
ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
}
}
}
RUBY_ALIAS_FUNCTION(rb_str_buf_cat(VALUE str, const char *ptr, long len), rb_str_cat, (str, ptr, len))
RUBY_ALIAS_FUNCTION(rb_str_buf_cat2(VALUE str, const char *ptr), rb_str_cat_cstr, (str, ptr))
RUBY_ALIAS_FUNCTION(rb_str_cat2(VALUE str, const char *ptr), rb_str_cat_cstr, (str, ptr))
@ -3634,7 +3686,11 @@ rb_str_concat(VALUE str1, VALUE str2)
}
encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
if (encidx >= 0) {
if (encidx == ENCINDEX_ASCII_8BIT) {
rb_str_buf_cat_byte(str1, (unsigned char)code);
}
else if (encidx >= 0) {
char buf[1];
buf[0] = (char)code;
rb_str_cat(str1, buf, 1);