Unify length field for embedded and heap strings (#7908)

* Unify length field for embedded and heap strings

The length field is of the same type and position in RString for both
embedded and heap allocated strings, so we can unify it.

* Remove RSTRING_EMBED_LEN
This commit is contained in:
Peter Zhu 2023-06-06 10:19:20 -04:00 коммит произвёл GitHub
Родитель fae2f80d06
Коммит 7577c101ed
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
9 изменённых файлов: 69 добавлений и 119 удалений

Просмотреть файл

@ -61,13 +61,12 @@ bug_str_unterminated_substring(VALUE str, VALUE vbeg, VALUE vlen)
if (RSTRING_LEN(str) < beg) rb_raise(rb_eIndexError, "beg: %ld", beg);
if (RSTRING_LEN(str) < beg + len) rb_raise(rb_eIndexError, "end: %ld", beg + len);
str = rb_str_new_shared(str);
RSTRING(str)->len = len;
if (STR_EMBED_P(str)) {
RSTRING(str)->as.embed.len = (short)len;
memmove(RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.ary + beg, len);
}
else {
RSTRING(str)->as.heap.ptr += beg;
RSTRING(str)->as.heap.len = len;
}
return str;
}
@ -114,7 +113,7 @@ bug_str_s_cstr_noembed(VALUE self, VALUE str)
RBASIC(str2)->flags &= ~(STR_SHARED | FL_USER5 | FL_USER6);
RSTRING(str2)->as.heap.aux.capa = capacity;
RSTRING(str2)->as.heap.ptr = buf;
RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
RSTRING(str2)->len = RSTRING_LEN(str);
TERM_FILL(RSTRING_END(str2), TERM_LEN(str));
return str2;
}

Просмотреть файл

@ -43,7 +43,6 @@
/** @cond INTERNAL_MACRO */
#define RSTRING_NOEMBED RSTRING_NOEMBED
#define RSTRING_FSTR RSTRING_FSTR
#define RSTRING_EMBED_LEN RSTRING_EMBED_LEN
#define RSTRING_LEN RSTRING_LEN
#define RSTRING_LENINT RSTRING_LENINT
#define RSTRING_PTR RSTRING_PTR
@ -199,6 +198,13 @@ struct RString {
/** Basic part, including flags and class. */
struct RBasic basic;
/**
* Length of the string, not including terminating NUL character.
*
* @note This is in bytes.
*/
long len;
/** String's specific fields. */
union {
@ -207,14 +213,6 @@ struct RString {
* pattern.
*/
struct {
/**
* Length of the string, not including terminating NUL character.
*
* @note This is in bytes.
*/
long len;
/**
* Pointer to the contents of the string. In the old days each
* string had dedicated memory regions. That is no longer true
@ -245,7 +243,6 @@ struct RString {
/** Embedded contents. */
struct {
long len;
/* This is a length 1 array because:
* 1. GCC has a bug that does not optimize C flexible array members
* (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452)
@ -364,24 +361,12 @@ RBIMPL_ATTR_ARTIFICIAL()
*
* @param[in] str String in question.
* @return Its length, in bytes.
* @pre `str` must be an instance of ::RString, and must has its
* ::RSTRING_NOEMBED flag off.
*
* @internal
*
* This was a macro before. It was inevitable to be public, since macros are
* global constructs. But should it be forever? Now that it is a function,
* @shyouhei thinks it could just be eliminated, hidden into implementation
* details.
* @pre `str` must be an instance of ::RString.
*/
static inline long
RSTRING_EMBED_LEN(VALUE str)
RSTRING_LEN(VALUE str)
{
RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING);
RBIMPL_ASSERT_OR_ASSUME(! RB_FL_ANY_RAW(str, RSTRING_NOEMBED));
long f = RSTRING(str)->as.embed.len;
return f;
return RSTRING(str)->len;
}
RBIMPL_WARNING_PUSH()
@ -411,7 +396,7 @@ rbimpl_rstring_getmem(VALUE str)
else {
/* Expecting compilers to optimize this on-stack struct away. */
struct RString retval;
retval.as.heap.len = RSTRING_EMBED_LEN(str);
retval.len = RSTRING_LEN(str);
retval.as.heap.ptr = RSTRING(str)->as.embed.ary;
return retval;
}
@ -419,21 +404,6 @@ rbimpl_rstring_getmem(VALUE str)
RBIMPL_WARNING_POP()
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
/**
* Queries the length of the string.
*
* @param[in] str String in question.
* @return Its length, in bytes.
* @pre `str` must be an instance of ::RString.
*/
static inline long
RSTRING_LEN(VALUE str)
{
return rbimpl_rstring_getmem(str).as.heap.len;
}
RBIMPL_ATTR_ARTIFICIAL()
/**
* Queries the contents pointer of the string.
@ -482,7 +452,7 @@ RSTRING_END(VALUE str)
rb_debug_rstring_null_ptr("RSTRING_END");
}
return &buf.as.heap.ptr[buf.as.heap.len];
return &buf.as.heap.ptr[buf.len];
}
RBIMPL_ATTR_ARTIFICIAL()
@ -516,7 +486,7 @@ RSTRING_LENINT(VALUE str)
__extension__ ({ \
struct RString rbimpl_str = rbimpl_rstring_getmem(str); \
(ptrvar) = rbimpl_str.as.heap.ptr; \
(lenvar) = rbimpl_str.as.heap.len; \
(lenvar) = rbimpl_str.len; \
})
#else
# define RSTRING_GETMEM(str, ptrvar, lenvar) \

Просмотреть файл

@ -2994,15 +2994,12 @@ module RubyVM::RJIT
# @param ctx [RubyVM::RJIT::Context]
# @param asm [RubyVM::RJIT::Assembler]
def jit_rb_str_empty_p(jit, ctx, asm, argc, known_recv_class)
# Assume same offset to len embedded or not so we can use one code path to read the length
#assert_equal(C.RString.offsetof(:as, :heap, :len), C.RString.offsetof(:as, :embed, :len))
recv_opnd = ctx.stack_pop(1)
out_opnd = ctx.stack_push(Type::UnknownImm)
asm.comment('get string length')
asm.mov(:rax, recv_opnd)
str_len_opnd = [:rax, C.RString.offsetof(:as, :heap, :len)]
str_len_opnd = [:rax, C.RString.offsetof(:len)]
asm.cmp(str_len_opnd, 0)
asm.mov(:rax, Qfalse)

Просмотреть файл

@ -857,11 +857,11 @@ module RubyVM::RJIT # :nodoc: all
@RString ||= CType::Struct.new(
"RString", Primitive.cexpr!("SIZEOF(struct RString)"),
basic: [self.RBasic, Primitive.cexpr!("OFFSETOF((*((struct RString *)NULL)), basic)")],
len: [CType::Immediate.parse("long"), Primitive.cexpr!("OFFSETOF((*((struct RString *)NULL)), len)")],
as: [CType::Union.new(
"", Primitive.cexpr!("SIZEOF(((struct RString *)NULL)->as)"),
heap: CType::Struct.new(
"", Primitive.cexpr!("SIZEOF(((struct RString *)NULL)->as.heap)"),
len: [CType::Immediate.parse("long"), Primitive.cexpr!("OFFSETOF(((struct RString *)NULL)->as.heap, len)")],
ptr: [CType::Pointer.new { CType::Immediate.parse("char") }, Primitive.cexpr!("OFFSETOF(((struct RString *)NULL)->as.heap, ptr)")],
aux: [CType::Union.new(
"", Primitive.cexpr!("SIZEOF(((struct RString *)NULL)->as.heap.aux)"),
@ -871,7 +871,6 @@ module RubyVM::RJIT # :nodoc: all
),
embed: CType::Struct.new(
"", Primitive.cexpr!("SIZEOF(((struct RString *)NULL)->as.embed)"),
len: [CType::Immediate.parse("long"), Primitive.cexpr!("OFFSETOF(((struct RString *)NULL)->as.embed, len)")],
ary: [CType::Pointer.new { CType::Immediate.parse("char") }, Primitive.cexpr!("OFFSETOF(((struct RString *)NULL)->as.embed, ary)")],
),
), Primitive.cexpr!("OFFSETOF((*((struct RString *)NULL)), as)")],

104
string.c
Просмотреть файл

@ -110,18 +110,9 @@ VALUE rb_cSymbol;
FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
} while (0)
#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
# define STR_SET_EMBED_LEN(str, n) do { \
assert(str_embed_capa(str) > (n));\
RSTRING(str)->as.embed.len = (n);\
} while (0)
#define STR_SET_LEN(str, n) do { \
if (STR_EMBED_P(str)) {\
STR_SET_EMBED_LEN((str), (n));\
}\
else {\
RSTRING(str)->as.heap.len = (n);\
}\
RSTRING(str)->len = (n); \
} while (0)
static inline bool
@ -158,7 +149,7 @@ str_enc_fastpath(VALUE str)
const long tlen = RSTRING_LEN(str);\
memcpy(tmp, RSTRING_PTR(str), tlen);\
RSTRING(str)->as.heap.ptr = tmp;\
RSTRING(str)->as.heap.len = tlen;\
RSTRING(str)->len = tlen;\
STR_SET_NOEMBED(str);\
RSTRING(str)->as.heap.aux.capa = (capacity);\
}\
@ -222,7 +213,7 @@ rb_str_size_as_embedded(VALUE str)
{
size_t real_size;
if (STR_EMBED_P(str)) {
real_size = rb_str_embed_size(RSTRING(str)->as.embed.len) + TERM_LEN(str);
real_size = rb_str_embed_size(RSTRING(str)->len) + TERM_LEN(str);
}
/* if the string is not currently embedded, but it can be embedded, how
* much space would it require */
@ -275,10 +266,10 @@ rb_str_make_embedded(VALUE str)
RUBY_ASSERT(!STR_EMBED_P(str));
char *buf = RSTRING(str)->as.heap.ptr;
long len = RSTRING(str)->as.heap.len;
long len = RSTRING(str)->len;
STR_SET_EMBED(str);
STR_SET_EMBED_LEN(str, len);
STR_SET_LEN(str, len);
if (len > 0) {
memcpy(RSTRING_PTR(str), buf, len);
@ -382,13 +373,13 @@ fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data, int exist
else {
if (FL_TEST_RAW(str, STR_FAKESTR)) {
if (arg->copy) {
VALUE new_str = str_new(rb_cString, RSTRING(str)->as.heap.ptr, RSTRING(str)->as.heap.len);
VALUE new_str = str_new(rb_cString, RSTRING(str)->as.heap.ptr, RSTRING(str)->len);
rb_enc_copy(new_str, str);
str = new_str;
}
else {
str = str_new_static(rb_cString, RSTRING(str)->as.heap.ptr,
RSTRING(str)->as.heap.len,
RSTRING(str)->len,
ENCODING_GET(str));
}
OBJ_FREEZE_RAW(str);
@ -486,7 +477,7 @@ setup_fake_str(struct RString *fake_str, const char *name, long len, int encidx)
ENCODING_SET_INLINED((VALUE)fake_str, encidx);
RBASIC_SET_CLASS_RAW((VALUE)fake_str, rb_cString);
fake_str->as.heap.len = len;
fake_str->len = len;
fake_str->as.heap.ptr = (char *)name;
fake_str->as.heap.aux.capa = len;
return (VALUE)fake_str;
@ -832,7 +823,7 @@ str_capacity(VALUE str, const int termlen)
return str_embed_capa(str) - termlen;
}
else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
return RSTRING(str)->as.heap.len;
return RSTRING(str)->len;
}
else {
return RSTRING(str)->as.heap.aux.capa;
@ -1012,7 +1003,7 @@ str_new_static(VALUE klass, const char *ptr, long len, int encindex)
else {
RUBY_DTRACE_CREATE_HOOK(STRING, len);
str = str_alloc_heap(klass);
RSTRING(str)->as.heap.len = len;
RSTRING(str)->len = len;
RSTRING(str)->as.heap.ptr = (char *)ptr;
RSTRING(str)->as.heap.aux.capa = len;
RBASIC(str)->flags |= STR_NOFREE;
@ -1296,7 +1287,6 @@ str_replace_shared_without_enc(VALUE str2, VALUE str)
char *ptr2 = RSTRING(str2)->as.embed.ary;
STR_SET_EMBED(str2);
memcpy(ptr2, RSTRING_PTR(str), len);
STR_SET_EMBED_LEN(str2, len);
TERM_FILL(ptr2+len, termlen);
}
else {
@ -1320,10 +1310,12 @@ str_replace_shared_without_enc(VALUE str2, VALUE str)
}
}
FL_SET(str2, STR_NOEMBED);
RSTRING(str2)->as.heap.len = len;
RSTRING(str2)->as.heap.ptr = ptr;
STR_SET_SHARED(str2, root);
}
STR_SET_LEN(str2, len);
return str2;
}
@ -1383,7 +1375,7 @@ rb_str_tmp_frozen_release(VALUE orig, VALUE tmp)
if (shared == tmp && !FL_TEST_RAW(tmp, STR_BORROWED)) {
assert(RSTRING(orig)->as.heap.ptr == RSTRING(tmp)->as.heap.ptr);
assert(RSTRING(orig)->as.heap.len == RSTRING(tmp)->as.heap.len);
assert(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
/* Unshare orig since the root (tmp) only has this one child. */
FL_UNSET_RAW(orig, STR_SHARED);
@ -1393,7 +1385,7 @@ rb_str_tmp_frozen_release(VALUE orig, VALUE tmp)
/* Make tmp embedded and empty so it is safe for sweeping. */
STR_SET_EMBED(tmp);
STR_SET_EMBED_LEN(tmp, 0);
STR_SET_LEN(tmp, 0);
}
}
}
@ -1411,7 +1403,7 @@ heap_str_make_shared(VALUE klass, VALUE orig)
assert(!STR_SHARED_P(orig));
VALUE str = str_alloc_heap(klass);
RSTRING(str)->as.heap.len = RSTRING_LEN(orig);
STR_SET_LEN(str, RSTRING_LEN(orig));
RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa;
RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE;
@ -1438,7 +1430,7 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
if (FL_TEST_RAW(orig, STR_SHARED)) {
VALUE shared = RSTRING(orig)->as.heap.aux.shared;
long ofs = RSTRING(orig)->as.heap.ptr - RSTRING_PTR(shared);
long rest = RSTRING_LEN(shared) - ofs - RSTRING(orig)->as.heap.len;
long rest = RSTRING_LEN(shared) - ofs - RSTRING_LEN(orig);
assert(ofs >= 0);
assert(rest >= 0);
assert(ofs + rest <= RSTRING_LEN(shared));
@ -1450,7 +1442,7 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
str = str_new_shared(klass, shared);
assert(!STR_EMBED_P(str));
RSTRING(str)->as.heap.ptr += ofs;
RSTRING(str)->as.heap.len -= ofs + rest;
STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
}
else {
if (RBASIC_CLASS(shared) == 0)
@ -1462,7 +1454,7 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
STR_SET_EMBED(str);
memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
STR_SET_EMBED_LEN(str, RSTRING_LEN(orig));
STR_SET_LEN(str, RSTRING_LEN(orig));
TERM_FILL(RSTRING_END(str), TERM_LEN(orig));
}
else {
@ -1591,23 +1583,24 @@ str_shared_replace(VALUE str, VALUE str2)
str_discard(str);
termlen = rb_enc_mbminlen(enc);
STR_SET_LEN(str, RSTRING_LEN(str2));
if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
STR_SET_EMBED(str);
memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (size_t)RSTRING_LEN(str2) + termlen);
STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
rb_enc_associate(str, enc);
ENC_CODERANGE_SET(str, cr);
}
else {
if (STR_EMBED_P(str2)) {
assert(!FL_TEST(str2, STR_SHARED));
long len = RSTRING(str2)->as.embed.len;
long len = RSTRING_LEN(str2);
assert(len + termlen <= str_embed_capa(str2));
char *new_ptr = ALLOC_N(char, len + termlen);
memcpy(new_ptr, RSTRING(str2)->as.embed.ary, len + termlen);
RSTRING(str2)->as.heap.ptr = new_ptr;
RSTRING(str2)->as.heap.len = len;
STR_SET_LEN(str2, len);
RSTRING(str2)->as.heap.aux.capa = len;
STR_SET_NOEMBED(str2);
}
@ -1615,7 +1608,6 @@ str_shared_replace(VALUE str, VALUE str2)
STR_SET_NOEMBED(str);
FL_UNSET(str, STR_SHARED);
RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
RSTRING(str)->as.heap.len = RSTRING_LEN(str2);
if (FL_TEST(str2, STR_SHARED)) {
VALUE shared = RSTRING(str2)->as.heap.aux.shared;
@ -1628,7 +1620,7 @@ str_shared_replace(VALUE str, VALUE str2)
/* abandon str2 */
STR_SET_EMBED(str2);
RSTRING_PTR(str2)[0] = 0;
STR_SET_EMBED_LEN(str2, 0);
STR_SET_LEN(str2, 0);
rb_enc_associate(str, enc);
ENC_CODERANGE_SET(str, cr);
}
@ -1664,7 +1656,7 @@ str_replace(VALUE str, VALUE str2)
VALUE shared = RSTRING(str2)->as.heap.aux.shared;
assert(OBJ_FROZEN(shared));
STR_SET_NOEMBED(str);
RSTRING(str)->as.heap.len = len;
STR_SET_LEN(str, len);
RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
STR_SET_SHARED(str, shared);
rb_enc_cr_str_exact_copy(str, str2);
@ -1708,11 +1700,10 @@ str_duplicate_setup(VALUE klass, VALUE str, VALUE dup)
VALUE flags = FL_TEST_RAW(str, flag_mask);
int encidx = 0;
if (STR_EMBED_P(str)) {
long len = RSTRING_EMBED_LEN(str);
long len = RSTRING_LEN(str);
assert(STR_EMBED_P(dup));
assert(str_embed_capa(dup) >= len + 1);
STR_SET_EMBED_LEN(dup, len);
MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, char, len + 1);
}
else {
@ -1727,13 +1718,14 @@ str_duplicate_setup(VALUE klass, VALUE str, VALUE dup)
assert(!STR_SHARED_P(root));
assert(RB_OBJ_FROZEN_RAW(root));
RSTRING(dup)->as.heap.len = RSTRING_LEN(str);
RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
FL_SET(root, STR_SHARED_ROOT);
RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, root);
flags |= RSTRING_NOEMBED | STR_SHARED;
}
STR_SET_LEN(dup, RSTRING_LEN(str));
if ((flags & ENCODING_MASK) == (ENCODING_INLINE_MAX<<ENCODING_SHIFT)) {
encidx = rb_enc_get_index(str);
flags &= ~ENCODING_MASK;
@ -1751,7 +1743,7 @@ ec_str_duplicate(struct rb_execution_context_struct *ec, VALUE klass, VALUE str)
dup = ec_str_alloc_heap(ec, klass);
}
else {
dup = ec_str_alloc_embed(ec, klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
dup = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
}
return str_duplicate_setup(klass, str, dup);
@ -1765,7 +1757,7 @@ str_duplicate(VALUE klass, VALUE str)
dup = str_alloc_heap(klass);
}
else {
dup = str_alloc_embed(klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
}
return str_duplicate_setup(klass, str, dup);
@ -1841,14 +1833,14 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
str_modifiable(str);
if (STR_EMBED_P(str)) { /* make noembed always */
char *new_ptr = ALLOC_N(char, (size_t)capa + termlen);
assert(RSTRING(str)->as.embed.len + 1 <= str_embed_capa(str));
memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.len + 1);
assert(RSTRING_LEN(str) + 1 <= str_embed_capa(str));
memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING_LEN(str) + 1);
RSTRING(str)->as.heap.ptr = new_ptr;
}
else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
const size_t size = (size_t)capa + termlen;
const char *const old_ptr = RSTRING_PTR(str);
const size_t osize = RSTRING(str)->as.heap.len + TERM_LEN(str);
const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
char *new_ptr = ALLOC_N(char, (size_t)capa + termlen);
memcpy(new_ptr, old_ptr, osize < size ? osize : size);
FL_UNSET_RAW(str, STR_SHARED|STR_NOFREE);
@ -1858,7 +1850,7 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char,
(size_t)capa + termlen, STR_HEAP_SIZE(str));
}
RSTRING(str)->as.heap.len = len;
STR_SET_LEN(str, len);
TERM_FILL(&RSTRING(str)->as.heap.ptr[len], termlen);
if (n == 1) {
memcpy(RSTRING(str)->as.heap.ptr, RSTRING_PTR(orig), len);
@ -2350,7 +2342,7 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen)
STR_SET_EMBED(str);
memcpy(RSTRING(str)->as.embed.ary, ptr, len);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
STR_SET_EMBED_LEN(str, len);
STR_SET_LEN(str, len);
return;
}
@ -2366,7 +2358,7 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen)
FL_UNSET(str, STR_SHARED|STR_NOFREE);
TERM_FILL(ptr + len, termlen);
RSTRING(str)->as.heap.ptr = ptr;
RSTRING(str)->as.heap.len = len;
STR_SET_LEN(str, len);
RSTRING(str)->as.heap.aux.capa = capa;
}
@ -2418,7 +2410,7 @@ str_discard(VALUE str)
if (!STR_EMBED_P(str) && !FL_TEST(str, STR_SHARED|STR_NOFREE)) {
ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
RSTRING(str)->as.heap.ptr = 0;
RSTRING(str)->as.heap.len = 0;
STR_SET_LEN(str, 0);
}
}
@ -2751,8 +2743,8 @@ str_subseq(VALUE str, long beg, long len)
str2 = str_new_shared(rb_cString, str);
ENC_CODERANGE_CLEAR(str2);
RSTRING(str2)->as.heap.ptr += beg;
if (RSTRING(str2)->as.heap.len > len) {
RSTRING(str2)->as.heap.len = len;
if (RSTRING_LEN(str2) > len) {
STR_SET_LEN(str2, len);
}
}
@ -3001,7 +2993,7 @@ rb_str_resize(VALUE str, long len)
if (STR_EMBED_P(str)) {
if (len == slen) return str;
if (str_embed_capa(str) >= len + termlen) {
STR_SET_EMBED_LEN(str, len);
STR_SET_LEN(str, len);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
return str;
}
@ -3013,7 +3005,7 @@ rb_str_resize(VALUE str, long len)
if (slen > len) slen = len;
if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
STR_SET_EMBED_LEN(str, len);
STR_SET_LEN(str, len);
if (independent) ruby_xfree(ptr);
return str;
}
@ -3028,7 +3020,7 @@ rb_str_resize(VALUE str, long len)
RSTRING(str)->as.heap.aux.capa = len;
}
else if (len == slen) return str;
RSTRING(str)->as.heap.len = len;
STR_SET_LEN(str, len);
TERM_FILL(RSTRING(str)->as.heap.ptr + len, termlen); /* sentinel */
}
return str;
@ -5158,7 +5150,6 @@ rb_str_drop_bytes(VALUE str, long len)
char *oldptr = ptr;
int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
STR_SET_EMBED(str);
STR_SET_EMBED_LEN(str, nlen);
ptr = RSTRING(str)->as.embed.ary;
memmove(ptr, oldptr + len, nlen);
if (fl == STR_NOEMBED) xfree(oldptr);
@ -5170,8 +5161,9 @@ rb_str_drop_bytes(VALUE str, long len)
OBJ_FREEZE(shared);
}
ptr = RSTRING(str)->as.heap.ptr += len;
RSTRING(str)->as.heap.len = nlen;
}
STR_SET_LEN(str, nlen);
ptr[nlen] = 0;
ENC_CODERANGE_CLEAR(str);
return str;
@ -5946,7 +5938,7 @@ rb_str_clear(VALUE str)
{
str_discard(str);
STR_SET_EMBED(str);
STR_SET_EMBED_LEN(str, 0);
STR_SET_LEN(str, 0);
RSTRING_PTR(str)[0] = 0;
if (rb_enc_asciicompat(STR_ENC_GET(str)))
ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
@ -7926,7 +7918,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
}
TERM_FILL((char *)t, termlen);
RSTRING(str)->as.heap.ptr = (char *)buf;
RSTRING(str)->as.heap.len = t - buf;
STR_SET_LEN(str, t - buf);
STR_SET_NOEMBED(str);
RSTRING(str)->as.heap.aux.capa = max;
}
@ -8002,7 +7994,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
}
TERM_FILL((char *)t, termlen);
RSTRING(str)->as.heap.ptr = (char *)buf;
RSTRING(str)->as.heap.len = t - buf;
STR_SET_LEN(str, t - buf);
STR_SET_NOEMBED(str);
RSTRING(str)->as.heap.aux.capa = max;
}
@ -10722,7 +10714,7 @@ rb_str_b(VALUE str)
str2 = str_alloc_heap(rb_cString);
}
else {
str2 = str_alloc_embed(rb_cString, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
str2 = str_alloc_embed(rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
}
str_replace_shared_without_enc(str2, str);

Просмотреть файл

@ -66,7 +66,7 @@ class Test_StringCapacity < Test::Unit::TestCase
end
def embed_header_size
2 * RbConfig::SIZEOF['void*'] + RbConfig::SIZEOF['long']
3 * RbConfig::SIZEOF['void*']
end
def max_embed_len

3
yjit.c
Просмотреть файл

@ -40,8 +40,7 @@
// Field offsets for the RString struct
enum rstring_offsets {
RUBY_OFFSET_RSTRING_AS_HEAP_LEN = offsetof(struct RString, as.heap.len),
RUBY_OFFSET_RSTRING_EMBED_LEN = offsetof(struct RString, as.embed.len),
RUBY_OFFSET_RSTRING_LEN = offsetof(struct RString, len)
};
// We need size_t to have a known size to simplify code generation and FFI.

Просмотреть файл

@ -4422,18 +4422,13 @@ fn jit_rb_str_empty_p(
_argc: i32,
_known_recv_class: *const VALUE,
) -> bool {
const _: () = assert!(
RUBY_OFFSET_RSTRING_AS_HEAP_LEN == RUBY_OFFSET_RSTRING_EMBED_LEN,
"same offset to len embedded or not so we can use one code path to read the length",
);
let recv_opnd = asm.stack_pop(1);
asm.comment("get string length");
let str_len_opnd = Opnd::mem(
std::os::raw::c_long::BITS as u8,
asm.load(recv_opnd),
RUBY_OFFSET_RSTRING_AS_HEAP_LEN as i32,
RUBY_OFFSET_RSTRING_LEN as i32,
);
asm.cmp(str_len_opnd, Opnd::UImm(0));

Просмотреть файл

@ -1063,8 +1063,7 @@ pub type ruby_vminsn_type = u32;
pub type rb_iseq_callback = ::std::option::Option<
unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void),
>;
pub const RUBY_OFFSET_RSTRING_AS_HEAP_LEN: rstring_offsets = 16;
pub const RUBY_OFFSET_RSTRING_EMBED_LEN: rstring_offsets = 16;
pub const RUBY_OFFSET_RSTRING_LEN: rstring_offsets = 16;
pub type rstring_offsets = u32;
pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
extern "C" {