[Feature #18239] Implement VWA for strings

This commit adds support for embedded strings with variable capacity and
uses Variable Width Allocation to allocate strings.
This commit is contained in:
Peter Zhu 2021-08-26 10:06:32 -04:00
Родитель 6374be5a81
Коммит a5b6598192
16 изменённых файлов: 453 добавлений и 172 удалений

Просмотреть файл

@ -56,7 +56,9 @@ const union {
enum ruby_robject_consts robject_consts;
enum ruby_rmodule_flags rmodule_flags;
enum ruby_rstring_flags rstring_flags;
#if !USE_RVARGC
enum ruby_rstring_consts rstring_consts;
#endif
enum ruby_rarray_flags rarray_flags;
enum ruby_rarray_consts rarray_consts;
enum {

Просмотреть файл

@ -4,10 +4,11 @@
static VALUE
bug_str_capacity(VALUE klass, VALUE str)
{
return
STR_EMBED_P(str) ? INT2FIX(RSTRING_EMBED_LEN_MAX) : \
STR_SHARED_P(str) ? INT2FIX(0) : \
LONG2FIX(RSTRING(str)->as.heap.aux.capa);
if (!STR_EMBED_P(str) && STR_SHARED_P(str)) {
return INT2FIX(0);
}
return LONG2FIX(rb_str_capacity(str));
}
void

Просмотреть файл

@ -62,9 +62,13 @@ bug_str_unterminated_substring(VALUE str, VALUE vbeg, VALUE vlen)
if (RSTRING_LEN(str) < beg + len) rb_raise(rb_eIndexError, "end: %ld", beg + len);
str = rb_str_new_shared(str);
if (STR_EMBED_P(str)) {
#if USE_RVARGC
RSTRING(str)->as.embed.len = (short)len;
#else
RSTRING(str)->basic.flags &= ~RSTRING_EMBED_LEN_MASK;
RSTRING(str)->basic.flags |= len << RSTRING_EMBED_LEN_SHIFT;
memmove(RSTRING(str)->as.ary, RSTRING(str)->as.ary + beg, len);
#endif
memmove(RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.ary + beg, len);
}
else {
RSTRING(str)->as.heap.ptr += beg;
@ -112,7 +116,11 @@ bug_str_s_cstr_noembed(VALUE self, VALUE str)
Check_Type(str, T_STRING);
FL_SET((str2), STR_NOEMBED);
memcpy(buf, RSTRING_PTR(str), capacity);
#if USE_RVARGC
RBASIC(str2)->flags &= ~(STR_SHARED | FL_USER5 | FL_USER6);
#else
RBASIC(str2)->flags &= ~RSTRING_EMBED_LEN_MASK;
#endif
RSTRING(str2)->as.heap.aux.capa = capacity;
RSTRING(str2)->as.heap.ptr = buf;
RSTRING(str2)->as.heap.len = RSTRING_LEN(str);

147
gc.c
Просмотреть файл

@ -888,6 +888,7 @@ static const bool USE_MMAP_ALIGNED_ALLOC = false;
#endif
struct heap_page {
short slot_size;
short total_slots;
short free_slots;
short pinned_slots;
@ -1849,7 +1850,7 @@ heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj
if (RGENGC_CHECK_MODE &&
/* obj should belong to page */
!(&page->start[0] <= (RVALUE *)obj &&
(uintptr_t)obj < ((uintptr_t)page->start + (page->total_slots * page->size_pool->slot_size)) &&
(uintptr_t)obj < ((uintptr_t)page->start + (page->total_slots * page->slot_size)) &&
obj % sizeof(RVALUE) == 0)) {
rb_bug("heap_page_add_freeobj: %p is not rvalue.", (void *)p);
}
@ -1938,7 +1939,7 @@ heap_pages_free_unused_pages(rb_objspace_t *objspace)
}
struct heap_page *hipage = heap_pages_sorted[heap_allocated_pages - 1];
uintptr_t himem = (uintptr_t)hipage->start + (hipage->total_slots * hipage->size_pool->slot_size);
uintptr_t himem = (uintptr_t)hipage->start + (hipage->total_slots * hipage->slot_size);
GC_ASSERT(himem <= (uintptr_t)heap_pages_himem);
heap_pages_himem = (RVALUE *)himem;
@ -2034,6 +2035,7 @@ heap_page_allocate(rb_objspace_t *objspace, rb_size_pool_t *size_pool)
page->start = (RVALUE *)start;
page->total_slots = limit;
page->slot_size = size_pool->slot_size;
page->size_pool = size_pool;
page_body->header.page = page;
@ -2091,7 +2093,6 @@ heap_add_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *hea
{
/* Adding to eden heap during incremental sweeping is forbidden */
GC_ASSERT(!(heap == SIZE_POOL_EDEN_HEAP(size_pool) && heap->sweeping_page));
GC_ASSERT(page->size_pool == size_pool);
page->flags.in_tomb = (heap == SIZE_POOL_TOMB_HEAP(size_pool));
list_add_tail(&heap->pages, &page->page_node);
heap->total_pages++;
@ -2324,18 +2325,37 @@ static inline void heap_add_freepage(rb_heap_t *heap, struct heap_page *page);
static struct heap_page *heap_next_freepage(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap);
static inline void ractor_set_cache(rb_ractor_t *cr, struct heap_page *page);
#if USE_RVARGC
void *
rb_gc_rvargc_object_data(VALUE obj)
size_t
rb_gc_obj_slot_size(VALUE obj)
{
return (void *)(obj + sizeof(RVALUE));
return GET_HEAP_PAGE(obj)->slot_size;
}
static inline size_t
size_pool_slot_size(char pool_id)
{
GC_ASSERT(pool_id < SIZE_POOL_COUNT);
size_t slot_size = (1 << pool_id) * sizeof(RVALUE);
#if RGENGC_CHECK_MODE
rb_objspace_t *objspace = &rb_objspace;
GC_ASSERT(size_pools[pool_id].slot_size == slot_size);
#endif
return slot_size;
}
bool
rb_gc_size_allocatable_p(size_t size)
{
return size <= size_pool_slot_size(SIZE_POOL_COUNT - 1);
}
static inline VALUE
ractor_cached_free_region(rb_objspace_t *objspace, rb_ractor_t *cr, size_t size)
{
if (size != sizeof(RVALUE)) {
if (size > sizeof(RVALUE)) {
return Qfalse;
}
@ -2409,6 +2429,25 @@ newobj_fill(VALUE obj, VALUE v1, VALUE v2, VALUE v3)
}
#if USE_RVARGC
static inline rb_size_pool_t *
size_pool_for_size(rb_objspace_t *objspace, size_t size)
{
size_t slot_count = CEILDIV(size, sizeof(RVALUE));
/* size_pool_idx is ceil(log2(slot_count)) */
size_t size_pool_idx = 64 - nlz_int64(slot_count - 1);
if (size_pool_idx >= SIZE_POOL_COUNT) {
rb_bug("size_pool_for_size: allocation size too large");
}
rb_size_pool_t *size_pool = &size_pools[size_pool_idx];
GC_ASSERT(size_pool->slot_size >= (short)size);
GC_ASSERT(size_pool_idx == 0 || size_pools[size_pool_idx - 1].slot_size < (short)size);
return size_pool;
}
static inline VALUE
heap_get_freeobj(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap)
{
@ -2430,25 +2469,6 @@ heap_get_freeobj(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *
return (VALUE)p;
}
static inline rb_size_pool_t *
size_pool_for_size(rb_objspace_t *objspace, size_t size)
{
size_t slot_count = CEILDIV(size, sizeof(RVALUE));
/* size_pool_idx is ceil(log2(slot_count)) */
size_t size_pool_idx = 64 - nlz_int64(slot_count - 1);
GC_ASSERT(size_pool_idx > 0);
if (size_pool_idx >= SIZE_POOL_COUNT) {
rb_bug("size_pool_for_size: allocation size too large");
}
rb_size_pool_t *size_pool = &size_pools[size_pool_idx];
GC_ASSERT(size_pool->slot_size >= (short)size);
GC_ASSERT(size_pools[size_pool_idx - 1].slot_size < (short)size);
return size_pool;
}
#endif
ALWAYS_INLINE(static VALUE newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, int wb_protected, size_t alloc_size));
@ -2574,7 +2594,6 @@ VALUE
rb_wb_unprotected_newobj_of(VALUE klass, VALUE flags, size_t size)
{
GC_ASSERT((flags & FL_WB_PROTECTED) == 0);
size = size + sizeof(RVALUE);
return newobj_of(klass, flags, 0, 0, 0, FALSE, size);
}
@ -2582,7 +2601,6 @@ VALUE
rb_wb_protected_newobj_of(VALUE klass, VALUE flags, size_t size)
{
GC_ASSERT((flags & FL_WB_PROTECTED) == 0);
size = size + sizeof(RVALUE);
return newobj_of(klass, flags, 0, 0, 0, TRUE, size);
}
@ -2590,7 +2608,6 @@ VALUE
rb_ec_wb_protected_newobj_of(rb_execution_context_t *ec, VALUE klass, VALUE flags, size_t size)
{
GC_ASSERT((flags & FL_WB_PROTECTED) == 0);
size = size + sizeof(RVALUE);
return newobj_of_cr(rb_ec_ractor_ptr(ec), klass, flags, 0, 0, 0, TRUE, size);
}
@ -2830,14 +2847,14 @@ is_pointer_to_heap(rb_objspace_t *objspace, void *ptr)
mid = (lo + hi) / 2;
page = heap_pages_sorted[mid];
if (page->start <= p) {
if ((uintptr_t)p < ((uintptr_t)page->start + (page->total_slots * page->size_pool->slot_size))) {
if ((uintptr_t)p < ((uintptr_t)page->start + (page->total_slots * page->slot_size))) {
RB_DEBUG_COUNTER_INC(gc_isptr_maybe);
if (page->flags.in_tomb) {
return FALSE;
}
else {
if ((NUM_IN_PAGE(p) * sizeof(RVALUE)) % page->size_pool->slot_size != 0) return FALSE;
if ((NUM_IN_PAGE(p) * sizeof(RVALUE)) % page->slot_size != 0) return FALSE;
return TRUE;
}
@ -4183,7 +4200,7 @@ rb_objspace_call_finalizer(rb_objspace_t *objspace)
/* run data/file object's finalizers */
for (i = 0; i < heap_allocated_pages; i++) {
struct heap_page *page = heap_pages_sorted[i];
short stride = page->size_pool->slot_size;
short stride = page->slot_size;
uintptr_t p = (uintptr_t)page->start;
uintptr_t pend = p + page->total_slots * stride;
@ -4780,13 +4797,13 @@ count_objects(int argc, VALUE *argv, VALUE os)
for (i = 0; i < heap_allocated_pages; i++) {
struct heap_page *page = heap_pages_sorted[i];
short stride = page->size_pool->slot_size;
short stride = page->slot_size;
uintptr_t p = (uintptr_t)page->start;
uintptr_t pend = p + page->total_slots * stride;
for (;p < pend; p += stride) {
VALUE vp = (VALUE)p;
GC_ASSERT((NUM_IN_PAGE(vp) * sizeof(RVALUE)) % page->size_pool->slot_size == 0);
GC_ASSERT((NUM_IN_PAGE(vp) * sizeof(RVALUE)) % page->slot_size == 0);
void *poisoned = asan_poisoned_object_p(vp);
asan_unpoison_object(vp, false);
@ -4916,7 +4933,7 @@ try_move_in_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa
from_freelist = true;
}
gc_move(objspace, (VALUE)p, dest, page->size_pool->slot_size);
gc_move(objspace, (VALUE)p, dest, page->slot_size);
gc_pin(objspace, (VALUE)p);
heap->compact_cursor_index = (RVALUE *)p;
if (from_freelist) {
@ -5216,7 +5233,7 @@ gc_fill_swept_page_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p,
struct heap_page * sweep_page = ctx->page;
if (bitset) {
short slot_size = sweep_page->size_pool->slot_size;
short slot_size = sweep_page->slot_size;
short slot_bits = slot_size / sizeof(RVALUE);
do {
@ -5307,7 +5324,7 @@ static inline void
gc_plane_sweep(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct gc_sweep_context *ctx)
{
struct heap_page * sweep_page = ctx->page;
short slot_size = sweep_page->size_pool->slot_size;
short slot_size = sweep_page->slot_size;
short slot_bits = slot_size / sizeof(RVALUE);
GC_ASSERT(slot_bits > 0);
@ -5385,7 +5402,6 @@ static inline void
gc_page_sweep(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap, struct gc_sweep_context *ctx)
{
struct heap_page *sweep_page = ctx->page;
GC_ASSERT(sweep_page->size_pool == size_pool);
int i;
@ -5603,7 +5619,23 @@ gc_sweep_finish_size_pool(rb_objspace_t *objspace, rb_size_pool_t *size_pool)
size_t min_free_slots = (size_t)(total_slots * gc_params.heap_free_slots_min_ratio);
if (swept_slots < min_free_slots) {
if (is_full_marking(objspace)) {
bool grow_heap = is_full_marking(objspace);
if (!is_full_marking(objspace)) {
/* The heap is a growth heap if it freed more slots than had empty slots. */
bool is_growth_heap = size_pool->empty_slots == 0 ||
size_pool->freed_slots > size_pool->empty_slots;
if (objspace->profile.count - objspace->rgengc.last_major_gc < RVALUE_OLD_AGE) {
grow_heap = TRUE;
}
else if (is_growth_heap) { /* Only growth heaps are allowed to start a major GC. */
objspace->rgengc.need_major_gc |= GPR_FLAG_MAJOR_BY_NOFREE;
size_pool->force_major_gc_count++;
}
}
if (grow_heap) {
size_t extend_page_count = heap_extend_pages(objspace, swept_slots, total_slots, total_pages);
if (extend_page_count > size_pool->allocatable_pages) {
@ -5612,18 +5644,6 @@ gc_sweep_finish_size_pool(rb_objspace_t *objspace, rb_size_pool_t *size_pool)
heap_increment(objspace, size_pool, SIZE_POOL_EDEN_HEAP(size_pool));
}
else {
/* The heap is a growth heap if it freed more slots than had empty slots. */
bool is_growth_heap = size_pool->empty_slots == 0 ||
size_pool->freed_slots > size_pool->empty_slots;
/* Only growth heaps are allowed to start a major GC. */
if (is_growth_heap &&
objspace->profile.count - objspace->rgengc.last_major_gc >= RVALUE_OLD_AGE) {
objspace->rgengc.need_major_gc |= GPR_FLAG_MAJOR_BY_NOFREE;
size_pool->force_major_gc_count++;
}
}
}
}
#endif
@ -5660,6 +5680,7 @@ gc_sweep_finish(rb_objspace_t *objspace)
else {
eden_heap->free_pages = eden_heap->pooled_pages;
}
eden_heap->pooled_pages = NULL;
objspace->rincgc.pooled_slots = 0;
}
#endif
@ -5701,8 +5722,6 @@ gc_sweep_step(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *hea
#endif
do {
GC_ASSERT(sweep_page->size_pool == size_pool);
RUBY_DEBUG_LOG("sweep_page:%p", (void *)sweep_page);
struct gc_sweep_context ctx = {
@ -5831,7 +5850,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_
bool from_freelist = FL_TEST_RAW(forwarding_object, FL_FROM_FREELIST);
object = rb_gc_location(forwarding_object);
gc_move(objspace, object, forwarding_object, page->size_pool->slot_size);
gc_move(objspace, object, forwarding_object, page->slot_size);
/* forwarding_object is now our actual object, and "object"
* is the free slot for the original page */
struct heap_page *orig_page = GET_HEAP_PAGE(object);
@ -7654,7 +7673,7 @@ gc_verify_heap_page(rb_objspace_t *objspace, struct heap_page *page, VALUE obj)
int remembered_old_objects = 0;
int free_objects = 0;
int zombie_objects = 0;
int stride = page->size_pool->slot_size / sizeof(RVALUE);
int stride = page->slot_size / sizeof(RVALUE);
for (i=0; i<page->total_slots; i+=stride) {
VALUE val = (VALUE)&page->start[i];
@ -7776,7 +7795,7 @@ gc_verify_internal_consistency_(rb_objspace_t *objspace)
/* check relations */
for (size_t i = 0; i < heap_allocated_pages; i++) {
struct heap_page *page = heap_pages_sorted[i];
short slot_size = page->size_pool->slot_size;
short slot_size = page->slot_size;
uintptr_t start = (uintptr_t)page->start;
uintptr_t end = start + page->total_slots * slot_size;
@ -10019,7 +10038,19 @@ gc_update_object_references(rb_objspace_t *objspace, VALUE obj)
case T_STRING:
if (STR_SHARED_P(obj)) {
#if USE_RVARGC
VALUE orig_shared = any->as.string.as.heap.aux.shared;
#endif
UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared);
#if USE_RVARGC
VALUE shared = any->as.string.as.heap.aux.shared;
if (STR_EMBED_P(shared)) {
size_t offset = (size_t)any->as.string.as.heap.ptr - (size_t)RSTRING(orig_shared)->as.embed.ary;
GC_ASSERT(any->as.string.as.heap.ptr >= RSTRING(orig_shared)->as.embed.ary);
GC_ASSERT(offset <= (size_t)RSTRING(shared)->as.embed.len);
any->as.string.as.heap.ptr = RSTRING(shared)->as.embed.ary + offset;
}
#endif
}
break;
@ -13561,6 +13592,8 @@ Init_GC(void)
rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_PAGE_BITMAP_SIZE")), SIZET2NUM(HEAP_PAGE_BITMAP_SIZE));
rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_PAGE_BITMAP_PLANES")), SIZET2NUM(HEAP_PAGE_BITMAP_PLANES));
rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_PAGE_SIZE")), SIZET2NUM(HEAP_PAGE_SIZE));
rb_hash_aset(gc_constants, ID2SYM(rb_intern("SIZE_POOL_COUNT")), LONG2FIX(SIZE_POOL_COUNT));
rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVARGC_MAX_ALLOCATE_SIZE")), LONG2FIX(size_pool_slot_size(SIZE_POOL_COUNT - 1)));
OBJ_FREEZE(gc_constants);
/* internal constants */
rb_define_const(rb_mGC, "INTERNAL_CONSTANTS", gc_constants);

10
gc.rb
Просмотреть файл

@ -256,6 +256,16 @@ module GC
def self.verify_compaction_references(toward: nil, double_heap: false)
Primitive.gc_verify_compaction_references(double_heap, toward == :empty)
end
# :nodoc:
# call-seq:
# GC.using_rvargc? -> true or false
#
# Returns true if using experimental feature Variable Width Allocation, false
# otherwise.
def self.using_rvargc?
GC::INTERNAL_CONSTANTS[:SIZE_POOL_COUNT] > 1
end
end
module ObjectSpace

Просмотреть файл

@ -146,4 +146,8 @@
# undef RBIMPL_TEST3
#endif /* HAVE_VA_ARGS_MACRO */
#ifndef USE_RVARGC
# define USE_RVARGC 0
#endif
#endif /* RBIMPL_CONFIG_H */

Просмотреть файл

@ -42,9 +42,11 @@
/** @cond INTERNAL_MACRO */
#define RSTRING_NOEMBED RSTRING_NOEMBED
#if !USE_RVARGC
#define RSTRING_EMBED_LEN_MASK RSTRING_EMBED_LEN_MASK
#define RSTRING_EMBED_LEN_SHIFT RSTRING_EMBED_LEN_SHIFT
#define RSTRING_EMBED_LEN_MAX RSTRING_EMBED_LEN_MAX
#endif
#define RSTRING_FSTR RSTRING_FSTR
#define RSTRING_EMBED_LEN RSTRING_EMBED_LEN
#define RSTRING_LEN RSTRING_LEN
@ -160,6 +162,7 @@ enum ruby_rstring_flags {
*/
RSTRING_NOEMBED = RUBY_FL_USER1,
#if !USE_RVARGC
/**
* When a string employs embedded strategy (see ::RSTRING_NOEMBED), these
* bits are used to store the number of bytes actually filled into
@ -172,6 +175,7 @@ enum ruby_rstring_flags {
*/
RSTRING_EMBED_LEN_MASK = RUBY_FL_USER2 | RUBY_FL_USER3 | RUBY_FL_USER4 |
RUBY_FL_USER5 | RUBY_FL_USER6,
#endif
/* Actually, string encodings are also encoded into the flags, using
* remaining bits.*/
@ -198,6 +202,7 @@ enum ruby_rstring_flags {
RSTRING_FSTR = RUBY_FL_USER17
};
#if !USE_RVARGC
/**
* This is an enum because GDB wants it (rather than a macro). People need not
* bother.
@ -209,6 +214,7 @@ enum ruby_rstring_consts {
/** Max possible number of characters that can be embedded. */
RSTRING_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(char) - 1
};
#endif
/**
* Ruby's String. A string in ruby conceptually has these information:
@ -278,7 +284,17 @@ struct RString {
* here. Could be sufficiently large. In this case the length is
* encoded into the flags.
*/
#if USE_RVARGC
short len;
/* This is a length 1 array because:
* 1. GCC has a bug that does not optimize C flexible array members
* (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452)
* 2. Zero length arrays are not supported by all compilers
*/
char ary[1];
#else
char ary[RSTRING_EMBED_LEN_MAX + 1];
#endif
} embed;
} as;
};
@ -407,9 +423,13 @@ RSTRING_EMBED_LEN(VALUE str)
RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING);
RBIMPL_ASSERT_OR_ASSUME(! RB_FL_ANY_RAW(str, RSTRING_NOEMBED));
#if USE_RVARGC
short f = RSTRING(str)->as.embed.len;
#else
VALUE f = RBASIC(str)->flags;
f &= RSTRING_EMBED_LEN_MASK;
f >>= RSTRING_EMBED_LEN_SHIFT;
#endif
return RBIMPL_CAST((long)f);
}

Просмотреть файл

@ -18,10 +18,6 @@
struct rb_execution_context_struct; /* in vm_core.h */
struct rb_objspace; /* in vm_core.h */
#ifndef USE_RVARGC
#define USE_RVARGC 0
#endif
#ifdef NEWOBJ_OF
# undef NEWOBJ_OF
# undef RB_NEWOBJ_OF
@ -30,22 +26,21 @@ struct rb_objspace; /* in vm_core.h */
#define RVALUE_SIZE (sizeof(struct RBasic) + sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX]))
/* optimized version of NEWOBJ() */
#define RB_NEWOBJ_OF(var, T, c, f) \
T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \
rb_wb_protected_newobj_of((c), (f) & ~FL_WB_PROTECTED, RVALUE_SIZE) : \
rb_wb_unprotected_newobj_of((c), (f), RVALUE_SIZE))
#define RB_EC_NEWOBJ_OF(ec, var, T, c, f) \
T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \
rb_ec_wb_protected_newobj_of((ec), (c), (f) & ~FL_WB_PROTECTED, RVALUE_SIZE) : \
rb_wb_unprotected_newobj_of((c), (f), RVALUE_SIZE))
#define RB_RVARGC_NEWOBJ_OF(var, T, c, f, s) \
T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \
rb_wb_protected_newobj_of((c), (f) & ~FL_WB_PROTECTED, s) : \
rb_wb_unprotected_newobj_of((c), (f), s))
#define RB_RVARGC_EC_NEWOBJ_OF(ec, var, T, c, f, s) \
T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \
rb_ec_wb_protected_newobj_of((ec), (c), (f) & ~FL_WB_PROTECTED, s) : \
rb_wb_unprotected_newobj_of((c), (f), s))
/* optimized version of NEWOBJ() */
#define RB_NEWOBJ_OF(var, T, c, f) RB_RVARGC_NEWOBJ_OF(var, T, c, f, RVALUE_SIZE)
#define RB_EC_NEWOBJ_OF(ec, var, T, c, f) RB_RVARGC_EC_NEWOBJ_OF(ec, var, T, c, f, RVALUE_SIZE)
#define NEWOBJ_OF(var, T, c, f) RB_NEWOBJ_OF((var), T, (c), (f))
#define RVARGC_NEWOBJ_OF(var, T, c, f, s) RB_RVARGC_NEWOBJ_OF((var), T, (c), (f), (s))
#define RB_OBJ_GC_FLAGS_MAX 6 /* used in ext/objspace */
@ -102,6 +97,8 @@ static inline void *ruby_sized_xrealloc2_inlined(void *ptr, size_t new_count, si
static inline void ruby_sized_xfree_inlined(void *ptr, size_t size);
VALUE rb_class_allocate_instance(VALUE klass);
void rb_gc_ractor_newobj_cache_clear(rb_ractor_newobj_cache_t *newobj_cache);
size_t rb_gc_obj_slot_size(VALUE obj);
bool rb_gc_size_allocatable_p(size_t size);
RUBY_SYMBOL_EXPORT_BEGIN
/* gc.c (export) */

Просмотреть файл

@ -190,6 +190,8 @@ def string2cstr(rstring):
cptr = int(rstring.GetValueForExpressionPath(".as.heap.ptr").value, 0)
clen = int(rstring.GetValueForExpressionPath(".as.heap.len").value, 0)
else:
# cptr = int(rstring.GetValueForExpressionPath(".as.embed.ary").location, 0)
# clen = int(rstring.GetValueForExpressionPath(".as.embed.len").value, 0)
cptr = int(rstring.GetValueForExpressionPath(".as.ary").location, 0)
clen = (flags & RSTRING_EMBED_LEN_MASK) >> RSTRING_EMBED_LEN_SHIFT
return cptr, clen
@ -315,7 +317,6 @@ def lldb_inspect(debugger, target, result, val):
else:
len = val.GetValueForExpressionPath("->as.heap.len").GetValueAsSigned()
ptr = val.GetValueForExpressionPath("->as.heap.ptr")
#print(val.GetValueForExpressionPath("->as.heap"), file=result)
result.write("T_ARRAY: %slen=%d" % (flaginfo, len))
if flags & RUBY_FL_USER1:
result.write(" (embed)")

7
ruby.c
Просмотреть файл

@ -566,7 +566,12 @@ static VALUE
runtime_libruby_path(void)
{
#if defined _WIN32 || defined __CYGWIN__
DWORD len = RSTRING_EMBED_LEN_MAX, ret;
DWORD len, ret;
#if USE_RVARGC
len = 32;
#else
len = RSTRING_EMBED_LEN_MAX;
#endif
VALUE path;
VALUE wsopath = rb_str_new(0, len*sizeof(WCHAR));
WCHAR *wlibpath;

Просмотреть файл

@ -108,7 +108,7 @@ describe "C-API String function" do
it "returns a string with the given capacity" do
buf = @s.rb_str_buf_new(256, nil)
@s.rb_str_capacity(buf).should == 256
@s.rb_str_capacity(buf).should >= 256
end
it "returns a string that can be appended to" do
@ -682,27 +682,27 @@ describe "C-API String function" do
describe "rb_str_modify_expand" do
it "grows the capacity to bytesize + expand, not changing the bytesize" do
str = @s.rb_str_buf_new(256, "abcd")
@s.rb_str_capacity(str).should == 256
@s.rb_str_capacity(str).should >= 256
@s.rb_str_set_len(str, 3)
str.bytesize.should == 3
@s.RSTRING_LEN(str).should == 3
@s.rb_str_capacity(str).should == 256
@s.rb_str_capacity(str).should >= 256
@s.rb_str_modify_expand(str, 4)
str.bytesize.should == 3
@s.RSTRING_LEN(str).should == 3
@s.rb_str_capacity(str).should == 7
@s.rb_str_capacity(str).should >= 7
@s.rb_str_modify_expand(str, 1024)
str.bytesize.should == 3
@s.RSTRING_LEN(str).should == 3
@s.rb_str_capacity(str).should == 1027
@s.rb_str_capacity(str).should >= 1027
@s.rb_str_modify_expand(str, 1)
str.bytesize.should == 3
@s.RSTRING_LEN(str).should == 3
@s.rb_str_capacity(str).should == 4
@s.rb_str_capacity(str).should >= 4
end
it "raises an error if the string is frozen" do

323
string.c
Просмотреть файл

@ -106,14 +106,26 @@ VALUE rb_cSymbol;
#define STR_SET_NOEMBED(str) do {\
FL_SET((str), STR_NOEMBED);\
STR_SET_EMBED_LEN((str), 0);\
if (USE_RVARGC) {\
FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
}\
else {\
STR_SET_EMBED_LEN((str), 0);\
}\
} while (0)
#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
#define STR_SET_EMBED_LEN(str, n) do { \
#if USE_RVARGC
# define STR_SET_EMBED_LEN(str, n) do { \
assert(str_embed_capa(str) > (n));\
RSTRING(str)->as.embed.len = (n);\
} while (0)
#else
# define STR_SET_EMBED_LEN(str, n) do { \
long tmp_n = (n);\
RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
} while (0)
#endif
#define STR_SET_LEN(str, n) do { \
if (STR_EMBED_P(str)) {\
@ -150,7 +162,7 @@ VALUE rb_cSymbol;
} while (0)
#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
if (STR_EMBED_P(str)) {\
if (!STR_EMBEDDABLE_P(capacity, termlen)) {\
if (str_embed_capa(str) < capacity + termlen) {\
char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
const long tlen = RSTRING_LEN(str);\
memcpy(tmp, RSTRING_PTR(str), tlen);\
@ -170,6 +182,8 @@ VALUE rb_cSymbol;
#define STR_SET_SHARED(str, shared_str) do { \
if (!FL_TEST(str, STR_FAKESTR)) { \
assert(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
assert(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
FL_SET((str), STR_SHARED); \
FL_SET((shared_str), STR_SHARED_ROOT); \
@ -193,8 +207,32 @@ VALUE rb_cSymbol;
#define SHARABLE_SUBSTRING_P(beg, len, end) 1
#endif
#define STR_EMBEDDABLE_P(len, termlen) \
((len) <= RSTRING_EMBED_LEN_MAX + 1 - (termlen))
static inline long
str_embed_capa(VALUE str)
{
#if USE_RVARGC
return rb_gc_obj_slot_size(str) - offsetof(struct RString, as.embed.ary);
#else
return RSTRING_EMBED_LEN_MAX + 1;
#endif
}
static inline size_t
str_embed_size(long capa)
{
return offsetof(struct RString, as.embed.ary) + capa;
}
static inline bool
STR_EMBEDDABLE_P(long len, long termlen)
{
#if USE_RVARGC
return rb_gc_size_allocatable_p(str_embed_size(len + termlen));
#else
return len <= RSTRING_EMBED_LEN_MAX + 1 - termlen;
#endif
}
static VALUE str_replace_shared_without_enc(VALUE str2, VALUE str);
static VALUE str_new_frozen(VALUE klass, VALUE orig);
@ -768,7 +806,11 @@ static size_t
str_capacity(VALUE str, const int termlen)
{
if (STR_EMBED_P(str)) {
#if USE_RVARGC
return str_embed_capa(str) - termlen;
#else
return (RSTRING_EMBED_LEN_MAX + 1 - termlen);
#endif
}
else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
return RSTRING(str)->as.heap.len;
@ -793,17 +835,36 @@ must_not_null(const char *ptr)
}
static inline VALUE
str_alloc(VALUE klass)
str_alloc(VALUE klass, size_t size)
{
NEWOBJ_OF(str, struct RString, klass, T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0));
assert(size > 0);
RVARGC_NEWOBJ_OF(str, struct RString, klass,
T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0), size);
return (VALUE)str;
}
static inline VALUE
str_alloc_embed(VALUE klass, size_t capa)
{
size_t size = str_embed_size(capa);
assert(rb_gc_size_allocatable_p(size));
#if !USE_RVARGC
assert(size <= sizeof(struct RString));
#endif
return str_alloc(klass, size);
}
static inline VALUE
str_alloc_heap(VALUE klass)
{
return str_alloc(klass, sizeof(struct RString));
}
static inline VALUE
empty_str_alloc(VALUE klass)
{
RUBY_DTRACE_CREATE_HOOK(STRING, 0);
return str_alloc(klass);
return str_alloc_embed(klass, 0);
}
static VALUE
@ -817,8 +878,14 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen)
RUBY_DTRACE_CREATE_HOOK(STRING, len);
str = str_alloc(klass);
if (!STR_EMBEDDABLE_P(len, termlen)) {
if (STR_EMBEDDABLE_P(len, termlen)) {
str = str_alloc_embed(klass, len + termlen);
if (len == 0) {
ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
}
}
else {
str = str_alloc_heap(klass);
RSTRING(str)->as.heap.aux.capa = len;
/* :FIXME: @shyouhei guesses `len + termlen` is guaranteed to never
* integer overflow. If we can STATIC_ASSERT that, the following
@ -827,9 +894,6 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen)
rb_xmalloc_mul_add_mul(sizeof(char), len, sizeof(char), termlen);
STR_SET_NOEMBED(str);
}
else if (len == 0) {
ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
}
if (ptr) {
memcpy(RSTRING_PTR(str), ptr, len);
}
@ -931,7 +995,7 @@ str_new_static(VALUE klass, const char *ptr, long len, int encindex)
}
else {
RUBY_DTRACE_CREATE_HOOK(STRING, len);
str = str_alloc(klass);
str = str_alloc_heap(klass);
RSTRING(str)->as.heap.len = len;
RSTRING(str)->as.heap.ptr = (char *)ptr;
RSTRING(str)->as.heap.aux.capa = len;
@ -1228,8 +1292,8 @@ str_replace_shared_without_enc(VALUE str2, VALUE str)
long len;
RSTRING_GETMEM(str, ptr, len);
if (STR_EMBEDDABLE_P(len, termlen)) {
char *ptr2 = RSTRING(str2)->as.embed.ary;
if (str_embed_capa(str2) >= len + termlen) {
char *ptr2 = RSTRING(str2)->as.embed.ary;
STR_SET_EMBED(str2);
memcpy(ptr2, RSTRING_PTR(str), len);
STR_SET_EMBED_LEN(str2, len);
@ -1245,6 +1309,7 @@ str_replace_shared_without_enc(VALUE str2, VALUE str)
root = rb_str_new_frozen(str);
RSTRING_GETMEM(root, ptr, len);
}
assert(OBJ_FROZEN(root));
if (!STR_EMBED_P(str2) && !FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
if (FL_TEST_RAW(str2, STR_SHARED_ROOT)) {
rb_fatal("about to free a possible shared root");
@ -1273,7 +1338,7 @@ str_replace_shared(VALUE str2, VALUE str)
static VALUE
str_new_shared(VALUE klass, VALUE str)
{
return str_replace_shared(str_alloc(klass), str);
return str_replace_shared(str_alloc_heap(klass), str);
}
VALUE
@ -1335,26 +1400,54 @@ str_new_frozen(VALUE klass, VALUE orig)
return str_new_frozen_buffer(klass, orig, TRUE);
}
static VALUE
heap_str_make_shared(VALUE klass, VALUE orig)
{
assert(!STR_EMBED_P(orig));
assert(!STR_SHARED_P(orig));
VALUE str = str_alloc_heap(klass);
STR_SET_NOEMBED(str);
RSTRING(str)->as.heap.len = RSTRING_LEN(orig);
RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa;
RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE;
RBASIC(orig)->flags &= ~STR_NOFREE;
STR_SET_SHARED(orig, str);
if (klass == 0)
FL_UNSET_RAW(str, STR_BORROWED);
return str;
}
static VALUE
str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
{
VALUE str;
if (STR_EMBED_P(orig)) {
str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
long len = RSTRING_LEN(orig);
if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(len, 1)) {
str = str_new(klass, RSTRING_PTR(orig), len);
assert(STR_EMBED_P(str));
}
else {
if (FL_TEST_RAW(orig, STR_SHARED)) {
VALUE shared = RSTRING(orig)->as.heap.aux.shared;
long ofs = RSTRING(orig)->as.heap.ptr - RSTRING(shared)->as.heap.ptr;
long rest = RSTRING(shared)->as.heap.len - ofs - RSTRING(orig)->as.heap.len;
long ofs = RSTRING(orig)->as.heap.ptr - RSTRING_PTR(shared);
long rest = RSTRING_LEN(shared) - ofs - RSTRING(orig)->as.heap.len;
assert(ofs >= 0);
assert(rest >= 0);
assert(ofs + rest <= RSTRING_LEN(shared));
#if !USE_RVARGC
assert(!STR_EMBED_P(shared));
#endif
assert(OBJ_FROZEN(shared));
if ((ofs > 0) || (rest > 0) ||
(klass != RBASIC(shared)->klass) ||
ENCODING_GET(shared) != ENCODING_GET(orig)) {
str = str_new_shared(klass, shared);
assert(!STR_EMBED_P(str));
RSTRING(str)->as.heap.ptr += ofs;
RSTRING(str)->as.heap.len -= ofs + rest;
}
@ -1364,24 +1457,15 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
return shared;
}
}
else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
str = str_alloc(klass);
else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
STR_SET_EMBED(str);
memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
STR_SET_EMBED_LEN(str, RSTRING_LEN(orig));
TERM_FILL(RSTRING_END(str), TERM_LEN(orig));
}
else {
str = str_alloc(klass);
STR_SET_NOEMBED(str);
RSTRING(str)->as.heap.len = RSTRING_LEN(orig);
RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa;
RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE;
RBASIC(orig)->flags &= ~STR_NOFREE;
STR_SET_SHARED(orig, str);
if (klass == 0)
FL_UNSET_RAW(str, STR_BORROWED);
str = heap_str_make_shared(klass, orig);
}
}
@ -1405,17 +1489,24 @@ str_new_empty_String(VALUE str)
}
#define STR_BUF_MIN_SIZE 63
#if !USE_RVARGC
STATIC_ASSERT(STR_BUF_MIN_SIZE, STR_BUF_MIN_SIZE > RSTRING_EMBED_LEN_MAX);
#endif
VALUE
rb_str_buf_new(long capa)
{
VALUE str = str_alloc(rb_cString);
if (STR_EMBEDDABLE_P(capa, 1)) {
return str_alloc_embed(rb_cString, capa + 1);
}
if (capa <= RSTRING_EMBED_LEN_MAX) return str;
VALUE str = str_alloc_heap(rb_cString);
#if !USE_RVARGC
if (capa < STR_BUF_MIN_SIZE) {
capa = STR_BUF_MIN_SIZE;
}
#endif
FL_SET(str, STR_NOEMBED);
RSTRING(str)->as.heap.aux.capa = capa;
RSTRING(str)->as.heap.ptr = ALLOC_N(char, (size_t)capa + 1);
@ -1508,7 +1599,7 @@ str_shared_replace(VALUE str, VALUE str2)
str_discard(str);
termlen = rb_enc_mbminlen(enc);
if (STR_EMBEDDABLE_P(RSTRING_LEN(str2), termlen)) {
if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
STR_SET_EMBED(str);
memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (size_t)RSTRING_LEN(str2) + termlen);
STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
@ -1516,6 +1607,21 @@ str_shared_replace(VALUE str, VALUE str2)
ENC_CODERANGE_SET(str, cr);
}
else {
#if USE_RVARGC
if (STR_EMBED_P(str2)) {
assert(!FL_TEST(str2, STR_SHARED));
long len = RSTRING(str2)->as.embed.len;
assert(len + termlen <= str_embed_capa(str2));
char *new_ptr = ALLOC_N(char, len + termlen);
memcpy(new_ptr, RSTRING(str2)->as.embed.ary, len + termlen);
RSTRING(str2)->as.heap.ptr = new_ptr;
RSTRING(str2)->as.heap.len = len;
RSTRING(str2)->as.heap.aux.capa = len;
STR_SET_NOEMBED(str2);
}
#endif
STR_SET_NOEMBED(str);
FL_UNSET(str, STR_SHARED);
RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
@ -1581,42 +1687,77 @@ str_replace(VALUE str, VALUE str2)
}
static inline VALUE
ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass)
ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass, size_t size)
{
RB_EC_NEWOBJ_OF(ec, str, struct RString, klass, T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0));
assert(size > 0);
RB_RVARGC_EC_NEWOBJ_OF(ec, str, struct RString, klass,
T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0), size);
return (VALUE)str;
}
static inline VALUE
ec_str_alloc_embed(struct rb_execution_context_struct *ec, VALUE klass, size_t capa)
{
size_t size = str_embed_size(capa);
assert(rb_gc_size_allocatable_p(size));
#if !USE_RVARGC
assert(size <= sizeof(struct RString));
#endif
return ec_str_alloc(ec, klass, size);
}
static inline VALUE
ec_str_alloc_heap(struct rb_execution_context_struct *ec, VALUE klass)
{
return ec_str_alloc(ec, klass, sizeof(struct RString));
}
static inline VALUE
str_duplicate_setup(VALUE klass, VALUE str, VALUE dup)
{
enum {embed_size = RSTRING_EMBED_LEN_MAX + 1};
const VALUE flag_mask =
#if !USE_RVARGC
RSTRING_NOEMBED | RSTRING_EMBED_LEN_MASK |
ENC_CODERANGE_MASK | ENCODING_MASK |
#endif
ENC_CODERANGE_MASK | ENCODING_MASK |
FL_FREEZE
;
VALUE flags = FL_TEST_RAW(str, flag_mask);
int encidx = 0;
MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary,
char, embed_size);
if (flags & STR_NOEMBED) {
if (STR_EMBED_P(str)) {
assert(str_embed_capa(dup) >= RSTRING_EMBED_LEN(str));
STR_SET_EMBED_LEN(dup, RSTRING_EMBED_LEN(str));
MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary,
char, RSTRING_EMBED_LEN(str));
flags &= ~RSTRING_NOEMBED;
}
else {
VALUE root = str;
if (FL_TEST_RAW(str, STR_SHARED)) {
str = RSTRING(str)->as.heap.aux.shared;
root = RSTRING(str)->as.heap.aux.shared;
}
else if (UNLIKELY(!(flags & FL_FREEZE))) {
str = str_new_frozen(klass, str);
root = str = str_new_frozen(klass, str);
flags = FL_TEST_RAW(str, flag_mask);
}
if (flags & STR_NOEMBED) {
RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, str);
flags |= STR_SHARED;
}
else {
MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary,
char, embed_size);
}
}
assert(!STR_SHARED_P(root));
assert(RB_OBJ_FROZEN_RAW(root));
#if USE_RVARGC
if (1) {
#else
if (STR_EMBED_P(root)) {
MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(root)->as.embed.ary,
char, RSTRING_EMBED_LEN_MAX + 1);
}
else {
#endif
RSTRING(dup)->as.heap.len = RSTRING_LEN(str);
RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, root);
flags |= RSTRING_NOEMBED | STR_SHARED;
}
}
if ((flags & ENCODING_MASK) == (ENCODING_INLINE_MAX<<ENCODING_SHIFT)) {
encidx = rb_enc_get_index(str);
flags &= ~ENCODING_MASK;
@ -1629,14 +1770,28 @@ str_duplicate_setup(VALUE klass, VALUE str, VALUE dup)
static inline VALUE
ec_str_duplicate(struct rb_execution_context_struct *ec, VALUE klass, VALUE str)
{
VALUE dup = ec_str_alloc(ec, klass);
VALUE dup;
if (FL_TEST(str, STR_NOEMBED)) {
dup = ec_str_alloc_heap(ec, klass);
}
else {
dup = ec_str_alloc_embed(ec, klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
}
return str_duplicate_setup(klass, str, dup);
}
static inline VALUE
str_duplicate(VALUE klass, VALUE str)
{
VALUE dup = str_alloc(klass);
VALUE dup;
if (FL_TEST(str, STR_NOEMBED)) {
dup = str_alloc_heap(klass);
}
else {
dup = str_alloc_embed(klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
}
return str_duplicate_setup(klass, str, dup);
}
@ -1745,7 +1900,12 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
str_modifiable(str);
if (STR_EMBED_P(str)) { /* make noembed always */
char *new_ptr = ALLOC_N(char, (size_t)capa + termlen);
#if USE_RVARGC
assert(RSTRING(str)->as.embed.len + 1 <= str_embed_capa(str));
memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.len + 1);
#else
memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING_EMBED_LEN_MAX + 1);
#endif
RSTRING(str)->as.heap.ptr = new_ptr;
}
else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
@ -2133,7 +2293,7 @@ rb_str_times(VALUE str, VALUE times)
return str_duplicate(rb_cString, str);
}
if (times == INT2FIX(0)) {
str2 = str_alloc(rb_cString);
str2 = str_alloc_embed(rb_cString, 0);
rb_enc_copy(str2, str);
return str2;
}
@ -2142,8 +2302,11 @@ rb_str_times(VALUE str, VALUE times)
rb_raise(rb_eArgError, "negative argument");
}
if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
str2 = str_alloc(rb_cString);
if (!STR_EMBEDDABLE_P(len, 1)) {
if (STR_EMBEDDABLE_P(len, 1)) {
str2 = str_alloc_embed(rb_cString, len + 1);
}
else {
str2 = str_alloc_heap(rb_cString);
RSTRING(str2)->as.heap.aux.capa = len;
RSTRING(str2)->as.heap.ptr = ZALLOC_N(char, (size_t)len + 1);
STR_SET_NOEMBED(str2);
@ -2244,11 +2407,11 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen)
if (len > capa) len = capa;
if (!STR_EMBED_P(str) && STR_EMBEDDABLE_P(capa, termlen)) {
if (!STR_EMBED_P(str) && str_embed_capa(str) >= capa + termlen) {
ptr = RSTRING(str)->as.heap.ptr;
STR_SET_EMBED(str);
memcpy(RSTRING(str)->as.embed.ary, ptr, len);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
memcpy(RSTRING(str)->as.embed.ary, ptr, len);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
STR_SET_EMBED_LEN(str, len);
return;
}
@ -2646,7 +2809,7 @@ rb_str_subseq(VALUE str, long beg, long len)
}
else {
str2 = rb_str_new(RSTRING_PTR(str)+beg, len);
RB_GC_GUARD(str);
RB_GC_GUARD(str);
}
rb_enc_cr_str_copy_for_substr(str2, str);
@ -2885,19 +3048,19 @@ rb_str_resize(VALUE str, long len)
const int termlen = TERM_LEN(str);
if (STR_EMBED_P(str)) {
if (len == slen) return str;
if (STR_EMBEDDABLE_P(len, termlen)) {
if (str_embed_capa(str) >= len + termlen) {
STR_SET_EMBED_LEN(str, len);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
return str;
}
str_make_independent_expand(str, slen, len - slen, termlen);
}
else if (STR_EMBEDDABLE_P(len, termlen)) {
else if (str_embed_capa(str) >= len + termlen) {
char *ptr = STR_HEAP_PTR(str);
STR_SET_EMBED(str);
if (slen > len) slen = len;
if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
STR_SET_EMBED_LEN(str, len);
if (independent) ruby_xfree(ptr);
return str;
@ -2925,7 +3088,9 @@ str_buf_cat(VALUE str, const char *ptr, long len)
long capa, total, olen, off = -1;
char *sptr;
const int termlen = TERM_LEN(str);
#if !USE_RVARGC
assert(termlen < RSTRING_EMBED_LEN_MAX + 1); /* < (LONG_MAX/2) */
#endif
RSTRING_GETMEM(str, sptr, olen);
if (ptr >= sptr && ptr <= sptr + olen) {
@ -2934,8 +3099,8 @@ str_buf_cat(VALUE str, const char *ptr, long len)
rb_str_modify(str);
if (len == 0) return 0;
if (STR_EMBED_P(str)) {
capa = RSTRING_EMBED_LEN_MAX + 1 - termlen;
sptr = RSTRING(str)->as.embed.ary;
capa = str_embed_capa(str) - termlen;
sptr = RSTRING(str)->as.embed.ary;
olen = RSTRING_EMBED_LEN(str);
}
else {
@ -4797,17 +4962,21 @@ rb_str_drop_bytes(VALUE str, long len)
str_modifiable(str);
if (len > olen) len = olen;
nlen = olen - len;
if (STR_EMBEDDABLE_P(nlen, TERM_LEN(str))) {
if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
char *oldptr = ptr;
int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
STR_SET_EMBED(str);
STR_SET_EMBED_LEN(str, nlen);
ptr = RSTRING(str)->as.embed.ary;
ptr = RSTRING(str)->as.embed.ary;
memmove(ptr, oldptr + len, nlen);
if (fl == STR_NOEMBED) xfree(oldptr);
}
else {
if (!STR_SHARED_P(str)) rb_str_new_frozen(str);
if (!STR_SHARED_P(str)) {
VALUE shared = heap_str_make_shared(rb_obj_class(str), str);
rb_enc_cr_str_exact_copy(shared, str);
OBJ_FREEZE(shared);
}
ptr = RSTRING(str)->as.heap.ptr += len;
RSTRING(str)->as.heap.len = nlen;
}
@ -10465,7 +10634,13 @@ rb_str_force_encoding(VALUE str, VALUE enc)
static VALUE
rb_str_b(VALUE str)
{
VALUE str2 = str_alloc(rb_cString);
VALUE str2;
if (FL_TEST(str, STR_NOEMBED)) {
str2 = str_alloc_heap(rb_cString);
}
else {
str2 = str_alloc_embed(rb_cString, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
}
str_replace_shared_without_enc(str2, str);
ENC_CODERANGE_CLEAR(str2);
return str2;

Просмотреть файл

@ -4,13 +4,10 @@ require '-test-/string'
require 'rbconfig/sizeof'
class Test_StringCapacity < Test::Unit::TestCase
def capa(str)
Bug::String.capacity(str)
end
def test_capacity_embedded
size = RbConfig::SIZEOF['void*'] * 3 - 1
assert_equal size, capa('foo')
assert_equal GC::INTERNAL_CONSTANTS[:RVALUE_SIZE] - embed_header_size - 1, capa('foo')
assert_equal max_embed_len, capa('1' * max_embed_len)
assert_equal max_embed_len, capa('1' * (max_embed_len - 1))
end
def test_capacity_shared
@ -18,7 +15,8 @@ class Test_StringCapacity < Test::Unit::TestCase
end
def test_capacity_normal
assert_equal 128, capa('1'*128)
assert_equal max_embed_len + 1, capa('1' * (max_embed_len + 1))
assert_equal max_embed_len + 100, capa('1' * (max_embed_len + 100))
end
def test_s_new_capacity
@ -39,7 +37,10 @@ class Test_StringCapacity < Test::Unit::TestCase
end
def test_literal_capacity
s = "I am testing string literal capacity"
s = eval(%{
# frozen_string_literal: true
"#{"a" * (max_embed_len + 1)}"
})
assert_equal(s.length, capa(s))
end
@ -51,9 +52,27 @@ class Test_StringCapacity < Test::Unit::TestCase
end
def test_capacity_fstring
s = String.new("I am testing", capacity: 1000)
s = String.new("a" * max_embed_len, capacity: 1000)
s << "fstring capacity"
s = -s
assert_equal(s.length, capa(s))
end
private
def capa(str)
Bug::String.capacity(str)
end
def embed_header_size
if GC.using_rvargc?
2 * RbConfig::SIZEOF['void*'] + RbConfig::SIZEOF['short']
else
2 * RbConfig::SIZEOF['void*']
end
end
def max_embed_len
GC::INTERNAL_CONSTANTS[:RVARGC_MAX_ALLOCATE_SIZE] - embed_header_size - 1
end
end

Просмотреть файл

@ -3,13 +3,15 @@ require '-test-/string'
class Test_RbStrDup < Test::Unit::TestCase
def test_nested_shared_non_frozen
str = Bug::String.rb_str_dup(Bug::String.rb_str_dup("a" * 50))
orig_str = "a" * GC::INTERNAL_CONSTANTS[:RVARGC_MAX_ALLOCATE_SIZE]
str = Bug::String.rb_str_dup(Bug::String.rb_str_dup(orig_str))
assert_send([Bug::String, :shared_string?, str])
assert_not_send([Bug::String, :sharing_with_shared?, str], '[Bug #15792]')
end
def test_nested_shared_frozen
str = Bug::String.rb_str_dup(Bug::String.rb_str_dup("a" * 50).freeze)
orig_str = "a" * GC::INTERNAL_CONSTANTS[:RVARGC_MAX_ALLOCATE_SIZE]
str = Bug::String.rb_str_dup(Bug::String.rb_str_dup(orig_str).freeze)
assert_send([Bug::String, :shared_string?, str])
assert_not_send([Bug::String, :sharing_with_shared?, str], '[Bug #15792]')
end

Просмотреть файл

@ -29,12 +29,12 @@ class TestObjSpace < Test::Unit::TestCase
end
def test_memsize_of_root_shared_string
a = "hello" * 5
a = "a" * GC::INTERNAL_CONSTANTS[:RVARGC_MAX_ALLOCATE_SIZE]
b = a.dup
c = nil
ObjectSpace.each_object(String) {|x| break c = x if x == a and x.frozen?}
rv_size = GC::INTERNAL_CONSTANTS[:RVALUE_SIZE]
assert_equal([rv_size, rv_size, 26 + rv_size], [a, b, c].map {|x| ObjectSpace.memsize_of(x)})
assert_equal([rv_size, rv_size, a.length + 1 + rv_size], [a, b, c].map {|x| ObjectSpace.memsize_of(x)})
end
def test_argf_memsize

Просмотреть файл

@ -3769,7 +3769,11 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
rb_str_modify(output);
if (NIL_P(output_bytesize_v)) {
#if USE_RVARGC
output_bytesize = rb_str_capacity(output);
#else
output_bytesize = RSTRING_EMBED_LEN_MAX;
#endif
if (!NIL_P(input) && output_bytesize < RSTRING_LEN(input))
output_bytesize = RSTRING_LEN(input);
}