Bug 1899160 part 1 - Add support for allocating JS strings with a refcounted StringBuffer. r=sfink

This adds initial support for JS strings that have a refcounted `mozilla::StringBuffer`
instead of a raw `malloc` buffer.

The `newString` testing function has two new options. This lets us allocate a new JS string
with a new buffer or a new JS string that reuses the underlying string buffer.

After this we can start using this to replace external strings. We can also allocate more JS
strings with `StringBuffer`s instead of raw `malloc` buffers.

Differential Revision: https://phabricator.services.mozilla.com/D212110
This commit is contained in:
Jan de Mooij 2024-06-24 07:41:48 +00:00
Родитель 9beda1e1e1
Коммит d832c58019
11 изменённых файлов: 336 добавлений и 35 удалений

Просмотреть файл

@ -16,9 +16,11 @@
# include "mozilla/intl/TimeZone.h"
#endif
#include "mozilla/Maybe.h"
#include "mozilla/RefPtr.h"
#include "mozilla/ScopeExit.h"
#include "mozilla/Span.h"
#include "mozilla/Sprintf.h"
#include "mozilla/StringBuffer.h"
#include "mozilla/TextUtils.h"
#include "mozilla/ThreadLocal.h"
@ -3588,6 +3590,8 @@ static bool NewString(JSContext* cx, unsigned argc, Value* vp) {
bool wantTwoByte = false;
bool forceExternal = false;
bool maybeExternal = false;
bool newStringBuffer = false;
bool shareStringBuffer = false;
uint32_t capacity = 0;
if (args.get(1).isObject()) {
@ -3602,7 +3606,9 @@ static bool NewString(JSContext* cx, unsigned argc, Value* vp) {
{BoolSetting{"tenured", &requestTenured},
BoolSetting{"twoByte", &wantTwoByte},
BoolSetting{"external", &forceExternal},
BoolSetting{"maybeExternal", &maybeExternal}}) {
BoolSetting{"maybeExternal", &maybeExternal},
BoolSetting{"newStringBuffer", &newStringBuffer},
BoolSetting{"shareStringBuffer", &shareStringBuffer}}) {
if (!JS_GetProperty(cx, options, name, &v)) {
return false;
}
@ -3630,11 +3636,14 @@ static bool NewString(JSContext* cx, unsigned argc, Value* vp) {
heap = requestTenured ? gc::Heap::Tenured : gc::Heap::Default;
if (forceExternal || maybeExternal) {
wantTwoByte = true;
if (capacity != 0) {
JS_ReportErrorASCII(cx,
"strings cannot be both external and extensible");
return false;
}
}
unsigned kinds = forceExternal + maybeExternal + (capacity != 0) +
newStringBuffer + shareStringBuffer;
if (kinds > 1) {
JS_ReportErrorASCII(cx,
"external, capacity, and stringBuffer options can "
"not be used at the same time");
return false;
}
}
@ -3663,6 +3672,21 @@ static bool NewString(JSContext* cx, unsigned argc, Value* vp) {
if (dest && isExternal) {
(void)buf.release(); // Ownership was transferred.
}
} else if (shareStringBuffer) {
if (!src->isLinear() || !src->asLinear().hasStringBuffer()) {
JS_ReportErrorASCII(cx, "source string must have a string buffer");
return false;
}
RefPtr<mozilla::StringBuffer> buffer = src->asLinear().stringBuffer();
if (src->hasLatin1Chars()) {
auto* bufferChars = static_cast<const Latin1Char*>(buffer->Data());
dest = JSLinearString::newValidLength<CanGC>(cx, std::move(buffer),
bufferChars, len, heap);
} else {
auto* bufferChars = static_cast<const char16_t*>(buffer->Data());
dest = JSLinearString::newValidLength<CanGC>(cx, std::move(buffer),
bufferChars, len, heap);
}
} else {
AutoStableStringChars stable(cx);
if (!wantTwoByte && src->hasLatin1Chars()) {
@ -3674,7 +3698,33 @@ static bool NewString(JSContext* cx, unsigned argc, Value* vp) {
return false;
}
}
if (capacity) {
if (newStringBuffer) {
auto allocString = [&](const auto* chars) -> JSLinearString* {
using CharT =
std::remove_const_t<std::remove_pointer_t<decltype(chars)>>;
if (JSInlineString::lengthFits<CharT>(len)) {
JS_ReportErrorASCII(cx, "Cannot create small non-inline strings");
return nullptr;
}
RefPtr<mozilla::StringBuffer> buffer =
mozilla::StringBuffer::Create(chars, len);
if (!buffer) {
ReportOutOfMemory(cx);
return nullptr;
}
auto* bufferChars = static_cast<const CharT*>(buffer->Data());
return JSLinearString::newValidLength<CanGC, CharT>(
cx, std::move(buffer), bufferChars, len, heap);
};
if (stable.isLatin1()) {
dest = allocString(stable.latin1Chars());
} else {
dest = allocString(stable.twoByteChars());
}
} else if (capacity) {
if (capacity < len) {
capacity = len;
}
@ -9548,6 +9598,11 @@ static const JSFunctionSpecWithHelp TestingFunctions[] = {
" input string's characters. Latin1 will be used by default if possible\n"
" (again regardless of the input string.)\n"
" \n"
" - newStringBuffer: create a new string that uses a refcounted StringBuffer for\n"
" the characters.\n"
" \n"
" - shareStringBuffer: create a new string that shares str's StringBuffer.\n"
" \n"
" - external: create an external string. External strings are always twoByte and\n"
" tenured.\n"
" \n"

Просмотреть файл

@ -15,6 +15,7 @@
#include "js/TracingAPI.h"
#include "vm/JSContext.h"
#include "vm/NativeObject.h"
#include "vm/StringType.h"
namespace js {
namespace gc {
@ -29,6 +30,13 @@ bool js::Nursery::isInside(const SharedMem<T>& p) const {
return isInside(p.unwrap(/*safe - used for value in comparison above*/));
}
inline bool js::Nursery::addStringBuffer(JSLinearString* s) {
MOZ_ASSERT(IsInsideNursery(s));
MOZ_ASSERT(isEnabled());
MOZ_ASSERT(s->hasStringBuffer());
return stringBuffers_.emplaceBack(s, s->stringBuffer());
}
inline bool js::Nursery::shouldTenure(gc::Cell* cell) {
MOZ_ASSERT(semispaceEnabled());
MOZ_ASSERT(inCollectedRegion(cell));

Просмотреть файл

@ -1925,6 +1925,31 @@ void js::Nursery::sweep() {
return false;
});
// Drop references to all StringBuffers. Strings we tenured must have an
// additional refcount at this point.
stringBuffers_.mutableEraseIf([&](StringAndBuffer& entry) {
auto [str, buffer] = entry;
MOZ_ASSERT(inCollectedRegion(str));
if (!IsForwarded(str)) {
MOZ_ASSERT(str->hasStringBuffer() || str->isAtomRef());
MOZ_ASSERT_IF(str->hasStringBuffer(), str->stringBuffer() == buffer);
buffer->Release();
return true;
}
JSLinearString* dst = Forwarded(str);
if (!IsInsideNursery(dst)) {
MOZ_ASSERT_IF(dst->hasStringBuffer() && dst->stringBuffer() == buffer,
buffer->RefCount() > 1);
buffer->Release();
return true;
}
entry.first = dst;
return false;
});
for (ZonesIter zone(runtime(), SkipAtoms); !zone.done(); zone.next()) {
zone->sweepAfterMinorGC(&trc);
}

Просмотреть файл

@ -56,6 +56,10 @@
template <typename T>
class SharedMem;
namespace mozilla {
class StringBuffer;
};
namespace js {
struct StringStats;
@ -246,6 +250,8 @@ class Nursery {
return cellsWithUid_.append(cell);
}
[[nodiscard]] inline bool addStringBuffer(JSLinearString* s);
size_t sizeOfMallocedBuffers(mozilla::MallocSizeOf mallocSizeOf) const;
// Wasm "trailer" (C++-heap-allocated) blocks.
@ -714,6 +720,15 @@ class Nursery {
using SetObjectVector = Vector<SetObject*, 0, SystemAllocPolicy>;
SetObjectVector setsWithNurseryMemory_;
// List of strings with StringBuffers allocated in the nursery. References
// to the buffers are dropped after minor GC. The list stores both the JS
// string and the StringBuffer to simplify interaction with AtomRefs and
// string deduplication.
using StringAndBuffer = std::pair<JSLinearString*, mozilla::StringBuffer*>;
using StringBufferVector =
JS::GCVector<StringAndBuffer, 8, SystemAllocPolicy>;
StringBufferVector stringBuffers_;
UniquePtr<NurseryDecommitTask> decommitTask;
// A cache of small C++-heap allocated blocks associated with this Nursery.

Просмотреть файл

@ -1265,6 +1265,19 @@ size_t js::gc::TenuringTracer::moveString(JSString* dst, JSString* src,
return size;
}
if (src->asLinear().hasStringBuffer()) {
auto* buffer = src->asLinear().stringBuffer();
if (dst->isTenured()) {
// Increment the buffer's refcount because the tenured string now has a
// reference to it. The nursery's reference will be released at the end of
// the minor GC in Nursery::sweep.
buffer->AddRef();
AddCellMemory(dst, dst->asLinear().allocSize(),
MemoryUse::StringContents);
}
return size;
}
// String data is in the nursery and needs to be moved to the malloc heap.
MOZ_ASSERT(nursery().isInside(src->asLinear().nonInlineCharsRaw()));

Просмотреть файл

@ -0,0 +1,55 @@
// |jit-test| skip-if: !getBuildConfiguration("debug")
// stringRepresentation and the bufferRefCount field aren't available in
// all builds.
gczeal(0);
function representation(s) {
return JSON.parse(stringRepresentation(s));
}
function testBasic(tenured) {
var s = newString("abcdefghijklmnopqrstuvwxyz", {newStringBuffer: true, tenured});
assertEq(representation(s).bufferRefCount, 1);
assertEq(s, "abcdefghijklmnopqrstuvwxyz");
assertEq(s.substring(1), "bcdefghijklmnopqrstuvwxyz");
assertEq(s + s + s, "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz");
}
testBasic(false);
testBasic(true);
function testAtomRef(tenured) {
var s = newString("abcdefghijklmnopqrstuvwxyz", {newStringBuffer: true, tenured});
var s2 = newString(s, {shareStringBuffer: true});
assertEq(representation(s).bufferRefCount, 2);
var o = {[s2]: 1};
for (var i = 0; i < 10; i++) {
o[s2]++;
}
minorgc();
minorgc();
// If s2 is now an AtomRef string, then only s holds a reference to
// the buffer.
if (representation(s2).flags.includes("ATOM_REF_BIT")) {
assertEq(representation(s).bufferRefCount, 1);
} else {
assertEq(representation(s).bufferRefCount, 2);
}
return o;
}
testAtomRef(false);
testAtomRef(true);
function testDeduplication(tenured) {
var arr = [];
var s = newString("abcdefghijklmnopqrstuvwxyz" + "012345".substring(1), {newStringBuffer: true, tenured});
for (var i = 0; i < 100; i++) {
arr.push(newString(s, {shareStringBuffer: true, tenured}));
}
assertEq(representation(s).bufferRefCount, 101);
gc()
assertEq(representation(s).bufferRefCount, tenured ? 101 : 1);
return arr;
}
testDeduplication(false);
testDeduplication(true);

Просмотреть файл

@ -265,7 +265,7 @@ MOZ_ALWAYS_INLINE const JS::Latin1Char* JSString::nonInlineCharsRaw() const {
}
bool JSString::ownsMallocedChars() const {
if (!hasOutOfLineChars()) {
if (!hasOutOfLineChars() || asLinear().hasStringBuffer()) {
return false;
}
@ -303,7 +303,7 @@ inline size_t JSLinearString::maybeMallocCharsOnPromotion(
}
inline size_t JSLinearString::allocSize() const {
MOZ_ASSERT(ownsMallocedChars());
MOZ_ASSERT(ownsMallocedChars() || hasStringBuffer());
size_t charSize =
hasLatin1Chars() ? sizeof(JS::Latin1Char) : sizeof(char16_t);
@ -312,7 +312,10 @@ inline size_t JSLinearString::allocSize() const {
}
inline size_t JSString::allocSize() const {
return ownsMallocedChars() ? asLinear().allocSize() : 0;
if (ownsMallocedChars() || hasStringBuffer()) {
return asLinear().allocSize();
}
return 0;
}
inline JSRope::JSRope(JSString* left, JSString* right, size_t length) {
@ -413,18 +416,30 @@ MOZ_ALWAYS_INLINE JSLinearString* JSDependentString::new_(
return cx->newCell<JSDependentString>(heap, base, start, length);
}
inline JSLinearString::JSLinearString(const char16_t* chars, size_t length) {
setLengthAndFlags(length, INIT_LINEAR_FLAGS);
// Check that the new buffer is located in the StringBufferArena
checkStringCharsArena(chars);
inline JSLinearString::JSLinearString(const char16_t* chars, size_t length,
bool hasBuffer) {
uint32_t flags = INIT_LINEAR_FLAGS | (hasBuffer ? HAS_STRING_BUFFER_BIT : 0);
setLengthAndFlags(length, flags);
// Check that the new buffer is located in the StringBufferArena.
// For now ignore this for StringBuffers because Gecko allocates these in the
// main jemalloc arena.
if (!hasBuffer) {
checkStringCharsArena(chars);
}
d.s.u2.nonInlineCharsTwoByte = chars;
}
inline JSLinearString::JSLinearString(const JS::Latin1Char* chars,
size_t length) {
setLengthAndFlags(length, INIT_LINEAR_FLAGS | LATIN1_CHARS_BIT);
// Check that the new buffer is located in the StringBufferArena
checkStringCharsArena(chars);
size_t length, bool hasBuffer) {
uint32_t flags = INIT_LINEAR_FLAGS | LATIN1_CHARS_BIT |
(hasBuffer ? HAS_STRING_BUFFER_BIT : 0);
setLengthAndFlags(length, flags);
// Check that the new buffer is located in the StringBufferArena.
// For now ignore this for StringBuffers because Gecko allocates these in the
// main jemalloc arena.
if (!hasBuffer) {
checkStringCharsArena(chars);
}
d.s.u2.nonInlineCharsLatin1 = chars;
}
@ -511,6 +526,46 @@ MOZ_ALWAYS_INLINE JSLinearString* JSLinearString::newValidLength(
return str;
}
template <js::AllowGC allowGC, typename CharT>
MOZ_ALWAYS_INLINE JSLinearString* JSLinearString::newValidLength(
JSContext* cx, RefPtr<mozilla::StringBuffer>&& buffer, const CharT* chars,
size_t length, js::gc::Heap heap) {
MOZ_ASSERT(!cx->zone()->isAtomsZone());
MOZ_ASSERT(!JSInlineString::lengthFits<CharT>(length));
JSLinearString* str = cx->newCell<JSLinearString, allowGC>(
heap, chars, length, /* hasBuffer = */ true);
if (!str) {
return nullptr;
}
if (!str->isTenured()) {
// If the following registration fails, the string is partially initialized
// and must be made valid, or its finalizer may attempt to free
// uninitialized memory.
if (!cx->nursery().addStringBuffer(str)) {
str->disownCharsBecauseError();
if (allowGC) {
ReportOutOfMemory(cx);
}
return nullptr;
}
} else {
// Note: this will overcount if the same buffer is used by multiple JS
// strings. Unfortunately we don't have a good way to avoid this.
cx->zone()->addCellMemory(str, length * sizeof(CharT),
js::MemoryUse::StringContents);
}
MOZ_ASSERT(str->stringBuffer() == buffer.get());
// Either the tenured Cell or the nursery's registry owns the chars now, so
// transfer the reference.
mozilla::StringBuffer* buf;
buffer.forget(&buf);
return str;
}
template <typename CharT>
MOZ_ALWAYS_INLINE JSAtom* JSAtom::newValidLength(
JSContext* cx, js::UniquePtr<CharT[], JS::FreePolicy> chars, size_t length,
@ -763,8 +818,15 @@ inline void JSLinearString::finalize(JS::GCContext* gcx) {
MOZ_ASSERT(getAllocKind() != js::gc::AllocKind::FAT_INLINE_ATOM);
if (!isInline() && !isDependent()) {
gcx->free_(this, nonInlineCharsRaw(), allocSize(),
js::MemoryUse::StringContents);
size_t size = allocSize();
if (hasStringBuffer()) {
mozilla::StringBuffer* buffer = stringBuffer();
buffer->Release();
gcx->removeCellMemory(this, size, js::MemoryUse::StringContents);
} else {
gcx->free_(this, nonInlineCharsRaw(), size,
js::MemoryUse::StringContents);
}
}
}

Просмотреть файл

@ -112,13 +112,18 @@ size_t JSString::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) {
return 0;
}
JSLinearString& linear = asLinear();
if (hasStringBuffer()) {
return linear.stringBuffer()->SizeOfIncludingThisIfUnshared(mallocSizeOf);
}
// Chars in the nursery are owned by the nursery.
if (!ownsMallocedChars()) {
return 0;
}
// Everything else: measure the space for the chars.
JSLinearString& linear = asLinear();
return linear.hasLatin1Chars() ? mallocSizeOf(linear.rawLatin1Chars())
: mallocSizeOf(linear.rawTwoByteChars());
}
@ -405,6 +410,9 @@ void ForEachStringFlag(const JSString* str, uint32_t flags, KnownF known,
case JSString::LATIN1_CHARS_BIT:
known("LATIN1_CHARS_BIT");
break;
case JSString::HAS_STRING_BUFFER_BIT:
known("HAS_STRING_BUFFER_BIT");
break;
case JSString::ATOM_IS_INDEX_BIT:
if (str->isAtom()) {
known("ATOM_IS_INDEX_BIT");
@ -2277,6 +2285,12 @@ void JSExtensibleString::dumpOwnRepresentationFields(
void JSInlineString::dumpOwnRepresentationFields(js::JSONPrinter& json) const {}
void JSLinearString::dumpOwnRepresentationFields(js::JSONPrinter& json) const {
if (hasStringBuffer()) {
# ifdef DEBUG
json.property("bufferRefCount", stringBuffer()->RefCount());
# endif
return;
}
if (!isInline()) {
// Include whether the chars are in the nursery even for tenured
// strings, which should always be false. For investigating bugs, it's
@ -2540,17 +2554,27 @@ bool JSString::tryReplaceWithAtomRef(JSAtom* atom) {
AutoCheckCannotGC nogc;
if (hasOutOfLineChars()) {
void* buffer = asLinear().nonInlineCharsRaw();
// This is a little cheeky and so deserves a comment. If the string is
// not tenured, then either its buffer lives purely in the nursery, in
// which case it will just be forgotten and blown away in the next
// minor GC, or it is tracked in the nursery's mallocedBuffers hashtable,
// in which case it will be freed for us in the next minor GC. We opt
// to let the GC take care of it since there's a chance it will run
// during idle time.
if (isTenured()) {
RemoveCellMemory(this, allocSize(), MemoryUse::StringContents);
js_free(buffer);
if (asLinear().hasStringBuffer()) {
// If the string is in the nursery, the reference to the buffer will be
// released during the next minor GC (in Nursery::sweep). If the string is
// tenured, we have to release this reference here.
if (isTenured()) {
RemoveCellMemory(this, allocSize(), MemoryUse::StringContents);
asLinear().stringBuffer()->Release();
}
} else {
void* buffer = asLinear().nonInlineCharsRaw();
// This is a little cheeky and so deserves a comment. If the string is
// not tenured, then either its buffer lives purely in the nursery, in
// which case it will just be forgotten and blown away in the next
// minor GC, or it is tracked in the nursery's mallocedBuffers hashtable,
// in which case it will be freed for us in the next minor GC. We opt
// to let the GC take care of it since there's a chance it will run
// during idle time.
if (isTenured()) {
RemoveCellMemory(this, allocSize(), MemoryUse::StringContents);
js_free(buffer);
}
}
}

Просмотреть файл

@ -10,7 +10,9 @@
#include "mozilla/Maybe.h"
#include "mozilla/MemoryReporting.h"
#include "mozilla/Range.h"
#include "mozilla/RefPtr.h"
#include "mozilla/Span.h"
#include "mozilla/StringBuffer.h"
#include "mozilla/TextUtils.h"
#include <string_view> // std::basic_string_view
@ -417,6 +419,15 @@ class JSString : public js::gc::CellWithLengthAndFlags {
static const uint32_t INDEX_VALUE_BIT = js::Bit(11);
static const uint32_t INDEX_VALUE_SHIFT = 16;
// Whether this is a non-inline linear string with a refcounted
// mozilla::StringBuffer.
//
// If set, d.s.u2.nonInlineChars* still points to the string's characters and
// the StringBuffer header is stored immediately before the characters. This
// allows recovering the StringBuffer from the chars pointer with
// StringBuffer::FromData.
static const uint32_t HAS_STRING_BUFFER_BIT = js::Bit(12);
// NON_DEDUP_BIT is used in string deduplication during tenuring. This bit is
// shared with both FLATTEN_FINISH_NODE and ATOM_IS_PERMANENT_BIT, since it
// only applies to linear non-atoms.
@ -747,6 +758,13 @@ class JSString : public js::gc::CellWithLengthAndFlags {
inline bool ownsMallocedChars() const;
bool hasStringBuffer() const {
MOZ_ASSERT_IF(flags() & HAS_STRING_BUFFER_BIT,
isLinear() && !isInline() && !isDependent() &&
!isExternal() && !isExtensible());
return flags() & HAS_STRING_BUFFER_BIT;
}
/* Encode as many scalar values of the string as UTF-8 as can fit
* into the caller-provided buffer replacing unpaired surrogates
* with the REPLACEMENT CHARACTER.
@ -1009,8 +1027,8 @@ class JSLinearString : public JSString {
bool isLinear() const = delete;
JSLinearString& asLinear() const = delete;
JSLinearString(const char16_t* chars, size_t length);
JSLinearString(const JS::Latin1Char* chars, size_t length);
JSLinearString(const char16_t* chars, size_t length, bool hasBuffer);
JSLinearString(const JS::Latin1Char* chars, size_t length, bool hasBuffer);
template <typename CharT>
explicit inline JSLinearString(JS::MutableHandle<OwnedChars<CharT>> chars);
@ -1043,6 +1061,11 @@ class JSLinearString : public JSString {
JSContext* cx, JS::MutableHandle<OwnedChars<CharT>> chars,
js::gc::Heap heap);
template <js::AllowGC allowGC, typename CharT>
static inline JSLinearString* newValidLength(
JSContext* cx, RefPtr<mozilla::StringBuffer>&& buffer, const CharT* chars,
size_t length, js::gc::Heap heap);
// Convert a plain linear string to an extensible string. For testing. The
// caller must ensure that it is a plain or extensible string already, and
// that `capacity` is adequate.
@ -1145,6 +1168,12 @@ class JSLinearString : public JSString {
MOZ_ASSERT(getIndexValue() == index);
}
mozilla::StringBuffer* stringBuffer() const {
MOZ_ASSERT(hasStringBuffer());
auto* chars = nonInlineCharsRaw();
return mozilla::StringBuffer::FromData(const_cast<void*>(chars));
}
/*
* Returns a property name represented by this string, or null on failure.
* You must verify that this is not an index per isIndex before calling

Просмотреть файл

@ -79,6 +79,10 @@ class StringBuffer {
size_t aLength) {
return DoCreate(aData, aLength);
}
static already_AddRefed<StringBuffer> Create(const unsigned char* aData,
size_t aLength) {
return DoCreate(aData, aLength);
}
/**
* Resizes the given string buffer to the specified storage size. This
@ -210,6 +214,16 @@ class StringBuffer {
#endif
}
#ifdef DEBUG
/**
* Returns the buffer's reference count. This is only exposed for logging and
* testing purposes.
*/
uint32_t RefCount() const {
return mRefCount.load(std::memory_order_acquire);
}
#endif
/**
* This measures the size only if the StringBuffer is unshared.
*/

Просмотреть файл

@ -7,12 +7,13 @@
#ifndef mozilla_WasiAtomic_h
#define mozilla_WasiAtomic_h
#include <cstddef> // For _LIBCPP_VERSION and ptrdiff_t
// Clang >= 14 supports <atomic> for wasm targets.
#if _LIBCPP_VERSION >= 14000
# include <atomic>
#else
# include <cstddef> // For ptrdiff_t
# include <cstdint>
// WASI doesn't support <atomic> and we use it as single-threaded for now.