Bug 1903037 part 2 - Add APIs for working with JS strings backed by StringBuffers. r=sfink

This mostly follows the external string APIs.

The next patch will convert the main browser uses of external strings to call
these new functions.

Differential Revision: https://phabricator.services.mozilla.com/D213969
This commit is contained in:
Jan de Mooij 2024-06-26 11:03:17 +00:00
Родитель d50f648d3e
Коммит adbacfb5b0
7 изменённых файлов: 323 добавлений и 7 удалений

Просмотреть файл

@ -11,13 +11,15 @@
#include "js/shadow/String.h" // JS::shadow::String
#include "mozilla/Assertions.h" // MOZ_ASSERT
#include "mozilla/Attributes.h" // MOZ_ALWAYS_INLINE
#include "mozilla/Likely.h" // MOZ_LIKELY
#include "mozilla/Maybe.h" // mozilla::Maybe
#include "mozilla/Range.h" // mozilla::Range
#include "mozilla/Span.h" // mozilla::Span
// std::tuple
#include "mozilla/Assertions.h" // MOZ_ASSERT
#include "mozilla/Attributes.h" // MOZ_ALWAYS_INLINE
#include "mozilla/Likely.h" // MOZ_LIKELY
#include "mozilla/Maybe.h" // mozilla::Maybe
#include "mozilla/Range.h" // mozilla::Range
#include "mozilla/RefPtr.h" // RefPtr
#include "mozilla/Span.h" // mozilla::Span
// std::tuple
#include "mozilla/StringBuffer.h" // mozilla::StringBuffer
#include <algorithm> // std::copy_n
#include <stddef.h> // size_t
@ -102,6 +104,41 @@ extern JS_PUBLIC_API JSString* JS_NewUCStringCopyN(JSContext* cx,
extern JS_PUBLIC_API JSString* JS_NewUCStringCopyZ(JSContext* cx,
const char16_t* s);
namespace JS {
/**
* Create a new JSString possibly backed by |buffer|. The contents of |buffer|
* will be interpreted as an array of Latin1 characters.
*
* Note that the returned string is not guaranteed to use |buffer|: as an
* optimization, this API can return an inline string or a previously allocated
* string.
*
* Increments the buffer's refcount iff the JS string holds a reference to it.
*/
extern JS_PUBLIC_API JSString* NewStringFromLatin1Buffer(
JSContext* cx, RefPtr<mozilla::StringBuffer> buffer, size_t length);
/**
* Similar to NewStringFromLatin1Buffer but for char16_t buffers.
*/
extern JS_PUBLIC_API JSString* NewStringFromTwoByteBuffer(
JSContext* cx, RefPtr<mozilla::StringBuffer> buffer, size_t length);
/**
* Similar to NewStringFromLatin1Buffer but for UTF8 buffers.
*
* This can create a Latin1 string backed by |buffer| iff the utf8 buffer
* contains only ASCII chars. If there are non-ASCII chars, |buffer| can't be
* used so this API will copy and inflate the characters for the new JS string.
*
* Note that |length| must be the (byte) length of the UTF8 buffer.
*/
extern JS_PUBLIC_API JSString* NewStringFromUTF8Buffer(
JSContext* cx, RefPtr<mozilla::StringBuffer> buffer, size_t length);
} // namespace JS
extern JS_PUBLIC_API JSString* JS_AtomizeUCStringN(JSContext* cx,
const char16_t* s,
size_t length);
@ -430,6 +467,46 @@ MOZ_ALWAYS_INLINE bool IsExternalUCString(
return true;
}
/**
* If the provided string is backed by a StringBuffer for latin-1 storage,
* return true and set |*buffer| to the string buffer.
*
* Note: this function doesn't increment the buffer's refcount. The buffer
* remains valid as long as the provided string is kept alive.
*/
MOZ_ALWAYS_INLINE bool IsLatin1StringWithStringBuffer(
JSString* str, mozilla::StringBuffer** buffer) {
shadow::String* s = shadow::AsShadowString(str);
if (!s->hasStringBuffer() || !s->hasLatin1Chars()) {
return false;
}
void* data = const_cast<JS::Latin1Char*>(s->nonInlineCharsLatin1);
*buffer = mozilla::StringBuffer::FromData(data);
return true;
}
/**
* If the provided string is backed by a StringBuffer for char16_t storage,
* return true and set |*buffer| to the string buffer.
*
* Note: this function doesn't increment the buffer's refcount. The buffer
* remains valid as long as the provided string is kept alive.
*/
MOZ_ALWAYS_INLINE bool IsTwoByteStringWithStringBuffer(
JSString* str, mozilla::StringBuffer** buffer) {
shadow::String* s = shadow::AsShadowString(str);
if (!s->hasStringBuffer() || s->hasLatin1Chars()) {
return false;
}
void* data = const_cast<char16_t*>(s->nonInlineCharsTwoByte);
*buffer = mozilla::StringBuffer::FromData(data);
return true;
}
namespace detail {
extern JS_PUBLIC_API JSLinearString* StringToLinearStringSlow(JSContext* cx,

Просмотреть файл

@ -35,6 +35,7 @@ struct String {
static constexpr uint32_t LINEAR_BIT = js::Bit(4);
static constexpr uint32_t INLINE_CHARS_BIT = js::Bit(6);
static constexpr uint32_t LATIN1_CHARS_BIT = js::Bit(10);
static constexpr uint32_t HAS_STRING_BUFFER_BIT = js::Bit(12);
static constexpr uint32_t EXTERNAL_FLAGS = LINEAR_BIT | js::Bit(8);
static constexpr uint32_t TYPE_FLAGS_MASK = js::BitMask(10) - js::BitMask(3);
static constexpr uint32_t PERMANENT_ATOM_MASK = ATOM_BIT | js::Bit(8);
@ -68,6 +69,7 @@ struct String {
bool isLinear() const { return flags() & LINEAR_BIT; }
bool hasLatin1Chars() const { return flags() & LATIN1_CHARS_BIT; }
bool hasStringBuffer() const { return flags() & HAS_STRING_BUFFER_BIT; }
// For hot code, prefer other type queries.
bool isExternal() const {

Просмотреть файл

@ -128,6 +128,7 @@ UNIFIED_SOURCES += [
"testSparseBitmap.cpp",
"testStencil.cpp",
"testStringBuffer.cpp",
"testStringBuffers.cpp",
"testStringIsArrayIndex.cpp",
"testStructuredClone.cpp",
"testSymbol.cpp",

Просмотреть файл

@ -0,0 +1,152 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/StringBuffer.h"
#include "jsapi.h"
#include "js/String.h"
#include "jsapi-tests/tests.h"
#include "util/Text.h"
BEGIN_TEST(testStringBuffersLatin1) {
static const JS::Latin1Char chars[] = "This is just some random string";
static const size_t len = js_strlen(chars);
RefPtr<mozilla::StringBuffer> buffer =
mozilla::StringBuffer::Create(chars, len);
CHECK(buffer);
auto* bufferChars = static_cast<const JS::Latin1Char*>(buffer->Data());
JS::Rooted<JSString*> str1(cx,
JS::NewStringFromLatin1Buffer(cx, buffer, len));
CHECK(str1);
CHECK_EQUAL(JS_GetStringLength(str1), len);
{
JS::AutoCheckCannotGC nogc;
size_t strLen;
const JS::Latin1Char* strChars =
JS_GetLatin1StringCharsAndLength(cx, nogc, str1, &strLen);
CHECK_EQUAL(strLen, len);
CHECK_EQUAL(strChars, bufferChars);
}
JS::Rooted<JSString*> str2(cx,
JS::NewStringFromLatin1Buffer(cx, buffer, len));
CHECK(str2);
JS::Rooted<JSString*> str3(cx,
JS::NewStringFromLatin1Buffer(cx, buffer, len));
CHECK(str3);
#ifdef DEBUG
CHECK_EQUAL(buffer->RefCount(), 4u); // |buffer| and the 3 JS strings.
#endif
mozilla::StringBuffer* buf;
CHECK(!JS::IsTwoByteStringWithStringBuffer(str2, &buf));
CHECK(JS::IsLatin1StringWithStringBuffer(str2, &buf));
CHECK_EQUAL(buf, buffer);
return true;
}
END_TEST(testStringBuffersLatin1)
BEGIN_TEST(testStringBuffersTwoByte) {
static const char16_t chars[] = u"This is just some random string";
static const size_t len = js_strlen(chars);
RefPtr<mozilla::StringBuffer> buffer =
mozilla::StringBuffer::Create(chars, len);
CHECK(buffer);
auto* bufferChars = static_cast<const char16_t*>(buffer->Data());
JS::Rooted<JSString*> str1(cx,
JS::NewStringFromTwoByteBuffer(cx, buffer, len));
CHECK(str1);
CHECK_EQUAL(JS_GetStringLength(str1), len);
{
JS::AutoCheckCannotGC nogc;
size_t strLen;
const char16_t* strChars =
JS_GetTwoByteStringCharsAndLength(cx, nogc, str1, &strLen);
CHECK_EQUAL(strLen, len);
CHECK_EQUAL(strChars, bufferChars);
}
JS::Rooted<JSString*> str2(cx,
JS::NewStringFromTwoByteBuffer(cx, buffer, len));
CHECK(str2);
JS::Rooted<JSString*> str3(cx,
JS::NewStringFromTwoByteBuffer(cx, buffer, len));
CHECK(str3);
#ifdef DEBUG
CHECK_EQUAL(buffer->RefCount(), 4u); // |buffer| and the 3 JS strings.
#endif
mozilla::StringBuffer* buf;
CHECK(!JS::IsLatin1StringWithStringBuffer(str2, &buf));
CHECK(JS::IsTwoByteStringWithStringBuffer(str2, &buf));
CHECK_EQUAL(buf, buffer);
return true;
}
END_TEST(testStringBuffersTwoByte)
BEGIN_TEST(testStringBuffersUTF8) {
// UTF8 ASCII string buffer.
{
static const char chars[] = "This is a UTF-8 string but also ASCII";
static const size_t len = js_strlen(chars);
RefPtr<mozilla::StringBuffer> buffer =
mozilla::StringBuffer::Create(chars, len);
CHECK(buffer);
JS::Rooted<JSString*> str1(cx,
JS::NewStringFromUTF8Buffer(cx, buffer, len));
CHECK(str1);
CHECK_EQUAL(JS_GetStringLength(str1), len);
mozilla::StringBuffer* buf;
CHECK(!JS::IsTwoByteStringWithStringBuffer(str1, &buf));
CHECK(JS::IsLatin1StringWithStringBuffer(str1, &buf));
CHECK_EQUAL(buf, buffer);
#ifdef DEBUG
CHECK_EQUAL(buffer->RefCount(), 2u); // |buffer| and the JS string.
#endif
}
// UTF8 non-ASCII string buffer. The passed in buffer isn't used.
{
static const char chars[] =
"This is a UTF-\xEF\xBC\x98 string but not ASCII";
static const size_t len = js_strlen(chars);
RefPtr<mozilla::StringBuffer> buffer =
mozilla::StringBuffer::Create(chars, len);
CHECK(buffer);
JS::Rooted<JSString*> str1(cx,
JS::NewStringFromUTF8Buffer(cx, buffer, len));
CHECK(str1);
CHECK_EQUAL(JS_GetStringLength(str1), 36u);
mozilla::StringBuffer* buf;
CHECK(!JS::IsLatin1StringWithStringBuffer(str1, &buf));
CHECK(!JS::IsTwoByteStringWithStringBuffer(str1, &buf));
#ifdef DEBUG
CHECK_EQUAL(buffer->RefCount(), 1u); // Just |buffer|.
#endif
}
return true;
}
END_TEST(testStringBuffersUTF8)

Просмотреть файл

@ -11,6 +11,7 @@
#include "mozilla/PodOperations.h"
#include "mozilla/Range.h"
#include "mozilla/StringBuffer.h"
#include "gc/GCEnum.h"
#include "gc/MaybeRooted.h"
@ -493,6 +494,17 @@ MOZ_ALWAYS_INLINE JSLinearString* JSLinearString::new_(
return newValidLength<allowGC>(cx, chars, heap);
}
template <js::AllowGC allowGC, typename CharT>
MOZ_ALWAYS_INLINE JSLinearString* JSLinearString::new_(
JSContext* cx, RefPtr<mozilla::StringBuffer>&& buffer, const CharT* chars,
size_t length, js::gc::Heap heap) {
if (MOZ_UNLIKELY(!validateLengthInternal<allowGC>(cx, length))) {
return nullptr;
}
return newValidLength<allowGC>(cx, std::move(buffer), chars, length, heap);
}
template <js::AllowGC allowGC, typename CharT>
MOZ_ALWAYS_INLINE JSLinearString* JSLinearString::newValidLength(
JSContext* cx, JS::MutableHandle<JSString::OwnedChars<CharT>> chars,

Просмотреть файл

@ -13,6 +13,7 @@
#include "mozilla/MemoryReporting.h"
#include "mozilla/PodOperations.h"
#include "mozilla/RangedPtr.h"
#include "mozilla/StringBuffer.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Utf8.h"
#include "mozilla/Vector.h"
@ -2276,6 +2277,71 @@ template JSString* NewMaybeExternalString(
} /* namespace js */
template <typename CharT>
static JSString* NewStringFromBuffer(JSContext* cx,
RefPtr<mozilla::StringBuffer>&& buffer,
size_t length) {
AssertHeapIsIdle();
CHECK_THREAD(cx);
const auto* s = static_cast<const CharT*>(buffer->Data());
if (JSString* str = TryEmptyOrStaticString(cx, s, length)) {
return str;
}
// Use the inline-string cache that we also use for external strings.
if (JSThinInlineString::lengthFits<Latin1Char>(length) &&
CanStoreCharsAsLatin1(s, length)) {
ExternalStringCache& cache = cx->zone()->externalStringCache();
if (JSInlineString* str = cache.lookupInline(s, length)) {
return str;
}
JSInlineString* str = NewInlineStringMaybeDeflated<AllowGC::CanGC>(
cx, mozilla::Range(s, length), gc::Heap::Default);
if (!str) {
return nullptr;
}
cache.putInline(str);
return str;
}
if (JSInlineString::lengthFits<CharT>(length)) {
return NewInlineString<CanGC>(cx, mozilla::Range(s, length),
gc::Heap::Default);
}
return JSLinearString::new_<CanGC>(cx, std::move(buffer), s, length,
gc::Heap::Default);
}
JS_PUBLIC_API JSString* JS::NewStringFromLatin1Buffer(
JSContext* cx, RefPtr<mozilla::StringBuffer> buffer, size_t length) {
return NewStringFromBuffer<Latin1Char>(cx, std::move(buffer), length);
}
JS_PUBLIC_API JSString* JS::NewStringFromTwoByteBuffer(
JSContext* cx, RefPtr<mozilla::StringBuffer> buffer, size_t length) {
return NewStringFromBuffer<char16_t>(cx, std::move(buffer), length);
}
JS_PUBLIC_API JSString* JS::NewStringFromUTF8Buffer(
JSContext* cx, RefPtr<mozilla::StringBuffer> buffer, size_t length) {
AssertHeapIsIdle();
CHECK_THREAD(cx);
const JS::UTF8Chars utf8(static_cast<const char*>(buffer->Data()), length);
JS::SmallestEncoding encoding = JS::FindSmallestEncoding(utf8);
if (encoding == JS::SmallestEncoding::ASCII) {
// ASCII case can use the string buffer as Latin1 buffer.
return NewStringFromBuffer<Latin1Char>(cx, std::move(buffer), length);
}
// Non-ASCII case cannot use the string buffer.
return NewStringCopyUTF8N(cx, utf8, encoding);
}
#if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
void JSExtensibleString::dumpOwnRepresentationFields(
js::JSONPrinter& json) const {

Просмотреть файл

@ -1057,6 +1057,12 @@ class JSLinearString : public JSString {
JS::MutableHandle<OwnedChars<CharT>> chars,
js::gc::Heap heap);
template <js::AllowGC allowGC, typename CharT>
static inline JSLinearString* new_(JSContext* cx,
RefPtr<mozilla::StringBuffer>&& buffer,
const CharT* chars, size_t length,
js::gc::Heap heap);
template <js::AllowGC allowGC, typename CharT>
static inline JSLinearString* newValidLength(
JSContext* cx, JS::MutableHandle<OwnedChars<CharT>> chars,