From adbacfb5b00b2db636904cbe224411d61001635c Mon Sep 17 00:00:00 2001 From: Jan de Mooij Date: Wed, 26 Jun 2024 11:03:17 +0000 Subject: [PATCH] Bug 1903037 part 2 - Add APIs for working with JS strings backed by StringBuffers. r=sfink This mostly follows the external string APIs. The next patch will convert the main browser uses of external strings to call these new functions. Differential Revision: https://phabricator.services.mozilla.com/D213969 --- js/public/String.h | 91 ++++++++++++-- js/public/shadow/String.h | 2 + js/src/jsapi-tests/moz.build | 1 + js/src/jsapi-tests/testStringBuffers.cpp | 152 +++++++++++++++++++++++ js/src/vm/StringType-inl.h | 12 ++ js/src/vm/StringType.cpp | 66 ++++++++++ js/src/vm/StringType.h | 6 + 7 files changed, 323 insertions(+), 7 deletions(-) create mode 100644 js/src/jsapi-tests/testStringBuffers.cpp diff --git a/js/public/String.h b/js/public/String.h index 3e003f291c4c..f7e0708cb2ec 100644 --- a/js/public/String.h +++ b/js/public/String.h @@ -11,13 +11,15 @@ #include "js/shadow/String.h" // JS::shadow::String -#include "mozilla/Assertions.h" // MOZ_ASSERT -#include "mozilla/Attributes.h" // MOZ_ALWAYS_INLINE -#include "mozilla/Likely.h" // MOZ_LIKELY -#include "mozilla/Maybe.h" // mozilla::Maybe -#include "mozilla/Range.h" // mozilla::Range -#include "mozilla/Span.h" // mozilla::Span - // std::tuple +#include "mozilla/Assertions.h" // MOZ_ASSERT +#include "mozilla/Attributes.h" // MOZ_ALWAYS_INLINE +#include "mozilla/Likely.h" // MOZ_LIKELY +#include "mozilla/Maybe.h" // mozilla::Maybe +#include "mozilla/Range.h" // mozilla::Range +#include "mozilla/RefPtr.h" // RefPtr +#include "mozilla/Span.h" // mozilla::Span + // std::tuple +#include "mozilla/StringBuffer.h" // mozilla::StringBuffer #include // std::copy_n #include // size_t @@ -102,6 +104,41 @@ extern JS_PUBLIC_API JSString* JS_NewUCStringCopyN(JSContext* cx, extern JS_PUBLIC_API JSString* JS_NewUCStringCopyZ(JSContext* cx, const char16_t* s); +namespace JS { + +/** + * Create a new JSString possibly backed by |buffer|. The contents of |buffer| + * will be interpreted as an array of Latin1 characters. + * + * Note that the returned string is not guaranteed to use |buffer|: as an + * optimization, this API can return an inline string or a previously allocated + * string. + * + * Increments the buffer's refcount iff the JS string holds a reference to it. + */ +extern JS_PUBLIC_API JSString* NewStringFromLatin1Buffer( + JSContext* cx, RefPtr buffer, size_t length); + +/** + * Similar to NewStringFromLatin1Buffer but for char16_t buffers. + */ +extern JS_PUBLIC_API JSString* NewStringFromTwoByteBuffer( + JSContext* cx, RefPtr buffer, size_t length); + +/** + * Similar to NewStringFromLatin1Buffer but for UTF8 buffers. + * + * This can create a Latin1 string backed by |buffer| iff the utf8 buffer + * contains only ASCII chars. If there are non-ASCII chars, |buffer| can't be + * used so this API will copy and inflate the characters for the new JS string. + * + * Note that |length| must be the (byte) length of the UTF8 buffer. + */ +extern JS_PUBLIC_API JSString* NewStringFromUTF8Buffer( + JSContext* cx, RefPtr buffer, size_t length); + +} // namespace JS + extern JS_PUBLIC_API JSString* JS_AtomizeUCStringN(JSContext* cx, const char16_t* s, size_t length); @@ -430,6 +467,46 @@ MOZ_ALWAYS_INLINE bool IsExternalUCString( return true; } +/** + * If the provided string is backed by a StringBuffer for latin-1 storage, + * return true and set |*buffer| to the string buffer. + * + * Note: this function doesn't increment the buffer's refcount. The buffer + * remains valid as long as the provided string is kept alive. + */ +MOZ_ALWAYS_INLINE bool IsLatin1StringWithStringBuffer( + JSString* str, mozilla::StringBuffer** buffer) { + shadow::String* s = shadow::AsShadowString(str); + + if (!s->hasStringBuffer() || !s->hasLatin1Chars()) { + return false; + } + + void* data = const_cast(s->nonInlineCharsLatin1); + *buffer = mozilla::StringBuffer::FromData(data); + return true; +} + +/** + * If the provided string is backed by a StringBuffer for char16_t storage, + * return true and set |*buffer| to the string buffer. + * + * Note: this function doesn't increment the buffer's refcount. The buffer + * remains valid as long as the provided string is kept alive. + */ +MOZ_ALWAYS_INLINE bool IsTwoByteStringWithStringBuffer( + JSString* str, mozilla::StringBuffer** buffer) { + shadow::String* s = shadow::AsShadowString(str); + + if (!s->hasStringBuffer() || s->hasLatin1Chars()) { + return false; + } + + void* data = const_cast(s->nonInlineCharsTwoByte); + *buffer = mozilla::StringBuffer::FromData(data); + return true; +} + namespace detail { extern JS_PUBLIC_API JSLinearString* StringToLinearStringSlow(JSContext* cx, diff --git a/js/public/shadow/String.h b/js/public/shadow/String.h index cae853cc5d9a..6e1443199931 100644 --- a/js/public/shadow/String.h +++ b/js/public/shadow/String.h @@ -35,6 +35,7 @@ struct String { static constexpr uint32_t LINEAR_BIT = js::Bit(4); static constexpr uint32_t INLINE_CHARS_BIT = js::Bit(6); static constexpr uint32_t LATIN1_CHARS_BIT = js::Bit(10); + static constexpr uint32_t HAS_STRING_BUFFER_BIT = js::Bit(12); static constexpr uint32_t EXTERNAL_FLAGS = LINEAR_BIT | js::Bit(8); static constexpr uint32_t TYPE_FLAGS_MASK = js::BitMask(10) - js::BitMask(3); static constexpr uint32_t PERMANENT_ATOM_MASK = ATOM_BIT | js::Bit(8); @@ -68,6 +69,7 @@ struct String { bool isLinear() const { return flags() & LINEAR_BIT; } bool hasLatin1Chars() const { return flags() & LATIN1_CHARS_BIT; } + bool hasStringBuffer() const { return flags() & HAS_STRING_BUFFER_BIT; } // For hot code, prefer other type queries. bool isExternal() const { diff --git a/js/src/jsapi-tests/moz.build b/js/src/jsapi-tests/moz.build index ac9afbe814ec..c66ed2878992 100644 --- a/js/src/jsapi-tests/moz.build +++ b/js/src/jsapi-tests/moz.build @@ -128,6 +128,7 @@ UNIFIED_SOURCES += [ "testSparseBitmap.cpp", "testStencil.cpp", "testStringBuffer.cpp", + "testStringBuffers.cpp", "testStringIsArrayIndex.cpp", "testStructuredClone.cpp", "testSymbol.cpp", diff --git a/js/src/jsapi-tests/testStringBuffers.cpp b/js/src/jsapi-tests/testStringBuffers.cpp new file mode 100644 index 000000000000..f36432f3cea7 --- /dev/null +++ b/js/src/jsapi-tests/testStringBuffers.cpp @@ -0,0 +1,152 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/StringBuffer.h" + +#include "jsapi.h" + +#include "js/String.h" +#include "jsapi-tests/tests.h" +#include "util/Text.h" + +BEGIN_TEST(testStringBuffersLatin1) { + static const JS::Latin1Char chars[] = "This is just some random string"; + static const size_t len = js_strlen(chars); + + RefPtr buffer = + mozilla::StringBuffer::Create(chars, len); + CHECK(buffer); + + auto* bufferChars = static_cast(buffer->Data()); + + JS::Rooted str1(cx, + JS::NewStringFromLatin1Buffer(cx, buffer, len)); + CHECK(str1); + CHECK_EQUAL(JS_GetStringLength(str1), len); + { + JS::AutoCheckCannotGC nogc; + size_t strLen; + const JS::Latin1Char* strChars = + JS_GetLatin1StringCharsAndLength(cx, nogc, str1, &strLen); + CHECK_EQUAL(strLen, len); + CHECK_EQUAL(strChars, bufferChars); + } + + JS::Rooted str2(cx, + JS::NewStringFromLatin1Buffer(cx, buffer, len)); + CHECK(str2); + + JS::Rooted str3(cx, + JS::NewStringFromLatin1Buffer(cx, buffer, len)); + CHECK(str3); + +#ifdef DEBUG + CHECK_EQUAL(buffer->RefCount(), 4u); // |buffer| and the 3 JS strings. +#endif + + mozilla::StringBuffer* buf; + CHECK(!JS::IsTwoByteStringWithStringBuffer(str2, &buf)); + CHECK(JS::IsLatin1StringWithStringBuffer(str2, &buf)); + CHECK_EQUAL(buf, buffer); + + return true; +} +END_TEST(testStringBuffersLatin1) + +BEGIN_TEST(testStringBuffersTwoByte) { + static const char16_t chars[] = u"This is just some random string"; + static const size_t len = js_strlen(chars); + + RefPtr buffer = + mozilla::StringBuffer::Create(chars, len); + CHECK(buffer); + + auto* bufferChars = static_cast(buffer->Data()); + + JS::Rooted str1(cx, + JS::NewStringFromTwoByteBuffer(cx, buffer, len)); + CHECK(str1); + CHECK_EQUAL(JS_GetStringLength(str1), len); + { + JS::AutoCheckCannotGC nogc; + size_t strLen; + const char16_t* strChars = + JS_GetTwoByteStringCharsAndLength(cx, nogc, str1, &strLen); + CHECK_EQUAL(strLen, len); + CHECK_EQUAL(strChars, bufferChars); + } + + JS::Rooted str2(cx, + JS::NewStringFromTwoByteBuffer(cx, buffer, len)); + CHECK(str2); + + JS::Rooted str3(cx, + JS::NewStringFromTwoByteBuffer(cx, buffer, len)); + CHECK(str3); + +#ifdef DEBUG + CHECK_EQUAL(buffer->RefCount(), 4u); // |buffer| and the 3 JS strings. +#endif + + mozilla::StringBuffer* buf; + CHECK(!JS::IsLatin1StringWithStringBuffer(str2, &buf)); + CHECK(JS::IsTwoByteStringWithStringBuffer(str2, &buf)); + CHECK_EQUAL(buf, buffer); + + return true; +} +END_TEST(testStringBuffersTwoByte) + +BEGIN_TEST(testStringBuffersUTF8) { + // UTF8 ASCII string buffer. + { + static const char chars[] = "This is a UTF-8 string but also ASCII"; + static const size_t len = js_strlen(chars); + + RefPtr buffer = + mozilla::StringBuffer::Create(chars, len); + CHECK(buffer); + + JS::Rooted str1(cx, + JS::NewStringFromUTF8Buffer(cx, buffer, len)); + CHECK(str1); + CHECK_EQUAL(JS_GetStringLength(str1), len); + + mozilla::StringBuffer* buf; + CHECK(!JS::IsTwoByteStringWithStringBuffer(str1, &buf)); + CHECK(JS::IsLatin1StringWithStringBuffer(str1, &buf)); + CHECK_EQUAL(buf, buffer); + +#ifdef DEBUG + CHECK_EQUAL(buffer->RefCount(), 2u); // |buffer| and the JS string. +#endif + } + + // UTF8 non-ASCII string buffer. The passed in buffer isn't used. + { + static const char chars[] = + "This is a UTF-\xEF\xBC\x98 string but not ASCII"; + static const size_t len = js_strlen(chars); + + RefPtr buffer = + mozilla::StringBuffer::Create(chars, len); + CHECK(buffer); + + JS::Rooted str1(cx, + JS::NewStringFromUTF8Buffer(cx, buffer, len)); + CHECK(str1); + CHECK_EQUAL(JS_GetStringLength(str1), 36u); + + mozilla::StringBuffer* buf; + CHECK(!JS::IsLatin1StringWithStringBuffer(str1, &buf)); + CHECK(!JS::IsTwoByteStringWithStringBuffer(str1, &buf)); + +#ifdef DEBUG + CHECK_EQUAL(buffer->RefCount(), 1u); // Just |buffer|. +#endif + } + + return true; +} +END_TEST(testStringBuffersUTF8) diff --git a/js/src/vm/StringType-inl.h b/js/src/vm/StringType-inl.h index 0672d1b83bf8..829aad3cdf86 100644 --- a/js/src/vm/StringType-inl.h +++ b/js/src/vm/StringType-inl.h @@ -11,6 +11,7 @@ #include "mozilla/PodOperations.h" #include "mozilla/Range.h" +#include "mozilla/StringBuffer.h" #include "gc/GCEnum.h" #include "gc/MaybeRooted.h" @@ -493,6 +494,17 @@ MOZ_ALWAYS_INLINE JSLinearString* JSLinearString::new_( return newValidLength(cx, chars, heap); } +template +MOZ_ALWAYS_INLINE JSLinearString* JSLinearString::new_( + JSContext* cx, RefPtr&& buffer, const CharT* chars, + size_t length, js::gc::Heap heap) { + if (MOZ_UNLIKELY(!validateLengthInternal(cx, length))) { + return nullptr; + } + + return newValidLength(cx, std::move(buffer), chars, length, heap); +} + template MOZ_ALWAYS_INLINE JSLinearString* JSLinearString::newValidLength( JSContext* cx, JS::MutableHandle> chars, diff --git a/js/src/vm/StringType.cpp b/js/src/vm/StringType.cpp index 93f0ba07a9fe..119e300c5258 100644 --- a/js/src/vm/StringType.cpp +++ b/js/src/vm/StringType.cpp @@ -13,6 +13,7 @@ #include "mozilla/MemoryReporting.h" #include "mozilla/PodOperations.h" #include "mozilla/RangedPtr.h" +#include "mozilla/StringBuffer.h" #include "mozilla/TextUtils.h" #include "mozilla/Utf8.h" #include "mozilla/Vector.h" @@ -2276,6 +2277,71 @@ template JSString* NewMaybeExternalString( } /* namespace js */ +template +static JSString* NewStringFromBuffer(JSContext* cx, + RefPtr&& buffer, + size_t length) { + AssertHeapIsIdle(); + CHECK_THREAD(cx); + + const auto* s = static_cast(buffer->Data()); + + if (JSString* str = TryEmptyOrStaticString(cx, s, length)) { + return str; + } + + // Use the inline-string cache that we also use for external strings. + if (JSThinInlineString::lengthFits(length) && + CanStoreCharsAsLatin1(s, length)) { + ExternalStringCache& cache = cx->zone()->externalStringCache(); + if (JSInlineString* str = cache.lookupInline(s, length)) { + return str; + } + JSInlineString* str = NewInlineStringMaybeDeflated( + cx, mozilla::Range(s, length), gc::Heap::Default); + if (!str) { + return nullptr; + } + cache.putInline(str); + return str; + } + + if (JSInlineString::lengthFits(length)) { + return NewInlineString(cx, mozilla::Range(s, length), + gc::Heap::Default); + } + + return JSLinearString::new_(cx, std::move(buffer), s, length, + gc::Heap::Default); +} + +JS_PUBLIC_API JSString* JS::NewStringFromLatin1Buffer( + JSContext* cx, RefPtr buffer, size_t length) { + return NewStringFromBuffer(cx, std::move(buffer), length); +} + +JS_PUBLIC_API JSString* JS::NewStringFromTwoByteBuffer( + JSContext* cx, RefPtr buffer, size_t length) { + return NewStringFromBuffer(cx, std::move(buffer), length); +} + +JS_PUBLIC_API JSString* JS::NewStringFromUTF8Buffer( + JSContext* cx, RefPtr buffer, size_t length) { + AssertHeapIsIdle(); + CHECK_THREAD(cx); + + const JS::UTF8Chars utf8(static_cast(buffer->Data()), length); + + JS::SmallestEncoding encoding = JS::FindSmallestEncoding(utf8); + if (encoding == JS::SmallestEncoding::ASCII) { + // ASCII case can use the string buffer as Latin1 buffer. + return NewStringFromBuffer(cx, std::move(buffer), length); + } + + // Non-ASCII case cannot use the string buffer. + return NewStringCopyUTF8N(cx, utf8, encoding); +} + #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) void JSExtensibleString::dumpOwnRepresentationFields( js::JSONPrinter& json) const { diff --git a/js/src/vm/StringType.h b/js/src/vm/StringType.h index 3dc9ef3addb2..2582ce21f9db 100644 --- a/js/src/vm/StringType.h +++ b/js/src/vm/StringType.h @@ -1057,6 +1057,12 @@ class JSLinearString : public JSString { JS::MutableHandle> chars, js::gc::Heap heap); + template + static inline JSLinearString* new_(JSContext* cx, + RefPtr&& buffer, + const CharT* chars, size_t length, + js::gc::Heap heap); + template static inline JSLinearString* newValidLength( JSContext* cx, JS::MutableHandle> chars,