Bug 1367779 - Use stack allocated storage in case conversion operations when the result fits into JSInlineString. r=jandem

2017-06-09 10:54:30 -07:00 · 2017-06-09 10:54:30 -07:00 · a6b3adf87a
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@ -604,23 +604,124 @@ js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t l
    return NewDependentString(cx, str, begin, len);
 }

-template <typename CharT>
-static auto
-ReallocChars(JSContext* cx, UniquePtr<CharT[], JS::FreePolicy> chars, size_t oldLength,
-             size_t newLength)
-  -> decltype(chars)
-{
-    using AnyCharPtr = decltype(chars);
+template <typename CharT> struct MaximumInlineLength;

-    CharT* oldChars = chars.release();
-    CharT* newChars = cx->pod_realloc(oldChars, oldLength, newLength);
-    if (!newChars) {
-        js_free(oldChars);
-        return AnyCharPtr();
+template<> struct MaximumInlineLength<Latin1Char> {
+    static constexpr size_t value = JSFatInlineString::MAX_LENGTH_LATIN1;
+};
+
+template<> struct MaximumInlineLength<char16_t> {
+    static constexpr size_t value = JSFatInlineString::MAX_LENGTH_TWO_BYTE;
+};
+
+// Character buffer class used for ToLowerCase and ToUpperCase operations.
+//
+// Case conversion operations normally return a string with the same length as
+// the input string. To avoid over-allocation, we optimistically allocate an
+// array with same size as the input string and only when we detect special
+// casing characters, which can change the output string length, we reallocate
+// the output buffer to the final string length.
+//
+// As a further mean to improve runtime performance, the character buffer
+// contains an inline storage, so we don't need to heap-allocate an array when
+// a JSInlineString will be used for the output string.
+//
+// Why not use mozilla::Vector instead? mozilla::Vector doesn't provide enough
+// fine-grained control to avoid over-allocation when (re)allocating for exact
+// buffer sizes. This led to visible performance regressions in µ-benchmarks.
+template <typename CharT>
+class MOZ_NON_PARAM InlineCharBuffer
+{
+    using CharPtr = UniquePtr<CharT[], JS::FreePolicy>;
+    static constexpr size_t InlineCapacity = MaximumInlineLength<CharT>::value;
+
+    CharT inlineStorage[InlineCapacity];
+    CharPtr heapStorage;
+
+#ifdef DEBUG
+    // In debug mode, we keep track of the requested string lengths to ensure
+    // all character buffer methods are called in the correct order and with
+    // the expected argument values.
+    size_t lastRequestedLength = 0;
+
+    void assertValidRequest(size_t expectedLastLength, size_t length) {
+        MOZ_ASSERT(length > expectedLastLength, "cannot shrink requested length");
+        MOZ_ASSERT(lastRequestedLength == expectedLastLength);
+        lastRequestedLength = length;
+    }
+#else
+    void assertValidRequest(size_t expectedLastLength, size_t length) {}
+#endif
+
+  public:
+    CharT* get()
+    {
+        return heapStorage ? heapStorage.get() : inlineStorage;
    }

-    return AnyCharPtr(newChars);
-}
+    bool maybeAlloc(JSContext* cx, size_t length)
+    {
+        assertValidRequest(0, length);
+
+        if (length <= InlineCapacity)
+            return true;
+
+        MOZ_ASSERT(!heapStorage, "heap storage already allocated");
+        heapStorage = cx->make_pod_array<CharT>(length + 1);
+        return !!heapStorage;
+    }
+
+    bool maybeRealloc(JSContext* cx, size_t oldLength, size_t newLength)
+    {
+        assertValidRequest(oldLength, newLength);
+
+        if (newLength <= InlineCapacity)
+            return true;
+
+        if (!heapStorage) {
+            heapStorage = cx->make_pod_array<CharT>(newLength + 1);
+            if (!heapStorage)
+                return false;
+
+            MOZ_ASSERT(oldLength <= InlineCapacity);
+            PodCopy(heapStorage.get(), inlineStorage, oldLength);
+            return true;
+        }
+
+        CharT* oldChars = heapStorage.release();
+        CharT* newChars = cx->pod_realloc(oldChars, oldLength + 1, newLength + 1);
+        if (!newChars) {
+            js_free(oldChars);
+            return false;
+        }
+
+        heapStorage.reset(newChars);
+        return true;
+    }
+
+    JSString* toString(JSContext* cx, size_t length)
+    {
+        MOZ_ASSERT(length == lastRequestedLength);
+
+        if (JSInlineString::lengthFits<CharT>(length)) {
+            MOZ_ASSERT(!heapStorage,
+                       "expected only inline storage when length fits in inline string");
+
+            mozilla::Range<const CharT> range(inlineStorage, length);
+            return NewInlineString<CanGC>(cx, range);
+        }
+
+        MOZ_ASSERT(heapStorage, "heap storage was not allocated for non-inline string");
+
+        heapStorage.get()[length] = '\0'; // Null-terminate
+        JSString* res = NewStringDontDeflate<CanGC>(cx, heapStorage.get(), length);
+        if (!res)
+            return nullptr;
+
+        mozilla::Unused << heapStorage.release();
+        return res;
+    }
+};

 /**
 * U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings
@ -760,8 +861,6 @@ ToLowerCaseImpl(CharT* destChars, const CharT* srcChars, size_t startIndex, size
    }

    MOZ_ASSERT(j == destLength);
-    destChars[destLength] = '\0';
-
    return srcLength;
 }

@ -792,9 +891,9 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
 {
    // Unlike toUpperCase, toLowerCase has the nice invariant that if the
    // input is a Latin-1 string, the output is also a Latin-1 string.
-    using AnyCharPtr = UniquePtr<CharT[], JS::FreePolicy>;

-    AnyCharPtr newChars;
+    InlineCharBuffer<CharT> newChars;
+
    const size_t length = str->length();
    size_t resultLength;
    {
@ -809,6 +908,18 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
        MOZ_ASSERT(unicode::CanLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA),
                   "U+03A3 has a simple lower case mapping");

+        // One element Latin-1 strings can be directly retrieved from the
+        // static strings cache.
+        if (IsSame<CharT, Latin1Char>::value) {
+            if (length == 1) {
+                char16_t lower = unicode::ToLowerCase(chars[0]);
+                MOZ_ASSERT(lower <= JSString::MAX_LATIN1_CHAR);
+                MOZ_ASSERT(StaticStrings::hasUnit(lower));
+
+                return cx->staticStrings().getUnit(lower);
+            }
+        }
+
        // Look for the first character that changes when lowercased.
        size_t i = 0;
        for (; i < length; i++) {
@ -834,8 +945,7 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
            return str;

        resultLength = length;
-        newChars = cx->make_pod_array<CharT>(resultLength + 1);
-        if (!newChars)
+        if (!newChars.maybeAlloc(cx, resultLength))
            return nullptr;

        PodCopy(newChars.get(), chars, i);
@ -846,23 +956,15 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
                       "Latin-1 strings don't have special lower case mappings");
            resultLength = ToLowerCaseLength(chars, readChars, length);

-            AnyCharPtr buf = ReallocChars(cx, Move(newChars), length + 1, resultLength + 1);
-            if (!buf)
+            if (!newChars.maybeRealloc(cx, length, resultLength))
                return nullptr;

-            newChars = Move(buf);
-
            MOZ_ALWAYS_TRUE(length ==
                ToLowerCaseImpl(newChars.get(), chars, readChars, length, resultLength));
        }
    }

-    JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), resultLength);
-    if (!res)
-        return nullptr;
-
-    mozilla::Unused << newChars.release();
-    return res;
+    return newChars.toString(cx, resultLength);
 }

 JSString*
@ -1023,8 +1125,6 @@ ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t startIndex,
    }

    MOZ_ASSERT(j == destLength);
-    destChars[destLength] = '\0';
-
    return srcLength;
 }

@ -1068,53 +1168,68 @@ CopyChars(CharT* destChars, const CharT* srcChars, size_t length)
 }

 template <typename DestChar, typename SrcChar>
-static inline UniquePtr<DestChar[], JS::FreePolicy>
-ToUpperCase(JSContext* cx, const SrcChar* chars, size_t startIndex, size_t length,
-            size_t* resultLength)
+static inline bool
+ToUpperCase(JSContext* cx, InlineCharBuffer<DestChar>& newChars, const SrcChar* chars,
+            size_t startIndex, size_t length, size_t* resultLength)
 {
    MOZ_ASSERT(startIndex < length);

-    using DestCharPtr = UniquePtr<DestChar[], JS::FreePolicy>;
-
    *resultLength = length;
-    DestCharPtr buf = cx->make_pod_array<DestChar>(length + 1);
-    if (!buf)
-        return buf;
+    if (!newChars.maybeAlloc(cx, length))
+        return false;

-    CopyChars(buf.get(), chars, startIndex);
+    CopyChars(newChars.get(), chars, startIndex);

-    size_t readChars = ToUpperCaseImpl(buf.get(), chars, startIndex, length, length);
+    size_t readChars = ToUpperCaseImpl(newChars.get(), chars, startIndex, length, length);
    if (readChars < length) {
        size_t actualLength = ToUpperCaseLength(chars, readChars, length);

        *resultLength = actualLength;
-        DestCharPtr buf2 = ReallocChars(cx, Move(buf), length + 1, actualLength + 1);
-        if (!buf2)
-            return buf2;
-
-        buf = Move(buf2);
+        if (!newChars.maybeRealloc(cx, length, actualLength))
+            return false;

        MOZ_ALWAYS_TRUE(length ==
-            ToUpperCaseImpl(buf.get(), chars, readChars, length, actualLength));
+            ToUpperCaseImpl(newChars.get(), chars, readChars, length, actualLength));
    }

-    return buf;
+    return true;
 }

 template <typename CharT>
 static JSString*
 ToUpperCase(JSContext* cx, JSLinearString* str)
 {
-    using Latin1CharPtr = UniquePtr<Latin1Char[], JS::FreePolicy>;
-    using TwoByteCharPtr = UniquePtr<char16_t[], JS::FreePolicy>;
+    using Latin1Buffer = InlineCharBuffer<Latin1Char>;
+    using TwoByteBuffer = InlineCharBuffer<char16_t>;

-    mozilla::MaybeOneOf<Latin1CharPtr, TwoByteCharPtr> newChars;
+    mozilla::MaybeOneOf<Latin1Buffer, TwoByteBuffer> newChars;
    const size_t length = str->length();
    size_t resultLength;
    {
        AutoCheckCannotGC nogc;
        const CharT* chars = str->chars<CharT>(nogc);

+        // Most one element Latin-1 strings can be directly retrieved from the
+        // static strings cache.
+        if (IsSame<CharT, Latin1Char>::value) {
+            if (length == 1) {
+                Latin1Char c = chars[0];
+                if (c != unicode::MICRO_SIGN &&
+                    c != unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS &&
+                    c != unicode::LATIN_SMALL_LETTER_SHARP_S)
+                {
+                    char16_t upper = unicode::ToUpperCase(c);
+                    MOZ_ASSERT(upper <= JSString::MAX_LATIN1_CHAR);
+                    MOZ_ASSERT(StaticStrings::hasUnit(upper));
+
+                    return cx->staticStrings().getUnit(upper);
+                }
+
+                MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR ||
+                           CanUpperCaseSpecialCasing(c));
+            }
+        }
+
        // Look for the first character that changes when uppercased.
        size_t i = 0;
        for (; i < length; i++) {
@ -1172,36 +1287,21 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
        }

        if (resultIsLatin1) {
-            Latin1CharPtr buf = ToUpperCase<Latin1Char>(cx, chars, i, length, &resultLength);
-            if (!buf)
-                return nullptr;
+            newChars.construct<Latin1Buffer>();

-            newChars.construct<Latin1CharPtr>(Move(buf));
+            if (!ToUpperCase(cx, newChars.ref<Latin1Buffer>(), chars, i, length, &resultLength))
+                return nullptr;
        } else {
-            TwoByteCharPtr buf = ToUpperCase<char16_t>(cx, chars, i, length, &resultLength);
-            if (!buf)
-                return nullptr;
+            newChars.construct<TwoByteBuffer>();

-            newChars.construct<TwoByteCharPtr>(Move(buf));
+            if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length, &resultLength))
+                return nullptr;
        }
    }

-    JSString* res;
-    if (newChars.constructed<Latin1CharPtr>()) {
-        res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), resultLength);
-        if (!res)
-            return nullptr;
-
-        mozilla::Unused << newChars.ref<Latin1CharPtr>().release();
-    } else {
-        res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), resultLength);
-        if (!res)
-            return nullptr;
-
-        mozilla::Unused << newChars.ref<TwoByteCharPtr>().release();
-    }
-
-    return res;
+    return newChars.constructed<Latin1Buffer>()
+           ? newChars.ref<Latin1Buffer>().toString(cx, resultLength)
+           : newChars.ref<TwoByteBuffer>().toString(cx, resultLength);
 }

 JSString*
@ -1342,12 +1442,8 @@ js::str_normalize(JSContext* cx, unsigned argc, Value* vp)
        }
    }

-    JSLinearString* linear = str->ensureLinear(cx);
-    if (!linear)
-        return false;
-
    // Latin-1 strings are already in Normalization Form C.
-    if (form == NFC && linear->hasLatin1Chars()) {
+    if (form == NFC && str->hasLatin1Chars()) {
        // Step 7.
        args.rval().setString(str);
        return true;
@ -1355,7 +1451,7 @@ js::str_normalize(JSContext* cx, unsigned argc, Value* vp)

    // Step 6.
    AutoStableStringChars stableChars(cx);
-    if (!stableChars.initTwoByte(cx, linear))
+    if (!stableChars.initTwoByte(cx, str))
        return false;

    mozilla::Range<const char16_t> srcChars = stableChars.twoByteRange();