Bug 1367779 - Use stack allocated storage in case conversion operations when the result fits into JSInlineString. r=jandem

This commit is contained in:
André Bargull 2017-06-09 10:54:30 -07:00
Родитель c168ba5421
Коммит a6b3adf87a
1 изменённых файлов: 178 добавлений и 82 удалений

Просмотреть файл

@ -604,23 +604,124 @@ js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t l
return NewDependentString(cx, str, begin, len);
}
template <typename CharT>
static auto
ReallocChars(JSContext* cx, UniquePtr<CharT[], JS::FreePolicy> chars, size_t oldLength,
size_t newLength)
-> decltype(chars)
{
using AnyCharPtr = decltype(chars);
template <typename CharT> struct MaximumInlineLength;
CharT* oldChars = chars.release();
CharT* newChars = cx->pod_realloc(oldChars, oldLength, newLength);
if (!newChars) {
js_free(oldChars);
return AnyCharPtr();
template<> struct MaximumInlineLength<Latin1Char> {
static constexpr size_t value = JSFatInlineString::MAX_LENGTH_LATIN1;
};
template<> struct MaximumInlineLength<char16_t> {
static constexpr size_t value = JSFatInlineString::MAX_LENGTH_TWO_BYTE;
};
// Character buffer class used for ToLowerCase and ToUpperCase operations.
//
// Case conversion operations normally return a string with the same length as
// the input string. To avoid over-allocation, we optimistically allocate an
// array with same size as the input string and only when we detect special
// casing characters, which can change the output string length, we reallocate
// the output buffer to the final string length.
//
// As a further mean to improve runtime performance, the character buffer
// contains an inline storage, so we don't need to heap-allocate an array when
// a JSInlineString will be used for the output string.
//
// Why not use mozilla::Vector instead? mozilla::Vector doesn't provide enough
// fine-grained control to avoid over-allocation when (re)allocating for exact
// buffer sizes. This led to visible performance regressions in µ-benchmarks.
template <typename CharT>
class MOZ_NON_PARAM InlineCharBuffer
{
using CharPtr = UniquePtr<CharT[], JS::FreePolicy>;
static constexpr size_t InlineCapacity = MaximumInlineLength<CharT>::value;
CharT inlineStorage[InlineCapacity];
CharPtr heapStorage;
#ifdef DEBUG
// In debug mode, we keep track of the requested string lengths to ensure
// all character buffer methods are called in the correct order and with
// the expected argument values.
size_t lastRequestedLength = 0;
void assertValidRequest(size_t expectedLastLength, size_t length) {
MOZ_ASSERT(length > expectedLastLength, "cannot shrink requested length");
MOZ_ASSERT(lastRequestedLength == expectedLastLength);
lastRequestedLength = length;
}
#else
void assertValidRequest(size_t expectedLastLength, size_t length) {}
#endif
public:
CharT* get()
{
return heapStorage ? heapStorage.get() : inlineStorage;
}
return AnyCharPtr(newChars);
}
bool maybeAlloc(JSContext* cx, size_t length)
{
assertValidRequest(0, length);
if (length <= InlineCapacity)
return true;
MOZ_ASSERT(!heapStorage, "heap storage already allocated");
heapStorage = cx->make_pod_array<CharT>(length + 1);
return !!heapStorage;
}
bool maybeRealloc(JSContext* cx, size_t oldLength, size_t newLength)
{
assertValidRequest(oldLength, newLength);
if (newLength <= InlineCapacity)
return true;
if (!heapStorage) {
heapStorage = cx->make_pod_array<CharT>(newLength + 1);
if (!heapStorage)
return false;
MOZ_ASSERT(oldLength <= InlineCapacity);
PodCopy(heapStorage.get(), inlineStorage, oldLength);
return true;
}
CharT* oldChars = heapStorage.release();
CharT* newChars = cx->pod_realloc(oldChars, oldLength + 1, newLength + 1);
if (!newChars) {
js_free(oldChars);
return false;
}
heapStorage.reset(newChars);
return true;
}
JSString* toString(JSContext* cx, size_t length)
{
MOZ_ASSERT(length == lastRequestedLength);
if (JSInlineString::lengthFits<CharT>(length)) {
MOZ_ASSERT(!heapStorage,
"expected only inline storage when length fits in inline string");
mozilla::Range<const CharT> range(inlineStorage, length);
return NewInlineString<CanGC>(cx, range);
}
MOZ_ASSERT(heapStorage, "heap storage was not allocated for non-inline string");
heapStorage.get()[length] = '\0'; // Null-terminate
JSString* res = NewStringDontDeflate<CanGC>(cx, heapStorage.get(), length);
if (!res)
return nullptr;
mozilla::Unused << heapStorage.release();
return res;
}
};
/**
* U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings
@ -760,8 +861,6 @@ ToLowerCaseImpl(CharT* destChars, const CharT* srcChars, size_t startIndex, size
}
MOZ_ASSERT(j == destLength);
destChars[destLength] = '\0';
return srcLength;
}
@ -792,9 +891,9 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
{
// Unlike toUpperCase, toLowerCase has the nice invariant that if the
// input is a Latin-1 string, the output is also a Latin-1 string.
using AnyCharPtr = UniquePtr<CharT[], JS::FreePolicy>;
AnyCharPtr newChars;
InlineCharBuffer<CharT> newChars;
const size_t length = str->length();
size_t resultLength;
{
@ -809,6 +908,18 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
MOZ_ASSERT(unicode::CanLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA),
"U+03A3 has a simple lower case mapping");
// One element Latin-1 strings can be directly retrieved from the
// static strings cache.
if (IsSame<CharT, Latin1Char>::value) {
if (length == 1) {
char16_t lower = unicode::ToLowerCase(chars[0]);
MOZ_ASSERT(lower <= JSString::MAX_LATIN1_CHAR);
MOZ_ASSERT(StaticStrings::hasUnit(lower));
return cx->staticStrings().getUnit(lower);
}
}
// Look for the first character that changes when lowercased.
size_t i = 0;
for (; i < length; i++) {
@ -834,8 +945,7 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
return str;
resultLength = length;
newChars = cx->make_pod_array<CharT>(resultLength + 1);
if (!newChars)
if (!newChars.maybeAlloc(cx, resultLength))
return nullptr;
PodCopy(newChars.get(), chars, i);
@ -846,23 +956,15 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
"Latin-1 strings don't have special lower case mappings");
resultLength = ToLowerCaseLength(chars, readChars, length);
AnyCharPtr buf = ReallocChars(cx, Move(newChars), length + 1, resultLength + 1);
if (!buf)
if (!newChars.maybeRealloc(cx, length, resultLength))
return nullptr;
newChars = Move(buf);
MOZ_ALWAYS_TRUE(length ==
ToLowerCaseImpl(newChars.get(), chars, readChars, length, resultLength));
}
}
JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), resultLength);
if (!res)
return nullptr;
mozilla::Unused << newChars.release();
return res;
return newChars.toString(cx, resultLength);
}
JSString*
@ -1023,8 +1125,6 @@ ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t startIndex,
}
MOZ_ASSERT(j == destLength);
destChars[destLength] = '\0';
return srcLength;
}
@ -1068,53 +1168,68 @@ CopyChars(CharT* destChars, const CharT* srcChars, size_t length)
}
template <typename DestChar, typename SrcChar>
static inline UniquePtr<DestChar[], JS::FreePolicy>
ToUpperCase(JSContext* cx, const SrcChar* chars, size_t startIndex, size_t length,
size_t* resultLength)
static inline bool
ToUpperCase(JSContext* cx, InlineCharBuffer<DestChar>& newChars, const SrcChar* chars,
size_t startIndex, size_t length, size_t* resultLength)
{
MOZ_ASSERT(startIndex < length);
using DestCharPtr = UniquePtr<DestChar[], JS::FreePolicy>;
*resultLength = length;
DestCharPtr buf = cx->make_pod_array<DestChar>(length + 1);
if (!buf)
return buf;
if (!newChars.maybeAlloc(cx, length))
return false;
CopyChars(buf.get(), chars, startIndex);
CopyChars(newChars.get(), chars, startIndex);
size_t readChars = ToUpperCaseImpl(buf.get(), chars, startIndex, length, length);
size_t readChars = ToUpperCaseImpl(newChars.get(), chars, startIndex, length, length);
if (readChars < length) {
size_t actualLength = ToUpperCaseLength(chars, readChars, length);
*resultLength = actualLength;
DestCharPtr buf2 = ReallocChars(cx, Move(buf), length + 1, actualLength + 1);
if (!buf2)
return buf2;
buf = Move(buf2);
if (!newChars.maybeRealloc(cx, length, actualLength))
return false;
MOZ_ALWAYS_TRUE(length ==
ToUpperCaseImpl(buf.get(), chars, readChars, length, actualLength));
ToUpperCaseImpl(newChars.get(), chars, readChars, length, actualLength));
}
return buf;
return true;
}
template <typename CharT>
static JSString*
ToUpperCase(JSContext* cx, JSLinearString* str)
{
using Latin1CharPtr = UniquePtr<Latin1Char[], JS::FreePolicy>;
using TwoByteCharPtr = UniquePtr<char16_t[], JS::FreePolicy>;
using Latin1Buffer = InlineCharBuffer<Latin1Char>;
using TwoByteBuffer = InlineCharBuffer<char16_t>;
mozilla::MaybeOneOf<Latin1CharPtr, TwoByteCharPtr> newChars;
mozilla::MaybeOneOf<Latin1Buffer, TwoByteBuffer> newChars;
const size_t length = str->length();
size_t resultLength;
{
AutoCheckCannotGC nogc;
const CharT* chars = str->chars<CharT>(nogc);
// Most one element Latin-1 strings can be directly retrieved from the
// static strings cache.
if (IsSame<CharT, Latin1Char>::value) {
if (length == 1) {
Latin1Char c = chars[0];
if (c != unicode::MICRO_SIGN &&
c != unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS &&
c != unicode::LATIN_SMALL_LETTER_SHARP_S)
{
char16_t upper = unicode::ToUpperCase(c);
MOZ_ASSERT(upper <= JSString::MAX_LATIN1_CHAR);
MOZ_ASSERT(StaticStrings::hasUnit(upper));
return cx->staticStrings().getUnit(upper);
}
MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR ||
CanUpperCaseSpecialCasing(c));
}
}
// Look for the first character that changes when uppercased.
size_t i = 0;
for (; i < length; i++) {
@ -1172,36 +1287,21 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
}
if (resultIsLatin1) {
Latin1CharPtr buf = ToUpperCase<Latin1Char>(cx, chars, i, length, &resultLength);
if (!buf)
return nullptr;
newChars.construct<Latin1Buffer>();
newChars.construct<Latin1CharPtr>(Move(buf));
if (!ToUpperCase(cx, newChars.ref<Latin1Buffer>(), chars, i, length, &resultLength))
return nullptr;
} else {
TwoByteCharPtr buf = ToUpperCase<char16_t>(cx, chars, i, length, &resultLength);
if (!buf)
return nullptr;
newChars.construct<TwoByteBuffer>();
newChars.construct<TwoByteCharPtr>(Move(buf));
if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length, &resultLength))
return nullptr;
}
}
JSString* res;
if (newChars.constructed<Latin1CharPtr>()) {
res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), resultLength);
if (!res)
return nullptr;
mozilla::Unused << newChars.ref<Latin1CharPtr>().release();
} else {
res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), resultLength);
if (!res)
return nullptr;
mozilla::Unused << newChars.ref<TwoByteCharPtr>().release();
}
return res;
return newChars.constructed<Latin1Buffer>()
? newChars.ref<Latin1Buffer>().toString(cx, resultLength)
: newChars.ref<TwoByteBuffer>().toString(cx, resultLength);
}
JSString*
@ -1342,12 +1442,8 @@ js::str_normalize(JSContext* cx, unsigned argc, Value* vp)
}
}
JSLinearString* linear = str->ensureLinear(cx);
if (!linear)
return false;
// Latin-1 strings are already in Normalization Form C.
if (form == NFC && linear->hasLatin1Chars()) {
if (form == NFC && str->hasLatin1Chars()) {
// Step 7.
args.rval().setString(str);
return true;
@ -1355,7 +1451,7 @@ js::str_normalize(JSContext* cx, unsigned argc, Value* vp)
// Step 6.
AutoStableStringChars stableChars(cx);
if (!stableChars.initTwoByte(cx, linear))
if (!stableChars.initTwoByte(cx, str))
return false;
mozilla::Range<const char16_t> srcChars = stableChars.twoByteRange();