зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1494942 - Improve AtomizeUTF8Chars performance. (r=Waldo)
--HG-- extra : rebase_source : 520a97331ee2358617acbf9db3bfc9eb425248e0
This commit is contained in:
Родитель
de0535b7e0
Коммит
ea3bcd2b8b
|
@ -377,6 +377,28 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, OutputFn dst)
|
|||
return true;
|
||||
}
|
||||
|
||||
template <OnUTF8Error ErrorAction, typename CharT>
|
||||
static void
|
||||
CopyAndInflateUTF8IntoBuffer(JSContext* cx, const UTF8Chars src, CharT *dst, size_t outlen, bool allASCII)
|
||||
{
|
||||
if (allASCII) {
|
||||
size_t srclen = src.length();
|
||||
MOZ_ASSERT(outlen == srclen);
|
||||
for (uint32_t i = 0; i < srclen; i++) {
|
||||
dst[i] = CharT(src[i]);
|
||||
}
|
||||
} else {
|
||||
size_t j = 0;
|
||||
auto push = [dst, &j](char16_t c) -> LoopDisposition {
|
||||
dst[j++] = CharT(c);
|
||||
return LoopDisposition::Continue;
|
||||
};
|
||||
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<ErrorAction>(cx, src, push)));
|
||||
MOZ_ASSERT(j == outlen);
|
||||
}
|
||||
dst[outlen] = CharT('\0'); // NUL char
|
||||
}
|
||||
|
||||
template <OnUTF8Error ErrorAction, typename CharsT>
|
||||
static CharsT
|
||||
InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
|
||||
|
@ -406,25 +428,10 @@ InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
|
|||
return CharsT();
|
||||
}
|
||||
|
||||
if (allASCII) {
|
||||
size_t srclen = src.length();
|
||||
MOZ_ASSERT(*outlen == srclen);
|
||||
for (uint32_t i = 0; i < srclen; i++) {
|
||||
dst[i] = CharT(src[i]);
|
||||
}
|
||||
} else {
|
||||
constexpr OnUTF8Error errorMode = std::is_same<CharT, Latin1Char>::value
|
||||
? OnUTF8Error::InsertQuestionMark
|
||||
: OnUTF8Error::InsertReplacementCharacter;
|
||||
size_t j = 0;
|
||||
auto push = [dst, &j](char16_t c) -> LoopDisposition {
|
||||
dst[j++] = CharT(c);
|
||||
return LoopDisposition::Continue;
|
||||
};
|
||||
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<errorMode>(cx, src, push)));
|
||||
MOZ_ASSERT(j == len);
|
||||
}
|
||||
dst[*outlen] = 0; // NUL char
|
||||
constexpr OnUTF8Error errorMode = std::is_same<CharT, Latin1Char>::value
|
||||
? OnUTF8Error::InsertQuestionMark
|
||||
: OnUTF8Error::InsertReplacementCharacter;
|
||||
CopyAndInflateUTF8IntoBuffer<errorMode>(cx, src, dst, *outlen, allASCII);
|
||||
|
||||
return CharsT(dst, *outlen);
|
||||
}
|
||||
|
@ -455,20 +462,31 @@ JS::LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const JS::ConstUTF8CharsZ& u
|
|||
return InflateUTF8StringHelper<OnUTF8Error::InsertReplacementCharacter, TwoByteCharsZ>(cx, chars, outlen);
|
||||
}
|
||||
|
||||
static void
|
||||
UpdateSmallestEncodingForChar(char16_t c, JS::SmallestEncoding* encoding)
|
||||
{
|
||||
JS::SmallestEncoding newEncoding = JS::SmallestEncoding::ASCII;
|
||||
if (c >= 0x80) {
|
||||
if (c < 0x100) {
|
||||
newEncoding = JS::SmallestEncoding::Latin1;
|
||||
} else {
|
||||
newEncoding = JS::SmallestEncoding::UTF16;
|
||||
}
|
||||
}
|
||||
if (newEncoding > *encoding) {
|
||||
*encoding = newEncoding;
|
||||
}
|
||||
}
|
||||
|
||||
JS::SmallestEncoding
|
||||
JS::FindSmallestEncoding(UTF8Chars utf8)
|
||||
{
|
||||
JS::SmallestEncoding encoding = JS::SmallestEncoding::ASCII;
|
||||
auto onChar = [&encoding](char16_t c) -> LoopDisposition {
|
||||
if (c >= 0x80) {
|
||||
if (c < 0x100) {
|
||||
encoding = JS::SmallestEncoding::Latin1;
|
||||
} else {
|
||||
encoding = JS::SmallestEncoding::UTF16;
|
||||
return LoopDisposition::Break;
|
||||
}
|
||||
}
|
||||
return LoopDisposition::Continue;
|
||||
UpdateSmallestEncodingForChar(c, &encoding);
|
||||
return encoding == JS::SmallestEncoding::UTF16
|
||||
? LoopDisposition::Break
|
||||
: LoopDisposition::Continue;
|
||||
};
|
||||
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<OnUTF8Error::InsertReplacementCharacter>(
|
||||
/* cx = */ nullptr, utf8, onChar)));
|
||||
|
@ -487,6 +505,86 @@ JS::LossyUTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t*
|
|||
return InflateUTF8StringHelper<OnUTF8Error::InsertQuestionMark, Latin1CharsZ>(cx, utf8, outlen);
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomization Helpers.
|
||||
*
|
||||
* These functions are extremely single-use, and are not intended for general
|
||||
* consumption.
|
||||
*/
|
||||
|
||||
bool
|
||||
GetUTF8AtomizationData(JSContext* cx, const JS::UTF8Chars utf8, size_t* outlen,
|
||||
JS::SmallestEncoding* encoding, HashNumber* hashNum)
|
||||
{
|
||||
*outlen = 0;
|
||||
*encoding = JS::SmallestEncoding::ASCII;
|
||||
*hashNum = 0;
|
||||
|
||||
auto getMetadata = [outlen, encoding, hashNum](char16_t c) -> LoopDisposition {
|
||||
(*outlen)++;
|
||||
UpdateSmallestEncodingForChar(c, encoding);
|
||||
*hashNum = mozilla::AddToHash(*hashNum, c);
|
||||
return LoopDisposition::Continue;
|
||||
};
|
||||
if (!InflateUTF8ToUTF16<OnUTF8Error::Throw>(cx, utf8, getMetadata)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
bool
|
||||
UTF8EqualsChars(const JS::UTF8Chars utfChars, const CharT* chars)
|
||||
{
|
||||
size_t ind = 0;
|
||||
bool isEqual = true;
|
||||
|
||||
auto checkEqual = [&isEqual, &ind, chars](char16_t c) -> LoopDisposition {
|
||||
#ifdef DEBUG
|
||||
JS::SmallestEncoding encoding = JS::SmallestEncoding::ASCII;
|
||||
UpdateSmallestEncodingForChar(c, &encoding);
|
||||
if (std::is_same<CharT, JS::Latin1Char>::value) {
|
||||
MOZ_ASSERT(encoding <= JS::SmallestEncoding::Latin1);
|
||||
} else if (!std::is_same<CharT, char16_t>::value) {
|
||||
MOZ_CRASH("Invalid character type in UTF8EqualsChars");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (CharT(c) != chars[ind]) {
|
||||
isEqual = false;
|
||||
return LoopDisposition::Break;
|
||||
}
|
||||
|
||||
ind++;
|
||||
return LoopDisposition::Continue;
|
||||
};
|
||||
|
||||
// To get here, you must have checked your work.
|
||||
InflateUTF8ToUTF16<OnUTF8Error::Crash>(/* cx = */ nullptr, utfChars, checkEqual);
|
||||
|
||||
return isEqual;
|
||||
}
|
||||
|
||||
template bool UTF8EqualsChars<char16_t>(const JS::UTF8Chars, const char16_t*);
|
||||
template bool UTF8EqualsChars<JS::Latin1Char>(const JS::UTF8Chars, const JS::Latin1Char*);
|
||||
|
||||
template <typename CharT>
|
||||
void
|
||||
InflateUTF8CharsToBufferAndTerminate(const UTF8Chars src, CharT* dst, size_t dstLen,
|
||||
JS::SmallestEncoding encoding)
|
||||
{
|
||||
CopyAndInflateUTF8IntoBuffer<OnUTF8Error::Crash>(/* cx = */ nullptr, src, dst, dstLen,
|
||||
encoding == JS::SmallestEncoding::ASCII);
|
||||
}
|
||||
|
||||
template void
|
||||
InflateUTF8CharsToBufferAndTerminate<char16_t>(const UTF8Chars src, char16_t* dst, size_t dstLen,
|
||||
JS::SmallestEncoding encoding);
|
||||
template void
|
||||
InflateUTF8CharsToBufferAndTerminate<JS::Latin1Char>(const UTF8Chars src, JS::Latin1Char* dst,
|
||||
size_t dstLen, JS::SmallestEncoding encoding);
|
||||
|
||||
#ifdef DEBUG
|
||||
void
|
||||
JS::ConstUTF8CharsZ::validate(size_t aLength)
|
||||
|
|
|
@ -41,37 +41,59 @@ using mozilla::Maybe;
|
|||
using mozilla::Nothing;
|
||||
using mozilla::RangedPtr;
|
||||
|
||||
template <typename CharT>
|
||||
extern void InflateUTF8CharsToBufferAndTerminate(const UTF8Chars src, CharT* dst, size_t dstLen,
|
||||
JS::SmallestEncoding encoding);
|
||||
|
||||
template <typename CharT>
|
||||
extern bool UTF8EqualsChars(const JS::UTF8Chars utf8, const CharT* chars);
|
||||
|
||||
extern bool
|
||||
GetUTF8AtomizationData(JSContext* cx, const JS::UTF8Chars utf8, size_t* outlen, JS::SmallestEncoding* encoding,
|
||||
HashNumber* hashNum);
|
||||
|
||||
struct js::AtomHasher::Lookup
|
||||
{
|
||||
union {
|
||||
const JS::Latin1Char* latin1Chars;
|
||||
const char16_t* twoByteChars;
|
||||
const char* utf8Bytes;
|
||||
};
|
||||
bool isLatin1;
|
||||
enum {
|
||||
TwoByteChar,
|
||||
Latin1,
|
||||
UTF8
|
||||
} type;
|
||||
size_t length;
|
||||
size_t byteLength;
|
||||
const JSAtom* atom; /* Optional. */
|
||||
JS::AutoCheckCannotGC nogc;
|
||||
|
||||
HashNumber hash;
|
||||
|
||||
MOZ_ALWAYS_INLINE Lookup(const char* utf8Bytes, size_t byteLen, size_t length, HashNumber hash)
|
||||
: utf8Bytes(utf8Bytes), type(UTF8), length(length), byteLength(byteLen), atom(nullptr), hash(hash)
|
||||
{}
|
||||
|
||||
MOZ_ALWAYS_INLINE Lookup(const char16_t* chars, size_t length)
|
||||
: twoByteChars(chars), isLatin1(false), length(length), atom(nullptr),
|
||||
: twoByteChars(chars), type(TwoByteChar), length(length), atom(nullptr),
|
||||
hash(mozilla::HashString(chars, length))
|
||||
{}
|
||||
|
||||
MOZ_ALWAYS_INLINE Lookup(const JS::Latin1Char* chars, size_t length)
|
||||
: latin1Chars(chars), isLatin1(true), length(length), atom(nullptr),
|
||||
: latin1Chars(chars), type(Latin1), length(length), atom(nullptr),
|
||||
hash(mozilla::HashString(chars, length))
|
||||
{}
|
||||
|
||||
inline explicit Lookup(const JSAtom* atom)
|
||||
: isLatin1(atom->hasLatin1Chars()), length(atom->length()), atom(atom),
|
||||
: type(atom->hasLatin1Chars() ? Latin1 : TwoByteChar), length(atom->length()), atom(atom),
|
||||
hash(atom->hash())
|
||||
{
|
||||
if (isLatin1) {
|
||||
if (type == Latin1) {
|
||||
latin1Chars = atom->latin1Chars(nogc);
|
||||
MOZ_ASSERT(mozilla::HashString(latin1Chars, length) == hash);
|
||||
} else {
|
||||
MOZ_ASSERT(type == TwoByteChar);
|
||||
twoByteChars = atom->twoByteChars(nogc);
|
||||
MOZ_ASSERT(mozilla::HashString(twoByteChars, length) == hash);
|
||||
}
|
||||
|
@ -97,17 +119,29 @@ js::AtomHasher::match(const AtomStateEntry& entry, const Lookup& lookup)
|
|||
|
||||
if (key->hasLatin1Chars()) {
|
||||
const Latin1Char* keyChars = key->latin1Chars(lookup.nogc);
|
||||
if (lookup.isLatin1) {
|
||||
switch (lookup.type) {
|
||||
case Lookup::Latin1:
|
||||
return mozilla::ArrayEqual(keyChars, lookup.latin1Chars, lookup.length);
|
||||
case Lookup::TwoByteChar:
|
||||
return EqualChars(keyChars, lookup.twoByteChars, lookup.length);
|
||||
case Lookup::UTF8: {
|
||||
JS::UTF8Chars utf8(lookup.utf8Bytes, lookup.byteLength);
|
||||
return UTF8EqualsChars(utf8, keyChars);
|
||||
}
|
||||
}
|
||||
return EqualChars(keyChars, lookup.twoByteChars, lookup.length);
|
||||
}
|
||||
|
||||
const char16_t* keyChars = key->twoByteChars(lookup.nogc);
|
||||
if (lookup.isLatin1) {
|
||||
switch (lookup.type) {
|
||||
case Lookup::Latin1:
|
||||
return EqualChars(lookup.latin1Chars, keyChars, lookup.length);
|
||||
case Lookup::TwoByteChar:
|
||||
return mozilla::ArrayEqual(keyChars, lookup.twoByteChars, lookup.length);
|
||||
case Lookup::UTF8: {
|
||||
JS::UTF8Chars utf8(lookup.utf8Bytes, lookup.byteLength);
|
||||
return UTF8EqualsChars(utf8, keyChars);
|
||||
}
|
||||
}
|
||||
return mozilla::ArrayEqual(keyChars, lookup.twoByteChars, lookup.length);
|
||||
}
|
||||
|
||||
inline JSAtom*
|
||||
|
@ -620,6 +654,12 @@ MOZ_ALWAYS_INLINE static JSAtom*
|
|||
AllocateNewAtom(JSContext* cx, const CharT* tbchars, size_t length, PinningBehavior pin,
|
||||
const Maybe<uint32_t>& indexValue, const AtomHasher::Lookup& lookup);
|
||||
|
||||
template <typename CharT>
|
||||
MOZ_ALWAYS_INLINE
|
||||
static JSAtom*
|
||||
AtomizeAndCopyCharsFromLookup(JSContext* cx, const CharT* tbchars, size_t length, const AtomHasher::Lookup& lookup,
|
||||
PinningBehavior pin, const Maybe<uint32_t>& indexValue);
|
||||
|
||||
/* |tbchars| must not point into an inline or short string. */
|
||||
template <typename CharT>
|
||||
MOZ_ALWAYS_INLINE
|
||||
|
@ -632,7 +672,16 @@ AtomizeAndCopyChars(JSContext* cx, const CharT* tbchars, size_t length, PinningB
|
|||
}
|
||||
|
||||
AtomHasher::Lookup lookup(tbchars, length);
|
||||
return AtomizeAndCopyCharsFromLookup(cx, tbchars, length, lookup, pin, indexValue);
|
||||
}
|
||||
|
||||
|
||||
template <typename CharT>
|
||||
MOZ_ALWAYS_INLINE
|
||||
static JSAtom*
|
||||
AtomizeAndCopyCharsFromLookup(JSContext* cx, const CharT* tbchars, size_t length, const AtomHasher::Lookup& lookup,
|
||||
PinningBehavior pin, const Maybe<uint32_t>& indexValue)
|
||||
{
|
||||
// Try the per-Zone cache first. If we find the atom there we can avoid the
|
||||
// atoms lock, the markAtom call, and the multiple HashSet lookups below.
|
||||
// We don't use the per-Zone cache if we want a pinned atom: handling that
|
||||
|
@ -810,6 +859,75 @@ PermanentlyAtomizeAndCopyChars(JSContext* cx,
|
|||
return atom;
|
||||
}
|
||||
|
||||
struct AtomizeUTF8CharsWrapper
|
||||
{
|
||||
JS::UTF8Chars utf8;
|
||||
JS::SmallestEncoding encoding;
|
||||
|
||||
AtomizeUTF8CharsWrapper(const JS::UTF8Chars& chars, JS::SmallestEncoding minEncode)
|
||||
: utf8(chars), encoding(minEncode)
|
||||
{ }
|
||||
};
|
||||
|
||||
template <typename CharT>
|
||||
MOZ_ALWAYS_INLINE
|
||||
static JSFlatString*
|
||||
MakeFlatStringForAtomization(JSContext* cx, const CharT* tbchars, size_t length)
|
||||
{
|
||||
return NewStringCopyN<NoGC>(cx, tbchars, length);
|
||||
}
|
||||
|
||||
template<typename CharT>
|
||||
MOZ_ALWAYS_INLINE
|
||||
static JSFlatString*
|
||||
MakeUTF8AtomHelper(JSContext* cx, const AtomizeUTF8CharsWrapper* chars, size_t length)
|
||||
{
|
||||
if (JSInlineString::lengthFits<CharT>(length)) {
|
||||
CharT* storage;
|
||||
JSInlineString* str = AllocateInlineString<NoGC>(cx, length, &storage);
|
||||
if (!str) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
InflateUTF8CharsToBufferAndTerminate(chars->utf8, storage, length, chars->encoding);
|
||||
return str;
|
||||
}
|
||||
|
||||
// MakeAtomUTF8Helper is called from deep in the Atomization path, which expects
|
||||
// functions to fail gracefully with nullptr on OOM, without throwing.
|
||||
//
|
||||
// Flat strings are null-terminated. Leave room with length + 1
|
||||
UniquePtr<CharT[], JS::FreePolicy> newStr(js_pod_malloc<CharT>(length + 1));
|
||||
if (!newStr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
InflateUTF8CharsToBufferAndTerminate(chars->utf8, newStr.get(), length, chars->encoding);
|
||||
|
||||
JSFlatString* str = JSFlatString::new_<NoGC>(cx, newStr.get(), length);
|
||||
if (!str) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
mozilla::Unused << newStr.release();
|
||||
return str;
|
||||
}
|
||||
|
||||
template<>
|
||||
MOZ_ALWAYS_INLINE
|
||||
/* static */ JSFlatString*
|
||||
MakeFlatStringForAtomization(JSContext* cx, const AtomizeUTF8CharsWrapper* chars, size_t length)
|
||||
{
|
||||
if (length == 0) {
|
||||
return cx->emptyString();
|
||||
}
|
||||
|
||||
if (chars->encoding == JS::SmallestEncoding::UTF16) {
|
||||
return MakeUTF8AtomHelper<char16_t>(cx, chars, length);
|
||||
}
|
||||
return MakeUTF8AtomHelper<JS::Latin1Char>(cx, chars, length);
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
MOZ_ALWAYS_INLINE static JSAtom*
|
||||
AllocateNewAtom(JSContext* cx, const CharT* tbchars, size_t length, PinningBehavior pin,
|
||||
|
@ -817,7 +935,7 @@ AllocateNewAtom(JSContext* cx, const CharT* tbchars, size_t length, PinningBehav
|
|||
{
|
||||
AutoAllocInAtomsZone ac(cx);
|
||||
|
||||
JSFlatString* flat = NewStringCopyN<NoGC>(cx, tbchars, length);
|
||||
JSFlatString* flat = MakeFlatStringForAtomization(cx, tbchars, length);
|
||||
if (!flat) {
|
||||
// Grudgingly forgo last-ditch GC. The alternative would be to release
|
||||
// the lock, manually GC here, and retry from the top. If you fix this,
|
||||
|
@ -919,19 +1037,22 @@ js::AtomizeChars(JSContext* cx, const char16_t* chars, size_t length, PinningBeh
|
|||
JSAtom*
|
||||
js::AtomizeUTF8Chars(JSContext* cx, const char* utf8Chars, size_t utf8ByteLength)
|
||||
{
|
||||
// This could be optimized to hand the char16_t's directly to the JSAtom
|
||||
// instead of making a copy. UTF8CharsToNewTwoByteCharsZ should be
|
||||
// refactored to take an JSContext so that this function could also.
|
||||
|
||||
UTF8Chars utf8(utf8Chars, utf8ByteLength);
|
||||
// Since the static strings are all ascii, we can check them before trying anything else.
|
||||
if (JSAtom* s = cx->staticStrings().lookup(utf8Chars, utf8ByteLength)) {
|
||||
return s;
|
||||
}
|
||||
|
||||
size_t length;
|
||||
UniqueTwoByteChars chars(JS::UTF8CharsToNewTwoByteCharsZ(cx, utf8, &length).get());
|
||||
if (!chars) {
|
||||
HashNumber hash;
|
||||
JS::SmallestEncoding forCopy;
|
||||
UTF8Chars utf8(utf8Chars, utf8ByteLength);
|
||||
if (!GetUTF8AtomizationData(cx, utf8, &length, &forCopy, &hash)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return AtomizeChars(cx, chars.get(), length);
|
||||
AtomizeUTF8CharsWrapper chars(utf8, forCopy);
|
||||
AtomHasher::Lookup lookup(utf8Chars, utf8ByteLength, length, hash);
|
||||
return AtomizeAndCopyCharsFromLookup(cx, &chars, length, lookup, DoNotPinAtom, Nothing());
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
Загрузка…
Ссылка в новой задаче