From b307e7031d2d24d05b8b61a3272167e90d180e9f Mon Sep 17 00:00:00 2001 From: Kannan Vijayan Date: Wed, 17 Jun 2020 22:31:35 +0000 Subject: [PATCH] Bug 1645845 - Add ParserAtomsTable, parser atoms types, common parser names table, and base parser atoms implementation. r=mgaudet,tcampbell Differential Revision: https://phabricator.services.mozilla.com/D79714 --- js/moz.configure | 6 + js/public/Utility.h | 2 + js/src/NamespaceImports.h | 1 + js/src/frontend/CompilationInfo.h | 5 + js/src/frontend/ParserAtom.cpp | 442 ++++++++++++++++++++++++++++++ js/src/frontend/ParserAtom.h | 360 ++++++++++++++++++++++++ js/src/frontend/TokenStream.cpp | 6 +- js/src/frontend/TokenStream.h | 62 ++++- js/src/frontend/moz.build | 1 + js/src/jsapi.cpp | 4 + js/src/jsnum.cpp | 14 +- js/src/jsnum.h | 5 + js/src/util/Text.cpp | 35 +++ js/src/util/Text.h | 13 + js/src/vm/JSContext.h | 7 + js/src/vm/Runtime.h | 7 + js/src/vm/StringType.cpp | 16 +- js/src/vm/StringType.h | 3 + 18 files changed, 970 insertions(+), 19 deletions(-) create mode 100644 js/src/frontend/ParserAtom.cpp create mode 100644 js/src/frontend/ParserAtom.h diff --git a/js/moz.configure b/js/moz.configure index 8c792c44b014..3fed13541604 100644 --- a/js/moz.configure +++ b/js/moz.configure @@ -62,6 +62,12 @@ set_define('JS_64BIT', depends(target)(lambda t: t.bitness == 64 or None)) set_define('JS_PUNBOX64', depends(target)(lambda t: t.bitness == 64 or None)) set_define('JS_NUNBOX32', depends(target)(lambda t: t.bitness == 32 or None)) +# Bits of Stencil-related parser-atoms work are being landed before +# being enabled. This define controls that code, and will be removed, +# along with guard code in ParserAtoms.cpp, when the final transition +# to parser atoms lands. +set_define('JS_PARSER_ATOMS', None) + # SpiderMonkey as a shared library, and how its symbols are exported # ================================================================== diff --git a/js/public/Utility.h b/js/public/Utility.h index 8bb35be48467..3e366a159ce9 100644 --- a/js/public/Utility.h +++ b/js/public/Utility.h @@ -21,6 +21,7 @@ #include "jstypes.h" #include "mozmemory.h" +#include "js/TypeDecls.h" /* The public JS engine namespace. */ namespace JS {} @@ -650,6 +651,7 @@ struct FreePolicy { typedef mozilla::UniquePtr UniqueChars; typedef mozilla::UniquePtr UniqueTwoByteChars; +typedef mozilla::UniquePtr UniqueLatin1Chars; } // namespace JS diff --git a/js/src/NamespaceImports.h b/js/src/NamespaceImports.h index 204bdb753da2..fd32c6212ee0 100644 --- a/js/src/NamespaceImports.h +++ b/js/src/NamespaceImports.h @@ -74,6 +74,7 @@ using JS::Latin1CharsZ; using JS::TwoByteChars; using JS::TwoByteCharsZ; using JS::UniqueChars; +using JS::UniqueLatin1Chars; using JS::UniqueTwoByteChars; using JS::UTF8Chars; using JS::UTF8CharsZ; diff --git a/js/src/frontend/CompilationInfo.h b/js/src/frontend/CompilationInfo.h index 654cf0066512..976259b9cab8 100644 --- a/js/src/frontend/CompilationInfo.h +++ b/js/src/frontend/CompilationInfo.h @@ -12,6 +12,7 @@ #include "mozilla/Variant.h" #include "ds/LifoAlloc.h" +#include "frontend/ParserAtom.h" #include "frontend/SharedContext.h" #include "frontend/Stencil.h" #include "frontend/UsedNameTracker.h" @@ -75,6 +76,9 @@ struct MOZ_RAII CompilationInfo : public JS::CustomAutoRooter { // onto them. AutoKeepAtoms keepAtoms; + // Table of parser atoms for this compilation. + ParserAtomsTable parserAtoms; + Directives directives; ScopeContext scopeContext; @@ -142,6 +146,7 @@ struct MOZ_RAII CompilationInfo : public JS::CustomAutoRooter { cx(cx), options(options), keepAtoms(cx), + parserAtoms(cx), directives(options.forceStrictMode()), scopeContext(enclosingScope, enclosingEnv), script(cx), diff --git a/js/src/frontend/ParserAtom.cpp b/js/src/frontend/ParserAtom.cpp new file mode 100644 index 000000000000..fac57bb4ec2f --- /dev/null +++ b/js/src/frontend/ParserAtom.cpp @@ -0,0 +1,442 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "frontend/ParserAtom.h" + +#include + +#include "jsnum.h" + +#include "frontend/NameCollections.h" +#include "vm/JSContext.h" +#include "vm/Printer.h" +#include "vm/Runtime.h" +#include "vm/StringType.h" + +// +// Parser-Atoms should be disabled for now. This check ensures that. +// NOTE: This will be removed when the final transition patches from +// JS-atoms to parser-atoms lands. +// +#ifdef JS_PARSER_ATOMS +# error "Parser atoms define should remain disabled until this is removed." +#endif + +using namespace js; +using namespace js::frontend; + +namespace js { +namespace frontend { + +static JS::OOM PARSER_ATOMS_OOM; + +mozilla::GenericErrorResult RaiseParserAtomsOOMError(JSContext* cx) { + js::ReportOutOfMemory(cx); + return mozilla::Err(PARSER_ATOMS_OOM); +} + +bool ParserAtomEntry::equalsJSAtom(JSAtom* other) const { + // Compare hashes first. + if (hash_ != other->hash()) { + return false; + } + if (length_ != other->length()) { + return false; + } + + JS::AutoCheckCannotGC nogc; + + if (hasTwoByteChars()) { + // Compare heap-allocated 16-bit chars to atom. + return other->hasLatin1Chars() + ? EqualChars(twoByteChars(), other->latin1Chars(nogc), length_) + : EqualChars(twoByteChars(), other->twoByteChars(nogc), length_); + } + + MOZ_ASSERT(hasLatin1Chars()); + return other->hasLatin1Chars() + ? EqualChars(latin1Chars(), other->latin1Chars(nogc), length_) + : EqualChars(latin1Chars(), other->twoByteChars(nogc), length_); +} + +UniqueChars ParserAtomToPrintableString(JSContext* cx, ParserAtomId atom) { + const ParserAtomEntry* entry = atom.entry(); + Sprinter sprinter(cx); + if (!sprinter.init()) { + return nullptr; + } + size_t length = entry->length(); + if (entry->hasLatin1Chars()) { + if (!QuoteString( + &sprinter, mozilla::Range(entry->latin1Chars(), length))) { + return nullptr; + } + } else { + if (!QuoteString( + &sprinter, mozilla::Range(entry->twoByteChars(), length))) { + return nullptr; + } + } + return sprinter.release(); +} + +bool ParserAtomEntry::isIndex(uint32_t* indexp) const { + if (hasLatin1Chars()) { + return js::CheckStringIsIndex(latin1Chars(), length(), indexp); + } + return js::CheckStringIsIndex(twoByteChars(), length(), indexp); +} + +JS::Result ParserAtomEntry::toJSAtom(JSContext* cx) const { + if (jsatom_) { + return jsatom_; + } + + if (hasLatin1Chars()) { + jsatom_ = AtomizeChars(cx, latin1Chars(), length()); + } else { + jsatom_ = AtomizeChars(cx, twoByteChars(), length()); + } + if (!jsatom_) { + return RaiseParserAtomsOOMError(cx); + } + return jsatom_; +} + +bool ParserAtomEntry::toNumber(JSContext* cx, double* result) const { + return hasLatin1Chars() ? CharsToNumber(cx, latin1Chars(), length(), result) + : CharsToNumber(cx, twoByteChars(), length(), result); +} + +ParserAtomsTable::ParserAtomsTable(JSContext* cx) + : entrySet_(cx), wellKnownTable_(*cx->runtime()->commonParserNames) {} + +JS::Result ParserAtomsTable::addEntry( + JSContext* cx, EntrySet::AddPtr addPtr, ParserAtomEntry&& entry) { + UniquePtr uniqueEntry( + cx->new_(std::move(entry))); + if (!uniqueEntry) { + return RaiseParserAtomsOOMError(cx); + } + ParserAtomEntry* entryPtr = uniqueEntry.get(); + + if (!entrySet_.add(addPtr, std::move(uniqueEntry))) { + return RaiseParserAtomsOOMError(cx); + } + ParserAtomId id(entryPtr); + + return id; +} + +static const uint16_t MAX_LATIN1_CHAR = 0xff; + +template +static void DrainChar16Seq(CharT* buf, InflatedChar16Sequence seq) { + static_assert( + std::is_same_v || std::is_same_v, + "Invalid target buffer type."); + CharT* cur = buf; + while (seq.hasMore()) { + char16_t ch = seq.next(); + if constexpr (std::is_same_v) { + MOZ_ASSERT(ch <= MAX_LATIN1_CHAR); + } + *cur = ch; + cur++; + } +} + +template +JS::Result ParserAtomsTable::internChar16Seq( + JSContext* cx, EntrySet::AddPtr add, InflatedChar16Sequence seq, + uint32_t length, HashNumber hash) { + using UniqueCharsT = mozilla::UniquePtr; + UniqueCharsT copy(cx->pod_malloc(length)); + if (!copy) { + return RaiseParserAtomsOOMError(cx); + } + DrainChar16Seq(copy.get(), seq); + ParserAtomEntry ent = ParserAtomEntry::make(std::move(copy), length, hash); + return addEntry(cx, add, std::move(ent)); +} + +template +JS::Result ParserAtomsTable::lookupOrInternChar16Seq( + JSContext* cx, InflatedChar16Sequence seq) { + // Check against well-known. + ParserAtomId wk = wellKnownTable_.lookupChar16Seq(seq); + if (wk) { + return wk; + } + + // Check for existing atom. + SpecificParserAtomLookup lookup(seq); + EntrySet::AddPtr add = entrySet_.lookupForAdd(lookup); + if (add) { + return ParserAtomId(add->get()); + } + + // Compute the total length and the storage requirements. + bool wide = false; + uint32_t length = 0; + InflatedChar16Sequence seqCopy = seq; + while (seqCopy.hasMore()) { + char16_t ch = seqCopy.next(); + wide = wide || (ch > MAX_LATIN1_CHAR); + length += 1; + } + + HashNumber hash = lookup.hash(); + return wide ? internChar16Seq(cx, add, seq, length, hash) + : internChar16Seq(cx, add, seq, length, hash); +} + +JS::Result ParserAtomsTable::internChar16( + JSContext* cx, const char16_t* char16Ptr, uint32_t length) { + InflatedChar16Sequence seq(char16Ptr, length); + + return lookupOrInternChar16Seq(cx, seq); +} + +JS::Result ParserAtomsTable::internAscii( + JSContext* cx, const char* asciiPtr, uint32_t length) { + const Latin1Char* latin1Ptr = reinterpret_cast(asciiPtr); + return internLatin1(cx, latin1Ptr, length); +} + +JS::Result ParserAtomsTable::internLatin1( + JSContext* cx, const Latin1Char* latin1Ptr, uint32_t length) { + // ASCII strings are strict subsets of Latin1 strings, an so can be used + // in the same (const) ways. + InflatedChar16Sequence seq(latin1Ptr, length); + + // Check against well-known. + ParserAtomId wk = wellKnownTable_.lookupChar16Seq(seq); + if (wk) { + return wk; + } + + // Look up. + SpecificParserAtomLookup lookup(seq); + EntrySet::AddPtr add = entrySet_.lookupForAdd(lookup); + if (add) { + return ParserAtomId(add->get()); + } + + // Existing entry not found, heap-allocate a copy and add it to the table. + UniqueLatin1Chars copy = js::DuplicateString(cx, latin1Ptr, length); + if (!copy) { + return RaiseParserAtomsOOMError(cx); + } + ParserAtomEntry ent = + ParserAtomEntry::make(std::move(copy), length, lookup.hash()); + return addEntry(cx, add, std::move(ent)); +} + +JS::Result ParserAtomsTable::internUtf8( + JSContext* cx, const mozilla::Utf8Unit* utf8Ptr, uint32_t length) { + // If source text is ASCII, then the length of the target char buffer + // is the same as the length of the UTF8 input. Convert it to a Latin1 + // encoded string on the heap. + UTF8Chars utf8(utf8Ptr, length); + if (FindSmallestEncoding(utf8) == JS::SmallestEncoding::ASCII) { + // As ascii strings are a subset of Latin1 strings, and each encoding + // unit is the same size, we can reliably cast this `Utf8Unit*` + // to a `Latin1Char*`. + const Latin1Char* latin1Ptr = reinterpret_cast(utf8Ptr); + return internLatin1(cx, latin1Ptr, length); + } + + InflatedChar16Sequence seq(utf8Ptr, length); + + // Otherwise, slowpath lookup/interning path that identifies the + // proper target encoding. + return lookupOrInternChar16Seq(cx, seq); +} + +JS::Result ParserAtomsTable::internJSAtom(JSContext* cx, + JSAtom* atom) { + JS::AutoCheckCannotGC nogc; + + auto result = + atom->hasLatin1Chars() + ? internLatin1(cx, atom->latin1Chars(nogc), atom->length()) + : internChar16(cx, atom->twoByteChars(nogc), atom->length()); + if (result.isErr()) { + return result; + } + ParserAtomId id = result.unwrap(); + id.entry()->setAtom(atom); + return id; +} + +static void FillChar16Buffer(char16_t* buf, const ParserAtomEntry* ent) { + if (ent->hasLatin1Chars()) { + std::copy(ent->latin1Chars(), ent->latin1Chars() + ent->length(), buf); + } else { + std::copy(ent->twoByteChars(), ent->twoByteChars() + ent->length(), buf); + } +} + +JS::Result ParserAtomsTable::concatAtoms( + JSContext* cx, ParserAtomId prefix, ParserAtomId suffix) { + const ParserAtomEntry* prefixEntry = prefix.entry(); + const ParserAtomEntry* suffixEntry = suffix.entry(); + + bool latin1 = prefixEntry->hasLatin1Chars() && suffixEntry->hasLatin1Chars(); + size_t prefixLength = prefixEntry->length(); + size_t suffixLength = suffixEntry->length(); + size_t concatLength = prefixLength + suffixLength; + + if (latin1) { + // Concatenate a latin1 string and add it to the table. + UniqueLatin1Chars copy(cx->pod_malloc(concatLength)); + if (!copy) { + return RaiseParserAtomsOOMError(cx); + } + mozilla::PodCopy(copy.get(), prefixEntry->latin1Chars(), prefixLength); + mozilla::PodCopy(copy.get() + prefixLength, suffixEntry->latin1Chars(), + suffixLength); + + InflatedChar16Sequence seq(copy.get(), concatLength); + + // Check against well-known. + ParserAtomId wk = wellKnownTable_.lookupChar16Seq(seq); + if (wk) { + return wk; + } + + SpecificParserAtomLookup lookup(seq); + EntrySet::AddPtr add = entrySet_.lookupForAdd(lookup); + if (add) { + return ParserAtomId(add->get()); + } + + ParserAtomEntry ent = + ParserAtomEntry::make(std::move(copy), concatLength, lookup.hash()); + + return addEntry(cx, add, std::move(ent)); + } + + // Concatenate a char16 string and add it to the table. + UniqueTwoByteChars copy(cx->pod_malloc(concatLength)); + if (!copy) { + return RaiseParserAtomsOOMError(cx); + } + FillChar16Buffer(copy.get(), prefixEntry); + FillChar16Buffer(copy.get() + prefixLength, suffixEntry); + + InflatedChar16Sequence seq(copy.get(), concatLength); + + // Check against well-known. + ParserAtomId wk = wellKnownTable_.lookupChar16Seq(seq); + if (wk) { + return wk; + } + + SpecificParserAtomLookup lookup(seq); + EntrySet::AddPtr add = entrySet_.lookupForAdd(lookup); + if (add) { + return ParserAtomId(add->get()); + } + + ParserAtomEntry ent = + ParserAtomEntry::make(std::move(copy), concatLength, lookup.hash()); + + return addEntry(cx, add, std::move(ent)); +} + +template +ParserAtomId WellKnownParserAtoms::lookupChar16Seq( + InflatedChar16Sequence seq) const { + SpecificParserAtomLookup lookup(seq); + EntrySet::Ptr get = entrySet_.readonlyThreadsafeLookup(lookup); + if (get) { + return ParserAtomId(get->get()); + } + return ParserAtomId::Invalid(); +} + +bool WellKnownParserAtoms::initSingle(JSContext* cx, ParserNameId* name, + const char* str) { + MOZ_ASSERT(name != nullptr); + + unsigned int len = strlen(str); + + MOZ_ASSERT(FindSmallestEncoding(UTF8Chars(str, len)) == + JS::SmallestEncoding::ASCII); + + UniqueLatin1Chars copy(cx->pod_malloc(len)); + if (!copy) { + return false; + } + mozilla::PodCopy(copy.get(), reinterpret_cast(str), len); + + InflatedChar16Sequence seq(copy.get(), len); + SpecificParserAtomLookup lookup(seq); + + ParserAtomEntry ent = + ParserAtomEntry::make(std::move(copy), len, lookup.hash()); + + UniquePtr uniqueEntry( + cx->new_(std::move(ent))); + if (!uniqueEntry) { + return false; + } + ParserNameId nm(uniqueEntry.get()); + + if (!entrySet_.putNew(lookup, std::move(uniqueEntry))) { + return false; + } + + *name = nm; + return true; +} + +bool WellKnownParserAtoms::init(JSContext* cx) { +#define COMMON_NAME_INIT(idpart, id, text) \ + if (!initSingle(cx, &(id), text)) { \ + return false; \ + } + FOR_EACH_COMMON_PROPERTYNAME(COMMON_NAME_INIT) +#undef COMMON_NAME_INIT + return true; +} + +} /* namespace frontend */ +} /* namespace js */ + +bool JSRuntime::initializeParserAtoms(JSContext* cx) { +#ifdef JS_PARSER_ATOMS + MOZ_ASSERT(!commonParserNames); + + if (parentRuntime) { + commonParserNames = parentRuntime->commonParserNames; + return true; + } + + UniquePtr names( + js_new(cx)); + if (!names || !names->init(cx)) { + return false; + } + + commonParserNames = names.release(); +#else + commonParserNames = nullptr; +#endif // JS_PARSER_ATOMS + return true; +} + +void JSRuntime::finishParserAtoms() { +#ifdef JS_PARSER_ATOMS + if (!parentRuntime) { + js_delete(commonParserNames.ref()); + } +#else + MOZ_ASSERT(!commonParserNames); +#endif // JS_PARSER_ATOMS +} diff --git a/js/src/frontend/ParserAtom.h b/js/src/frontend/ParserAtom.h new file mode 100644 index 000000000000..4c85d7ca8fba --- /dev/null +++ b/js/src/frontend/ParserAtom.h @@ -0,0 +1,360 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef frontend_ParserAtom_h +#define frontend_ParserAtom_h + +#include "mozilla/HashFunctions.h" // HashString +#include "mozilla/Variant.h" // mozilla::Variant + +#include "ds/LifoAlloc.h" // LifoAlloc +#include "js/HashTable.h" // HashSet +#include "js/UniquePtr.h" // js::UniquePtr +#include "js/Vector.h" // Vector +#include "vm/CommonPropertyNames.h" +#include "vm/StringType.h" // CompareChars, StringEqualsAscii + +namespace js { +namespace frontend { + +class ParserNameId; + +template +class SpecificParserAtomLookup; + +class ParserAtomsTable; + +mozilla::GenericErrorResult RaiseParserAtomsOOMError(JSContext* cx); + +/** + * A ParserAtomEntry is an in-parser representation of an interned atomic + * string. It mostly mirrors the information carried by a JSAtom*. + * + * ParserAtomEntry structs are individually heap-allocated and own their + * heap-allocated contents. + */ +class ParserAtomEntry { + friend class ParserAtomsTable; + + public: + // Owned characters, either 8-bit Latin1, or 16-bit Char16 + mozilla::Variant chars_; + + // The length of the buffer in chars_. + uint32_t length_; + + // The JSAtom-compatible hash of the string. + HashNumber hash_; + + // Used to dynamically optimize the mapping of ParserAtoms to JSAtom*s. + // If the entry comes from an atom or has been mapped to an + // atom previously, the atom reference is kept here. + mutable JSAtom* jsatom_ = nullptr; + + template + ParserAtomEntry(CharsT&& chars, uint32_t length, HashNumber hash) + : chars_(std::forward(chars)), length_(length), hash_(hash) {} + + public: + // ParserAtomEntries own their content buffers in chars_, and thus cannot + // be copy-constructed - as a new chars would need to be allocated. + ParserAtomEntry(const ParserAtomEntry&) = delete; + + ParserAtomEntry(ParserAtomEntry&& other) = default; + + template + static ParserAtomEntry make(mozilla::UniquePtr&& ptr, + uint32_t length, HashNumber hash) { + return ParserAtomEntry(std::move(ptr), length, hash); + } + + bool hasLatin1Chars() const { return chars_.is(); } + bool hasTwoByteChars() const { return chars_.is(); } + + const Latin1Char* latin1Chars() const { + MOZ_ASSERT(hasLatin1Chars()); + return chars_.as().get(); + } + const char16_t* twoByteChars() const { + MOZ_ASSERT(hasTwoByteChars()); + return chars_.as().get(); + } + + bool isIndex(uint32_t* indexp) const; + + HashNumber hash() const { return hash_; } + uint32_t length() const { return length_; } + + bool equalsJSAtom(JSAtom* other) const; + + template + bool equalsSeq(HashNumber hash, InflatedChar16Sequence seq) const; + + void setAtom(JSAtom* atom) const { + MOZ_ASSERT(atom != nullptr); + if (jsatom_ != nullptr) { + MOZ_ASSERT(jsatom_ == atom); + return; + } + MOZ_ASSERT(equalsJSAtom(atom)); + jsatom_ = atom; + } + + // Convert this entry to a js-atom. The first time this method is called + // the entry will cache the JSAtom pointer to return later. + JS::Result toJSAtom(JSContext* cx) const; + + // Convert this entry to a number. + bool toNumber(JSContext* cx, double* result) const; +}; + +class ParserAtomId { + protected: + const ParserAtomEntry* entry_; + + struct InitInvalid {}; + + explicit ParserAtomId(InitInvalid) : entry_(nullptr) {} + + public: + explicit ParserAtomId(const ParserAtomEntry* entry) : entry_(entry) { + MOZ_ASSERT(entry_ != nullptr); + } + ParserAtomId() = default; + + bool isValid() const { return entry_ != nullptr; } + const ParserAtomEntry* entry() const { + MOZ_ASSERT(isValid()); + return entry_; + } + MOZ_IMPLICIT operator bool() const { return isValid(); } + + static ParserAtomId Invalid() { return ParserAtomId(InitInvalid{}); } + + // As the "unchecked" tag signifies, this method should only be called + // after it has been confirmed that this atom is a name and not an index. + inline ParserNameId toNameIdUnchecked() const; + + bool operator==(const ParserAtomId& other) { return entry_ == other.entry_; } + bool operator!=(const ParserAtomId& other) { return !(*this == other); } + + bool isIndex(uint32_t* indexp) const { return entry()->isIndex(indexp); } + bool equalsJSAtom(JSAtom* other) const { + return entry()->equalsJSAtom(other); + } + + size_t length() const { return entry()->length(); } + size_t empty() const { return length() == 0; } + + struct Hasher { + using Lookup = ParserAtomId; + + static inline HashNumber hash(const Lookup& l) { + return DefaultHasher::hash(l.entry()); + } + static inline bool match(const ParserAtomId& entry, + const ParserAtomId& lookup) { + return lookup == entry; + } + }; +}; + +class ParserNameId : public ParserAtomId { + explicit ParserNameId(InitInvalid) : ParserAtomId(InitInvalid{}) {} + + public: + ParserNameId() = default; + explicit ParserNameId(const ParserAtomEntry* entry) : ParserAtomId(entry) {} + ParserNameId(const ParserNameId& other) = default; + + static ParserNameId Invalid() { return ParserNameId(InitInvalid{}); } +}; + +inline ParserNameId ParserAtomId::toNameIdUnchecked() const { + return ParserNameId(entry_); +} + +UniqueChars ParserAtomToPrintableString(JSContext* cx, ParserAtomId atom); + +/** + * A lookup structure that allows for querying ParserAtoms in + * a hashtable using a flexible input type that supports string + * representations of various forms. + */ +class ParserAtomLookup { + protected: + HashNumber hash_; + + ParserAtomLookup(HashNumber hash) : hash_(hash) {} + + public: + HashNumber hash() const { return hash_; } + + virtual bool equalsEntry(const ParserAtomEntry* entry) const = 0; +}; + +struct ParserAtomLookupHasher { + using Lookup = ParserAtomLookup; + + static inline HashNumber hash(const Lookup& l) { return l.hash(); } + static inline bool match(const UniquePtr& entry, + const Lookup& l) { + return l.equalsEntry(entry.get()); + } +}; + +/** + * WellKnown maintains a well-structured reference to common names. + * A single instance of it is held on the main Runtime, and allows + * for the looking up of names, but not addition after initialization. + */ +class WellKnownParserAtoms { + public: + /* Various built-in or commonly-used names. */ +#define PROPERTYNAME_FIELD(idpart, id, text) ParserNameId id{}; + FOR_EACH_COMMON_PROPERTYNAME(PROPERTYNAME_FIELD) +#undef PROPERTYNAME_FIELD + + private: + using EntrySet = HashSet, ParserAtomLookupHasher, + TempAllocPolicy>; + EntrySet entrySet_; + + bool initSingle(JSContext* cx, ParserNameId* name, const char* str); + + public: + explicit WellKnownParserAtoms(JSContext* cx) : entrySet_(cx) {} + + bool init(JSContext* cx); + + template + ParserAtomId lookupChar16Seq(InflatedChar16Sequence seq) const; +}; + +/** + * A ParserAtomsTable owns and manages the vector of ParserAtom entries + * associated with a given compile session. + */ +class ParserAtomsTable { + private: + using EntrySet = HashSet, ParserAtomLookupHasher, + TempAllocPolicy>; + EntrySet entrySet_; + const WellKnownParserAtoms& wellKnownTable_; + + public: + explicit ParserAtomsTable(JSContext* cx); + + private: + JS::Result addEntry(JSContext* cx, + EntrySet::AddPtr addPtr, + ParserAtomEntry&& entry); + + template + JS::Result internChar16Seq( + JSContext* cx, EntrySet::AddPtr add, InflatedChar16Sequence seq, + uint32_t length, HashNumber hash); + + template + JS::Result lookupOrInternChar16Seq( + JSContext* cx, InflatedChar16Sequence seq); + + public: + JS::Result internChar16(JSContext* cx, + const char16_t* char16Ptr, + uint32_t length); + + JS::Result internAscii(JSContext* cx, + const char* asciiPtr, + uint32_t length); + + JS::Result internLatin1(JSContext* cx, + const Latin1Char* latin1Ptr, + uint32_t length); + + JS::Result internUtf8(JSContext* cx, + const mozilla::Utf8Unit* utf8Ptr, + uint32_t length); + + JS::Result internJSAtom(JSContext* cx, JSAtom* atom); + + JS::Result concatAtoms(JSContext* cx, ParserAtomId prefix, + ParserAtomId suffix); + + // Lift this code + // Once all the parser code has been changed to use a ParserAtomId, these + // can go away. + JS::Result toJSAtom(JSContext* cx, ParserAtomId id) const { + return id.entry()->toJSAtom(cx); + } +}; + +template +class SpecificParserAtomLookup : public ParserAtomLookup { + // The sequence of characters to look up. + InflatedChar16Sequence seq_; + + public: + explicit SpecificParserAtomLookup(const InflatedChar16Sequence& seq) + : SpecificParserAtomLookup(seq, computeHash(seq)) {} + + SpecificParserAtomLookup(const InflatedChar16Sequence& seq, + HashNumber hash) + : ParserAtomLookup(hash), seq_(seq) { + MOZ_ASSERT(computeHash(seq_) == hash); + } + + virtual bool equalsEntry(const ParserAtomEntry* entry) const override { + return entry->equalsSeq(hash_, seq_); + } + + private: + static HashNumber computeHash(InflatedChar16Sequence seq) { + HashNumber hash = 0; + while (seq.hasMore()) { + hash = mozilla::AddToHash(hash, seq.next()); + } + return hash; + } +}; + +template +inline bool ParserAtomEntry::equalsSeq( + HashNumber hash, InflatedChar16Sequence seq) const { + // Compare hashes first. + if (hash_ != hash) { + return false; + } + + if (hasTwoByteChars()) { + const char16_t* chars = twoByteChars(); + for (uint32_t i = 0; i < length_; i++) { + if (!seq.hasMore() || chars[i] != seq.next()) { + return false; + } + } + if (seq.hasMore()) { + return false; + } + + } else { + const Latin1Char* chars = latin1Chars(); + for (uint32_t i = 0; i < length_; i++) { + if (!seq.hasMore() || char16_t(chars[i]) != seq.next()) { + return false; + } + } + if (seq.hasMore()) { + return false; + } + } + return true; +} + +} /* namespace frontend */ +} /* namespace js */ + +#endif // frontend_ParserAtom_h diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp index fc1d434c372f..2724d39001dc 100644 --- a/js/src/frontend/TokenStream.cpp +++ b/js/src/frontend/TokenStream.cpp @@ -2231,7 +2231,7 @@ MOZ_MUST_USE bool TokenStreamSpecific::identifierName( return false; } - atom = drainCharBufferIntoAtom(anyCharsAccess().cx); + atom = drainCharBufferIntoAtom(); } else { // Escape-free identifiers can be created directly from sourceUnits. const Unit* chars = identStart; @@ -2247,7 +2247,7 @@ MOZ_MUST_USE bool TokenStreamSpecific::identifierName( } } - atom = atomizeSourceChars(anyCharsAccess().cx, MakeSpan(chars, length)); + atom = atomizeSourceChars(MakeSpan(chars, length)); } if (!atom) { return false; @@ -3668,7 +3668,7 @@ bool TokenStreamSpecific::getStringOrTemplateToken( } } - JSAtom* atom = drainCharBufferIntoAtom(anyCharsAccess().cx); + JSAtom* atom = drainCharBufferIntoAtom(); if (!atom) { return false; } diff --git a/js/src/frontend/TokenStream.h b/js/src/frontend/TokenStream.h index 2f95fceb1949..1d80eb2f44b5 100644 --- a/js/src/frontend/TokenStream.h +++ b/js/src/frontend/TokenStream.h @@ -1537,8 +1537,22 @@ class TokenStreamCharsShared { return mozilla::IsAscii(static_cast(unit)); } - JSAtom* drainCharBufferIntoAtom(JSContext* cx) { - JSAtom* atom = AtomizeChars(cx, charBuffer.begin(), charBuffer.length()); + JSAtom* drainCharBufferIntoAtom() { + JSAtom* atom = AtomizeChars(this->compilationInfo->cx, charBuffer.begin(), + charBuffer.length()); + if (!atom) { + return nullptr; + } + + // Add to parser atoms table. +#ifdef JS_PARSER_ATOMS + auto maybeId = this->compilationInfo->parserAtoms.internChar16( + this->compilationInfo->cx, charBuffer.begin(), charBuffer.length()); + if (maybeId.isErr()) { + return nullptr; + } +#endif // JS_PARSER_ATOMS + charBuffer.clear(); return atom; } @@ -1602,8 +1616,7 @@ class TokenStreamCharsBase : public TokenStreamCharsShared { sourceUnits.ungetCodeUnit(); } - static MOZ_ALWAYS_INLINE JSAtom* atomizeSourceChars( - JSContext* cx, mozilla::Span units); + MOZ_ALWAYS_INLINE JSAtom* atomizeSourceChars(mozilla::Span units); /** * Try to match a non-LineTerminator ASCII code point. Return true iff it @@ -1689,18 +1702,45 @@ inline void TokenStreamCharsBase::consumeKnownCodeUnit(int32_t unit) { } template <> -/* static */ MOZ_ALWAYS_INLINE JSAtom* -TokenStreamCharsBase::atomizeSourceChars( - JSContext* cx, mozilla::Span units) { - return AtomizeChars(cx, units.data(), units.size()); +MOZ_ALWAYS_INLINE JSAtom* TokenStreamCharsBase::atomizeSourceChars( + mozilla::Span units) { + JSAtom* atom = + AtomizeChars(this->compilationInfo->cx, units.data(), units.size()); + if (!atom) { + return nullptr; + } + +#ifdef JS_PARSER_ATOMS + auto maybeId = this->compilationInfo->parserAtoms.internChar16( + this->compilationInfo->cx, units.data(), units.size()); + if (maybeId.isErr()) { + return nullptr; + } +#endif // JS_PARSER_ATOMS + + return atom; } template <> /* static */ MOZ_ALWAYS_INLINE JSAtom* TokenStreamCharsBase::atomizeSourceChars( - JSContext* cx, mozilla::Span units) { + mozilla::Span units) { auto chars = ToCharSpan(units); - return AtomizeUTF8Chars(cx, chars.data(), chars.size()); + JSAtom* atom = + AtomizeUTF8Chars(this->compilationInfo->cx, chars.data(), chars.size()); + if (!atom) { + return nullptr; + } + +#ifdef JS_PARSER_ATOMS + auto maybeId = this->compilationInfo->parserAtoms.internUtf8( + this->compilationInfo->cx, units.data(), units.size()); + if (maybeId.isErr()) { + return nullptr; + } +#endif // JS_PARSER_ATOMS + + return atom; } template @@ -2141,7 +2181,7 @@ class GeneralTokenStreamChars : public SpecializedTokenStreamCharsBase { return nullptr; } - return drainCharBufferIntoAtom(anyChars.cx); + return drainCharBufferIntoAtom(); } }; diff --git a/js/src/frontend/moz.build b/js/src/frontend/moz.build index b46addcfa023..70bcbef95e52 100644 --- a/js/src/frontend/moz.build +++ b/js/src/frontend/moz.build @@ -58,6 +58,7 @@ UNIFIED_SOURCES += [ 'ParseContext.cpp', 'ParseNode.cpp', 'ParseNodeVerify.cpp', + 'ParserAtom.cpp', 'PropOpEmitter.cpp', 'SharedContext.cpp', 'SourceNotes.cpp', diff --git a/js/src/jsapi.cpp b/js/src/jsapi.cpp index 6a1c53855b73..71f50b7a6df8 100644 --- a/js/src/jsapi.cpp +++ b/js/src/jsapi.cpp @@ -464,6 +464,10 @@ JS_PUBLIC_API bool JS::InitSelfHostedCode(JSContext* cx) { return false; } + if (!rt->initializeParserAtoms(cx)) { + return false; + } + #ifndef JS_CODEGEN_NONE if (!rt->createJitRuntime(cx)) { return false; diff --git a/js/src/jsnum.cpp b/js/src/jsnum.cpp index 62e422d65776..acfc2c188c48 100644 --- a/js/src/jsnum.cpp +++ b/js/src/jsnum.cpp @@ -1661,8 +1661,8 @@ bool JS_FASTCALL js::NumberValueToStringBuffer(JSContext* cx, const Value& v, } template -static bool CharsToNumber(JSContext* cx, const CharT* chars, size_t length, - double* result) { +static bool CharsToNumberImpl(JSContext* cx, const CharT* chars, size_t length, + double* result) { if (length == 1) { CharT c = chars[0]; if ('0' <= c && c <= '9') { @@ -1731,6 +1731,16 @@ static bool CharsToNumber(JSContext* cx, const CharT* chars, size_t length, return true; } +bool js::CharsToNumber(JSContext* cx, const Latin1Char* chars, size_t length, + double* result) { + return CharsToNumberImpl(cx, chars, length, result); +} + +bool js::CharsToNumber(JSContext* cx, const char16_t* chars, size_t length, + double* result) { + return CharsToNumberImpl(cx, chars, length, result); +} + bool js::StringToNumber(JSContext* cx, JSString* str, double* result) { AutoCheckCannotGC nogc; JSLinearString* linearStr = str->ensureLinear(cx); diff --git a/js/src/jsnum.h b/js/src/jsnum.h index 75337664d687..27ad712178eb 100644 --- a/js/src/jsnum.h +++ b/js/src/jsnum.h @@ -176,6 +176,11 @@ template extern MOZ_MUST_USE bool GetDecimalNonInteger(JSContext* cx, const CharT* start, const CharT* end, double* dp); +bool CharsToNumber(JSContext* cx, const Latin1Char* chars, size_t length, + double* result); +bool CharsToNumber(JSContext* cx, const char16_t* chars, size_t length, + double* result); + extern MOZ_MUST_USE bool StringToNumber(JSContext* cx, JSString* str, double* result); diff --git a/js/src/util/Text.cpp b/js/src/util/Text.cpp index ddb9962f63fb..b93db9afe55b 100644 --- a/js/src/util/Text.cpp +++ b/js/src/util/Text.cpp @@ -73,6 +73,19 @@ UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, JSContext* cx, return ret; } +UniqueLatin1Chars js::DuplicateStringToArena(arena_id_t destArenaId, + JSContext* cx, + const JS::Latin1Char* s, + size_t n) { + auto ret = cx->make_pod_arena_array(destArenaId, n + 1); + if (!ret) { + return nullptr; + } + PodCopy(ret.get(), s, n); + ret[n] = '\0'; + return ret; +} + UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId, JSContext* cx, const char16_t* s) { @@ -106,6 +119,19 @@ UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, const char* s, return ret; } +UniqueLatin1Chars js::DuplicateStringToArena(arena_id_t destArenaId, + const JS::Latin1Char* s, + size_t n) { + UniqueLatin1Chars ret( + js_pod_arena_malloc(destArenaId, n + 1)); + if (!ret) { + return nullptr; + } + PodCopy(ret.get(), s, n); + ret[n] = '\0'; + return ret; +} + UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId, const char16_t* s) { return DuplicateStringToArena(destArenaId, s, js_strlen(s)); @@ -130,6 +156,11 @@ UniqueChars js::DuplicateString(JSContext* cx, const char* s) { return DuplicateStringToArena(js::MallocArena, cx, s); } +UniqueLatin1Chars js::DuplicateString(JSContext* cx, const JS::Latin1Char* s, + size_t n) { + return DuplicateStringToArena(js::MallocArena, cx, s, n); +} + UniqueTwoByteChars js::DuplicateString(JSContext* cx, const char16_t* s) { return DuplicateStringToArena(js::MallocArena, cx, s); } @@ -147,6 +178,10 @@ UniqueChars js::DuplicateString(const char* s, size_t n) { return DuplicateStringToArena(js::MallocArena, s, n); } +UniqueLatin1Chars js::DuplicateString(const JS::Latin1Char* s, size_t n) { + return DuplicateStringToArena(js::MallocArena, s, n); +} + UniqueTwoByteChars js::DuplicateString(const char16_t* s) { return DuplicateStringToArena(js::MallocArena, s); } diff --git a/js/src/util/Text.h b/js/src/util/Text.h index fee533fb78a8..655eb02de890 100644 --- a/js/src/util/Text.h +++ b/js/src/util/Text.h @@ -108,6 +108,10 @@ extern UniqueChars DuplicateStringToArena(arena_id_t destArenaId, JSContext* cx, extern UniqueChars DuplicateStringToArena(arena_id_t destArenaId, JSContext* cx, const char* s, size_t n); +extern UniqueLatin1Chars DuplicateStringToArena(arena_id_t destArenaId, + JSContext* cx, + const Latin1Char* s, size_t n); + extern UniqueTwoByteChars DuplicateStringToArena(arena_id_t destArenaId, JSContext* cx, const char16_t* s); @@ -126,6 +130,10 @@ extern UniqueChars DuplicateStringToArena(arena_id_t destArenaId, extern UniqueChars DuplicateStringToArena(arena_id_t destArenaId, const char* s, size_t n); +extern UniqueLatin1Chars DuplicateStringToArena(arena_id_t destArenaId, + const JS::Latin1Char* s, + size_t n); + extern UniqueTwoByteChars DuplicateStringToArena(arena_id_t destArenaId, const char16_t* s); @@ -136,6 +144,9 @@ extern UniqueChars DuplicateString(JSContext* cx, const char* s); extern UniqueChars DuplicateString(JSContext* cx, const char* s, size_t n); +extern UniqueLatin1Chars DuplicateString(JSContext* cx, const JS::Latin1Char* s, + size_t n); + extern UniqueTwoByteChars DuplicateString(JSContext* cx, const char16_t* s); extern UniqueTwoByteChars DuplicateString(JSContext* cx, const char16_t* s, @@ -149,6 +160,8 @@ extern UniqueChars DuplicateString(const char* s); extern UniqueChars DuplicateString(const char* s, size_t n); +extern UniqueLatin1Chars DuplicateString(const JS::Latin1Char* s, size_t n); + extern UniqueTwoByteChars DuplicateString(const char16_t* s); extern UniqueTwoByteChars DuplicateString(const char16_t* s, size_t n); diff --git a/js/src/vm/JSContext.h b/js/src/vm/JSContext.h index ae5e44c4ffc4..c04280be4148 100644 --- a/js/src/vm/JSContext.h +++ b/js/src/vm/JSContext.h @@ -40,6 +40,10 @@ class AutoAllocInAtomsZone; class AutoMaybeLeaveAtomsZone; class AutoRealm; +namespace frontend { +class WellKnownParserAtoms; +} // namespace frontend + namespace jit { class JitActivation; class JitContext; @@ -265,6 +269,9 @@ struct JS_PUBLIC_API JSContext : public JS::RootingContext, // Accessors for immutable runtime data. JSAtomState& names() { return *runtime_->commonNames; } + js::frontend::WellKnownParserAtoms& parserNames() { + return *runtime_->commonParserNames; + } js::StaticStrings& staticStrings() { return *runtime_->staticStrings; } js::SharedImmutableStringsCache& sharedImmutableStrings() { return runtime_->sharedImmutableStrings(); diff --git a/js/src/vm/Runtime.h b/js/src/vm/Runtime.h index 798ce54523b9..3a0a046744f2 100644 --- a/js/src/vm/Runtime.h +++ b/js/src/vm/Runtime.h @@ -109,6 +109,10 @@ class Simulator; #endif } // namespace jit +namespace frontend { +class WellKnownParserAtoms; +} // namespace frontend + // [SMDOC] JS Engine Threading // // Threads interacting with a runtime are divided into two categories: @@ -742,7 +746,9 @@ struct JSRuntime { public: bool initializeAtoms(JSContext* cx); + bool initializeParserAtoms(JSContext* cx); void finishAtoms(); + void finishParserAtoms(); bool atomsAreFinished() const { return !atoms_ && !permanentAtomsDuringInit_; } @@ -783,6 +789,7 @@ struct JSRuntime { // Cached pointers to various permanent property names. js::WriteOnceData commonNames; + js::WriteOnceData commonParserNames; // All permanent atoms in the runtime, other than those in staticStrings. // Access to this does not require a lock because it is frozen and thus diff --git a/js/src/vm/StringType.cpp b/js/src/vm/StringType.cpp index 3c8267cfff67..1833096f4833 100644 --- a/js/src/vm/StringType.cpp +++ b/js/src/vm/StringType.cpp @@ -1152,9 +1152,7 @@ bool js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes, } template -/* static */ -bool JSLinearString::isIndexSlow(const CharT* s, size_t length, - uint32_t* indexp) { +bool js::CheckStringIsIndex(const CharT* s, size_t length, uint32_t* indexp) { MOZ_ASSERT(length > 0); MOZ_ASSERT(length <= UINT32_CHAR_BUFFER_LENGTH); MOZ_ASSERT(IsAsciiDigit(*s), @@ -1194,6 +1192,18 @@ bool JSLinearString::isIndexSlow(const CharT* s, size_t length, return false; } +template bool js::CheckStringIsIndex(const Latin1Char* s, size_t length, + uint32_t* indexp); +template bool js::CheckStringIsIndex(const char16_t* s, size_t length, + uint32_t* indexp); + +template +/* static */ +bool JSLinearString::isIndexSlow(const CharT* s, size_t length, + uint32_t* indexp) { + return js::CheckStringIsIndex(s, length, indexp); +} + template bool JSLinearString::isIndexSlow(const Latin1Char* s, size_t length, uint32_t* indexp); diff --git a/js/src/vm/StringType.h b/js/src/vm/StringType.h index 301a817a0cb5..5359efc0c9d5 100644 --- a/js/src/vm/StringType.h +++ b/js/src/vm/StringType.h @@ -1259,6 +1259,9 @@ MOZ_ALWAYS_INLINE JSAtom* JSLinearString::morphAtomizedStringIntoPermanentAtom( namespace js { +template +bool CheckStringIsIndex(const CharT* s, size_t length, uint32_t* indexp); + /** * An indexable characters class exposing unaligned, little-endian encoded * char16_t data.