Bug 1499170 - Add an atom bit to know whether we're ascii lowercase. r=njn

And thus massively speed up ascii-case-insensitive atom comparisons when both
atoms are lowercase (which is the common case by far).

This removes almost all the slow selector-matching in this page, and it seems
an easier fix than storing the lowercased version of all class-names in quirks
mode in elements and selectors...

Differential Revision: https://phabricator.services.mozilla.com/D10945

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Emilio Cobos Álvarez 2018-11-13 12:47:40 +00:00
Родитель 601e1dfa28
Коммит 6ca25ecbe6
10 изменённых файлов: 148 добавлений и 124 удалений

Просмотреть файл

@ -1169,16 +1169,13 @@ nsAttrValue::Contains(nsAtom* aValue, nsCaseTreatment aCaseSensitive) const
case eAtomBase:
{
nsAtom* atom = GetAtomValue();
if (aCaseSensitive == eCaseMatters) {
return aValue == atom;
}
// For performance reasons, don't do a full on unicode case insensitive
// string comparison. This is only used for quirks mode anyway.
return
nsContentUtils::EqualsIgnoreASCIICase(nsDependentAtomString(aValue),
nsDependentAtomString(atom));
return nsContentUtils::EqualsIgnoreASCIICase(aValue, atom);
}
default:
{
@ -1188,16 +1185,11 @@ nsAttrValue::Contains(nsAtom* aValue, nsCaseTreatment aCaseSensitive) const
return array->Contains(aValue);
}
nsDependentAtomString val1(aValue);
for (RefPtr<nsAtom> *cur = array->Elements(),
*end = cur + array->Length();
cur != end; ++cur) {
for (RefPtr<nsAtom>& cur : *array) {
// For performance reasons, don't do a full on unicode case
// insensitive string comparison. This is only used for quirks mode
// anyway.
if (nsContentUtils::EqualsIgnoreASCIICase(val1,
nsDependentAtomString(*cur))) {
if (nsContentUtils::EqualsIgnoreASCIICase(aValue, cur)) {
return true;
}
}

Просмотреть файл

@ -2058,6 +2058,25 @@ public:
static JSContext *GetCurrentJSContext();
/**
* Case insensitive comparison between two atoms.
*/
static bool EqualsIgnoreASCIICase(nsAtom* aAtom1, nsAtom* aAtom2)
{
if (aAtom1 == aAtom2) {
return true;
}
// If both are ascii lowercase already, we know that the slow comparison
// below is going to return false.
if (aAtom1->IsAsciiLowercase() && aAtom2->IsAsciiLowercase()) {
return false;
}
return EqualsIgnoreASCIICase(nsDependentAtomString(aAtom1),
nsDependentAtomString(aAtom2));
}
/**
* Case insensitive comparison between two strings. However it only ignores
* case for ASCII characters a-z.

Просмотреть файл

@ -16,8 +16,8 @@ sys.path.insert(0, os.path.join(os.path.dirname(GECKO_DIR), "properties"))
import build
# Matches lines like `GK_ATOM(foo, "foo", 0x12345678, nsStaticAtom, PseudoElementAtom)`.
PATTERN = re.compile('^GK_ATOM\(([^,]*),[^"]*"([^"]*)",\s*(0x[0-9a-f]+),\s*([^,]*),\s*([^)]*)\)',
# Matches lines like `GK_ATOM(foo, "foo", 0x12345678, true, nsStaticAtom, PseudoElementAtom)`.
PATTERN = re.compile('^GK_ATOM\(([^,]*),[^"]*"([^"]*)",\s*(0x[0-9a-f]+),\s*[^,]*,\s*([^,]*),\s*([^)]*)\)',
re.MULTILINE)
FILE = "include/nsGkAtomList.h"

Просмотреть файл

@ -175,13 +175,19 @@ impl WeakAtom {
/// Returns whether this atom is static.
#[inline]
pub fn is_static(&self) -> bool {
unsafe { (*self.as_ptr()).mIsStatic() != 0 }
self.0.mIsStatic() != 0
}
/// Returns whether this atom is ascii lowercase.
#[inline]
fn is_ascii_lowercase(&self) -> bool {
self.0.mIsAsciiLowercase() != 0
}
/// Returns the length of the atom string.
#[inline]
pub fn len(&self) -> u32 {
unsafe { (*self.as_ptr()).mLength() }
self.0.mLength()
}
/// Returns whether this atom is the empty string.
@ -199,41 +205,54 @@ impl WeakAtom {
/// Convert this atom to ASCII lower-case
pub fn to_ascii_lowercase(&self) -> Atom {
let slice = self.as_slice();
match slice
.iter()
.position(|&char16| (b'A' as u16) <= char16 && char16 <= (b'Z' as u16))
{
None => self.clone(),
Some(i) => {
let mut buffer: [u16; 64] = unsafe { mem::uninitialized() };
let mut vec;
let mutable_slice = if let Some(buffer_prefix) = buffer.get_mut(..slice.len()) {
buffer_prefix.copy_from_slice(slice);
buffer_prefix
} else {
vec = slice.to_vec();
&mut vec
};
for char16 in &mut mutable_slice[i..] {
if *char16 <= 0x7F {
*char16 = (*char16 as u8).to_ascii_lowercase() as u16
}
}
Atom::from(&*mutable_slice)
},
if self.is_ascii_lowercase() {
return self.clone();
}
let slice = self.as_slice();
let mut buffer: [u16; 64] = unsafe { mem::uninitialized() };
let mut vec;
let mutable_slice = if let Some(buffer_prefix) = buffer.get_mut(..slice.len()) {
buffer_prefix.copy_from_slice(slice);
buffer_prefix
} else {
vec = slice.to_vec();
&mut vec
};
for char16 in &mut *mutable_slice {
if *char16 <= 0x7F {
*char16 = (*char16 as u8).to_ascii_lowercase() as u16
}
}
Atom::from(&*mutable_slice)
}
/// Return whether two atoms are ASCII-case-insensitive matches
#[inline]
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
if self == other {
return true;
}
// If we know both atoms are ascii-lowercase, then we can stick with
// pointer equality.
if self.is_ascii_lowercase() && other.is_ascii_lowercase() {
debug_assert!(!self.eq_ignore_ascii_case_slow(other));
return false;
}
self.eq_ignore_ascii_case_slow(other)
}
fn eq_ignore_ascii_case_slow(&self, other: &Self) -> bool {
let a = self.as_slice();
let b = other.as_slice();
a.len() == b.len() && a.iter().zip(b).all(|(&a16, &b16)| {
if a.len() != b.len() {
return false;
}
a.iter().zip(b).all(|(&a16, &b16)| {
if a16 <= 0x7F && b16 <= 0x7F {
(a16 as u8).eq_ignore_ascii_case(&(b16 as u8))
} else {
@ -241,13 +260,6 @@ impl WeakAtom {
}
})
}
/// Return whether this atom is an ASCII-case-insensitive match for the given string
pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool {
self.chars()
.map(|r| r.map(|c: char| c.to_ascii_lowercase()))
.eq(other.chars().map(|c: char| Ok(c.to_ascii_lowercase())))
}
}
impl fmt::Debug for WeakAtom {

Просмотреть файл

@ -10,6 +10,7 @@ class Atom():
self.ty = ty
self.atom_type = self.__class__.__name__
self.hash = hash_string(string)
self.is_ascii_lowercase = is_ascii_lowercase(string)
class PseudoElementAtom(Atom):
@ -52,3 +53,12 @@ def hash_string(s):
for c in s:
h = wrapping_multiply(GOLDEN_RATIO_U32, rotate_left_5(h) ^ ord(c))
return h
# Returns true if lowercasing this string in an ascii-case-insensitive way
# would leave the string unchanged.
def is_ascii_lowercase(s):
for c in s:
if c >= 'A' and c <= 'Z':
return False
return True

Просмотреть файл

@ -2355,9 +2355,9 @@ def generate_nsgkatomlist_h(output, *ignore):
"#ifdef small\n"
"#undef small\n"
"#endif\n\n"
"// GK_ATOM(identifier, string, hash, gecko_type, atom_type)\n" +
"".join(["GK_ATOM(%s, \"%s\", 0x%08x, %s, %s)\n" %
(a.ident, a.string, a.hash, a.ty, a.atom_type)
"// GK_ATOM(identifier, string, hash, is_ascii_lower, gecko_type, atom_type)\n" +
"".join(["GK_ATOM(%s, \"%s\", 0x%08x, %s, %s, %s)\n" %
(a.ident, a.string, a.hash, str(a.is_ascii_lowercase).lower(), a.ty, a.atom_type)
for a in STATIC_ATOMS]))

Просмотреть файл

@ -21,9 +21,7 @@ class nsDynamicAtom;
// This class encompasses both static and dynamic atoms.
//
// - In places where static and dynamic atoms can be used, use RefPtr<nsAtom>.
// This is by far the most common case. (The exception to this is the HTML5
// parser, which does its own weird thing, and uses non-refcounted dynamic
// atoms.)
// This is by far the most common case.
//
// - In places where only static atoms can appear, use nsStaticAtom* to avoid
// unnecessary refcounting. This is a moderately common case.
@ -75,6 +73,13 @@ public:
//
uint32_t hash() const { return mHash; }
// This function returns true if ToLowercaseASCII would return the string
// unchanged.
bool IsAsciiLowercase() const
{
return mIsAsciiLowercase;
}
// We can't use NS_INLINE_DECL_THREADSAFE_REFCOUNTING because the refcounting
// of this type is special.
MozExternalRefCountType AddRef();
@ -84,16 +89,20 @@ public:
protected:
// Used by nsStaticAtom.
constexpr nsAtom(uint32_t aLength, uint32_t aHash)
constexpr nsAtom(uint32_t aLength, uint32_t aHash, bool aIsAsciiLowercase)
: mLength(aLength)
, mIsStatic(true)
, mIsAsciiLowercase(aIsAsciiLowercase)
, mHash(aHash)
{}
// Used by nsDynamicAtom.
nsAtom(const nsAString& aString, uint32_t aHash)
nsAtom(const nsAString& aString,
uint32_t aHash,
bool aIsAsciiLowercase)
: mLength(aString.Length())
, mIsStatic(false)
, mIsAsciiLowercase(aIsAsciiLowercase)
, mHash(aHash)
{
}
@ -101,8 +110,8 @@ protected:
~nsAtom() = default;
const uint32_t mLength:30;
// NOTE: There's one free bit here.
const uint32_t mIsStatic:1;
const uint32_t mIsAsciiLowercase:1;
const uint32_t mHash;
};
@ -123,8 +132,8 @@ public:
// Atom.py and assert in nsAtomTable::RegisterStaticAtoms that the two
// hashes match.
constexpr nsStaticAtom(uint32_t aLength, uint32_t aHash,
uint32_t aStringOffset)
: nsAtom(aLength, aHash)
uint32_t aStringOffset, bool aIsAsciiLowercase)
: nsAtom(aLength, aHash, aIsAsciiLowercase)
, mStringOffset(aStringOffset)
{}
@ -167,14 +176,10 @@ private:
// These shouldn't be used directly, even by friend classes. The
// Create()/Destroy() methods use them.
static nsDynamicAtom* CreateInner(const nsAString& aString, uint32_t aHash);
nsDynamicAtom(const nsAString& aString, uint32_t aHash);
nsDynamicAtom(const nsAString& aString, uint32_t aHash, bool aIsAsciiLowercase);
~nsDynamicAtom() {}
// Creation/destruction is done by friend classes. The first Create() is for
// dynamic normal atoms, the second is for dynamic HTML5 atoms.
static nsDynamicAtom* Create(const nsAString& aString, uint32_t aHash);
static nsDynamicAtom* Create(const nsAString& aString);
static void Destroy(nsDynamicAtom* aAtom);
mozilla::ThreadSafeAutoRefCnt mRefCnt;

Просмотреть файл

@ -66,42 +66,45 @@ enum class GCKind {
// replaying.
static Atomic<int32_t, ReleaseAcquire, recordreplay::Behavior::DontPreserve> gUnusedAtomCount(0);
nsDynamicAtom::nsDynamicAtom(const nsAString& aString, uint32_t aHash)
: nsAtom(aString, aHash)
nsDynamicAtom::nsDynamicAtom(const nsAString& aString, uint32_t aHash, bool aIsAsciiLowercase)
: nsAtom(aString, aHash, aIsAsciiLowercase)
, mRefCnt(1)
{
}
nsDynamicAtom*
nsDynamicAtom::CreateInner(const nsAString& aString, uint32_t aHash)
// Returns true if ToLowercaseASCII would return the string unchanged.
static bool
IsAsciiLowercase(const char16_t* aString, const uint32_t aLength)
{
// We tack the chars onto the end of the nsDynamicAtom object.
size_t numCharBytes = (aString.Length() + 1) * sizeof(char16_t);
size_t numTotalBytes = sizeof(nsDynamicAtom) + numCharBytes;
for (uint32_t i = 0; i < aLength; ++i) {
if (IS_ASCII_UPPER(aString[i])) {
return false;
}
}
nsDynamicAtom* atom = (nsDynamicAtom*)moz_xmalloc(numTotalBytes);
new (atom) nsDynamicAtom(aString, aHash);
memcpy(const_cast<char16_t*>(atom->String()),
PromiseFlatString(aString).get(), numCharBytes);
MOZ_ASSERT(atom->String()[atom->GetLength()] == char16_t(0));
MOZ_ASSERT(atom->Equals(aString));
return atom;
return true;
}
nsDynamicAtom*
nsDynamicAtom::Create(const nsAString& aString, uint32_t aHash)
{
nsDynamicAtom* atom = CreateInner(aString, aHash);
MOZ_ASSERT(atom->mHash == HashString(atom->String(), atom->GetLength()));
return atom;
}
// We tack the chars onto the end of the nsDynamicAtom object.
size_t numCharBytes = (aString.Length() + 1) * sizeof(char16_t);
size_t numTotalBytes = sizeof(nsDynamicAtom) + numCharBytes;
nsDynamicAtom*
nsDynamicAtom::Create(const nsAString& aString)
{
return CreateInner(aString, /* hash */ 0);
bool isAsciiLower = ::IsAsciiLowercase(aString.Data(), aString.Length());
nsDynamicAtom* atom = (nsDynamicAtom*)moz_xmalloc(numTotalBytes);
new (atom) nsDynamicAtom(aString, aHash, isAsciiLower);
memcpy(const_cast<char16_t*>(atom->String()),
PromiseFlatString(aString).get(), numCharBytes);
MOZ_ASSERT(atom->String()[atom->GetLength()] == char16_t(0));
MOZ_ASSERT(atom->Equals(aString));
MOZ_ASSERT(atom->mHash == HashString(atom->String(), atom->GetLength()));
MOZ_ASSERT(atom->mIsAsciiLowercase == isAsciiLower);
return atom;
}
void
@ -181,26 +184,22 @@ struct AtomTableKey
MOZ_ASSERT(HashString(mUTF16String, mLength) == mHash);
}
AtomTableKey(const char16_t* aUTF16String, uint32_t aLength,
uint32_t* aHashOut)
AtomTableKey(const char16_t* aUTF16String, uint32_t aLength)
: mUTF16String(aUTF16String)
, mUTF8String(nullptr)
, mLength(aLength)
{
mHash = HashString(mUTF16String, mLength);
*aHashOut = mHash;
}
AtomTableKey(const char* aUTF8String,
uint32_t aLength,
uint32_t* aHashOut,
bool* aErr)
: mUTF16String(nullptr)
, mUTF8String(aUTF8String)
, mLength(aLength)
{
mHash = HashUTF8AsUTF16(mUTF8String, mLength, aErr);
*aHashOut = mHash;
}
const char16_t* mUTF16String;
@ -641,6 +640,7 @@ nsAtomTable::RegisterStaticAtoms(const nsStaticAtom* aAtoms, size_t aAtomsLen)
const nsStaticAtom* atom = &aAtoms[i];
MOZ_ASSERT(nsCRT::IsAscii(atom->String()));
MOZ_ASSERT(NS_strlen(atom->String()) == atom->GetLength());
MOZ_ASSERT(atom->IsAsciiLowercase() == ::IsAsciiLowercase(atom->String(), atom->GetLength()));
// This assertion ensures the static atom's precomputed hash value matches
// what would be computed by mozilla::HashString(aStr), which is what we use
@ -677,9 +677,8 @@ NS_Atomize(const char* aUTF8String)
already_AddRefed<nsAtom>
nsAtomTable::Atomize(const nsACString& aUTF8String)
{
uint32_t hash;
bool err;
AtomTableKey key(aUTF8String.Data(), aUTF8String.Length(), &hash, &err);
AtomTableKey key(aUTF8String.Data(), aUTF8String.Length(), &err);
if (MOZ_UNLIKELY(err)) {
MOZ_ASSERT_UNREACHABLE("Tried to atomize invalid UTF-8.");
// The input was invalid UTF-8. Let's replace the errors with U+FFFD
@ -694,13 +693,12 @@ nsAtomTable::Atomize(const nsACString& aUTF8String)
if (he->mAtom) {
RefPtr<nsAtom> atom = he->mAtom;
return atom.forget();
}
nsString str;
CopyUTF8toUTF16(aUTF8String, str);
RefPtr<nsAtom> atom = dont_AddRef(nsDynamicAtom::Create(str, hash));
RefPtr<nsAtom> atom = dont_AddRef(nsDynamicAtom::Create(str, key.mHash));
he->mAtom = atom;
@ -724,19 +722,18 @@ NS_Atomize(const char16_t* aUTF16String)
already_AddRefed<nsAtom>
nsAtomTable::Atomize(const nsAString& aUTF16String)
{
uint32_t hash;
AtomTableKey key(aUTF16String.Data(), aUTF16String.Length(), &hash);
AtomTableKey key(aUTF16String.Data(), aUTF16String.Length());
nsAtomSubTable& table = SelectSubTable(key);
MutexAutoLock lock(table.mLock);
AtomTableEntry* he = table.Add(key);
if (he->mAtom) {
RefPtr<nsAtom> atom = he->mAtom;
return atom.forget();
}
RefPtr<nsAtom> atom = dont_AddRef(nsDynamicAtom::Create(aUTF16String, hash));
RefPtr<nsAtom> atom =
dont_AddRef(nsDynamicAtom::Create(aUTF16String, key.mHash));
he->mAtom = atom;
return atom.forget();
@ -754,8 +751,7 @@ nsAtomTable::AtomizeMainThread(const nsAString& aUTF16String)
{
MOZ_ASSERT(NS_IsMainThread());
RefPtr<nsAtom> retVal;
uint32_t hash;
AtomTableKey key(aUTF16String.Data(), aUTF16String.Length(), &hash);
AtomTableKey key(aUTF16String.Data(), aUTF16String.Length());
auto p = sRecentlyUsedMainThreadAtoms.Lookup(key);
if (p) {
retVal = p.Data();
@ -770,7 +766,7 @@ nsAtomTable::AtomizeMainThread(const nsAString& aUTF16String)
retVal = he->mAtom;
} else {
RefPtr<nsAtom> newAtom =
dont_AddRef(nsDynamicAtom::Create(aUTF16String, hash));
dont_AddRef(nsDynamicAtom::Create(aUTF16String, key.mHash));
he->mAtom = newAtom;
retVal = newAtom.forget();
}
@ -810,8 +806,7 @@ NS_GetStaticAtom(const nsAString& aUTF16String)
nsStaticAtom*
nsAtomTable::GetStaticAtom(const nsAString& aUTF16String)
{
uint32_t hash;
AtomTableKey key(aUTF16String.Data(), aUTF16String.Length(), &hash);
AtomTableKey key(aUTF16String.Data(), aUTF16String.Length());
nsAtomSubTable& table = SelectSubTable(key);
MutexAutoLock lock(table.mLock);
AtomTableEntry* he = table.Search(key);
@ -823,21 +818,11 @@ nsAtomTable::GetStaticAtom(const nsAString& aUTF16String)
void ToLowerCaseASCII(RefPtr<nsAtom>& aAtom)
{
// Assume the common case is that the atom is already ASCII lowercase.
bool reAtomize = false;
const nsDependentString existing(aAtom->GetUTF16String(), aAtom->GetLength());
for (size_t i = 0; i < existing.Length(); ++i) {
if (IS_ASCII_UPPER(existing[i])) {
reAtomize = true;
break;
}
}
// If the string was already lowercase, we're done.
if (!reAtomize) {
if (aAtom->IsAsciiLowercase()) {
return;
}
nsAutoString lowercased;
ToLowerCaseASCII(existing, lowercased);
ToLowerCaseASCII(nsDependentAtomString(aAtom), lowercased);
aAtom = NS_Atomize(lowercased);
}

Просмотреть файл

@ -20,7 +20,7 @@ extern constexpr GkAtoms gGkAtoms = {
// u"bb",
// u"ccc",
//
#define GK_ATOM(name_, value_, hash_, type_, atom_type_) \
#define GK_ATOM(name_, value_, hash_, is_ascii_lower_, type_, atom_type_) \
u"" value_,
#include "nsGkAtomList.h"
#undef GK_ATOM
@ -48,12 +48,12 @@ extern constexpr GkAtoms gGkAtoms = {
// offsetof(GkAtoms, mAtoms[static_cast<size_t>(GkAtoms::Atoms::ccc)]) -
// offsetof(GkAtoms, ccc_string)),
//
#define GK_ATOM(name_, value_, hash_, type_, atom_type_) \
#define GK_ATOM(name_, value_, hash_, is_ascii_lower_, type_, atom_type_) \
nsStaticAtom( \
sizeof(value_) - 1, hash_, \
offsetof(GkAtoms, \
mAtoms[static_cast<size_t>(GkAtoms::Atoms::name_)]) - \
offsetof(GkAtoms, name_##_string)),
offsetof(GkAtoms, name_##_string), is_ascii_lower_),
#include "nsGkAtomList.h"
#undef GK_ATOM
}

Просмотреть файл

@ -58,8 +58,9 @@
class name_ : public nsStaticAtom \
{ \
public: \
constexpr name_(uint32_t aLength, uint32_t aHash, uint32_t aOffset) \
: nsStaticAtom(aLength, aHash, aOffset) {} \
constexpr name_(uint32_t aLength, uint32_t aHash, uint32_t aOffset, \
bool aIsAsciiLowercase) \
: nsStaticAtom(aLength, aHash, aOffset, aIsAsciiLowercase) {} \
};
DEFINE_STATIC_ATOM_SUBCLASS(nsCSSAnonBoxPseudoStaticAtom)
@ -86,7 +87,7 @@ struct GkAtoms
// const char16_t bb_string[sizeof("bb")];
// const char16_t ccc_string[sizeof("ccc")];
//
#define GK_ATOM(name_, value_, hash_, type_, atom_type_) \
#define GK_ATOM(name_, value_, hash_, is_ascii_lower_, type_, atom_type_) \
const char16_t name_##_string[sizeof(value_)];
#include "nsGkAtomList.h"
#undef GK_ATOM
@ -99,7 +100,7 @@ struct GkAtoms
// bb,
// ccc,
//
#define GK_ATOM(name_, value_, hash_, type_, atom_type_) \
#define GK_ATOM(name_, value_, hash_, is_ascii_lower_, type_, atom_type_) \
name_,
#include "nsGkAtomList.h"
#undef GK_ATOM
@ -166,7 +167,7 @@ public:
// &mozilla::detail::gGkAtoms.mAtoms[
// static_cast<size_t>(mozilla::detail::GkAtoms::Atoms::ccc)]);
//
#define GK_ATOM(name_, value_, hash_, type_, atom_type_) \
#define GK_ATOM(name_, value_, hash_, is_ascii_lower_, type_, atom_type_) \
static constexpr nsStaticAtom* name_ = \
const_cast<nsStaticAtom*>( \
&mozilla::detail::gGkAtoms.mAtoms[ \