Bug 1845095: Bloom filter for fast-rejecting `:has()`. r=emilio

Differential Revision: https://phabricator.services.mozilla.com/D184525
This commit is contained in:
David Shin 2023-07-28 13:13:34 +00:00
Родитель 1665e6df48
Коммит 398ecb5c1f
12 изменённых файлов: 241 добавлений и 20 удалений

Просмотреть файл

@ -7,6 +7,7 @@ use crate::bloom::BloomFilter;
use crate::nth_index_cache::{NthIndexCache, NthIndexCacheInner};
use crate::parser::{Selector, SelectorImpl};
use crate::relative_selector::cache::RelativeSelectorCache;
use crate::relative_selector::filter::RelativeSelectorFilterMap;
use crate::tree::{Element, OpaqueElement};
/// What kind of selector matching mode we should use.
@ -142,13 +143,15 @@ impl RelativeSelectorMatchingState {
}
}
/// Set of caches that speed up expensive selector matches.
/// Set of caches (And cache-likes) that speed up expensive selector matches.
#[derive(Default)]
pub struct SelectorCaches {
/// A cache to speed up nth-index-like selectors.
pub nth_index: NthIndexCache,
/// A cache to speed up relative selector matches. See module documentation.
pub relative_selector: RelativeSelectorCache,
/// A map of bloom filters to fast-reject relative selector matches.
pub relative_selector_filter_map: RelativeSelectorFilterMap,
}
/// Data associated with the matching process for a element. This context is

Просмотреть файл

@ -463,6 +463,25 @@ fn matches_relative_selectors<E: Element>(
// Did not match, continue on.
continue;
}
// See if we can fast-reject.
if context
.selector_caches
.relative_selector_filter_map
.fast_reject(
element,
relative_selector,
context.quirks_mode(),
)
{
// Alright, add as unmatched to cache.
context.selector_caches.relative_selector.add(
element.opaque(),
relative_selector,
RelativeSelectorCachedMatch::NotMatched,
);
// Then continue on.
continue;
}
let matched = matches_relative_selector(relative_selector, element, context, rightmost);
context.selector_caches.relative_selector.add(

Просмотреть файл

@ -203,9 +203,9 @@ macro_rules! with_all_bounds {
pub trait SelectorImpl: Clone + Debug + Sized + 'static {
type ExtraMatchingData<'a>: Sized + Default;
type AttrValue: $($InSelector)*;
type Identifier: $($InSelector)*;
type LocalName: $($InSelector)* + Borrow<Self::BorrowedLocalName>;
type NamespaceUrl: $($CommonBounds)* + Default + Borrow<Self::BorrowedNamespaceUrl>;
type Identifier: $($InSelector)* + PrecomputedHash;
type LocalName: $($InSelector)* + Borrow<Self::BorrowedLocalName> + PrecomputedHash;
type NamespaceUrl: $($CommonBounds)* + Default + Borrow<Self::BorrowedNamespaceUrl> + PrecomputedHash;
type NamespacePrefix: $($InSelector)* + Default;
type BorrowedNamespaceUrl: ?Sized + Eq;
type BorrowedLocalName: ?Sized + Eq;
@ -523,18 +523,17 @@ pub struct AncestorHashes {
pub packed_hashes: [u32; 3],
}
fn collect_ancestor_hashes<Impl: SelectorImpl>(
iter: SelectorIter<Impl>,
pub(crate) fn collect_selector_hashes<'a, Impl: SelectorImpl, Iter>(
iter: Iter,
quirks_mode: QuirksMode,
hashes: &mut [u32; 4],
len: &mut usize,
create_inner_iterator: fn(&'a Selector<Impl>) -> Iter,
) -> bool
where
Impl::Identifier: PrecomputedHash,
Impl::LocalName: PrecomputedHash,
Impl::NamespaceUrl: PrecomputedHash,
Iter: Iterator<Item = &'a Component<Impl>>,
{
for component in AncestorIter::new(iter) {
for component in iter {
let hash = match *component {
Component::LocalName(LocalName {
ref name,
@ -590,7 +589,13 @@ where
// in the filter if there's more than one selector, as that'd
// exclude elements that may match one of the other selectors.
if list.len() == 1 &&
!collect_ancestor_hashes(list.slice()[0].iter(), quirks_mode, hashes, len)
!collect_selector_hashes(
create_inner_iterator(&list.slice()[0]),
quirks_mode,
hashes,
len,
create_inner_iterator,
)
{
return false;
}
@ -608,12 +613,17 @@ where
true
}
fn collect_ancestor_hashes<Impl: SelectorImpl>(
iter: SelectorIter<Impl>,
quirks_mode: QuirksMode,
hashes: &mut [u32; 4],
len: &mut usize,
) {
collect_selector_hashes(AncestorIter::new(iter), quirks_mode, hashes, len, |s| AncestorIter(s.iter()));
}
impl AncestorHashes {
pub fn new<Impl: SelectorImpl>(selector: &Selector<Impl>, quirks_mode: QuirksMode) -> Self
where
Impl::Identifier: PrecomputedHash,
Impl::LocalName: PrecomputedHash,
Impl::NamespaceUrl: PrecomputedHash,
{
// Compute ancestor hashes for the bloom filter.
let mut hashes = [0u32; 4];
@ -3455,6 +3465,12 @@ pub mod tests {
}
}
impl PrecomputedHash for DummyAtom {
fn precomputed_hash(&self) -> u32 {
self.0.as_ptr() as u32
}
}
impl<'i> Parser<'i> for DummyParser {
type Impl = DummySelectorImpl;
type Error = SelectorParseErrorKind<'i>;

Просмотреть файл

@ -0,0 +1,158 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
/// Bloom filter for relative selectors.
use fxhash::FxHashMap;
use crate::bloom::BloomFilter;
use crate::context::QuirksMode;
use crate::parser::{
collect_selector_hashes, RelativeSelector, RelativeSelectorMatchHint,
};
use crate::tree::{Element, OpaqueElement};
use crate::SelectorImpl;
enum Entry {
/// Filter lookup happened once. Construction of the filter is expensive,
/// so this is set when the element for subtree traversal is encountered.
Lookup,
/// Filter lookup happened more than once, and the filter for this element's
/// subtree traversal is constructed. Could use special handlings for pseudo-classes
/// such as `:hover` and `:focus`, see Bug 1845503.
HasFilter(Box<BloomFilter>),
}
#[derive(Clone, Copy, Hash, Eq, PartialEq)]
enum TraversalKind {
Children,
Descendants,
}
fn add_to_filter<E: Element>(element: &E, filter: &mut BloomFilter, kind: TraversalKind) -> bool {
let mut child = element.first_element_child();
while let Some(e) = child {
if !e.add_element_unique_hashes(filter) {
return false;
}
if kind == TraversalKind::Descendants {
if !add_to_filter(&e, filter, kind) {
return false;
}
}
child = e.next_sibling_element();
}
true
}
#[derive(Clone, Copy, Hash, Eq, PartialEq)]
struct Key(OpaqueElement, TraversalKind);
/// Map of bloom filters for fast-rejecting relative selectors.
#[derive(Default)]
pub struct RelativeSelectorFilterMap {
map: FxHashMap<Key, Entry>,
}
fn fast_reject<Impl: SelectorImpl>(
selector: &RelativeSelector<Impl>,
quirks_mode: QuirksMode,
filter: &BloomFilter,
) -> bool {
let mut hashes = [0u32; 4];
let mut len = 0;
// For inner selectors, we only collect from the single rightmost compound.
// This is because inner selectors can cause breakouts: e.g. `.anchor:has(:is(.a .b) .c)`
// can match when `.a` is the ancestor of `.anchor`. Including `.a` would possibly fast
// reject the subtree for not having `.a`, even if the selector would match.
// Technically, if the selector's traversal is non-sibling subtree, we can traverse
// inner selectors up to the point where a descendant/child combinator is encountered
// (e.g. In `.anchor:has(:is(.a ~ .b) .c)`, `.a` is guaranteed to be the a descendant
// of `.anchor`). While that can be separately handled, well, this is simpler.
collect_selector_hashes(
selector.selector.iter(),
quirks_mode,
&mut hashes,
&mut len,
|s| s.iter(),
);
for i in 0..len {
if !filter.might_contain_hash(hashes[i]) {
// Definitely rejected.
return true;
}
}
false
}
impl RelativeSelectorFilterMap {
fn get_filter<E: Element>(&mut self, element: &E, kind: TraversalKind) -> Option<&BloomFilter> {
// Insert flag to indicate that we looked up the filter once, and
// create the filter if and only if that flag is there.
let key = Key(element.opaque(), kind);
let entry = self
.map
.entry(key)
.and_modify(|entry| {
if !matches!(entry, Entry::Lookup) {
return;
}
let mut filter = BloomFilter::new();
// Go through all children/descendants of this element and add their hashes.
if add_to_filter(element, &mut filter, kind) {
*entry = Entry::HasFilter(Box::new(filter));
}
})
.or_insert(Entry::Lookup);
match entry {
Entry::Lookup => None,
Entry::HasFilter(ref filter) => Some(filter.as_ref()),
}
}
/// Potentially reject the given selector for this element.
/// This may seem redundant in presence of the cache, but the cache keys into the
/// selector-element pair specifically, while this filter keys to the element
/// and the traversal kind, so it is useful for handling multiple selectors
/// that effectively end up looking at the same(-ish, for siblings) subtree.
pub fn fast_reject<Impl: SelectorImpl, E: Element>(
&mut self,
element: &E,
selector: &RelativeSelector<Impl>,
quirks_mode: QuirksMode,
) -> bool {
if matches!(selector.match_hint, RelativeSelectorMatchHint::InNextSibling) {
// Don't bother.
return false;
}
let is_sibling = matches!(
selector.match_hint,
RelativeSelectorMatchHint::InSibling |
RelativeSelectorMatchHint::InNextSiblingSubtree |
RelativeSelectorMatchHint::InSiblingSubtree
);
let is_subtree = matches!(
selector.match_hint,
RelativeSelectorMatchHint::InSubtree |
RelativeSelectorMatchHint::InNextSiblingSubtree |
RelativeSelectorMatchHint::InSiblingSubtree
);
let kind = if is_subtree {
TraversalKind::Descendants
} else {
TraversalKind::Children
};
if is_sibling {
// Contain the entirety of the parent's children/subtree in the filter, and use that.
// This is less likely to reject, especially for sibling subtree matches; however, it's less
// expensive memory-wise, compared to storing filters for each sibling.
element.parent_element().map_or(false, |parent| {
self.get_filter(&parent, kind)
.map_or(false, |filter| fast_reject(selector, quirks_mode, filter))
})
} else {
self.get_filter(element, kind)
.map_or(false, |filter| fast_reject(selector, quirks_mode, filter))
}
}
}

Просмотреть файл

@ -3,3 +3,4 @@
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
pub mod cache;
pub mod filter;

Просмотреть файл

@ -6,6 +6,7 @@
//! between layout and style.
use crate::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint};
use crate::bloom::BloomFilter;
use crate::matching::{ElementSelectorFlags, MatchingContext};
use crate::parser::SelectorImpl;
use std::fmt::Debug;
@ -160,4 +161,8 @@ pub trait Element: Sized + Clone + Debug {
fn ignores_nth_child_selectors(&self) -> bool {
false
}
/// Add hashes unique to this element to the given filter, returning true
/// if any got added.
fn add_element_unique_hashes(&self, filter: &mut BloomFilter) -> bool;
}

Просмотреть файл

@ -111,7 +111,8 @@ pub fn is_attr_name_excluded_from_filter(atom: &crate::Atom) -> bool {
*atom == atom!("class") || *atom == atom!("id") || *atom == atom!("style")
}
fn each_relevant_element_hash<E, F>(element: E, mut f: F)
/// Gather all relevant hash for fast-reject filters from an element.
pub fn each_relevant_element_hash<E, F>(element: E, mut f: F)
where
E: TElement,
F: FnMut(u32),

Просмотреть файл

@ -651,7 +651,7 @@ pub struct ThreadLocalStyleContext<E: TElement> {
/// A checker used to ensure that parallel.rs does not recurse indefinitely
/// even on arbitrarily deep trees. See Gecko bug 1376883.
pub stack_limit_checker: StackLimitChecker,
/// Collection of caches for speeding up expensive selector matches.
/// Collection of caches (And cache-likes) for speeding up expensive selector matches.
pub selector_caches: SelectorCaches,
}

Просмотреть файл

@ -312,8 +312,12 @@ impl ElementData {
return InvalidationResult::empty();
}
let mut processor =
StateAndAttrInvalidationProcessor::new(shared_context, element, self, selector_caches);
let mut processor = StateAndAttrInvalidationProcessor::new(
shared_context,
element,
self,
selector_caches,
);
let invalidator = TreeStyleInvalidator::new(element, stack_limit_checker, &mut processor);

Просмотреть файл

@ -15,6 +15,7 @@
//! the separation between the style system implementation and everything else.
use crate::applicable_declarations::ApplicableDeclarationBlock;
use crate::bloom::each_relevant_element_hash;
use crate::context::{PostAnimationTasks, QuirksMode, SharedStyleContext, UpdateAnimationsTasks};
use crate::data::ElementData;
use crate::dom::{LayoutIterator, NodeInfo, OpaqueNode, TDocument, TElement, TNode, TShadowRoot};
@ -69,6 +70,7 @@ use dom::{DocumentState, ElementState};
use euclid::default::Size2D;
use fxhash::FxHashMap;
use selectors::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint};
use selectors::bloom::{BloomFilter, BLOOM_HASH_MASK};
use selectors::matching::VisitedHandlingMode;
use selectors::matching::{ElementSelectorFlags, MatchingContext};
use selectors::sink::Push;
@ -2090,4 +2092,9 @@ impl<'le> ::selectors::Element for GeckoElement<'le> {
fn ignores_nth_child_selectors(&self) -> bool {
self.is_root_of_native_anonymous_subtree()
}
fn add_element_unique_hashes(&self, filter: &mut BloomFilter) -> bool {
each_relevant_element_hash(*self, |hash| filter.insert_hash(hash & BLOOM_HASH_MASK));
true
}
}

Просмотреть файл

@ -12,6 +12,7 @@ use crate::values::AtomIdent;
use crate::{CaseSensitivityExt, LocalName, Namespace, WeakAtom};
use dom::ElementState;
use selectors::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint};
use selectors::bloom::BloomFilter;
use selectors::matching::{ElementSelectorFlags, MatchingContext};
use selectors::{Element, OpaqueElement};
use std::cell::Cell;
@ -388,4 +389,9 @@ where
.assigned_slot()
.map(|e| ElementWrapper::new(e, self.snapshot_map))
}
fn add_element_unique_hashes(&self, _filter: &mut BloomFilter) -> bool {
// Should not be relevant in the context of checking past elements in invalidation.
false
}
}

Просмотреть файл

@ -137,7 +137,8 @@ where
let for_element = target.revalidation_match_results(stylist, bloom, selector_caches);
let for_candidate = candidate.revalidation_match_results(stylist, bloom, selector_caches);
let for_candidate =
candidate.revalidation_match_results(stylist, bloom, selector_caches);
// This assert "ensures", to some extent, that the two candidates have
// matched the same rulehash buckets, and as such, that the bits we're