зеркало из https://github.com/mozilla/gecko-dev.git
servo: Merge #18231 - Bug 1376883 - stylo: Drop thread pool stack size to 100k. r=bholley (from julian-seward1:master); r=bholley
<!-- Please describe your changes on the following line: --> Bug 1376883 - stylo: Drop thread pool stack size to 100k. r=bholley. --- <!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `__` with appropriate data: --> - [ ] `./mach build -d` does not report any errors - [ ] `./mach test-tidy` does not report any errors - [ ] These changes fix #__ (github issue number if applicable). <!-- Either: --> - [ ] There are tests for these changes OR - [ ] These changes do not require tests because _____ <!-- Also, please make sure that "Allow edits from maintainers" checkbox is checked, so that we can help you if you get stuck somewhere along the way.--> <!-- Pull requests that do not address these steps are welcome, but they will require additional verification as part of the review process. --> Source-Repo: https://github.com/servo/servo Source-Revision: 7fa6e355836901726e20af0d5e74cf3d37b944db --HG-- extra : subtree_source : https%3A//hg.mozilla.org/projects/converted-servo-linear extra : subtree_revision : eeac903d3db70a77e940762fa1fc81a60cb07d2c
This commit is contained in:
Родитель
276defc2a7
Коммит
d25fe41b86
|
@ -16,6 +16,7 @@ use euclid::Size2D;
|
||||||
use fnv::FnvHashMap;
|
use fnv::FnvHashMap;
|
||||||
use font_metrics::FontMetricsProvider;
|
use font_metrics::FontMetricsProvider;
|
||||||
#[cfg(feature = "gecko")] use gecko_bindings::structs;
|
#[cfg(feature = "gecko")] use gecko_bindings::structs;
|
||||||
|
use parallel::STYLE_THREAD_STACK_SIZE_KB;
|
||||||
#[cfg(feature = "servo")] use parking_lot::RwLock;
|
#[cfg(feature = "servo")] use parking_lot::RwLock;
|
||||||
use properties::ComputedValues;
|
use properties::ComputedValues;
|
||||||
#[cfg(feature = "servo")] use properties::PropertyId;
|
#[cfg(feature = "servo")] use properties::PropertyId;
|
||||||
|
@ -605,6 +606,61 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// A helper type for stack limit checking. This assumes that stacks grow
|
||||||
|
/// down, which is true for all non-ancient CPU architectures.
|
||||||
|
pub struct StackLimitChecker {
|
||||||
|
lower_limit: usize
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StackLimitChecker {
|
||||||
|
/// Create a new limit checker, for this thread, allowing further use
|
||||||
|
/// of up to |stack_size| bytes beyond (below) the current stack pointer.
|
||||||
|
#[inline(never)]
|
||||||
|
pub fn new(stack_size_limit: usize) -> Self {
|
||||||
|
StackLimitChecker {
|
||||||
|
lower_limit: StackLimitChecker::get_sp() - stack_size_limit
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Checks whether the previously stored stack limit has now been exceeded.
|
||||||
|
#[inline(never)]
|
||||||
|
pub fn limit_exceeded(&self) -> bool {
|
||||||
|
let curr_sp = StackLimitChecker::get_sp();
|
||||||
|
|
||||||
|
// Try to assert if we're called from a different thread than the
|
||||||
|
// one that originally created this object. This is a bit subtle
|
||||||
|
// and relies on wraparound behaviour of unsigned integers.
|
||||||
|
//
|
||||||
|
// * If we're called from a thread whose stack has a higher address
|
||||||
|
// than the one that created this object, then
|
||||||
|
// |curr_sp - self.lower_limit| will (almost certainly) be larger
|
||||||
|
// than the thread stack size, so the check will fail.
|
||||||
|
//
|
||||||
|
// * If we're called from a thread whose stack has a lower address
|
||||||
|
// than the one that created this object, then
|
||||||
|
// |curr_sp - self.lower_limit| will be negative, which will look
|
||||||
|
// like a very large unsigned value, so the check will also fail.
|
||||||
|
//
|
||||||
|
// The correctness of depends on the assumption that no stack wraps
|
||||||
|
// around the end of the address space.
|
||||||
|
debug_assert!(curr_sp - self.lower_limit
|
||||||
|
<= STYLE_THREAD_STACK_SIZE_KB * 1024);
|
||||||
|
|
||||||
|
// The actual bounds check.
|
||||||
|
curr_sp <= self.lower_limit
|
||||||
|
}
|
||||||
|
|
||||||
|
// Technically, rustc can optimize this away, but shouldn't for now.
|
||||||
|
// We should fix this once black_box is stable.
|
||||||
|
#[inline(always)]
|
||||||
|
fn get_sp() -> usize {
|
||||||
|
let mut foo: usize = 42;
|
||||||
|
(&mut foo as *mut usize) as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// A thread-local style context.
|
/// A thread-local style context.
|
||||||
///
|
///
|
||||||
/// This context contains data that needs to be used during restyling, but is
|
/// This context contains data that needs to be used during restyling, but is
|
||||||
|
@ -639,6 +695,9 @@ pub struct ThreadLocalStyleContext<E: TElement> {
|
||||||
/// The struct used to compute and cache font metrics from style
|
/// The struct used to compute and cache font metrics from style
|
||||||
/// for evaluation of the font-relative em/ch units and font-size
|
/// for evaluation of the font-relative em/ch units and font-size
|
||||||
pub font_metrics_provider: E::FontMetricsProvider,
|
pub font_metrics_provider: E::FontMetricsProvider,
|
||||||
|
/// A checker used to ensure that parallel.rs does not recurse indefinitely
|
||||||
|
/// even on arbitrarily deep trees. See Gecko bug 1376883.
|
||||||
|
pub stack_limit_checker: StackLimitChecker,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<E: TElement> ThreadLocalStyleContext<E> {
|
impl<E: TElement> ThreadLocalStyleContext<E> {
|
||||||
|
@ -654,6 +713,8 @@ impl<E: TElement> ThreadLocalStyleContext<E> {
|
||||||
statistics: TraversalStatistics::default(),
|
statistics: TraversalStatistics::default(),
|
||||||
current_element_info: None,
|
current_element_info: None,
|
||||||
font_metrics_provider: E::FontMetricsProvider::create_from(shared),
|
font_metrics_provider: E::FontMetricsProvider::create_from(shared),
|
||||||
|
stack_limit_checker: StackLimitChecker::new(
|
||||||
|
(STYLE_THREAD_STACK_SIZE_KB - 40) * 1024),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -668,6 +729,15 @@ impl<E: TElement> ThreadLocalStyleContext<E> {
|
||||||
statistics: TraversalStatistics::default(),
|
statistics: TraversalStatistics::default(),
|
||||||
current_element_info: None,
|
current_element_info: None,
|
||||||
font_metrics_provider: E::FontMetricsProvider::create_from(shared),
|
font_metrics_provider: E::FontMetricsProvider::create_from(shared),
|
||||||
|
// Threads in the styling pool have small stacks, and we have to
|
||||||
|
// be careful not to run out of stack during recursion in
|
||||||
|
// parallel.rs. Therefore set up a stack limit checker, in
|
||||||
|
// which we reserve 40KB of stack as a safety buffer. Currently
|
||||||
|
// the stack size is 128KB, so this allows 88KB for recursive
|
||||||
|
// DOM traversal, which encompasses 53 levels of recursion before
|
||||||
|
// the limiter kicks in, on x86_64-Linux. See Gecko bug 1376883.
|
||||||
|
stack_limit_checker: StackLimitChecker::new(
|
||||||
|
(STYLE_THREAD_STACK_SIZE_KB - 40) * 1024),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ use gecko_bindings::bindings;
|
||||||
use gecko_bindings::bindings::{Gecko_RegisterProfilerThread, Gecko_UnregisterProfilerThread};
|
use gecko_bindings::bindings::{Gecko_RegisterProfilerThread, Gecko_UnregisterProfilerThread};
|
||||||
use gecko_bindings::bindings::Gecko_SetJemallocThreadLocalArena;
|
use gecko_bindings::bindings::Gecko_SetJemallocThreadLocalArena;
|
||||||
use num_cpus;
|
use num_cpus;
|
||||||
|
use parallel::STYLE_THREAD_STACK_SIZE_KB;
|
||||||
use rayon;
|
use rayon;
|
||||||
use shared_lock::SharedRwLock;
|
use shared_lock::SharedRwLock;
|
||||||
use std::cmp;
|
use std::cmp;
|
||||||
|
@ -92,7 +93,9 @@ lazy_static! {
|
||||||
.breadth_first()
|
.breadth_first()
|
||||||
.thread_name(thread_name)
|
.thread_name(thread_name)
|
||||||
.start_handler(thread_startup)
|
.start_handler(thread_startup)
|
||||||
.exit_handler(thread_shutdown);
|
.exit_handler(thread_shutdown)
|
||||||
|
// Set thread stack size to 128KB. See Gecko bug 1376883.
|
||||||
|
.stack_size(STYLE_THREAD_STACK_SIZE_KB * 1024);
|
||||||
let pool = rayon::ThreadPool::new(configuration).ok();
|
let pool = rayon::ThreadPool::new(configuration).ok();
|
||||||
pool
|
pool
|
||||||
};
|
};
|
||||||
|
|
|
@ -32,6 +32,9 @@ use std::borrow::Borrow;
|
||||||
use time;
|
use time;
|
||||||
use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
|
use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
|
||||||
|
|
||||||
|
/// The minimum stack size for a thread in the styling pool, in kilobytes.
|
||||||
|
pub const STYLE_THREAD_STACK_SIZE_KB: usize = 128;
|
||||||
|
|
||||||
/// The maximum number of child nodes that we will process as a single unit.
|
/// The maximum number of child nodes that we will process as a single unit.
|
||||||
///
|
///
|
||||||
/// Larger values will increase style sharing cache hits and general DOM
|
/// Larger values will increase style sharing cache hits and general DOM
|
||||||
|
@ -77,7 +80,7 @@ pub fn traverse_dom<E, D>(traversal: &D,
|
||||||
let root_opaque = root.opaque();
|
let root_opaque = root.opaque();
|
||||||
traverse_nodes(&[root],
|
traverse_nodes(&[root],
|
||||||
DispatchMode::TailCall,
|
DispatchMode::TailCall,
|
||||||
0,
|
true,
|
||||||
root_opaque,
|
root_opaque,
|
||||||
traversal_data,
|
traversal_data,
|
||||||
scope,
|
scope,
|
||||||
|
@ -132,7 +135,6 @@ fn create_thread_local_context<'scope, E, D>(
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
#[allow(unsafe_code)]
|
#[allow(unsafe_code)]
|
||||||
fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
|
fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
|
||||||
recursion_depth: usize,
|
|
||||||
root: OpaqueNode,
|
root: OpaqueNode,
|
||||||
mut traversal_data: PerLevelTraversalData,
|
mut traversal_data: PerLevelTraversalData,
|
||||||
scope: &'a rayon::Scope<'scope>,
|
scope: &'a rayon::Scope<'scope>,
|
||||||
|
@ -144,6 +146,10 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
|
||||||
{
|
{
|
||||||
debug_assert!(nodes.len() <= WORK_UNIT_MAX);
|
debug_assert!(nodes.len() <= WORK_UNIT_MAX);
|
||||||
|
|
||||||
|
// We set this below, when we have a borrow of the thread-local-context
|
||||||
|
// available.
|
||||||
|
let recursion_ok;
|
||||||
|
|
||||||
// Collect all the children of the elements in our work unit. This will
|
// Collect all the children of the elements in our work unit. This will
|
||||||
// contain the combined children of up to WORK_UNIT_MAX nodes, which may
|
// contain the combined children of up to WORK_UNIT_MAX nodes, which may
|
||||||
// be numerous. As such, we store it in a large SmallVec to minimize heap-
|
// be numerous. As such, we store it in a large SmallVec to minimize heap-
|
||||||
|
@ -154,6 +160,10 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
|
||||||
// a potential recursive call when we pass TailCall.
|
// a potential recursive call when we pass TailCall.
|
||||||
let mut tlc = tls.ensure(
|
let mut tlc = tls.ensure(
|
||||||
|slot: &mut Option<ThreadLocalStyleContext<E>>| create_thread_local_context(traversal, slot));
|
|slot: &mut Option<ThreadLocalStyleContext<E>>| create_thread_local_context(traversal, slot));
|
||||||
|
|
||||||
|
// Check that we're not in danger of running out of stack.
|
||||||
|
recursion_ok = !tlc.stack_limit_checker.limit_exceeded();
|
||||||
|
|
||||||
let mut context = StyleContext {
|
let mut context = StyleContext {
|
||||||
shared: traversal.shared_context(),
|
shared: traversal.shared_context(),
|
||||||
thread_local: &mut *tlc,
|
thread_local: &mut *tlc,
|
||||||
|
@ -202,7 +212,7 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
|
||||||
traversal_data_copy.current_dom_depth += 1;
|
traversal_data_copy.current_dom_depth += 1;
|
||||||
traverse_nodes(&*discovered_child_nodes,
|
traverse_nodes(&*discovered_child_nodes,
|
||||||
DispatchMode::NotTailCall,
|
DispatchMode::NotTailCall,
|
||||||
recursion_depth,
|
recursion_ok,
|
||||||
root,
|
root,
|
||||||
traversal_data_copy,
|
traversal_data_copy,
|
||||||
scope,
|
scope,
|
||||||
|
@ -232,7 +242,7 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
|
||||||
traversal_data.current_dom_depth += 1;
|
traversal_data.current_dom_depth += 1;
|
||||||
traverse_nodes(&discovered_child_nodes,
|
traverse_nodes(&discovered_child_nodes,
|
||||||
DispatchMode::TailCall,
|
DispatchMode::TailCall,
|
||||||
recursion_depth,
|
recursion_ok,
|
||||||
root,
|
root,
|
||||||
traversal_data,
|
traversal_data,
|
||||||
scope,
|
scope,
|
||||||
|
@ -254,16 +264,10 @@ impl DispatchMode {
|
||||||
fn is_tail_call(&self) -> bool { matches!(*self, DispatchMode::TailCall) }
|
fn is_tail_call(&self) -> bool { matches!(*self, DispatchMode::TailCall) }
|
||||||
}
|
}
|
||||||
|
|
||||||
// On x86_64-linux, a recursive cycle requires 3472 bytes of stack. Limiting
|
|
||||||
// the depth to 150 therefore should keep the stack use by the recursion to
|
|
||||||
// 520800 bytes, which would give a generously conservative margin should we
|
|
||||||
// decide to reduce the thread stack size from its default of 2MB down to 1MB.
|
|
||||||
const RECURSION_DEPTH_LIMIT: usize = 150;
|
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
|
fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
|
||||||
mode: DispatchMode,
|
mode: DispatchMode,
|
||||||
recursion_depth: usize,
|
recursion_ok: bool,
|
||||||
root: OpaqueNode,
|
root: OpaqueNode,
|
||||||
traversal_data: PerLevelTraversalData,
|
traversal_data: PerLevelTraversalData,
|
||||||
scope: &'a rayon::Scope<'scope>,
|
scope: &'a rayon::Scope<'scope>,
|
||||||
|
@ -279,12 +283,11 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
|
||||||
// want to actually dispatch the job as a tail call if there's nothing left
|
// want to actually dispatch the job as a tail call if there's nothing left
|
||||||
// in our local queue. Otherwise we need to return to it to maintain proper
|
// in our local queue. Otherwise we need to return to it to maintain proper
|
||||||
// breadth-first ordering. We also need to take care to avoid stack
|
// breadth-first ordering. We also need to take care to avoid stack
|
||||||
// overflow due to excessive tail recursion. The stack overflow isn't
|
// overflow due to excessive tail recursion. The stack overflow avoidance
|
||||||
// observable to content -- we're still completely correct, just not
|
// isn't observable to content -- we're still completely correct, just not
|
||||||
// using tail recursion any more. See bug 1368302.
|
// using tail recursion any more. See Gecko bugs 1368302 and 1376883.
|
||||||
debug_assert!(recursion_depth <= RECURSION_DEPTH_LIMIT);
|
|
||||||
let may_dispatch_tail = mode.is_tail_call() &&
|
let may_dispatch_tail = mode.is_tail_call() &&
|
||||||
recursion_depth != RECURSION_DEPTH_LIMIT &&
|
recursion_ok &&
|
||||||
!pool.current_thread_has_pending_tasks().unwrap();
|
!pool.current_thread_has_pending_tasks().unwrap();
|
||||||
|
|
||||||
// In the common case, our children fit within a single work unit, in which
|
// In the common case, our children fit within a single work unit, in which
|
||||||
|
@ -292,12 +295,12 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
|
||||||
if nodes.len() <= WORK_UNIT_MAX {
|
if nodes.len() <= WORK_UNIT_MAX {
|
||||||
let work = nodes.iter().cloned().collect::<WorkUnit<E::ConcreteNode>>();
|
let work = nodes.iter().cloned().collect::<WorkUnit<E::ConcreteNode>>();
|
||||||
if may_dispatch_tail {
|
if may_dispatch_tail {
|
||||||
top_down_dom(&work, recursion_depth + 1, root,
|
top_down_dom(&work, root,
|
||||||
traversal_data, scope, pool, traversal, tls);
|
traversal_data, scope, pool, traversal, tls);
|
||||||
} else {
|
} else {
|
||||||
scope.spawn(move |scope| {
|
scope.spawn(move |scope| {
|
||||||
let work = work;
|
let work = work;
|
||||||
top_down_dom(&work, 0, root,
|
top_down_dom(&work, root,
|
||||||
traversal_data, scope, pool, traversal, tls);
|
traversal_data, scope, pool, traversal, tls);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -307,7 +310,7 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
|
||||||
let traversal_data_copy = traversal_data.clone();
|
let traversal_data_copy = traversal_data.clone();
|
||||||
scope.spawn(move |scope| {
|
scope.spawn(move |scope| {
|
||||||
let n = nodes;
|
let n = nodes;
|
||||||
top_down_dom(&*n, 0, root,
|
top_down_dom(&*n, root,
|
||||||
traversal_data_copy, scope, pool, traversal, tls)
|
traversal_data_copy, scope, pool, traversal, tls)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче