зеркало из https://github.com/mozilla/gecko-dev.git
servo: Merge #18231 - Bug 1376883 - stylo: Drop thread pool stack size to 100k. r=bholley (from julian-seward1:master); r=bholley
<!-- Please describe your changes on the following line: --> Bug 1376883 - stylo: Drop thread pool stack size to 100k. r=bholley. --- <!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `__` with appropriate data: --> - [ ] `./mach build -d` does not report any errors - [ ] `./mach test-tidy` does not report any errors - [ ] These changes fix #__ (github issue number if applicable). <!-- Either: --> - [ ] There are tests for these changes OR - [ ] These changes do not require tests because _____ <!-- Also, please make sure that "Allow edits from maintainers" checkbox is checked, so that we can help you if you get stuck somewhere along the way.--> <!-- Pull requests that do not address these steps are welcome, but they will require additional verification as part of the review process. --> Source-Repo: https://github.com/servo/servo Source-Revision: 7fa6e355836901726e20af0d5e74cf3d37b944db --HG-- extra : subtree_source : https%3A//hg.mozilla.org/projects/converted-servo-linear extra : subtree_revision : eeac903d3db70a77e940762fa1fc81a60cb07d2c
This commit is contained in:
Родитель
276defc2a7
Коммит
d25fe41b86
|
@ -16,6 +16,7 @@ use euclid::Size2D;
|
|||
use fnv::FnvHashMap;
|
||||
use font_metrics::FontMetricsProvider;
|
||||
#[cfg(feature = "gecko")] use gecko_bindings::structs;
|
||||
use parallel::STYLE_THREAD_STACK_SIZE_KB;
|
||||
#[cfg(feature = "servo")] use parking_lot::RwLock;
|
||||
use properties::ComputedValues;
|
||||
#[cfg(feature = "servo")] use properties::PropertyId;
|
||||
|
@ -605,6 +606,61 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/// A helper type for stack limit checking. This assumes that stacks grow
|
||||
/// down, which is true for all non-ancient CPU architectures.
|
||||
pub struct StackLimitChecker {
|
||||
lower_limit: usize
|
||||
}
|
||||
|
||||
impl StackLimitChecker {
|
||||
/// Create a new limit checker, for this thread, allowing further use
|
||||
/// of up to |stack_size| bytes beyond (below) the current stack pointer.
|
||||
#[inline(never)]
|
||||
pub fn new(stack_size_limit: usize) -> Self {
|
||||
StackLimitChecker {
|
||||
lower_limit: StackLimitChecker::get_sp() - stack_size_limit
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks whether the previously stored stack limit has now been exceeded.
|
||||
#[inline(never)]
|
||||
pub fn limit_exceeded(&self) -> bool {
|
||||
let curr_sp = StackLimitChecker::get_sp();
|
||||
|
||||
// Try to assert if we're called from a different thread than the
|
||||
// one that originally created this object. This is a bit subtle
|
||||
// and relies on wraparound behaviour of unsigned integers.
|
||||
//
|
||||
// * If we're called from a thread whose stack has a higher address
|
||||
// than the one that created this object, then
|
||||
// |curr_sp - self.lower_limit| will (almost certainly) be larger
|
||||
// than the thread stack size, so the check will fail.
|
||||
//
|
||||
// * If we're called from a thread whose stack has a lower address
|
||||
// than the one that created this object, then
|
||||
// |curr_sp - self.lower_limit| will be negative, which will look
|
||||
// like a very large unsigned value, so the check will also fail.
|
||||
//
|
||||
// The correctness of depends on the assumption that no stack wraps
|
||||
// around the end of the address space.
|
||||
debug_assert!(curr_sp - self.lower_limit
|
||||
<= STYLE_THREAD_STACK_SIZE_KB * 1024);
|
||||
|
||||
// The actual bounds check.
|
||||
curr_sp <= self.lower_limit
|
||||
}
|
||||
|
||||
// Technically, rustc can optimize this away, but shouldn't for now.
|
||||
// We should fix this once black_box is stable.
|
||||
#[inline(always)]
|
||||
fn get_sp() -> usize {
|
||||
let mut foo: usize = 42;
|
||||
(&mut foo as *mut usize) as usize
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// A thread-local style context.
|
||||
///
|
||||
/// This context contains data that needs to be used during restyling, but is
|
||||
|
@ -639,6 +695,9 @@ pub struct ThreadLocalStyleContext<E: TElement> {
|
|||
/// The struct used to compute and cache font metrics from style
|
||||
/// for evaluation of the font-relative em/ch units and font-size
|
||||
pub font_metrics_provider: E::FontMetricsProvider,
|
||||
/// A checker used to ensure that parallel.rs does not recurse indefinitely
|
||||
/// even on arbitrarily deep trees. See Gecko bug 1376883.
|
||||
pub stack_limit_checker: StackLimitChecker,
|
||||
}
|
||||
|
||||
impl<E: TElement> ThreadLocalStyleContext<E> {
|
||||
|
@ -654,6 +713,8 @@ impl<E: TElement> ThreadLocalStyleContext<E> {
|
|||
statistics: TraversalStatistics::default(),
|
||||
current_element_info: None,
|
||||
font_metrics_provider: E::FontMetricsProvider::create_from(shared),
|
||||
stack_limit_checker: StackLimitChecker::new(
|
||||
(STYLE_THREAD_STACK_SIZE_KB - 40) * 1024),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -668,6 +729,15 @@ impl<E: TElement> ThreadLocalStyleContext<E> {
|
|||
statistics: TraversalStatistics::default(),
|
||||
current_element_info: None,
|
||||
font_metrics_provider: E::FontMetricsProvider::create_from(shared),
|
||||
// Threads in the styling pool have small stacks, and we have to
|
||||
// be careful not to run out of stack during recursion in
|
||||
// parallel.rs. Therefore set up a stack limit checker, in
|
||||
// which we reserve 40KB of stack as a safety buffer. Currently
|
||||
// the stack size is 128KB, so this allows 88KB for recursive
|
||||
// DOM traversal, which encompasses 53 levels of recursion before
|
||||
// the limiter kicks in, on x86_64-Linux. See Gecko bug 1376883.
|
||||
stack_limit_checker: StackLimitChecker::new(
|
||||
(STYLE_THREAD_STACK_SIZE_KB - 40) * 1024),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ use gecko_bindings::bindings;
|
|||
use gecko_bindings::bindings::{Gecko_RegisterProfilerThread, Gecko_UnregisterProfilerThread};
|
||||
use gecko_bindings::bindings::Gecko_SetJemallocThreadLocalArena;
|
||||
use num_cpus;
|
||||
use parallel::STYLE_THREAD_STACK_SIZE_KB;
|
||||
use rayon;
|
||||
use shared_lock::SharedRwLock;
|
||||
use std::cmp;
|
||||
|
@ -92,7 +93,9 @@ lazy_static! {
|
|||
.breadth_first()
|
||||
.thread_name(thread_name)
|
||||
.start_handler(thread_startup)
|
||||
.exit_handler(thread_shutdown);
|
||||
.exit_handler(thread_shutdown)
|
||||
// Set thread stack size to 128KB. See Gecko bug 1376883.
|
||||
.stack_size(STYLE_THREAD_STACK_SIZE_KB * 1024);
|
||||
let pool = rayon::ThreadPool::new(configuration).ok();
|
||||
pool
|
||||
};
|
||||
|
|
|
@ -32,6 +32,9 @@ use std::borrow::Borrow;
|
|||
use time;
|
||||
use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
|
||||
|
||||
/// The minimum stack size for a thread in the styling pool, in kilobytes.
|
||||
pub const STYLE_THREAD_STACK_SIZE_KB: usize = 128;
|
||||
|
||||
/// The maximum number of child nodes that we will process as a single unit.
|
||||
///
|
||||
/// Larger values will increase style sharing cache hits and general DOM
|
||||
|
@ -77,7 +80,7 @@ pub fn traverse_dom<E, D>(traversal: &D,
|
|||
let root_opaque = root.opaque();
|
||||
traverse_nodes(&[root],
|
||||
DispatchMode::TailCall,
|
||||
0,
|
||||
true,
|
||||
root_opaque,
|
||||
traversal_data,
|
||||
scope,
|
||||
|
@ -132,7 +135,6 @@ fn create_thread_local_context<'scope, E, D>(
|
|||
#[inline(always)]
|
||||
#[allow(unsafe_code)]
|
||||
fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
|
||||
recursion_depth: usize,
|
||||
root: OpaqueNode,
|
||||
mut traversal_data: PerLevelTraversalData,
|
||||
scope: &'a rayon::Scope<'scope>,
|
||||
|
@ -144,6 +146,10 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
|
|||
{
|
||||
debug_assert!(nodes.len() <= WORK_UNIT_MAX);
|
||||
|
||||
// We set this below, when we have a borrow of the thread-local-context
|
||||
// available.
|
||||
let recursion_ok;
|
||||
|
||||
// Collect all the children of the elements in our work unit. This will
|
||||
// contain the combined children of up to WORK_UNIT_MAX nodes, which may
|
||||
// be numerous. As such, we store it in a large SmallVec to minimize heap-
|
||||
|
@ -154,6 +160,10 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
|
|||
// a potential recursive call when we pass TailCall.
|
||||
let mut tlc = tls.ensure(
|
||||
|slot: &mut Option<ThreadLocalStyleContext<E>>| create_thread_local_context(traversal, slot));
|
||||
|
||||
// Check that we're not in danger of running out of stack.
|
||||
recursion_ok = !tlc.stack_limit_checker.limit_exceeded();
|
||||
|
||||
let mut context = StyleContext {
|
||||
shared: traversal.shared_context(),
|
||||
thread_local: &mut *tlc,
|
||||
|
@ -202,7 +212,7 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
|
|||
traversal_data_copy.current_dom_depth += 1;
|
||||
traverse_nodes(&*discovered_child_nodes,
|
||||
DispatchMode::NotTailCall,
|
||||
recursion_depth,
|
||||
recursion_ok,
|
||||
root,
|
||||
traversal_data_copy,
|
||||
scope,
|
||||
|
@ -232,7 +242,7 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
|
|||
traversal_data.current_dom_depth += 1;
|
||||
traverse_nodes(&discovered_child_nodes,
|
||||
DispatchMode::TailCall,
|
||||
recursion_depth,
|
||||
recursion_ok,
|
||||
root,
|
||||
traversal_data,
|
||||
scope,
|
||||
|
@ -254,16 +264,10 @@ impl DispatchMode {
|
|||
fn is_tail_call(&self) -> bool { matches!(*self, DispatchMode::TailCall) }
|
||||
}
|
||||
|
||||
// On x86_64-linux, a recursive cycle requires 3472 bytes of stack. Limiting
|
||||
// the depth to 150 therefore should keep the stack use by the recursion to
|
||||
// 520800 bytes, which would give a generously conservative margin should we
|
||||
// decide to reduce the thread stack size from its default of 2MB down to 1MB.
|
||||
const RECURSION_DEPTH_LIMIT: usize = 150;
|
||||
|
||||
#[inline]
|
||||
fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
|
||||
mode: DispatchMode,
|
||||
recursion_depth: usize,
|
||||
recursion_ok: bool,
|
||||
root: OpaqueNode,
|
||||
traversal_data: PerLevelTraversalData,
|
||||
scope: &'a rayon::Scope<'scope>,
|
||||
|
@ -279,12 +283,11 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
|
|||
// want to actually dispatch the job as a tail call if there's nothing left
|
||||
// in our local queue. Otherwise we need to return to it to maintain proper
|
||||
// breadth-first ordering. We also need to take care to avoid stack
|
||||
// overflow due to excessive tail recursion. The stack overflow isn't
|
||||
// observable to content -- we're still completely correct, just not
|
||||
// using tail recursion any more. See bug 1368302.
|
||||
debug_assert!(recursion_depth <= RECURSION_DEPTH_LIMIT);
|
||||
// overflow due to excessive tail recursion. The stack overflow avoidance
|
||||
// isn't observable to content -- we're still completely correct, just not
|
||||
// using tail recursion any more. See Gecko bugs 1368302 and 1376883.
|
||||
let may_dispatch_tail = mode.is_tail_call() &&
|
||||
recursion_depth != RECURSION_DEPTH_LIMIT &&
|
||||
recursion_ok &&
|
||||
!pool.current_thread_has_pending_tasks().unwrap();
|
||||
|
||||
// In the common case, our children fit within a single work unit, in which
|
||||
|
@ -292,12 +295,12 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
|
|||
if nodes.len() <= WORK_UNIT_MAX {
|
||||
let work = nodes.iter().cloned().collect::<WorkUnit<E::ConcreteNode>>();
|
||||
if may_dispatch_tail {
|
||||
top_down_dom(&work, recursion_depth + 1, root,
|
||||
top_down_dom(&work, root,
|
||||
traversal_data, scope, pool, traversal, tls);
|
||||
} else {
|
||||
scope.spawn(move |scope| {
|
||||
let work = work;
|
||||
top_down_dom(&work, 0, root,
|
||||
top_down_dom(&work, root,
|
||||
traversal_data, scope, pool, traversal, tls);
|
||||
});
|
||||
}
|
||||
|
@ -307,7 +310,7 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
|
|||
let traversal_data_copy = traversal_data.clone();
|
||||
scope.spawn(move |scope| {
|
||||
let n = nodes;
|
||||
top_down_dom(&*n, 0, root,
|
||||
top_down_dom(&*n, root,
|
||||
traversal_data_copy, scope, pool, traversal, tls)
|
||||
});
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче