servo: Merge #18231 - Bug 1376883 - stylo: Drop thread pool stack size to 100k. r=bholley (from julian-seward1:master); r=bholley

<!-- Please describe your changes on the following line: -->
Bug 1376883 - stylo: Drop thread pool stack size to 100k.  r=bholley.

---
<!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `__` with appropriate data: -->
- [ ] `./mach build -d` does not report any errors
- [ ] `./mach test-tidy` does not report any errors
- [ ] These changes fix #__ (github issue number if applicable).

<!-- Either: -->
- [ ] There are tests for these changes OR
- [ ] These changes do not require tests because _____

<!-- Also, please make sure that "Allow edits from maintainers" checkbox is checked, so that we can help you if you get stuck somewhere along the way.-->

<!-- Pull requests that do not address these steps are welcome, but they will require additional verification as part of the review process. -->

Source-Repo: https://github.com/servo/servo
Source-Revision: 7fa6e355836901726e20af0d5e74cf3d37b944db

--HG--
extra : subtree_source : https%3A//hg.mozilla.org/projects/converted-servo-linear
extra : subtree_revision : eeac903d3db70a77e940762fa1fc81a60cb07d2c
This commit is contained in:
Julian Seward 2017-08-25 10:14:23 -05:00
Родитель 276defc2a7
Коммит d25fe41b86
3 изменённых файлов: 96 добавлений и 20 удалений

Просмотреть файл

@ -16,6 +16,7 @@ use euclid::Size2D;
use fnv::FnvHashMap;
use font_metrics::FontMetricsProvider;
#[cfg(feature = "gecko")] use gecko_bindings::structs;
use parallel::STYLE_THREAD_STACK_SIZE_KB;
#[cfg(feature = "servo")] use parking_lot::RwLock;
use properties::ComputedValues;
#[cfg(feature = "servo")] use properties::PropertyId;
@ -605,6 +606,61 @@ where
}
}
/// A helper type for stack limit checking. This assumes that stacks grow
/// down, which is true for all non-ancient CPU architectures.
pub struct StackLimitChecker {
lower_limit: usize
}
impl StackLimitChecker {
/// Create a new limit checker, for this thread, allowing further use
/// of up to |stack_size| bytes beyond (below) the current stack pointer.
#[inline(never)]
pub fn new(stack_size_limit: usize) -> Self {
StackLimitChecker {
lower_limit: StackLimitChecker::get_sp() - stack_size_limit
}
}
/// Checks whether the previously stored stack limit has now been exceeded.
#[inline(never)]
pub fn limit_exceeded(&self) -> bool {
let curr_sp = StackLimitChecker::get_sp();
// Try to assert if we're called from a different thread than the
// one that originally created this object. This is a bit subtle
// and relies on wraparound behaviour of unsigned integers.
//
// * If we're called from a thread whose stack has a higher address
// than the one that created this object, then
// |curr_sp - self.lower_limit| will (almost certainly) be larger
// than the thread stack size, so the check will fail.
//
// * If we're called from a thread whose stack has a lower address
// than the one that created this object, then
// |curr_sp - self.lower_limit| will be negative, which will look
// like a very large unsigned value, so the check will also fail.
//
// The correctness of depends on the assumption that no stack wraps
// around the end of the address space.
debug_assert!(curr_sp - self.lower_limit
<= STYLE_THREAD_STACK_SIZE_KB * 1024);
// The actual bounds check.
curr_sp <= self.lower_limit
}
// Technically, rustc can optimize this away, but shouldn't for now.
// We should fix this once black_box is stable.
#[inline(always)]
fn get_sp() -> usize {
let mut foo: usize = 42;
(&mut foo as *mut usize) as usize
}
}
/// A thread-local style context.
///
/// This context contains data that needs to be used during restyling, but is
@ -639,6 +695,9 @@ pub struct ThreadLocalStyleContext<E: TElement> {
/// The struct used to compute and cache font metrics from style
/// for evaluation of the font-relative em/ch units and font-size
pub font_metrics_provider: E::FontMetricsProvider,
/// A checker used to ensure that parallel.rs does not recurse indefinitely
/// even on arbitrarily deep trees. See Gecko bug 1376883.
pub stack_limit_checker: StackLimitChecker,
}
impl<E: TElement> ThreadLocalStyleContext<E> {
@ -654,6 +713,8 @@ impl<E: TElement> ThreadLocalStyleContext<E> {
statistics: TraversalStatistics::default(),
current_element_info: None,
font_metrics_provider: E::FontMetricsProvider::create_from(shared),
stack_limit_checker: StackLimitChecker::new(
(STYLE_THREAD_STACK_SIZE_KB - 40) * 1024),
}
}
@ -668,6 +729,15 @@ impl<E: TElement> ThreadLocalStyleContext<E> {
statistics: TraversalStatistics::default(),
current_element_info: None,
font_metrics_provider: E::FontMetricsProvider::create_from(shared),
// Threads in the styling pool have small stacks, and we have to
// be careful not to run out of stack during recursion in
// parallel.rs. Therefore set up a stack limit checker, in
// which we reserve 40KB of stack as a safety buffer. Currently
// the stack size is 128KB, so this allows 88KB for recursive
// DOM traversal, which encompasses 53 levels of recursion before
// the limiter kicks in, on x86_64-Linux. See Gecko bug 1376883.
stack_limit_checker: StackLimitChecker::new(
(STYLE_THREAD_STACK_SIZE_KB - 40) * 1024),
}
}

Просмотреть файл

@ -9,6 +9,7 @@ use gecko_bindings::bindings;
use gecko_bindings::bindings::{Gecko_RegisterProfilerThread, Gecko_UnregisterProfilerThread};
use gecko_bindings::bindings::Gecko_SetJemallocThreadLocalArena;
use num_cpus;
use parallel::STYLE_THREAD_STACK_SIZE_KB;
use rayon;
use shared_lock::SharedRwLock;
use std::cmp;
@ -92,7 +93,9 @@ lazy_static! {
.breadth_first()
.thread_name(thread_name)
.start_handler(thread_startup)
.exit_handler(thread_shutdown);
.exit_handler(thread_shutdown)
// Set thread stack size to 128KB. See Gecko bug 1376883.
.stack_size(STYLE_THREAD_STACK_SIZE_KB * 1024);
let pool = rayon::ThreadPool::new(configuration).ok();
pool
};

Просмотреть файл

@ -32,6 +32,9 @@ use std::borrow::Borrow;
use time;
use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
/// The minimum stack size for a thread in the styling pool, in kilobytes.
pub const STYLE_THREAD_STACK_SIZE_KB: usize = 128;
/// The maximum number of child nodes that we will process as a single unit.
///
/// Larger values will increase style sharing cache hits and general DOM
@ -77,7 +80,7 @@ pub fn traverse_dom<E, D>(traversal: &D,
let root_opaque = root.opaque();
traverse_nodes(&[root],
DispatchMode::TailCall,
0,
true,
root_opaque,
traversal_data,
scope,
@ -132,7 +135,6 @@ fn create_thread_local_context<'scope, E, D>(
#[inline(always)]
#[allow(unsafe_code)]
fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
recursion_depth: usize,
root: OpaqueNode,
mut traversal_data: PerLevelTraversalData,
scope: &'a rayon::Scope<'scope>,
@ -144,6 +146,10 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
{
debug_assert!(nodes.len() <= WORK_UNIT_MAX);
// We set this below, when we have a borrow of the thread-local-context
// available.
let recursion_ok;
// Collect all the children of the elements in our work unit. This will
// contain the combined children of up to WORK_UNIT_MAX nodes, which may
// be numerous. As such, we store it in a large SmallVec to minimize heap-
@ -154,6 +160,10 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
// a potential recursive call when we pass TailCall.
let mut tlc = tls.ensure(
|slot: &mut Option<ThreadLocalStyleContext<E>>| create_thread_local_context(traversal, slot));
// Check that we're not in danger of running out of stack.
recursion_ok = !tlc.stack_limit_checker.limit_exceeded();
let mut context = StyleContext {
shared: traversal.shared_context(),
thread_local: &mut *tlc,
@ -202,7 +212,7 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
traversal_data_copy.current_dom_depth += 1;
traverse_nodes(&*discovered_child_nodes,
DispatchMode::NotTailCall,
recursion_depth,
recursion_ok,
root,
traversal_data_copy,
scope,
@ -232,7 +242,7 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
traversal_data.current_dom_depth += 1;
traverse_nodes(&discovered_child_nodes,
DispatchMode::TailCall,
recursion_depth,
recursion_ok,
root,
traversal_data,
scope,
@ -254,16 +264,10 @@ impl DispatchMode {
fn is_tail_call(&self) -> bool { matches!(*self, DispatchMode::TailCall) }
}
// On x86_64-linux, a recursive cycle requires 3472 bytes of stack. Limiting
// the depth to 150 therefore should keep the stack use by the recursion to
// 520800 bytes, which would give a generously conservative margin should we
// decide to reduce the thread stack size from its default of 2MB down to 1MB.
const RECURSION_DEPTH_LIMIT: usize = 150;
#[inline]
fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
mode: DispatchMode,
recursion_depth: usize,
recursion_ok: bool,
root: OpaqueNode,
traversal_data: PerLevelTraversalData,
scope: &'a rayon::Scope<'scope>,
@ -279,12 +283,11 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
// want to actually dispatch the job as a tail call if there's nothing left
// in our local queue. Otherwise we need to return to it to maintain proper
// breadth-first ordering. We also need to take care to avoid stack
// overflow due to excessive tail recursion. The stack overflow isn't
// observable to content -- we're still completely correct, just not
// using tail recursion any more. See bug 1368302.
debug_assert!(recursion_depth <= RECURSION_DEPTH_LIMIT);
// overflow due to excessive tail recursion. The stack overflow avoidance
// isn't observable to content -- we're still completely correct, just not
// using tail recursion any more. See Gecko bugs 1368302 and 1376883.
let may_dispatch_tail = mode.is_tail_call() &&
recursion_depth != RECURSION_DEPTH_LIMIT &&
recursion_ok &&
!pool.current_thread_has_pending_tasks().unwrap();
// In the common case, our children fit within a single work unit, in which
@ -292,12 +295,12 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
if nodes.len() <= WORK_UNIT_MAX {
let work = nodes.iter().cloned().collect::<WorkUnit<E::ConcreteNode>>();
if may_dispatch_tail {
top_down_dom(&work, recursion_depth + 1, root,
top_down_dom(&work, root,
traversal_data, scope, pool, traversal, tls);
} else {
scope.spawn(move |scope| {
let work = work;
top_down_dom(&work, 0, root,
top_down_dom(&work, root,
traversal_data, scope, pool, traversal, tls);
});
}
@ -307,7 +310,7 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
let traversal_data_copy = traversal_data.clone();
scope.spawn(move |scope| {
let n = nodes;
top_down_dom(&*n, 0, root,
top_down_dom(&*n, root,
traversal_data_copy, scope, pool, traversal, tls)
});
}