diff --git a/servo/components/script/dom/servoparser/async_html.rs b/servo/components/script/dom/servoparser/async_html.rs index 243e4452bd66..59411fda1c73 100644 --- a/servo/components/script/dom/servoparser/async_html.rs +++ b/servo/components/script/dom/servoparser/async_html.rs @@ -7,9 +7,8 @@ use dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods; use dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; use dom::bindings::inheritance::Castable; -use dom::bindings::js::{JS, MutNullableJS, Root}; +use dom::bindings::js::{JS, Root}; use dom::bindings::str::DOMString; -use dom::bindings::trace::JSTraceable; use dom::comment::Comment; use dom::document::Document; use dom::documenttype::DocumentType; @@ -20,25 +19,167 @@ use dom::htmltemplateelement::HTMLTemplateElement; use dom::node::Node; use dom::processinginstruction::ProcessingInstruction; use dom::virtualmethods::vtable_for; -use html5ever::{Attribute, LocalName, QualName, ExpandedName}; +use html5ever::{Attribute as HtmlAttribute, ExpandedName, LocalName, QualName}; use html5ever::buffer_queue::BufferQueue; -use html5ever::tendril::StrTendril; +use html5ever::tendril::{SendTendril, StrTendril, Tendril}; +use html5ever::tendril::fmt::UTF8; use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts, TokenizerResult}; -use html5ever::tree_builder::{NodeOrText, TreeSink, NextParserState, QuirksMode, ElementFlags}; -use html5ever::tree_builder::{Tracer as HtmlTracer, TreeBuilder, TreeBuilderOpts}; -use js::jsapi::JSTracer; +use html5ever::tree_builder::{ElementFlags, NodeOrText as HtmlNodeOrText, NextParserState, QuirksMode, TreeSink}; +use html5ever::tree_builder::{TreeBuilder, TreeBuilderOpts}; use servo_url::ServoUrl; use std::ascii::AsciiExt; use std::borrow::Cow; use std::cell::Cell; use std::collections::HashMap; +use std::collections::vec_deque::VecDeque; +use std::sync::mpsc::{channel, Receiver, Sender}; +use std::thread; use style::context::QuirksMode as ServoQuirksMode; +type ParseNodeId = usize; + +#[derive(Clone, HeapSizeOf, JSTraceable)] +pub struct ParseNode { + id: ParseNodeId, + qual_name: Option, +} + +#[derive(HeapSizeOf, JSTraceable)] +enum NodeOrText { + Node(ParseNode), + Text(String), +} + +#[derive(HeapSizeOf, JSTraceable)] +struct Attribute { + name: QualName, + value: String, +} + +#[derive(HeapSizeOf, JSTraceable)] +enum ParseOperation { + GetTemplateContents { target: ParseNodeId, contents: ParseNodeId }, + + CreateElement { + node: ParseNodeId, + name: QualName, + attrs: Vec, + current_line: u64 + }, + + CreateComment { text: String, node: ParseNodeId }, + AppendBeforeSibling { sibling: ParseNodeId, node: NodeOrText }, + Append { parent: ParseNodeId, node: NodeOrText }, + + AppendDoctypeToDocument { + name: String, + public_id: String, + system_id: String + }, + + AddAttrsIfMissing { target: ParseNodeId, attrs: Vec }, + RemoveFromParent { target: ParseNodeId }, + MarkScriptAlreadyStarted { node: ParseNodeId }, + ReparentChildren { parent: ParseNodeId, new_parent: ParseNodeId }, + AssociateWithForm { target: ParseNodeId, form: ParseNodeId }, + + CreatePI { + node: ParseNodeId, + target: String, + data: String + }, + + Pop { node: ParseNodeId }, + + SetQuirksMode { + #[ignore_heap_size_of = "Defined in style"] + mode: ServoQuirksMode + }, +} + +#[derive(HeapSizeOf)] +enum ToTokenizerMsg { + // From HtmlTokenizer + TokenizerResultDone { + #[ignore_heap_size_of = "Defined in html5ever"] + updated_input: VecDeque> + }, + TokenizerResultScript { + script: ParseNode, + #[ignore_heap_size_of = "Defined in html5ever"] + updated_input: VecDeque> + }, + End, // Sent to Tokenizer to signify HtmlTokenizer's end method has returned + + // From Sink + ProcessOperation(ParseOperation), + IsSameTree(ParseNodeId, ParseNodeId), + HasParentNode(ParseNodeId), +} + +#[derive(HeapSizeOf)] +enum ToHtmlTokenizerMsg { + Feed { + #[ignore_heap_size_of = "Defined in html5ever"] + input: VecDeque> + }, + End, + SetPlainTextState, +} + +// Responses to the queries asked by the the Sink to the Tokenizer, +// using the messages types in FromSinkMsg. +#[derive(HeapSizeOf)] +enum ToSinkMsg { + IsSameTree(bool), + HasParentNode(bool), +} + +fn create_buffer_queue(mut buffers: VecDeque>) -> BufferQueue { + let mut buffer_queue = BufferQueue::new(); + while let Some(st) = buffers.pop_front() { + buffer_queue.push_back(StrTendril::from(st)); + } + buffer_queue +} + +// The async HTML Tokenizer consists of two separate types working together: the Tokenizer +// (defined below), which lives on the main thread, and the HtmlTokenizer, defined in html5ever, which +// lives on the parser thread. +// Steps: +// 1. A call to Tokenizer::new will spin up a new parser thread, creating an HtmlTokenizer instance, +// which starts listening for messages from Tokenizer. +// 2. Upon receiving an input from ServoParser, the Tokenizer forwards it to HtmlTokenizer, where it starts +// creating the necessary tree actions based on the input. +// 3. HtmlTokenizer sends these tree actions to the Tokenizer as soon as it creates them. The Tokenizer +// then executes the received actions. +// +// _____________ _______________ +// | | ToHtmlTokenizerMsg | | +// | |------------------------>| | +// | | | | +// | | ToTokenizerMsg | HtmlTokenizer | +// | |<------------------------| | +// | Tokenizer | | | +// | | ToTokenizerMsg | ________ | +// | |<------------------------|---| | | +// | | | | Sink | | +// | | ToSinkMsg | | | | +// | |-------------------------|-->|________| | +// |_____________| |_______________| +// #[derive(HeapSizeOf, JSTraceable)] #[must_root] pub struct Tokenizer { - #[ignore_heap_size_of = "Defined in html5ever"] - inner: HtmlTokenizer>, + document: JS, + #[ignore_heap_size_of = "Defined in std"] + receiver: Receiver, + #[ignore_heap_size_of = "Defined in std"] + html_tokenizer_sender: Sender, + #[ignore_heap_size_of = "Defined in std"] + sink_sender: Sender, + nodes: HashMap>, + url: ServoUrl, } impl Tokenizer { @@ -47,171 +188,125 @@ impl Tokenizer { url: ServoUrl, fragment_context: Option) -> Self { - let mut sink = Sink::new(url, document); + // Messages from the Tokenizer (main thread) to HtmlTokenizer (parser thread) + let (to_html_tokenizer_sender, html_tokenizer_receiver) = channel(); + // Messages from the Tokenizer (main thread) to Sink (parser thread) + let (to_sink_sender, sink_receiver) = channel(); + // Messages from HtmlTokenizer and Sink (parser thread) to Tokenizer (main thread) + let (to_tokenizer_sender, tokenizer_receiver) = channel(); - let options = TreeBuilderOpts { - ignore_missing_rules: true, - .. Default::default() + let mut tokenizer = Tokenizer { + document: JS::from_ref(document), + receiver: tokenizer_receiver, + html_tokenizer_sender: to_html_tokenizer_sender, + sink_sender: to_sink_sender, + nodes: HashMap::new(), + url: url }; + tokenizer.insert_node(0, JS::from_ref(document.upcast())); - let inner = if let Some(fc) = fragment_context { - let ctxt_parse_node = sink.new_parse_node(); - sink.nodes.insert(ctxt_parse_node.id, JS::from_ref(fc.context_elem)); + let mut sink = Sink::new(to_tokenizer_sender.clone(), sink_receiver); + let mut ctxt_parse_node = None; + let mut form_parse_node = None; + let mut fragment_context_is_some = false; + if let Some(fc) = fragment_context { + let node = sink.new_parse_node(); + tokenizer.insert_node(node.id, JS::from_ref(fc.context_elem)); + ctxt_parse_node = Some(node); - let form_parse_node = fc.form_elem.map(|form_elem| { + form_parse_node = fc.form_elem.map(|form_elem| { let node = sink.new_parse_node(); - sink.nodes.insert(node.id, JS::from_ref(form_elem)); + tokenizer.insert_node(node.id, JS::from_ref(form_elem)); node }); - let tb = TreeBuilder::new_for_fragment( - sink, - ctxt_parse_node, - form_parse_node, - options); - - let tok_options = TokenizerOpts { - initial_state: Some(tb.tokenizer_state_for_context_elem()), - .. Default::default() - }; - - HtmlTokenizer::new(tb, tok_options) - } else { - HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default()) + fragment_context_is_some = true; }; - Tokenizer { - inner: inner, - } + // Create new thread for HtmlTokenizer. This is where parser actions + // will be generated from the input provided. These parser actions are then passed + // onto the main thread to be executed. + thread::Builder::new().name(String::from("HTML Parser")).spawn(move || { + run(sink, + fragment_context_is_some, + ctxt_parse_node, + form_parse_node, + to_tokenizer_sender, + html_tokenizer_receiver); + }).expect("HTML Parser thread spawning failed"); + + tokenizer } pub fn feed(&mut self, input: &mut BufferQueue) -> Result<(), Root> { - match self.inner.feed(input) { - TokenizerResult::Done => Ok(()), - TokenizerResult::Script(script) => { - let nodes = &self.inner.sink.sink.nodes; - let script = nodes.get(&script.id).unwrap(); - Err(Root::from_ref(script.downcast().unwrap())) - }, + let mut send_tendrils = VecDeque::new(); + while let Some(str) = input.pop_front() { + send_tendrils.push_back(SendTendril::from(str)); + } + + // Send message to parser thread, asking it to start reading from the input. + // Parser operation messages will be sent to main thread as they are evaluated. + self.html_tokenizer_sender.send(ToHtmlTokenizerMsg::Feed { input: send_tendrils }).unwrap(); + + loop { + match self.receiver.recv().expect("Unexpected channel panic in main thread.") { + ToTokenizerMsg::ProcessOperation(parse_op) => self.process_operation(parse_op), + ToTokenizerMsg::IsSameTree(ref x_id, ref y_id) => { + let x = self.get_node(x_id); + let y = self.get_node(y_id); + + let x = x.downcast::().expect("Element node expected"); + let y = y.downcast::().expect("Element node expected"); + self.sink_sender.send(ToSinkMsg::IsSameTree(x.is_in_same_home_subtree(y))).unwrap(); + }, + ToTokenizerMsg::HasParentNode(ref id) => { + let res = self.get_node(id).GetParentNode().is_some(); + self.sink_sender.send(ToSinkMsg::HasParentNode(res)).unwrap(); + }, + ToTokenizerMsg::TokenizerResultDone { updated_input } => { + let buffer_queue = create_buffer_queue(updated_input); + *input = buffer_queue; + return Ok(()); + }, + ToTokenizerMsg::TokenizerResultScript { script, updated_input } => { + let buffer_queue = create_buffer_queue(updated_input); + *input = buffer_queue; + let script = self.get_node(&script.id); + return Err(Root::from_ref(script.downcast().unwrap())); + } + ToTokenizerMsg::End => unreachable!(), + }; } } pub fn end(&mut self) { - self.inner.end(); + self.html_tokenizer_sender.send(ToHtmlTokenizerMsg::End).unwrap(); + loop { + match self.receiver.recv().expect("Unexpected channel panic in main thread.") { + ToTokenizerMsg::ProcessOperation(parse_op) => self.process_operation(parse_op), + ToTokenizerMsg::IsSameTree(ref x_id, ref y_id) => { + let x = self.get_node(x_id); + let y = self.get_node(y_id); + + let x = x.downcast::().expect("Element node expected"); + let y = y.downcast::().expect("Element node expected"); + self.sink_sender.send(ToSinkMsg::IsSameTree(x.is_in_same_home_subtree(y))).unwrap(); + }, + ToTokenizerMsg::HasParentNode(ref id) => { + let res = self.get_node(id).GetParentNode().is_some(); + self.sink_sender.send(ToSinkMsg::HasParentNode(res)).unwrap(); + }, + ToTokenizerMsg::End => return, + _ => unreachable!(), + }; + } } pub fn url(&self) -> &ServoUrl { - &self.inner.sink.sink.base_url + &self.url } pub fn set_plaintext_state(&mut self) { - self.inner.set_plaintext_state(); - } -} - -#[allow(unsafe_code)] -unsafe impl JSTraceable for HtmlTokenizer> { - unsafe fn trace(&self, trc: *mut JSTracer) { - struct Tracer(*mut JSTracer); - let tracer = Tracer(trc); - - impl HtmlTracer for Tracer { - type Handle = ParseNode; - #[allow(unrooted_must_root)] - fn trace_handle(&self, node: &ParseNode) { - unsafe { node.trace(self.0); } - } - } - - let tree_builder = &self.sink; - tree_builder.trace_handles(&tracer); - tree_builder.sink.trace(trc); - } -} - -type ParseNodeId = usize; - -#[derive(JSTraceable, Clone, HeapSizeOf)] -pub struct ParseNode { - id: ParseNodeId, - qual_name: Option, -} - -#[derive(JSTraceable, HeapSizeOf)] -struct ParseNodeData { - contents: Option, - is_integration_point: bool, -} - -impl Default for ParseNodeData { - fn default() -> ParseNodeData { - ParseNodeData { - contents: None, - is_integration_point: false, - } - } -} - -enum ParseOperation { - GetTemplateContents(ParseNodeId, ParseNodeId), - CreateElement(ParseNodeId, QualName, Vec), - CreateComment(StrTendril, ParseNodeId), - // sibling, node to be inserted - AppendBeforeSibling(ParseNodeId, NodeOrText), - // parent, node to be inserted - Append(ParseNodeId, NodeOrText), - AppendDoctypeToDocument(StrTendril, StrTendril, StrTendril), - AddAttrsIfMissing(ParseNodeId, Vec), - RemoveFromParent(ParseNodeId), - MarkScriptAlreadyStarted(ParseNodeId), - ReparentChildren(ParseNodeId, ParseNodeId), - AssociateWithForm(ParseNodeId, ParseNodeId), - CreatePI(ParseNodeId, StrTendril, StrTendril), - Pop(ParseNodeId), -} - -#[derive(JSTraceable, HeapSizeOf)] -#[must_root] -pub struct Sink { - base_url: ServoUrl, - document: JS, - current_line: u64, - script: MutNullableJS, - parse_node_data: HashMap, - next_parse_node_id: Cell, - nodes: HashMap>, - document_node: ParseNode, -} - -impl Sink { - fn new(base_url: ServoUrl, document: &Document) -> Sink { - let mut sink = Sink { - base_url: base_url, - document: JS::from_ref(document), - current_line: 1, - script: Default::default(), - parse_node_data: HashMap::new(), - next_parse_node_id: Cell::new(1), - nodes: HashMap::new(), - document_node: ParseNode { - id: 0, - qual_name: None, - } - }; - let data = ParseNodeData::default(); - sink.insert_parse_node_data(0, data); - sink.insert_node(0, JS::from_ref(document.upcast())); - sink - } - - fn new_parse_node(&mut self) -> ParseNode { - let id = self.next_parse_node_id.get(); - let data = ParseNodeData::default(); - self.insert_parse_node_data(id, data); - self.next_parse_node_id.set(id + 1); - ParseNode { - id: id, - qual_name: None, - } + self.html_tokenizer_sender.send(ToHtmlTokenizerMsg::SetPlainTextState).unwrap(); } fn insert_node(&mut self, id: ParseNodeId, node: JS) { @@ -222,29 +317,17 @@ impl Sink { self.nodes.get(id).expect("Node not found!") } - fn insert_parse_node_data(&mut self, id: ParseNodeId, data: ParseNodeData) { - assert!(self.parse_node_data.insert(id, data).is_none()); - } - - fn get_parse_node_data<'a>(&'a self, id: &'a ParseNodeId) -> &'a ParseNodeData { - self.parse_node_data.get(id).expect("Parse Node data not found!") - } - - fn get_parse_node_data_mut<'a>(&'a mut self, id: &'a ParseNodeId) -> &'a mut ParseNodeData { - self.parse_node_data.get_mut(id).expect("Parse Node data not found!") - } - fn process_operation(&mut self, op: ParseOperation) { let document = Root::from_ref(&**self.get_node(&0)); let document = document.downcast::().expect("Document node should be downcasted!"); match op { - ParseOperation::GetTemplateContents(target, contents) => { + ParseOperation::GetTemplateContents { target, contents } => { let target = Root::from_ref(&**self.get_node(&target)); let template = target.downcast::().expect( "Tried to extract contents from non-template element while parsing"); self.insert_node(contents, JS::from_ref(template.Content().upcast())); } - ParseOperation::CreateElement(id, name, attrs) => { + ParseOperation::CreateElement { node, name, attrs, current_line } => { let is = attrs.iter() .find(|attr| attr.name.local.eq_str_ignore_ascii_case("is")) .map(|attr| LocalName::from(&*attr.value)); @@ -252,68 +335,72 @@ impl Sink { let elem = Element::create(name, is, &*self.document, - ElementCreator::ParserCreated(self.current_line), + ElementCreator::ParserCreated(current_line), CustomElementCreationMode::Synchronous); for attr in attrs { - elem.set_attribute_from_parser(attr.name, DOMString::from(String::from(attr.value)), None); + elem.set_attribute_from_parser(attr.name, DOMString::from(attr.value), None); } - self.insert_node(id, JS::from_ref(elem.upcast())); + self.insert_node(node, JS::from_ref(elem.upcast())); } - ParseOperation::CreateComment(text, id) => { - let comment = Comment::new(DOMString::from(String::from(text)), document); - self.insert_node(id, JS::from_ref(&comment.upcast())); + ParseOperation::CreateComment { text, node } => { + let comment = Comment::new(DOMString::from(text), document); + self.insert_node(node, JS::from_ref(&comment.upcast())); } - ParseOperation::AppendBeforeSibling(sibling, node) => { + ParseOperation::AppendBeforeSibling { sibling, node } => { let node = match node { - NodeOrText::AppendNode(n) => NodeOrText::AppendNode(JS::from_ref(&**self.get_node(&n.id))), - NodeOrText::AppendText(text) => NodeOrText::AppendText(text) + NodeOrText::Node(n) => HtmlNodeOrText::AppendNode(JS::from_ref(&**self.get_node(&n.id))), + NodeOrText::Text(text) => HtmlNodeOrText::AppendText( + Tendril::from(text) + ) }; let sibling = &**self.get_node(&sibling); let parent = &*sibling.GetParentNode().expect("append_before_sibling called on node without parent"); super::insert(parent, Some(sibling), node); } - ParseOperation::Append(parent, node) => { + ParseOperation::Append { parent, node } => { let node = match node { - NodeOrText::AppendNode(n) => NodeOrText::AppendNode(JS::from_ref(&**self.get_node(&n.id))), - NodeOrText::AppendText(text) => NodeOrText::AppendText(text) + NodeOrText::Node(n) => HtmlNodeOrText::AppendNode(JS::from_ref(&**self.get_node(&n.id))), + NodeOrText::Text(text) => HtmlNodeOrText::AppendText( + Tendril::from(text) + ) }; let parent = &**self.get_node(&parent); super::insert(parent, None, node); } - ParseOperation::AppendDoctypeToDocument(name, public_id, system_id) => { + ParseOperation::AppendDoctypeToDocument { name, public_id, system_id } => { let doctype = DocumentType::new( - DOMString::from(String::from(name)), Some(DOMString::from(String::from(public_id))), - Some(DOMString::from(String::from(system_id))), document); + DOMString::from(String::from(name)), Some(DOMString::from(public_id)), + Some(DOMString::from(system_id)), document); document.upcast::().AppendChild(doctype.upcast()).expect("Appending failed"); } - ParseOperation::AddAttrsIfMissing(target_id, attrs) => { - let elem = self.get_node(&target_id).downcast::() + ParseOperation::AddAttrsIfMissing { target, attrs } => { + let elem = self.get_node(&target).downcast::() .expect("tried to set attrs on non-Element in HTML parsing"); for attr in attrs { - elem.set_attribute_from_parser(attr.name, DOMString::from(String::from(attr.value)), None); + elem.set_attribute_from_parser(attr.name, DOMString::from(attr.value), None); } } - ParseOperation::RemoveFromParent(target) => { + ParseOperation::RemoveFromParent { target } => { if let Some(ref parent) = self.get_node(&target).GetParentNode() { parent.RemoveChild(&**self.get_node(&target)).unwrap(); } } - ParseOperation::MarkScriptAlreadyStarted(node) => { + ParseOperation::MarkScriptAlreadyStarted { node } => { let script = self.get_node(&node).downcast::(); script.map(|script| script.set_already_started(true)); } - ParseOperation::ReparentChildren(parent, new_parent) => { + ParseOperation::ReparentChildren { parent, new_parent } => { let parent = self.get_node(&parent); let new_parent = self.get_node(&new_parent); while let Some(child) = parent.GetFirstChild() { new_parent.AppendChild(&child).unwrap(); } } - ParseOperation::AssociateWithForm(target, form) => { + ParseOperation::AssociateWithForm { target, form } => { let form = self.get_node(&form); let form = Root::downcast::(Root::from_ref(&**form)) .expect("Owner must be a form element"); @@ -329,20 +416,141 @@ impl Sink { assert!(node.NodeName() == "KEYGEN", "Unknown form-associatable element"); } } - ParseOperation::Pop(node) => { + ParseOperation::Pop { node } => { vtable_for(self.get_node(&node)).pop(); } - ParseOperation::CreatePI(node, target, data) => { + ParseOperation::CreatePI { node, target, data } => { let pi = ProcessingInstruction::new( - DOMString::from(String::from(target)), - DOMString::from(String::from(data)), - document); + DOMString::from(target), + DOMString::from(data), + document); self.insert_node(node, JS::from_ref(pi.upcast())); } + ParseOperation::SetQuirksMode { mode } => { + document.set_quirks_mode(mode); + } } } } +fn run(sink: Sink, + fragment_context_is_some: bool, + ctxt_parse_node: Option, + form_parse_node: Option, + sender: Sender, + receiver: Receiver) { + let options = TreeBuilderOpts { + ignore_missing_rules: true, + .. Default::default() + }; + + let mut html_tokenizer = if fragment_context_is_some { + let tb = TreeBuilder::new_for_fragment( + sink, + ctxt_parse_node.unwrap(), + form_parse_node, + options); + + let tok_options = TokenizerOpts { + initial_state: Some(tb.tokenizer_state_for_context_elem()), + .. Default::default() + }; + + HtmlTokenizer::new(tb, tok_options) + } else { + HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default()) + }; + + loop { + match receiver.recv().expect("Unexpected channel panic in html parser thread") { + ToHtmlTokenizerMsg::Feed { input } => { + let mut input = create_buffer_queue(input); + let res = html_tokenizer.feed(&mut input); + + // Gather changes to 'input' and place them in 'updated_input', + // which will be sent to the main thread to update feed method's 'input' + let mut updated_input = VecDeque::new(); + while let Some(st) = input.pop_front() { + updated_input.push_back(SendTendril::from(st)); + } + + let res = match res { + TokenizerResult::Done => ToTokenizerMsg::TokenizerResultDone { updated_input }, + TokenizerResult::Script(script) => ToTokenizerMsg::TokenizerResultScript { script, updated_input } + }; + sender.send(res).unwrap(); + }, + ToHtmlTokenizerMsg::End => { + html_tokenizer.end(); + sender.send(ToTokenizerMsg::End).unwrap(); + break; + }, + ToHtmlTokenizerMsg::SetPlainTextState => html_tokenizer.set_plaintext_state() + }; + } +} + +#[derive(JSTraceable, HeapSizeOf, Default)] +struct ParseNodeData { + contents: Option, + is_integration_point: bool, +} + +pub struct Sink { + current_line: u64, + parse_node_data: HashMap, + next_parse_node_id: Cell, + document_node: ParseNode, + sender: Sender, + receiver: Receiver, +} + +impl Sink { + fn new(sender: Sender, receiver: Receiver) -> Sink { + let mut sink = Sink { + current_line: 1, + parse_node_data: HashMap::new(), + next_parse_node_id: Cell::new(1), + document_node: ParseNode { + id: 0, + qual_name: None, + }, + sender: sender, + receiver: receiver, + }; + let data = ParseNodeData::default(); + sink.insert_parse_node_data(0, data); + sink + } + + fn new_parse_node(&mut self) -> ParseNode { + let id = self.next_parse_node_id.get(); + let data = ParseNodeData::default(); + self.insert_parse_node_data(id, data); + self.next_parse_node_id.set(id + 1); + ParseNode { + id: id, + qual_name: None, + } + } + + fn send_op(&self, op: ParseOperation) { + self.sender.send(ToTokenizerMsg::ProcessOperation(op)).unwrap(); + } + + fn insert_parse_node_data(&mut self, id: ParseNodeId, data: ParseNodeData) { + assert!(self.parse_node_data.insert(id, data).is_none()); + } + + fn get_parse_node_data<'a>(&'a self, id: &'a ParseNodeId) -> &'a ParseNodeData { + self.parse_node_data.get(id).expect("Parse Node data not found!") + } + + fn get_parse_node_data_mut<'a>(&'a mut self, id: &'a ParseNodeId) -> &'a mut ParseNodeData { + self.parse_node_data.get_mut(id).expect("Parse Node data not found!") + } +} + #[allow(unrooted_must_root)] impl TreeSink for Sink { type Output = Self; @@ -363,7 +571,7 @@ impl TreeSink for Sink { let mut data = self.get_parse_node_data_mut(&target.id); data.contents = Some(node.clone()); } - self.process_operation(ParseOperation::GetTemplateContents(target.id, node.id)); + self.send_op(ParseOperation::GetTemplateContents { target: target.id, contents: node.id }); node } @@ -376,21 +584,20 @@ impl TreeSink for Sink { } fn same_tree(&self, x: &Self::Handle, y: &Self::Handle) -> bool { - let x = self.get_node(&x.id); - let y = self.get_node(&y.id); - - let x = x.downcast::().expect("Element node expected"); - let y = y.downcast::().expect("Element node expected"); - x.is_in_same_home_subtree(y) + self.sender.send(ToTokenizerMsg::IsSameTree(x.id, y.id)).unwrap(); + match self.receiver.recv().expect("Unexpected channel panic in html parser thread.") { + ToSinkMsg::IsSameTree(result) => result, + _ => unreachable!(), + } } - fn create_element(&mut self, name: QualName, attrs: Vec, _flags: ElementFlags) + fn create_element(&mut self, name: QualName, html_attrs: Vec, _flags: ElementFlags) -> Self::Handle { let mut node = self.new_parse_node(); node.qual_name = Some(name.clone()); { let mut node_data = self.get_parse_node_data_mut(&node.id); - node_data.is_integration_point = attrs.iter() + node_data.is_integration_point = html_attrs.iter() .any(|attr| { let attr_value = &String::from(attr.value.clone()); (attr.name.local == local_name!("encoding") && attr.name.ns == ns!()) && @@ -398,34 +605,57 @@ impl TreeSink for Sink { attr_value.eq_ignore_ascii_case("application/xhtml+xml")) }); } - self.process_operation(ParseOperation::CreateElement(node.id, name, attrs)); + let attrs = html_attrs.into_iter() + .map(|attr| Attribute { name: attr.name, value: String::from(attr.value) }).collect(); + + self.send_op(ParseOperation::CreateElement { + node: node.id, + name, + attrs, + current_line: self.current_line + }); node } fn create_comment(&mut self, text: StrTendril) -> Self::Handle { let node = self.new_parse_node(); - self.process_operation(ParseOperation::CreateComment(text, node.id)); + self.send_op(ParseOperation::CreateComment { text: String::from(text), node: node.id }); node } fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> ParseNode { let node = self.new_parse_node(); - self.process_operation(ParseOperation::CreatePI(node.id, target, data)); + self.send_op(ParseOperation::CreatePI { + node: node.id, + target: String::from(target), + data: String::from(data) + }); node } fn has_parent_node(&self, node: &Self::Handle) -> bool { - self.get_node(&node.id).GetParentNode().is_some() + self.sender.send(ToTokenizerMsg::HasParentNode(node.id)).unwrap(); + match self.receiver.recv().expect("Unexpected channel panic in html parser thread.") { + ToSinkMsg::HasParentNode(result) => result, + _ => unreachable!(), + } } fn associate_with_form(&mut self, target: &Self::Handle, form: &Self::Handle) { - self.process_operation(ParseOperation::AssociateWithForm(target.id, form.id)); + self.send_op(ParseOperation::AssociateWithForm { + target: target.id, + form: form.id + }); } fn append_before_sibling(&mut self, sibling: &Self::Handle, - new_node: NodeOrText) { - self.process_operation(ParseOperation::AppendBeforeSibling(sibling.id, new_node)); + new_node: HtmlNodeOrText) { + let new_node = match new_node { + HtmlNodeOrText::AppendNode(node) => NodeOrText::Node(node), + HtmlNodeOrText::AppendText(text) => NodeOrText::Text(String::from(text)) + }; + self.send_op(ParseOperation::AppendBeforeSibling { sibling: sibling.id, node: new_node }); } fn parse_error(&mut self, msg: Cow<'static, str>) { @@ -438,28 +668,38 @@ impl TreeSink for Sink { QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks, QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks, }; - self.document.set_quirks_mode(mode); + self.send_op(ParseOperation::SetQuirksMode { mode }); } - fn append(&mut self, parent: &Self::Handle, child: NodeOrText) { - self.process_operation(ParseOperation::Append(parent.id, child)); + fn append(&mut self, parent: &Self::Handle, child: HtmlNodeOrText) { + let child = match child { + HtmlNodeOrText::AppendNode(node) => NodeOrText::Node(node), + HtmlNodeOrText::AppendText(text) => NodeOrText::Text(String::from(text)) + }; + self.send_op(ParseOperation::Append { parent: parent.id, node: child }); } fn append_doctype_to_document(&mut self, name: StrTendril, public_id: StrTendril, system_id: StrTendril) { - self.process_operation(ParseOperation::AppendDoctypeToDocument(name, public_id, system_id)); + self.send_op(ParseOperation::AppendDoctypeToDocument { + name: String::from(name), + public_id: String::from(public_id), + system_id: String::from(system_id) + }); } - fn add_attrs_if_missing(&mut self, target: &Self::Handle, attrs: Vec) { - self.process_operation(ParseOperation::AddAttrsIfMissing(target.id, attrs)); + fn add_attrs_if_missing(&mut self, target: &Self::Handle, html_attrs: Vec) { + let attrs = html_attrs.into_iter() + .map(|attr| Attribute { name: attr.name, value: String::from(attr.value) }).collect(); + self.send_op(ParseOperation::AddAttrsIfMissing { target: target.id, attrs }); } fn remove_from_parent(&mut self, target: &Self::Handle) { - self.process_operation(ParseOperation::RemoveFromParent(target.id)); + self.send_op(ParseOperation::RemoveFromParent { target: target.id }); } fn mark_script_already_started(&mut self, node: &Self::Handle) { - self.process_operation(ParseOperation::MarkScriptAlreadyStarted(node.id)); + self.send_op(ParseOperation::MarkScriptAlreadyStarted { node: node.id }); } fn complete_script(&mut self, _: &Self::Handle) -> NextParserState { @@ -467,7 +707,7 @@ impl TreeSink for Sink { } fn reparent_children(&mut self, parent: &Self::Handle, new_parent: &Self::Handle) { - self.process_operation(ParseOperation::ReparentChildren(parent.id, new_parent.id)); + self.send_op(ParseOperation::ReparentChildren { parent: parent.id, new_parent: new_parent.id }); } /// https://html.spec.whatwg.org/multipage/#html-integration-point @@ -482,6 +722,6 @@ impl TreeSink for Sink { } fn pop(&mut self, node: &Self::Handle) { - self.process_operation(ParseOperation::Pop(node.id)); + self.send_op(ParseOperation::Pop { node: node.id }); } } diff --git a/servo/etc/ci/buildbot_steps.yml b/servo/etc/ci/buildbot_steps.yml index 89fa5303940a..f6b45f11c169 100644 --- a/servo/etc/ci/buildbot_steps.yml +++ b/servo/etc/ci/buildbot_steps.yml @@ -27,6 +27,8 @@ mac-rel-wpt4: - ./mach build --release - ./mach test-wpt --release --processes 4 --total-chunks 4 --this-chunk 4 --log-raw test-wpt.log --log-errorsummary wpt-errorsummary.log --always-succeed - ./mach filter-intermittents wpt-errorsummary.log --log-intermittents intermittents.log --log-filteredsummary filtered-wpt-errorsummary.log --use-tracker + - ./mach test-wpt --release --pref dom.servoparser.async_html_tokenizer.enabled --processes=8 --log-raw test-async-parsing.log --log-errorsummary async-parsing-errorsummary.log --always-succeed domparsing html/syntax html/dom/documents html/dom/dynamic-markup-insertion + - ./mach filter-intermittents async-parsing-errorsummary.log --log-intermittents async-parsing-intermittents.log --log-filteredsummary filtered-async-parsing-errorsummary.log --use-tracker mac-dev-unit: - ./mach clean-nightlies --keep 3 --force