diff --git a/servo/src/servo/parser/html.rs b/servo/src/servo/parser/html.rs index 89e53c1adecc..7e472081de2f 100644 --- a/servo/src/servo/parser/html.rs +++ b/servo/src/servo/parser/html.rs @@ -1,14 +1,22 @@ import comm::{port, chan}; +enum parse_state { + ps_normal, + ps_tag +} + type parser = { mut lookahead: option, + mut state: parse_state, reader: io::reader }; enum token { - to_start_tag(str), + to_start_opening_tag(str), + to_end_opening_tag, to_end_tag(str), to_text(str), + to_attr(str, str), to_doctype, to_eof } @@ -18,6 +26,18 @@ enum char_or_eof { coe_eof } +impl u8_methods for u8 { + fn is_alpha() -> bool { + ret (self >= ('A' as u8) && self <= ('Z' as u8)) || + (self >= ('a' as u8) && self <= ('z' as u8)); + } +} + +impl u8_vec_methods for [u8] { + fn to_str() -> str { ret str::from_bytes(self); } + fn to_str_token() -> token { ret to_text(self.to_str()); } +} + impl methods for parser { fn get() -> char_or_eof { alt self.lookahead { @@ -62,8 +82,7 @@ impl methods for parser { loop { alt self.get() { coe_char(c) { - if (c >= ('A' as u8) && c <= ('Z' as u8)) || - (c >= ('a' as u8) && c <= ('z' as u8)) { + if (c.is_alpha()) { result += [c]; } else if result.len() == 0u { self.parse_err("expected ident"); @@ -112,6 +131,14 @@ impl methods for parser { coe_eof { ret to_eof; } } + ret alt self.state { + ps_normal { self.parse_in_normal_state(ch) } + ps_tag { self.parse_in_tag_state(ch) } + } + } + + fn parse_in_normal_state(c: u8) -> token { + let mut ch = c; if ch == ('<' as u8) { alt self.get() { coe_char(c) { ch = c; } @@ -139,8 +166,9 @@ impl methods for parser { self.eat_whitespace(); let ident = self.parse_ident(); self.eat_whitespace(); - self.expect('>' as u8); - ret to_start_tag(ident); + + self.state = ps_tag; + ret to_start_opening_tag(ident); } // Make a text node. @@ -150,18 +178,66 @@ impl methods for parser { coe_char(c) { if c == ('<' as u8) { self.unget(c); - ret to_text(str::from_bytes(s)); + ret s.to_str_token(); } s += [c]; } - coe_eof { ret to_text(str::from_bytes(s)); } + coe_eof { ret s.to_str_token(); } } } } + + fn parse_in_tag_state(c: u8) -> token { + let mut ch = c; + + if ch == ('>' as u8) { + self.state = ps_normal; + ret to_end_opening_tag; + } + + if !ch.is_alpha() { + fail "expected alphabetical in tag"; + } + + // Parse an attribute. + let mut attribute_name = [ch]; + loop { + alt self.get() { + coe_char(c) { + if c == ('=' as u8) { break; } + attribute_name += [c]; + } + coe_eof { + ret to_attr(attribute_name.to_str(), + attribute_name.to_str()); } + } + } + + // Parse the attribute value. + self.expect('"' as u8); + let mut attribute_value = []; + loop { + alt self.get() { + coe_char(c) { + if c == ('"' as u8) { break; } + attribute_value += [c]; + } + coe_eof { + ret to_attr(attribute_name.to_str(), + attribute_value.to_str()); + } + } + } + + // Eat whitespace. + self.eat_whitespace(); + + ret to_attr(attribute_name.to_str(), attribute_value.to_str()); + } } fn parser(reader: io::reader) -> parser { - ret { mut lookahead: none, reader: reader }; + ret { mut lookahead: none, mut state: ps_normal, reader: reader }; } fn spawn_parser_task(filename: str) -> port { diff --git a/servo/src/servo/parser/html_builder.rs b/servo/src/servo/parser/html_builder.rs index 6f764ef19137..67bb0fc1eba1 100644 --- a/servo/src/servo/parser/html_builder.rs +++ b/servo/src/servo/parser/html_builder.rs @@ -1,4 +1,4 @@ -// Constructs a DOM tree from an incoming token stream. +#[doc="Constructs a DOM tree from an incoming token stream."] import dom::rcu::writer_methods; import dom::base::{methods, rd_tree_ops, wr_tree_ops}; @@ -7,6 +7,35 @@ import parser = parser::html; import html::token; import gfx::geom; +fn link_up_attribute(scope: dom::node_scope, node: dom::node, key: str, + value: str) { + // TODO: Implement atoms so that we don't always perform string + // comparisons. + // FIXME: This is wrong... we should not have DIV and IMG be separate types + // of nodes and instead have them inherit from Element, obviously. + scope.rd(node) { + |node_contents| + alt node_contents.kind { + dom::nk_img(dims) if key == "width" { + alt int::from_str(value) { + none { /* drop on the floor */ } + some(s) { dims.width = geom::px_to_au(s); } + } + } + dom::nk_img(dims) if key == "height" { + alt int::from_str(value) { + none { /* drop on the floor */ } + some(s) { dims.height = geom::px_to_au(s); } + } + } + dom::nk_div | dom::nk_img(*) { /* drop on the floor */ } + dom::nk_text(*) { + fail "attempt to link up an attribute to a text node" + } + } + } +} + fn build_dom(scope: dom::node_scope, stream: port) -> dom::node { // The current reference node. @@ -16,14 +45,14 @@ fn build_dom(scope: dom::node_scope, #debug["token=%?", token]; alt token { parser::to_eof { break; } - parser::to_start_tag("div") { + parser::to_start_opening_tag("div") { #debug["DIV"]; let new_node = scope.new_node( dom::nk_div); scope.add_child(cur, new_node); cur = new_node; } - parser::to_start_tag("img") { + parser::to_start_opening_tag("img") { #debug["IMG"]; let new_node = scope.new_node( dom::nk_img({mut width: geom::px_to_au(100), @@ -31,8 +60,15 @@ fn build_dom(scope: dom::node_scope, scope.add_child(cur, new_node); cur = new_node; } - parser::to_start_tag(t) { - fail ("Unrecognized tag: " + t); + parser::to_start_opening_tag(t) { + fail ("Unrecognized tag: " + t); + } + parser::to_attr(key, value) { + #debug["attr: %? = %?", key, value]; + link_up_attribute(scope, cur, key, value); + } + parser::to_end_opening_tag { + #debug("end opening tag"); } parser::to_end_tag(_) { // TODO: Assert that the closing tag has the right name. @@ -54,3 +90,4 @@ fn build_dom(scope: dom::node_scope, } ret cur; } +