diff --git a/servo/src/servo/parser/html.rs b/servo/src/servo/parser/html.rs
index 89e53c1adecc..7e472081de2f 100644
--- a/servo/src/servo/parser/html.rs
+++ b/servo/src/servo/parser/html.rs
@@ -1,14 +1,22 @@
import comm::{port, chan};
+enum parse_state {
+ ps_normal,
+ ps_tag
+}
+
type parser = {
mut lookahead: option,
+ mut state: parse_state,
reader: io::reader
};
enum token {
- to_start_tag(str),
+ to_start_opening_tag(str),
+ to_end_opening_tag,
to_end_tag(str),
to_text(str),
+ to_attr(str, str),
to_doctype,
to_eof
}
@@ -18,6 +26,18 @@ enum char_or_eof {
coe_eof
}
+impl u8_methods for u8 {
+ fn is_alpha() -> bool {
+ ret (self >= ('A' as u8) && self <= ('Z' as u8)) ||
+ (self >= ('a' as u8) && self <= ('z' as u8));
+ }
+}
+
+impl u8_vec_methods for [u8] {
+ fn to_str() -> str { ret str::from_bytes(self); }
+ fn to_str_token() -> token { ret to_text(self.to_str()); }
+}
+
impl methods for parser {
fn get() -> char_or_eof {
alt self.lookahead {
@@ -62,8 +82,7 @@ impl methods for parser {
loop {
alt self.get() {
coe_char(c) {
- if (c >= ('A' as u8) && c <= ('Z' as u8)) ||
- (c >= ('a' as u8) && c <= ('z' as u8)) {
+ if (c.is_alpha()) {
result += [c];
} else if result.len() == 0u {
self.parse_err("expected ident");
@@ -112,6 +131,14 @@ impl methods for parser {
coe_eof { ret to_eof; }
}
+ ret alt self.state {
+ ps_normal { self.parse_in_normal_state(ch) }
+ ps_tag { self.parse_in_tag_state(ch) }
+ }
+ }
+
+ fn parse_in_normal_state(c: u8) -> token {
+ let mut ch = c;
if ch == ('<' as u8) {
alt self.get() {
coe_char(c) { ch = c; }
@@ -139,8 +166,9 @@ impl methods for parser {
self.eat_whitespace();
let ident = self.parse_ident();
self.eat_whitespace();
- self.expect('>' as u8);
- ret to_start_tag(ident);
+
+ self.state = ps_tag;
+ ret to_start_opening_tag(ident);
}
// Make a text node.
@@ -150,18 +178,66 @@ impl methods for parser {
coe_char(c) {
if c == ('<' as u8) {
self.unget(c);
- ret to_text(str::from_bytes(s));
+ ret s.to_str_token();
}
s += [c];
}
- coe_eof { ret to_text(str::from_bytes(s)); }
+ coe_eof { ret s.to_str_token(); }
}
}
}
+
+ fn parse_in_tag_state(c: u8) -> token {
+ let mut ch = c;
+
+ if ch == ('>' as u8) {
+ self.state = ps_normal;
+ ret to_end_opening_tag;
+ }
+
+ if !ch.is_alpha() {
+ fail "expected alphabetical in tag";
+ }
+
+ // Parse an attribute.
+ let mut attribute_name = [ch];
+ loop {
+ alt self.get() {
+ coe_char(c) {
+ if c == ('=' as u8) { break; }
+ attribute_name += [c];
+ }
+ coe_eof {
+ ret to_attr(attribute_name.to_str(),
+ attribute_name.to_str()); }
+ }
+ }
+
+ // Parse the attribute value.
+ self.expect('"' as u8);
+ let mut attribute_value = [];
+ loop {
+ alt self.get() {
+ coe_char(c) {
+ if c == ('"' as u8) { break; }
+ attribute_value += [c];
+ }
+ coe_eof {
+ ret to_attr(attribute_name.to_str(),
+ attribute_value.to_str());
+ }
+ }
+ }
+
+ // Eat whitespace.
+ self.eat_whitespace();
+
+ ret to_attr(attribute_name.to_str(), attribute_value.to_str());
+ }
}
fn parser(reader: io::reader) -> parser {
- ret { mut lookahead: none, reader: reader };
+ ret { mut lookahead: none, mut state: ps_normal, reader: reader };
}
fn spawn_parser_task(filename: str) -> port {
diff --git a/servo/src/servo/parser/html_builder.rs b/servo/src/servo/parser/html_builder.rs
index 6f764ef19137..67bb0fc1eba1 100644
--- a/servo/src/servo/parser/html_builder.rs
+++ b/servo/src/servo/parser/html_builder.rs
@@ -1,4 +1,4 @@
-// Constructs a DOM tree from an incoming token stream.
+#[doc="Constructs a DOM tree from an incoming token stream."]
import dom::rcu::writer_methods;
import dom::base::{methods, rd_tree_ops, wr_tree_ops};
@@ -7,6 +7,35 @@ import parser = parser::html;
import html::token;
import gfx::geom;
+fn link_up_attribute(scope: dom::node_scope, node: dom::node, key: str,
+ value: str) {
+ // TODO: Implement atoms so that we don't always perform string
+ // comparisons.
+ // FIXME: This is wrong... we should not have DIV and IMG be separate types
+ // of nodes and instead have them inherit from Element, obviously.
+ scope.rd(node) {
+ |node_contents|
+ alt node_contents.kind {
+ dom::nk_img(dims) if key == "width" {
+ alt int::from_str(value) {
+ none { /* drop on the floor */ }
+ some(s) { dims.width = geom::px_to_au(s); }
+ }
+ }
+ dom::nk_img(dims) if key == "height" {
+ alt int::from_str(value) {
+ none { /* drop on the floor */ }
+ some(s) { dims.height = geom::px_to_au(s); }
+ }
+ }
+ dom::nk_div | dom::nk_img(*) { /* drop on the floor */ }
+ dom::nk_text(*) {
+ fail "attempt to link up an attribute to a text node"
+ }
+ }
+ }
+}
+
fn build_dom(scope: dom::node_scope,
stream: port) -> dom::node {
// The current reference node.
@@ -16,14 +45,14 @@ fn build_dom(scope: dom::node_scope,
#debug["token=%?", token];
alt token {
parser::to_eof { break; }
- parser::to_start_tag("div") {
+ parser::to_start_opening_tag("div") {
#debug["DIV"];
let new_node = scope.new_node(
dom::nk_div);
scope.add_child(cur, new_node);
cur = new_node;
}
- parser::to_start_tag("img") {
+ parser::to_start_opening_tag("img") {
#debug["IMG"];
let new_node = scope.new_node(
dom::nk_img({mut width: geom::px_to_au(100),
@@ -31,8 +60,15 @@ fn build_dom(scope: dom::node_scope,
scope.add_child(cur, new_node);
cur = new_node;
}
- parser::to_start_tag(t) {
- fail ("Unrecognized tag: " + t);
+ parser::to_start_opening_tag(t) {
+ fail ("Unrecognized tag: " + t);
+ }
+ parser::to_attr(key, value) {
+ #debug["attr: %? = %?", key, value];
+ link_up_attribute(scope, cur, key, value);
+ }
+ parser::to_end_opening_tag {
+ #debug("end opening tag");
}
parser::to_end_tag(_) {
// TODO: Assert that the closing tag has the right name.
@@ -54,3 +90,4 @@ fn build_dom(scope: dom::node_scope,
}
ret cur;
}
+