servo: Merge #9 - Added css datastructures, pretty-printing, and a lexer (from mmeyerho:cssmatching)

Source-Repo: https://github.com/servo/servo
Source-Revision: a2174ba81845a51e0d12aa187b1757e87cc5ab4b
This commit is contained in:
Margaret Meyerhofer 2012-05-29 17:32:07 -07:00
Родитель eaaaaa8010
Коммит 21b830a978
8 изменённых файлов: 639 добавлений и 279 удалений

Просмотреть файл

@ -40,7 +40,7 @@ fn content(to_layout: chan<layout::msg>) -> chan<msg> {
// Note: we can parse the next document in parallel
// with any previous documents.
let stream = html::spawn_parser_task(filename);
let stream = lexer::spawn_html_parser_task(filename);
let root = parser::html_builder::build_dom(scope, stream);
// Now, join the layout so that they will see the latest

Просмотреть файл

@ -54,9 +54,10 @@ enum element_subclass {
es_head
}
#[doc="The rd_aux data is a (weak) pointer to the layout data, which contains
the CSS info as well as the primary box. Note that there may be multiple
boxes per DOM node."]
#[doc="The rd_aux data is a (weak) pointer to the layout data, which
contains the CSS info as well as the primary box. Note that
there may be multiple boxes per DOM node."]
type node = rcu::handle<node_data, layout_data>;
type node_scope = rcu::scope<node_data, layout_data>;

Просмотреть файл

@ -0,0 +1,125 @@
import io::println;
enum display_type{
block,
inline
}
enum style_decl{
font_size(uint),
display(display_type),
text_color(uint),
background_color(uint)
}
enum attr{
exists(str),
exact(str, str),
includes(str, str),
starts_with(str, str)
}
enum selector{
element(str, [attr]),
child(~selector, ~selector),
descendant(~selector, ~selector),
sibling(~selector, ~selector)
}
type rule = (selector, [style_decl]);
type stylesheet = [rule];
fn print_list<T>(list : [T], print : fn(T) -> str) -> str {
let l = vec::len(list);
if l == 0u { ret "" }
let mut res = print(list[0]);
let mut i = 1u;
while i < l {
res += ", ";
res += print(list[i]);
i += 1u;
}
ret res;
}
fn print_display(dis_ty : display_type) -> str {
alt dis_ty {
block { "block" }
inline { "inline" }
}
}
fn print_style(decl : style_decl) -> str{
alt decl {
font_size(s) { #fmt("Font size = %u px", s) }
display(dis_ty) { #fmt("Display style = %s", print_display(dis_ty)) }
text_color(c) { #fmt("Text color = 0x%06x", c) }
background_color(c) { #fmt("Background color = 0x%06x", c) }
}
}
fn print_attr(attribute : attr) -> str {
alt attribute {
exists(att) { #fmt("[%s]", att) }
exact(att, val) { #fmt("[%s = %s]", att, val) }
includes(att, val) { #fmt("[%s ~= %s]", att, val) }
starts_with(att, val) { #fmt("[%s |= %s]", att, val) }
}
}
fn print_selector(select : ~selector) -> str {
alt *select {
element(s, attrs) { #fmt("Element %s with attributes: %s", s,
print_list(attrs, print_attr)) }
child(sel1, sel2) { #fmt("(%s) > (%s)", print_selector(sel1),
print_selector(sel2)) }
descendant(sel1, sel2) { #fmt("(%s) (%s)", print_selector(sel1),
print_selector(sel2)) }
sibling(sel1, sel2) { #fmt("(%s) + (%s)", print_selector(sel1),
print_selector(sel2)) }
}
}
fn print_rule(rule : rule) -> str {
alt rule {
(sel, styles) {
let sel_str = print_selector(~(copy sel));
let sty_str = print_list(styles, print_style);
#fmt("Selector: %s, Style: {%s}", sel_str, sty_str)
}
}
}
fn print_sheet(sheet : stylesheet) -> str {
#fmt("CSS Rules: %s", print_list(sheet, print_rule))
}
#[test]
fn test_pretty_print() {
let test1 = [(element("p", []), [font_size(32u)])];
let actual1 = print_sheet(test1);
let expected1 = "CSS Rules: Selector: Element p with attributes: ," +
" Style: {Font size = 32 px}";
assert(actual1 == expected1);
let elmt1 = ~element("*", []);
let elmt2 = ~element("body", [exact("class", "2")]);
let test2 = [(descendant(elmt1, elmt2),
[display(block), text_color(0u)])];
let actual2 = print_sheet(test2);
let expected2 = "CSS Rules: Selector: (Element * with attributes: ) " +
"(Element body with attributes: [class = 2]), " +
"Style: {Display style = block, Text color = 0x000000}";
assert(actual2 == expected2);
}

Просмотреть файл

@ -1,268 +0,0 @@
import comm::{port, chan};
enum parse_state {
ps_normal,
ps_tag
}
type parser = {
mut lookahead: option<char_or_eof>,
mut state: parse_state,
reader: io::reader
};
enum token {
to_start_opening_tag(str),
to_end_opening_tag,
to_end_tag(str),
to_self_close_tag,
to_text(str),
to_attr(str, str),
to_doctype,
to_eof
}
enum char_or_eof {
coe_char(u8),
coe_eof
}
impl u8_methods for u8 {
fn is_alpha() -> bool {
ret (self >= ('A' as u8) && self <= ('Z' as u8)) ||
(self >= ('a' as u8) && self <= ('z' as u8));
}
}
impl u8_vec_methods for [u8] {
fn to_str() -> str { ret str::from_bytes(self); }
fn to_str_token() -> token { ret to_text(self.to_str()); }
}
impl methods for parser {
fn get() -> char_or_eof {
alt self.lookahead {
some(coe) {
let rv = coe;
self.lookahead = none;
ret rv;
}
none {
/* fall through */
}
}
if self.reader.eof() { ret coe_eof; }
ret coe_char(self.reader.read_byte() as u8);
}
fn unget(ch: u8) {
assert self.lookahead.is_none();
self.lookahead = some(coe_char(ch));
}
fn parse_err(err: str) -> ! {
fail err
}
fn expect(ch: u8) {
alt self.get() {
coe_char(c) {
if c != ch {
self.parse_err(#fmt("expected '%c'", ch as char));
}
}
coe_eof {
self.parse_err(#fmt("expected '%c' at eof", ch as char));
}
}
}
fn parse_ident() -> str {
let mut result: [u8] = [];
loop {
alt self.get() {
coe_char(c) {
if (c.is_alpha()) {
result += [c];
} else if result.len() == 0u {
self.parse_err("expected ident");
} else {
self.unget(c);
break;
}
}
coe_eof {
self.parse_err("expected ident");
}
}
}
ret str::from_bytes(result);
}
fn expect_ident(expected: str) {
let actual = self.parse_ident();
if expected != actual {
self.parse_err(#fmt("expected '%s' but found '%s'",
expected, actual));
}
}
fn eat_whitespace() {
loop {
alt self.get() {
coe_char(c) {
if c != (' ' as u8) && c != ('\n' as u8) &&
c != ('\t' as u8) {
self.unget(c);
ret;
}
}
coe_eof {
ret;
}
}
}
}
fn parse() -> token {
let mut ch: u8;
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { ret to_eof; }
}
let token = alt self.state {
ps_normal { self.parse_in_normal_state(ch) }
ps_tag { self.parse_in_tag_state(ch) }
};
#debug["token=%?", token];
ret token;
}
fn parse_in_normal_state(c: u8) -> token {
let mut ch = c;
if ch == ('<' as u8) {
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { self.parse_err("eof after '<'") }
}
if ch == ('!' as u8) {
self.eat_whitespace();
self.expect_ident("DOCTYPE");
self.eat_whitespace();
self.expect_ident("html");
self.eat_whitespace();
self.expect('>' as u8);
ret to_doctype;
}
if ch == ('/' as u8) {
let ident = self.parse_ident();
self.expect('>' as u8);
ret to_end_tag(ident);
}
self.unget(ch);
self.eat_whitespace();
let ident = self.parse_ident();
self.eat_whitespace();
self.state = ps_tag;
ret to_start_opening_tag(ident);
}
// Make a text node.
let mut s: [u8] = [ch];
loop {
alt self.get() {
coe_char(c) {
if c == ('<' as u8) {
self.unget(c);
ret s.to_str_token();
}
s += [c];
}
coe_eof { ret s.to_str_token(); }
}
}
}
fn parse_in_tag_state(c: u8) -> token {
let mut ch = c;
if ch == ('>' as u8) {
self.state = ps_normal;
ret to_end_opening_tag;
}
if ch == ('/' as u8) {
self.state = ps_normal;
ret to_self_close_tag;
}
if !ch.is_alpha() {
fail #fmt("expected alphabetical in tag but found %c", ch as char);
}
// Parse an attribute.
let mut attribute_name = [ch];
loop {
alt self.get() {
coe_char(c) {
if c == ('=' as u8) { break; }
attribute_name += [c];
}
coe_eof {
ret to_attr(attribute_name.to_str(),
attribute_name.to_str()); }
}
}
// Parse the attribute value.
self.expect('"' as u8);
let mut attribute_value = [];
loop {
alt self.get() {
coe_char(c) {
if c == ('"' as u8) { break; }
attribute_value += [c];
}
coe_eof {
ret to_attr(attribute_name.to_str(),
attribute_value.to_str());
}
}
}
// Eat whitespace.
self.eat_whitespace();
ret to_attr(attribute_name.to_str(), attribute_value.to_str());
}
}
fn parser(reader: io::reader) -> parser {
ret { mut lookahead: none, mut state: ps_normal, reader: reader };
}
fn spawn_parser_task(filename: str) -> port<token> {
let result_port = port();
let result_chan = chan(result_port);
task::spawn {||
let file_data = io::read_whole_file(filename).get();
let reader = io::bytes_reader(file_data);
let parser = parser(reader);
loop {
let token = parser.parse();
result_chan.send(token);
if token == to_eof { break; }
}
};
ret result_port;
}

Просмотреть файл

@ -5,8 +5,8 @@ import dom::base::{attr, element, element_subclass, es_div, es_head, es_img};
import dom::base::{es_unknown, methods, nk_element, nk_text, rd_tree_ops};
import dom::base::{wr_tree_ops};
import dom = dom::base;
import parser = parser::html;
import html::token;
import parser = parser::lexer::html;
import parser::token;
import gfx::geom;
import dvec::extensions;

Просмотреть файл

@ -0,0 +1,501 @@
import comm::{port, chan};
import html::html_methods;
import css::css_methods;
import dom::style;
enum parse_state {
ps_html_normal,
ps_html_tag,
ps_css_elmt,
ps_css_relation,
ps_css_desc,
ps_css_attribute
}
type parser = {
mut lookahead: option<char_or_eof>,
mut state: parse_state,
reader: io::reader
};
enum char_or_eof {
coe_char(u8),
coe_eof
}
impl u8_methods for u8 {
fn is_whitespace() -> bool {
ret self == ' ' as u8 || self == '\n' as u8
|| self == '\t' as u8;
}
fn is_alpha() -> bool {
ret (self >= ('A' as u8) && self <= ('Z' as u8)) ||
(self >= ('a' as u8) && self <= ('z' as u8));
}
}
impl u8_vec_methods for [u8] {
fn to_str() -> str { ret str::from_bytes(self); }
fn to_html_token() -> html::token { ret html::to_text(self.to_str()); }
fn to_css_token() -> html::token { ret html::to_text(self.to_str()); }
}
impl util_methods for parser {
fn get() -> char_or_eof {
alt self.lookahead {
some(coe) {
let rv = coe;
self.lookahead = none;
ret rv;
}
none {
/* fall through */
}
}
if self.reader.eof() { ret coe_eof; }
ret coe_char(self.reader.read_byte() as u8);
}
fn unget(ch: u8) {
assert self.lookahead.is_none();
self.lookahead = some(coe_char(ch));
}
fn parse_err(err: str) -> ! {
fail err
}
fn expect(ch: u8) {
alt self.get() {
coe_char(c) {
if c != ch {
self.parse_err(#fmt("expected '%c'", ch as char));
}
}
coe_eof {
self.parse_err(#fmt("expected '%c' at eof", ch as char));
}
}
}
fn parse_ident() -> str {
let mut result: [u8] = [];
loop {
alt self.get() {
coe_char(c) {
if (c.is_alpha()) {
result += [c];
} else if result.len() == 0u {
self.parse_err("expected ident");
} else {
self.unget(c);
break;
}
}
coe_eof {
self.parse_err("expected ident");
}
}
}
ret str::from_bytes(result);
}
fn expect_ident(expected: str) {
let actual = self.parse_ident();
if expected != actual {
self.parse_err(#fmt("expected '%s' but found '%s'",
expected, actual));
}
}
fn eat_whitespace() {
loop {
alt self.get() {
coe_char(c) {
if c.is_whitespace() {
self.unget(c);
ret;
}
}
coe_eof {
ret;
}
}
}
}
fn parse_html() -> html::token {
let mut ch: u8;
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { ret html::to_eof; }
}
let token = alt self.state {
ps_html_normal { self.parse_in_normal_state(ch) }
ps_html_tag { self.parse_in_tag_state(ch) }
_ { fail "Parsing in html mode when not in " +
"an html state" }
};
#debug["token=%?", token];
ret token;
}
fn parse_css() -> css::token {
let mut ch: u8;
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { ret css::to_eof; }
}
let token = alt self.state {
ps_css_desc { self.parse_css_description(ch) }
ps_css_attribute { self.parse_css_attribute(ch) }
ps_css_elmt { self.parse_css_element(ch) }
ps_css_relation { self.parse_css_relation(ch) }
_ { fail "Parsing in css mode when not in " +
"a css state" }
};
#debug["token=%?", token];
ret token;
}
}
mod html {
enum token {
to_start_opening_tag(str),
to_end_opening_tag,
to_end_tag(str),
to_self_close_tag,
to_text(str),
to_attr(str, str),
to_doctype,
to_eof
}
impl html_methods for parser {
fn parse_in_normal_state(c: u8) -> token {
let mut ch = c;
if ch == ('<' as u8) {
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { self.parse_err("eof after '<'") }
}
if ch == ('!' as u8) {
self.eat_whitespace();
self.expect_ident("DOCTYPE");
self.eat_whitespace();
self.expect_ident("html");
self.eat_whitespace();
self.expect('>' as u8);
ret to_doctype;
}
if ch == ('/' as u8) {
let ident = self.parse_ident();
self.expect('>' as u8);
ret to_end_tag(ident);
}
self.unget(ch);
self.eat_whitespace();
let ident = self.parse_ident();
self.eat_whitespace();
self.state = ps_html_tag;
ret to_start_opening_tag(ident);
}
// Make a text node.
let mut s: [u8] = [ch];
loop {
alt self.get() {
coe_char(c) {
if c == ('<' as u8) {
self.unget(c);
ret s.to_html_token();
}
s += [c];
}
coe_eof { ret s.to_html_token(); }
}
}
}
fn parse_in_tag_state(c: u8) -> token {
let mut ch = c;
if ch == ('>' as u8) {
self.state = ps_html_normal;
ret to_end_opening_tag;
}
if ch == ('/' as u8) {
self.state = ps_html_normal;
ret to_self_close_tag;
}
if !ch.is_alpha() {
fail #fmt("expected alphabetical in tag but found %c",
ch as char);
}
// Parse an attribute.
let mut attribute_name = [ch];
loop {
alt self.get() {
coe_char(c) {
if c == ('=' as u8) { break; }
attribute_name += [c];
}
coe_eof {
ret to_attr(attribute_name.to_str(),
attribute_name.to_str()); }
}
}
// Parse the attribute value.
self.expect('"' as u8);
let mut attribute_value = [];
loop {
alt self.get() {
coe_char(c) {
if c == ('"' as u8) { break; }
attribute_value += [c];
}
coe_eof {
ret to_attr(attribute_name.to_str(),
attribute_value.to_str());
}
}
}
// Eat whitespacpe.
self.eat_whitespace();
ret to_attr(attribute_name.to_str(), attribute_value.to_str());
}
}
}
mod css {
enum token {
to_start_desc,
to_end_desc,
to_descendant,
to_child,
to_sibling,
to_comma,
to_elmt(str),
to_attr(style::attr),
to_desc(str, str),
to_eof
}
impl css_methods for parser {
fn parse_css_relation(c : u8) -> token {
self.state = ps_css_elmt;
let token = alt c {
'{' as u8 { self.state = ps_css_desc; to_start_desc }
'>' as u8 { to_child }
'+' as u8 { to_sibling }
',' as u8 { to_comma }
_ { to_descendant }
};
self.eat_whitespace();
ret token;
}
fn parse_css_element(c : u8) -> token {
/* Check for special attributes with an implied element.*/
if c == '.' as u8 || c == '#' as u8 {
self.state = ps_css_attribute;
self.unget(c);
ret to_elmt("*");
}
let element = self.parse_ident();
self.state = ps_css_attribute;
ret to_elmt(element);
}
fn parse_css_attribute(c : u8) -> token {
let mut ch = c;
/* If we've reached the end of this list of attributes,
look for the relation to the next element.*/
if c.is_whitespace() {
self.state = ps_css_relation;
self.eat_whitespace();
alt self.get() {
coe_char(c) { ch = c }
coe_eof { fail "File ended before description " +
"of style" }
}
ret self.parse_css_relation(ch);
}
alt ch {
'.' as u8 { ret to_attr(
style::includes("class", self.parse_ident())); }
'#' as u8 { ret to_attr(
style::includes("id", self.parse_ident())); }
'[' as u8 {
let attr_name = self.parse_ident();
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { fail "File ended before " +
"description finished"; }
}
if ch == ']' as u8 {
ret to_attr(style::exists(attr_name));
} else if ch == '=' as u8 {
let attr_val = self.parse_ident();
self.expect(']' as u8);
ret to_attr(style::exact(attr_name, attr_val));
} else if ch == '~' as u8 {
self.expect('=' as u8);
let attr_val = self.parse_ident();
self.expect(']' as u8);
ret to_attr(style::includes(attr_name, attr_val));
} else if ch == '|' as u8 {
self.expect('=' as u8);
let attr_val = self.parse_ident();
self.expect(']' as u8);
ret to_attr(style::starts_with(attr_name, attr_val));
}
fail #fmt("Unexpected symbol %c in attribute", ch as char);
}
_ { fail #fmt("Unexpected symbol %c in attribute",
ch as char); }
}
}
fn parse_css_description(c: u8) -> token {
let mut ch = c;
if ch.is_whitespace() {
self.eat_whitespace();
alt self.get() {
coe_char(c) { ch = c }
coe_eof { fail "Reached end of file " +
"in CSS description" }
}
}
let mut desc_name = [];
// Get the name of the descriptor
loop {
if ch.is_whitespace() {
self.eat_whitespace();
} else if ch == ':' as u8 {
if desc_name.len() == 0u {
fail "Expected descriptor name";
} else {
break;
}
} else {
desc_name += [ch];
}
alt self.get() {
coe_char(c) { ch = c }
coe_eof { fail "Reached end of file " +
"in CSS description" }
}
}
self.eat_whitespace();
let mut desc_val = [];
// Get the value of the descriptor
loop {
alt self.get() {
coe_char(c) { ch = c }
coe_eof { fail "Reached end of file " +
"in CSS description" }
}
if ch.is_whitespace() {
self.eat_whitespace();
} else if ch == '}' as u8 {
if desc_val.len() == 0u {
fail "Expected descriptor value";
} else {
self.state = ps_css_elmt;
break;
}
} else if ch == ';' as u8 {
if desc_val.len() == 0u {
fail "Expected descriptor value";
} else {
break;
}
} else {
desc_val += [ch];
}
}
ret to_desc(desc_name.to_str(), desc_val.to_str());
}
}
}
fn parser(reader: io::reader, state : parse_state) -> parser {
ret { mut lookahead: none, mut state: state, reader: reader };
}
fn spawn_html_parser_task(filename: str) -> port<html::token> {
let result_port = port();
let result_chan = chan(result_port);
task::spawn {||
let file_data = io::read_whole_file(filename).get();
let reader = io::bytes_reader(file_data);
assert filename.ends_with(".html");
let parser = parser(reader, ps_html_normal);
loop {
let token = parser.parse_html();
result_chan.send(token);
if token == html::to_eof { break; }
}
};
ret result_port;
}
fn spawn_css_parser_task(filename: str) -> port<css::token> {
let result_port = port();
let result_chan = chan(result_port);
task::spawn {||
let file_data = io::read_whole_file(filename).get();
let reader = io::bytes_reader(file_data);
assert filename.ends_with(".css");
let parser : parser = parser(reader, ps_css_elmt);
loop {
let token = parser.parse_css();
result_chan.send(token);
if token == css::to_eof { break; }
}
};
ret result_port;
}

Просмотреть файл

@ -16,6 +16,7 @@ use stb_image;
mod dom {
mod base;
mod rcu;
mod style;
}
mod gfx {
@ -26,7 +27,7 @@ mod gfx {
}
mod image {
mod base;
mod base;
mod encode {
mod tga;
}
@ -34,7 +35,7 @@ mod image {
mod layout {
mod style {
mod apply;
mod apply;
mod style;
}
@ -48,7 +49,7 @@ mod layout {
}
mod parser {
mod html;
mod lexer;
mod html_builder;
}

Просмотреть файл

@ -1,6 +1,6 @@
import comm::*;
import parser::html;
import parser::html::methods;
import parser::lexer;
//import parser::lexer::util_methods;
import result::extensions;
import gfx::renderer;
import platform::osmain;