Updated markup parser for unicode; fixed bug 648596
This commit is contained in:
Родитель
769e5f7313
Коммит
908287ce76
|
@ -1,24 +1,26 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import validator.testcases.markup.markuptester as markuptester
|
||||
from validator.errorbundler import ErrorBundle
|
||||
from validator.constants import *
|
||||
|
||||
def _do_test(path, should_fail=False, type_=None):
|
||||
return _do_test_raw(open(path).read(),
|
||||
path,
|
||||
should_fail,
|
||||
type_)
|
||||
|
||||
markup_file = open(path)
|
||||
data = markup_file.read()
|
||||
markup_file.close()
|
||||
|
||||
def _do_test_raw(data, path, should_fail=False, type_=None):
|
||||
filename = path.split("/")[-1]
|
||||
extension = filename.split(".")[-1]
|
||||
|
||||
err = ErrorBundle(None, True)
|
||||
err = ErrorBundle()
|
||||
if type_:
|
||||
err.set_type(type_)
|
||||
|
||||
parser = markuptester.MarkupParser(err, debug=True)
|
||||
parser.process(filename, data, extension)
|
||||
|
||||
err.print_summary(True)
|
||||
print err.print_summary(verbose=True)
|
||||
|
||||
if should_fail:
|
||||
assert err.failed()
|
||||
|
@ -31,7 +33,7 @@ def _do_test(path, should_fail=False, type_=None):
|
|||
def test_local_url_detector():
|
||||
"Tests that local URLs can be detected."
|
||||
|
||||
err = ErrorBundle(None, True)
|
||||
err = ErrorBundle()
|
||||
mp = markuptester.MarkupParser(err)
|
||||
tester = mp._is_url_local
|
||||
|
||||
|
@ -135,3 +137,18 @@ def test_invalid_markup():
|
|||
result = _do_test("tests/resources/markup/markuptester/bad_script.xml",
|
||||
False)
|
||||
assert result.notices
|
||||
|
||||
|
||||
def test_self_closing_scripts():
|
||||
"""Tests that self-closing script tags are not deletrious to parsing"""
|
||||
|
||||
_do_test_raw("""
|
||||
<foo>
|
||||
<script type="text/javascript"/>
|
||||
<list_item undecodable=" _ " />
|
||||
<list_item />
|
||||
<list_item />
|
||||
</foo>
|
||||
""", "foo.js")
|
||||
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@ def main():
|
|||
# Print the output of the tests based on the requested format.
|
||||
if args.output == "text":
|
||||
print error_bundle.print_summary(verbose=args.verbose,
|
||||
no_color=args.boring)
|
||||
no_color=args.boring).encode("utf-8")
|
||||
elif args.output == "json":
|
||||
sys.stdout.write(error_bundle.render_json())
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@ from StringIO import StringIO
|
|||
|
||||
from validator import decorator
|
||||
from validator import submain as testendpoint_validator
|
||||
from validator import unicodehelper
|
||||
import validator.testcases.markup.markuptester as testendpoint_markup
|
||||
import validator.testcases.markup.csstester as testendpoint_css
|
||||
import validator.testcases.scripting as testendpoint_js
|
||||
|
@ -142,9 +143,8 @@ def test_packed_packages(err, package_contents=None, xpi_package=None):
|
|||
if not file_data:
|
||||
continue
|
||||
|
||||
# Skip BOMs and the like
|
||||
while not is_standard_ascii(file_data[0]):
|
||||
file_data = file_data[1:]
|
||||
# Convert the file data to unicode
|
||||
file_data = unicodehelper.decode(file_data)
|
||||
|
||||
if data["extension"] == "css":
|
||||
testendpoint_css.test_css_file(err,
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
import re
|
||||
try:
|
||||
from HTMLParser import HTMLParser
|
||||
|
@ -6,6 +5,7 @@ except ImportError: # pragma: no cover
|
|||
from html.parser import HTMLParser
|
||||
|
||||
import validator.testcases.scripting as scripting
|
||||
import validator.unicodehelper as unicodehelper
|
||||
from validator.testcases.markup import csstester
|
||||
from validator.contextgenerator import ContextGenerator
|
||||
from validator.constants import *
|
||||
|
@ -51,7 +51,7 @@ class MarkupParser(HTMLParser):
|
|||
self.xml_state = []
|
||||
self.xml_buffer = []
|
||||
|
||||
self.reported = {}
|
||||
self.reported = set()
|
||||
|
||||
def process(self, filename, data, extension="xul"):
|
||||
"""Processes data by splitting it into individual lines, then
|
||||
|
@ -61,7 +61,7 @@ class MarkupParser(HTMLParser):
|
|||
self.filename = filename
|
||||
self.extension = extension
|
||||
|
||||
self.reported = {}
|
||||
self.reported = set()
|
||||
|
||||
self.context = ContextGenerator(data)
|
||||
|
||||
|
@ -100,6 +100,8 @@ class MarkupParser(HTMLParser):
|
|||
|
||||
try:
|
||||
self.feed(line + "\n")
|
||||
except UnicodeDecodeError:
|
||||
raise
|
||||
except Exception as inst:
|
||||
if DEBUG: # pragma: no cover
|
||||
print self.xml_state, inst
|
||||
|
@ -107,8 +109,8 @@ class MarkupParser(HTMLParser):
|
|||
if "markup" in self.reported:
|
||||
return
|
||||
|
||||
if "script" in self.xml_state or (
|
||||
self.debug and "testscript" in self.xml_state):
|
||||
if ("script" in self.xml_state or
|
||||
self.debug and "testscript" in self.xml_state):
|
||||
if "script_comments" in self.reported or not self.strict:
|
||||
return
|
||||
self.err.notice(("testcases_markup_markuptester",
|
||||
|
@ -122,7 +124,7 @@ class MarkupParser(HTMLParser):
|
|||
self.filename,
|
||||
line=self.line,
|
||||
context=self.context)
|
||||
self.reported["script_comments"] = True
|
||||
self.reported.add("script_comments")
|
||||
return
|
||||
|
||||
if self.strict:
|
||||
|
@ -136,7 +138,7 @@ class MarkupParser(HTMLParser):
|
|||
self.filename,
|
||||
line=self.line,
|
||||
context=self.context)
|
||||
self.reported["markup"] = True
|
||||
self.reported.add("markup")
|
||||
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
# Self closing tags don't have an end tag, so we want to
|
||||
|
@ -154,7 +156,7 @@ class MarkupParser(HTMLParser):
|
|||
self_closing = tag in SELF_CLOSING_TAGS
|
||||
|
||||
if DEBUG: # pragma: no cover
|
||||
print self.xml_state, tag, self_closing
|
||||
print "S: ", self.xml_state, tag, self_closing
|
||||
|
||||
# A fictional tag for testing purposes.
|
||||
if tag == "xbannedxtestx":
|
||||
|
@ -286,17 +288,19 @@ class MarkupParser(HTMLParser):
|
|||
return
|
||||
|
||||
self.xml_state.append(tag)
|
||||
self.xml_buffer.append("")
|
||||
self.xml_buffer.append(unicode(""))
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
|
||||
tag = tag.lower()
|
||||
|
||||
if DEBUG: # pragma: no cover
|
||||
print tag, self.xml_state
|
||||
print "E: ", tag, self.xml_state
|
||||
|
||||
if not self.xml_state:
|
||||
if "closing_tags" in self.reported or not self.strict:
|
||||
if DEBUG:
|
||||
print "Unstrict; extra closing tags ------"
|
||||
return
|
||||
self.err.warning(("testcases_markup_markuptester",
|
||||
"handle_endtag",
|
||||
|
@ -307,16 +311,18 @@ class MarkupParser(HTMLParser):
|
|||
self.filename,
|
||||
line=self.line,
|
||||
context=self.context)
|
||||
self.reported["closing_tags"] = True
|
||||
self.reported.add("closing_tags")
|
||||
if DEBUG: # pragma: no cover
|
||||
print "Too many closing tags ------"
|
||||
return
|
||||
|
||||
elif "script" in self.xml_state:
|
||||
elif "script" in self.xml_state[:-1]:
|
||||
# If we're in a script tag, nothing else matters. Just rush
|
||||
# everything possible into the xml buffer.
|
||||
|
||||
self._save_to_buffer("</" + tag + ">")
|
||||
if DEBUG:
|
||||
print "Markup as text in script ------"
|
||||
return
|
||||
|
||||
elif tag not in self.xml_state:
|
||||
|
@ -344,6 +350,8 @@ class MarkupParser(HTMLParser):
|
|||
# classifies as a self-closing tag, we just recursively close
|
||||
# down to the level of the tag we're actualy closing.
|
||||
if old_state != tag and old_state in SELF_CLOSING_TAGS:
|
||||
if DEBUG:
|
||||
print "Self closing tag cascading down ------"
|
||||
return self.handle_endtag(tag)
|
||||
|
||||
# If this is an XML-derived language, everything must nest
|
||||
|
@ -365,17 +373,20 @@ class MarkupParser(HTMLParser):
|
|||
if DEBUG: # pragma: no cover
|
||||
print "Invalid markup nesting ------"
|
||||
|
||||
data_buffer = data_buffer.strip()
|
||||
|
||||
# Perform analysis on collected data.
|
||||
if tag == "script":
|
||||
scripting.test_js_snippet(self.err,
|
||||
data_buffer,
|
||||
self.filename,
|
||||
self.line)
|
||||
elif tag == "style":
|
||||
csstester.test_css_file(self.err,
|
||||
self.filename,
|
||||
data_buffer,
|
||||
self.line)
|
||||
if data_buffer:
|
||||
if tag == "script":
|
||||
scripting.test_js_snippet(self.err,
|
||||
data_buffer,
|
||||
self.filename,
|
||||
self.line)
|
||||
elif tag == "style":
|
||||
csstester.test_css_file(self.err,
|
||||
self.filename,
|
||||
data_buffer,
|
||||
self.line)
|
||||
|
||||
def handle_data(self, data):
|
||||
self._save_to_buffer(data)
|
||||
|
@ -413,6 +424,8 @@ class MarkupParser(HTMLParser):
|
|||
if not self.xml_buffer:
|
||||
return
|
||||
|
||||
data = unicodehelper.decode(data)
|
||||
|
||||
self.xml_buffer[-1] += data
|
||||
|
||||
def _format_args(self, args):
|
||||
|
|
Загрузка…
Ссылка в новой задаче