From 6ee9eceb0536288790e3b7b84ab6fe654bad88ac Mon Sep 17 00:00:00 2001 From: Matt Basta Date: Thu, 29 Jul 2010 15:06:57 -0700 Subject: [PATCH] Fixed bug with CDATA in markuptester; Error messaging fixes --- validator/errorbundler.py | 149 ++++++++++----------- validator/main.py | 6 + validator/testcases/content.py | 2 +- validator/testcases/markup/csstester.py | 10 +- validator/testcases/markup/markuptester.py | 142 ++++++++++++++------ validator/typedetection.py | 5 +- 6 files changed, 193 insertions(+), 121 deletions(-) diff --git a/validator/errorbundler.py b/validator/errorbundler.py index f76fe79..5a377cf 100644 --- a/validator/errorbundler.py +++ b/validator/errorbundler.py @@ -20,8 +20,10 @@ class ErrorBundle(object): self.errors = [] self.warnings = [] self.infos = [] + self.message_tree = {} self.metadata = {} + self.cluster = False self.subpackages = [] self.package_stack = [] @@ -35,7 +37,8 @@ class ErrorBundle(object): def error(self, err_id, error, description='', filename='', line=0): "Stores an error message for the validation process" - self.errors.append({"id": err_id, + self._save_message(self.errors, + {"id": err_id, "message": error, "description": description, "file": filename, @@ -44,22 +47,39 @@ class ErrorBundle(object): def warning(self, err_id, warning, description='', filename='', line=0): "Stores a warning message for the validation process" - self.warnings.append({"id": err_id, - "message": warning, - "description": description, - "file": filename, - "line": line}) + self._save_message(self.warnings, + {"id": err_id, + "message": warning, + "description": description, + "file": filename, + "line": line}) return self def info(self, err_id, info, description='', filename='', line=0): "Stores an informational message about the validation" - self.infos.append({"id": err_id, - "message": info, - "description": description, - "file": filename, - "line": line}) + self._save_message(self.infos, + {"id": err_id, + "message": info, + "description": description, + "file": filename, + "line": line}) return self + def _save_message(self, stack, message): + "Stores a message in the appropriate message stack." + + stack.append(message) + + tree = self.message_tree + last_id = None + for eid in message["id"]: + if last_id is not None: + tree = tree[last_id] + if eid not in tree: + tree[eid] = {} + last_id = eid + tree[last_id] = message + def set_type(self, type_): "Stores the type of addon we're scanning" self.detected_type = type_ @@ -83,7 +103,6 @@ class ErrorBundle(object): self.resources[name] = resource - def is_nested_package(self): "Returns whether the current package is within a PACKAGE_MULTI" @@ -96,12 +115,17 @@ class ErrorBundle(object): "warnings": self.warnings, "infos": self.infos, "detected_type": self.detected_type, - "resources": self.resources}) + "resources": self.resources, + "message_tree": self.message_tree}) + + # Note that the message tree is not saved because it is simply rebuilt + # when the messages are rewritten. self.errors = [] self.warnings = [] self.infos = [] self.resources = {} + self.message_tree = {} self.package_stack.append(new_file) @@ -113,6 +137,7 @@ class ErrorBundle(object): errors = self.errors warnings = self.warnings infos = self.infos + # We only rebuild message_tree anyway. No need to restore. # Copy the existing state back into place self.errors = state["errors"] @@ -120,51 +145,34 @@ class ErrorBundle(object): self.infos = state["infos"] self.detected_type = state["detected_type"] self.resources = state["resources"] + self.message_tree = state["message_tree"] name = self.package_stack.pop() + self._merge_messages(errors, self.error, name) + self._merge_messages(warnings, self.warning, name) + self._merge_messages(infos, self.info, name) + + + def _merge_messages(self, messages, callback, name): + "Merges a stack of messages into another stack of messages" + # Overlay the popped warnings onto the existing ones. - for error in errors: + for message in messages: trace = [name] - # If there are sub-sub-packages, they'll be in a list. - if type(error["file"]) is list: - trace.extend(error["file"]) + if isinstance(message["file"], list): + trace.extend(message["file"]) else: - trace.append(error["file"]) + trace.append(message["file"]) # Write the errors with the file structure delimited by # right carets. - self.error(error["id"], - "%s > %s" % (name, error["message"]), - error["description"], - trace, - error["line"]) - - for warning in warnings: - trace = [name] - if type(warning["file"]) is list: - trace.extend(warning["file"]) - else: - trace.append(warning["file"]) - - self.warning(warning["id"], - "%s > %s" % (name, warning["message"]), - warning["description"], - trace, - warning["line"]) - for info in infos: - trace = [name] - if type(info["file"]) is list: - trace.extend(info["file"]) - else: - trace.append(info["file"]) - - self.info(info["id"], - "%s > %s" % (name, info["message"]), - info["description"], - trace, - info["line"]) + callback(message["id"], + "%s > %s" % (name, message["message"]), + message["description"], + trace, + message["line"]) def _clean_description(self, message, json=False): @@ -176,32 +184,20 @@ class ErrorBundle(object): def _clean_message(self, desc, json=False): "Cleans all the nasty whitespace from a string." - if not isinstance(desc, list): - desc = desc.split("\n") - output = [] - merge = [] - # Loop through each item in the multipart description. - for line in desc: - # If it's a string, add it to the line buffer for concat. - if isinstance(line, str): - merge.append(line.strip()) - # If it's a list, just append it. - elif isinstance(line, list): - # While you're in here, concat and flush the line buffer. - if merge: - output.append(" ".join(merge)) - - # JSON keeps the structure, plain text normalizes. - if json: - output.append(line) - else: - output.append(" ".join(line)) - # Finish up the line buffer. - if merge: - output.append(" ".join(merge)) - return output + if not isinstance(desc, list): + lines = desc.splitlines() + for line in lines: + output.append(line.strip()) + return " ".join(output) + else: + for line in desc: + output.append(self._clean_message(line, json)) + if json: + return output + else: + return "\n".join(output) def print_json(self): "Prints a JSON summary of the validation operation." @@ -286,8 +282,7 @@ class ErrorBundle(object): "Prints a message and takes care of all sorts of nasty code" # Load up the standard output. - output = [prefix, - message["message"]] + output = [prefix, self._clean_message([message["message"]])] # We have some extra stuff for verbose mode. if verbose: @@ -296,8 +291,8 @@ class ErrorBundle(object): # Detailed problem description. if message["description"]: # These are dirty, so strip out whitespace and concat. - verbose_output.append("\n".join( - self._clean_message(message["description"]))) + verbose_output.append( + self._clean_message(message["description"])) # If file information is availe, output that as well. files = message["file"] @@ -327,8 +322,8 @@ class ErrorBundle(object): def _print_verbose(self, verbose): "Prints info code to help prevent code duplication" - mesg = "<>Notice:<>\t" if self.infos and verbose: + mesg = "<>Notice:<>\t" for info in self.infos: self._print_message(mesg, info) \ No newline at end of file diff --git a/validator/main.py b/validator/main.py index bbd8ef1..d7165c1 100644 --- a/validator/main.py +++ b/validator/main.py @@ -56,6 +56,11 @@ def main(): const=True, help="""If the output format supports it, makes the analysis summary include extra info.""") + parser.add_argument("--cluster", + action="store_const", + const=True, + help="""Structures output in clustered format, + grouping similar errors together.""") parser.add_argument("--file", type=argparse.FileType("w"), default=sys.stdout, @@ -78,6 +83,7 @@ def main(): error_bundle = ErrorBundle(args.file, not args.file == sys.stdout or args.boring) + error_bundle.cluster = args.cluster # Emulates the "$status" variable in the original validation.php # test file. Equal to "$status == STATUS_LISTED". diff --git a/validator/testcases/content.py b/validator/testcases/content.py index fe89fc3..4db0ff2 100644 --- a/validator/testcases/content.py +++ b/validator/testcases/content.py @@ -55,7 +55,7 @@ def test_packed_packages(err, package_contents=None, xpi_package=None): # Let the error bunder know we're in a sub-package. err.push_state(data["name_lower"]) err.set_type(PACKAGE_SUBPACKAGE) # Subpackage - testendpoint_validator.test_inner_package(err, + testendpoint_validator.main.test_inner_package(err, temp_contents, sub_xpi) diff --git a/validator/testcases/markup/csstester.py b/validator/testcases/markup/csstester.py index d8b5e19..1dc7319 100644 --- a/validator/testcases/markup/csstester.py +++ b/validator/testcases/markup/csstester.py @@ -13,7 +13,10 @@ def test_css_file(err, filename, data, line_start=1): try: _run_css_tests(err, token_generator, filename, line_start - 1) except UnicodeDecodeError: - err.warning("Unicode decode error.", + err.warning(("testcases_markup_csstester", + "test_css_file", + "unicode_decode"), + "Unicode decode error.", """While decoding a CSS file, an unknown character was encountered, causing some problems.""", filename) @@ -40,7 +43,10 @@ def _run_css_tests(err, tokens, filename, line_start=0): if tok_type == "IDENT": last_descriptor = value.lower() if value.startswith("-webkit"): - err.error("Blasphemy.", + err.error(("testcases_markup_csstester", + "_run_css_tests", + "webkit"), + "Blasphemy.", "WebKit descriptors? Really?", filename, line) diff --git a/validator/testcases/markup/markuptester.py b/validator/testcases/markup/markuptester.py index 228ab33..f4a44fc 100644 --- a/validator/testcases/markup/markuptester.py +++ b/validator/testcases/markup/markuptester.py @@ -8,7 +8,7 @@ except ImportError: # pragma: no cover from validator.testcases.markup import csstester from validator.constants import * -DEBUG = True +DEBUG = False UNSAFE_TAGS = ("script", "object", @@ -45,7 +45,7 @@ class MarkupParser(HTMLParser): self.xml_state = [] self.xml_buffer = [] - self.alerted_script_comments = False + self.reported = {} def process(self, filename, data, extension="xul"): """Processes data by splitting it into individual lines, then @@ -56,48 +56,78 @@ class MarkupParser(HTMLParser): self.filename = filename self.extension = extension - reported = False + self.reported = {} lines = data.split("\n") + line_buffer = [] + force_buffer = False for line in lines: - try: - self.line += 1 - self.feed(line + "\n") - except Exception as inst: - - if DEBUG: # pragma: no cover - print self.xml_state, inst - - if reported: - continue - - if "script" in self.xml_state or ( - self.debug and "testscript" in self.xml_state): - if self.alerted_script_comments: + self.line += 1 + + search_line = line + while True: + if "" in post_cdata: + search_line = post_cdata[post_cdata.find("]]>")+3:] continue - self.err.info(("testcases_markup_markuptester", - "process", - "missing_script_comments"), - "Missing comments in