Fixed bug with CDATA in markuptester; Error messaging fixes

2010-07-29 15:06:57 -07:00 · 2010-07-29 15:06:57 -07:00 · 6ee9eceb05
--- a/validator/errorbundler.py
+++ b/validator/errorbundler.py
@ -20,8 +20,10 @@ class ErrorBundle(object):
        self.errors = []
        self.warnings = []
        self.infos = []
+        self.message_tree = {}
        
        self.metadata = {}
+        self.cluster = False
        
        self.subpackages = []
        self.package_stack = []
@ -35,7 +37,8 @@ class ErrorBundle(object):
        
    def error(self, err_id, error, description='', filename='', line=0):
        "Stores an error message for the validation process"
-        self.errors.append({"id": err_id,
+        self._save_message(self.errors,
+                           {"id": err_id,
                            "message": error,
                            "description": description,
                            "file": filename,
@ -44,22 +47,39 @@ class ErrorBundle(object):
        
    def warning(self, err_id, warning, description='', filename='', line=0):
        "Stores a warning message for the validation process"
-        self.warnings.append({"id": err_id,
-                              "message": warning,
-                              "description": description,
-                              "file": filename,
-                              "line": line})
+        self._save_message(self.warnings,
+                           {"id": err_id,
+                            "message": warning,
+                            "description": description,
+                            "file": filename,
+                            "line": line})
        return self

    def info(self, err_id, info, description='', filename='', line=0):
        "Stores an informational message about the validation"
-        self.infos.append({"id": err_id,
-                           "message": info,
-                           "description": description,
-                           "file": filename,
-                           "line": line})
+        self._save_message(self.infos,
+                           {"id": err_id,
+                            "message": info,
+                            "description": description,
+                            "file": filename,
+                            "line": line})
        return self
        
+    def _save_message(self, stack, message):
+        "Stores a message in the appropriate message stack."
+        
+        stack.append(message)
+        
+        tree = self.message_tree
+        last_id = None
+        for eid in message["id"]:
+            if last_id is not None:
+                tree = tree[last_id]
+            if eid not in tree:
+                tree[eid] = {}
+            last_id = eid
+        tree[last_id] = message
+        
    def set_type(self, type_):
        "Stores the type of addon we're scanning"
        self.detected_type = type_
@ -83,7 +103,6 @@ class ErrorBundle(object):
        
        self.resources[name] = resource
        
-    
    def is_nested_package(self):
        "Returns whether the current package is within a PACKAGE_MULTI"
        
@ -96,12 +115,17 @@ class ErrorBundle(object):
                                 "warnings": self.warnings,
                                 "infos": self.infos,
                                 "detected_type": self.detected_type,
-                                 "resources": self.resources})
+                                 "resources": self.resources,
+                                 "message_tree": self.message_tree})
+        
+        # Note that the message tree is not saved because it is simply rebuilt
+        # when the messages are rewritten.
        
        self.errors = []
        self.warnings = []
        self.infos = []
        self.resources = {}
+        self.message_tree = {}
        
        self.package_stack.append(new_file)
    
@ -113,6 +137,7 @@ class ErrorBundle(object):
        errors = self.errors
        warnings = self.warnings
        infos = self.infos
+        # We only rebuild message_tree anyway. No need to restore.
        
        # Copy the existing state back into place
        self.errors = state["errors"]
@ -120,51 +145,34 @@ class ErrorBundle(object):
        self.infos = state["infos"]
        self.detected_type = state["detected_type"]
        self.resources = state["resources"]
+        self.message_tree = state["message_tree"]
        
        name = self.package_stack.pop()
        
+        self._merge_messages(errors, self.error, name)
+        self._merge_messages(warnings, self.warning, name)
+        self._merge_messages(infos, self.info, name)
+        
+    
+    def _merge_messages(self, messages, callback, name):
+        "Merges a stack of messages into another stack of messages"
+        
        # Overlay the popped warnings onto the existing ones.
-        for error in errors:
+        for message in messages:
            trace = [name]
-            
            # If there are sub-sub-packages, they'll be in a list.
-            if type(error["file"]) is list:
-                trace.extend(error["file"])
+            if isinstance(message["file"], list):
+                trace.extend(message["file"])
            else:
-                trace.append(error["file"])
+                trace.append(message["file"])
            
            # Write the errors with the file structure delimited by
            # right carets.
-            self.error(error["id"],
-                       "%s > %s" % (name, error["message"]),
-                       error["description"],
-                       trace,
-                       error["line"])
-                       
-        for warning in warnings:
-            trace = [name]
-            if type(warning["file"]) is list:
-                trace.extend(warning["file"])
-            else:
-                trace.append(warning["file"])
-            
-            self.warning(warning["id"],
-                         "%s > %s" % (name, warning["message"]),
-                         warning["description"],
-                         trace,
-                         warning["line"])
-        for info in infos:
-            trace = [name]
-            if type(info["file"]) is list:
-                trace.extend(info["file"])
-            else:
-                trace.append(info["file"])
-            
-            self.info(info["id"],
-                      "%s > %s" % (name, info["message"]),
-                      info["description"],
-                      trace,
-                      info["line"])
+            callback(message["id"],
+                     "%s > %s" % (name, message["message"]),
+                     message["description"],
+                     trace,
+                     message["line"])
        
    
    def _clean_description(self, message, json=False):
@ -176,32 +184,20 @@ class ErrorBundle(object):
    def _clean_message(self, desc, json=False):
        "Cleans all the nasty whitespace from a string."
        
-        if not isinstance(desc, list):
-            desc = desc.split("\n")
-        
        output = []
-        merge = []
-        # Loop through each item in the multipart description.
-        for line in desc:
-            # If it's a string, add it to the line buffer for concat.
-            if isinstance(line, str):
-                merge.append(line.strip())
-            # If it's a list, just append it.
-            elif isinstance(line, list):
-                # While you're in here, concat and flush the line buffer.
-                if merge:
-                    output.append(" ".join(merge))
-                
-                # JSON keeps the structure, plain text normalizes.
-                if json:
-                    output.append(line)
-                else:
-                    output.append(" ".join(line))
-        # Finish up the line buffer.
-        if merge:
-            output.append(" ".join(merge))
        
-        return output
+        if not isinstance(desc, list):
+            lines = desc.splitlines()
+            for line in lines:
+                output.append(line.strip())
+            return " ".join(output)
+        else:
+            for line in desc:
+                output.append(self._clean_message(line, json))
+            if json:
+                return output
+            else:
+                return "\n".join(output)
    
    def print_json(self):
        "Prints a JSON summary of the validation operation."
@ -286,8 +282,7 @@ class ErrorBundle(object):
        "Prints a message and takes care of all sorts of nasty code"
        
        # Load up the standard output.
-        output = [prefix,
-                  message["message"]]
+        output = [prefix, self._clean_message([message["message"]])]
        
        # We have some extra stuff for verbose mode.
        if verbose:
@ -296,8 +291,8 @@ class ErrorBundle(object):
            # Detailed problem description.
            if message["description"]:
                # These are dirty, so strip out whitespace and concat.
-                verbose_output.append("\n".join(
-                            self._clean_message(message["description"])))
+                verbose_output.append(
+                            self._clean_message(message["description"]))
            
            # If file information is availe, output that as well.
            files = message["file"]
@ -327,8 +322,8 @@ class ErrorBundle(object):
    def _print_verbose(self, verbose):
        "Prints info code to help prevent code duplication"
        
-        mesg = "<<WHITE>>Notice:<<NORMAL>>\t"
        if self.infos and verbose:
+            mesg = "<<WHITE>>Notice:<<NORMAL>>\t"
            for info in self.infos:
                self._print_message(mesg, info)
        
--- a/validator/main.py
+++ b/validator/main.py
@ -56,6 +56,11 @@ def main():
                        const=True,
                        help="""If the output format supports it, makes
                        the analysis summary include extra info.""")
+    parser.add_argument("--cluster",
+                        action="store_const",
+                        const=True,
+                        help="""Structures output in clustered format,
+                        grouping similar errors together.""")
    parser.add_argument("--file",
                        type=argparse.FileType("w"),
                        default=sys.stdout,
@ -78,6 +83,7 @@ def main():

    error_bundle = ErrorBundle(args.file,
        not args.file == sys.stdout or args.boring)
+    error_bundle.cluster = args.cluster

    # Emulates the "$status" variable in the original validation.php
    # test file. Equal to "$status == STATUS_LISTED".
--- a/validator/testcases/content.py
+++ b/validator/testcases/content.py
@ -55,7 +55,7 @@ def test_packed_packages(err, package_contents=None, xpi_package=None):
            # Let the error bunder know we're in a sub-package.
            err.push_state(data["name_lower"])
            err.set_type(PACKAGE_SUBPACKAGE) # Subpackage
-            testendpoint_validator.test_inner_package(err,
+            testendpoint_validator.main.test_inner_package(err,
                                                      temp_contents,
                                                      sub_xpi)
            
--- a/validator/testcases/markup/csstester.py
+++ b/validator/testcases/markup/csstester.py
@ -13,7 +13,10 @@ def test_css_file(err, filename, data, line_start=1):
    try:
        _run_css_tests(err, token_generator, filename, line_start - 1)
    except UnicodeDecodeError:
-        err.warning("Unicode decode error.",
+        err.warning(("testcases_markup_csstester",
+                     "test_css_file",
+                     "unicode_decode"),
+                    "Unicode decode error.",
                    """While decoding a CSS file, an unknown character
                    was encountered, causing some problems.""",
                    filename)
@ -40,7 +43,10 @@ def _run_css_tests(err, tokens, filename, line_start=0):
        if tok_type == "IDENT":
            last_descriptor = value.lower()
            if value.startswith("-webkit"):
-                err.error("Blasphemy.",
+                err.error(("testcases_markup_csstester",
+                           "_run_css_tests",
+                           "webkit"),
+                          "Blasphemy.",
                          "WebKit descriptors? Really?",
                          filename,
                          line)
--- a/validator/testcases/markup/markuptester.py
+++ b/validator/testcases/markup/markuptester.py
@ -8,7 +8,7 @@ except ImportError: # pragma: no cover
 from validator.testcases.markup import csstester
 from validator.constants import *

-DEBUG = True
+DEBUG = False

 UNSAFE_TAGS = ("script",
               "object",
@ -45,7 +45,7 @@ class MarkupParser(HTMLParser):
        self.xml_state = []
        self.xml_buffer = []
        
-        self.alerted_script_comments = False
+        self.reported = {}
        
    def process(self, filename, data, extension="xul"):
        """Processes data by splitting it into individual lines, then
@ -56,48 +56,78 @@ class MarkupParser(HTMLParser):
        self.filename = filename
        self.extension = extension
        
-        reported = False
+        self.reported = {}
        
        lines = data.split("\n")
+        line_buffer = []
+        force_buffer = False
        for line in lines:
-            try:
-                self.line += 1
-                self.feed(line + "\n")
-            except Exception as inst:
-                
-                if DEBUG: # pragma: no cover
-                    print self.xml_state, inst
-                
-                if reported:
-                    continue
-                
-                if "script" in self.xml_state or (
-                   self.debug and "testscript" in self.xml_state):
-                    if self.alerted_script_comments:
+            self.line += 1
+            
+            search_line = line
+            while True:
+                if "<![CDATA[" in search_line and not force_buffer:
+                    cdatapos = search_line.find("<![CDATA[")
+                    post_cdata = search_line[cdatapos:]
+                    
+                    if "]]>" in post_cdata:
+                        search_line = post_cdata[post_cdata.find("]]>")+3:]
                        continue
-                    self.err.info(("testcases_markup_markuptester",
-                                   "process",
-                                   "missing_script_comments"),
-                                  "Missing comments in <script> tag",
-                                  """Markup parsing errors occurred
-                                  while trying to parse the file. This
-                                  can likely be mitigated by wrapping
-                                  <script> tag contents in HTML comment
-                                  tags (<!-- -->)""",
-                                  self.filename,
-                                  self.line)
-                    self.alerted_script_comments = True
-                    continue
-                
-                self.err.warning(("testcases_markup_markuptester",
-                                  "process",
-                                  "parse_error"),
-                                 "Markup parsing error",
-                                 """There was an error parsing the
-                                 markup document.""",
-                                 self.filename,
-                                 self.line)
-                reported = True
+                    force_buffer = True
+                elif "]]>" in search_line and force_buffer:
+                    force_buffer = False
+                break
+            
+            if force_buffer:
+                line_buffer.append(line)
+            else:
+                if line_buffer:
+                    line_buffer.append(line)
+                    line = "\n".join(line_buffer)
+                    line_buffer = []
+                self._feed_parser(line)
+    
+    def _feed_parser(self, line):
+        "Feeds data into the parser"
+        
+        try:
+            self.feed(line + "\n")
+        except Exception as inst:
+            if DEBUG: # pragma: no cover
+                print self.xml_state, inst
+            
+            if "markup" in self.reported:
+                return
+            
+            if "script" in self.xml_state or (
+               self.debug and "testscript" in self.xml_state):
+                if "script_comments" in self.reported:
+                    return
+                self.err.info(("testcases_markup_markuptester",
+                               "_feed",
+                               "missing_script_comments"),
+                              "Missing comments in <script> tag",
+                              """Markup parsing errors occurred
+                              while trying to parse the file. This
+                              can likely be mitigated by wrapping
+                              <script> tag contents in HTML comment
+                              tags (<!-- -->)""",
+                              self.filename,
+                              self.line)
+                self.reported["script_comments"] = True
+                return
+            
+            self.err.warning(("testcases_markup_markuptester",
+                              "_feed",
+                              "parse_error"),
+                             "Markup parsing error",
+                             ["""There was an error parsing the markup
+                              document.""",
+                              str(inst)],
+                             self.filename,
+                             self.line)
+            self.reported["markup"] = True
+        
    
    def handle_startendtag(self, tag, attrs):
        # Self closing tags don't have an end tag, so we want to
@ -251,6 +281,8 @@ class MarkupParser(HTMLParser):
            print tag, self.xml_state
        
        if not self.xml_state:
+            if "closing_tags" in self.reported:
+                return
            self.err.error(("testcases_markup_markuptester",
                            "handle_endtag",
                            "extra_closing_tags"),
@ -259,6 +291,7 @@ class MarkupParser(HTMLParser):
                           than it has opening tags.""",
                           self.filename,
                           self.line)
+            self.reported["closing_tags"] = True
            if DEBUG: # pragma: no cover
                print "Too many closing tags ------"
            return
@ -328,6 +361,35 @@ class MarkupParser(HTMLParser):
        
    def handle_comment(self, data):
        self._save_to_buffer(data)
+        
+    def parse_marked_section(self, i, report=0):
+        rawdata= self.rawdata
+        _markedsectionclose = re.compile(r']\s*]\s*>')
+        
+        assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()"
+        sectName, j = self._scan_name( i+3, i )
+        if j < 0:
+            return j
+        if sectName in ("temp", "cdata", "ignore", "include", "rcdata"):
+            # look for standard ]]> ending
+            match= _markedsectionclose.search(rawdata, i+3)
+        else:
+            self.error('unknown status keyword %r in marked section' % rawdata[i+3:j])
+        if not match:
+            return -1
+        if report:
+            j = match.start(0)
+            self.unknown_decl(rawdata[i+3: j])
+        return match.end(0)
+        
+    def _unknown_decl(self, decl):
+        "Handles non-DOCTYPE SGML declarations in *ML documents."
+        #print decl
+        decl = decl.strip()
+        split_decl = decl.split()
+        
+        self.out_buffer[split_decl[1]] = split_decl[2].strip('\'"')
+        
    
    def _save_to_buffer(self, data):
        """Save data to the XML buffer for the current tag."""
--- a/validator/typedetection.py
+++ b/validator/typedetection.py
@ -52,7 +52,10 @@ def detect_type(err, install_rdf=None, xpi_package=None):
                      "install.rdf")
            return
    else:
-        err.info("No <em:type> element found in install.rdf",
+        err.info(("typedetection",
+                  "detect_type",
+                  "no_em:type"),
+                 "No <em:type> element found in install.rdf",
                 """It isn't always required, but it is the most
                 reliable method for determining addon type.""",
                 "install.rdf")