Bug 1042921 - Explicitly type-convert all input to mozlog.structured.;r=ahal

2014-08-04 10:29:58 +01:00 · 2014-08-04 10:29:58 +01:00 · 0da7281b20
--- a/testing/mochitest/runtests.py
+++ b/testing/mochitest/runtests.py
@ -1950,7 +1950,7 @@ class Mochitest(MochitestUtilsMixin):
    def fix_stack(self, message):
      if message['action'] == 'log' and self.stackFixerFunction:
-        message['message'] = self.stackFixerFunction(message['message'].encode('ascii', 'replace'))
+        message['message'] = self.stackFixerFunction(message['message'].encode('utf-8', 'replace'))
      return message
    def record_last_test(self, message):
--- a/testing/mochitest/tests/SimpleTest/TestRunner.js
+++ b/testing/mochitest/tests/SimpleTest/TestRunner.js
@ -243,7 +243,7 @@ function StructuredLogger(name) {
        var allData = {action: action,
                       time: new Date().getTime(),
-                       thread: null,
+                       thread: "",
                       pid: null,
                       source: this.name};
--- a/testing/mozbase/mozlog/mozlog/structured/handlers/init.py
+++ b/testing/mozbase/mozlog/mozlog/structured/handlers/init.py
@ -3,6 +3,7 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 from threading import Lock
 import codecs
 from ..structuredlog import log_levels
@ -50,6 +51,16 @@ class StreamHandler(BaseHandler):
    def __init__(self,  stream, formatter):
        assert stream is not None
        # This is a hack to deal with the case where we are passed a
        # StreamWriter (e.g. by mach for stdout). A StreamWriter requires
        # the code to handle unicode in exactly the opposite way compared
        # to a normal stream i.e. you always have to pass in a Unicode
        # object rather than a string object. Cope with that by extracting
        # the underlying raw stream.
        if isinstance(stream, codecs.StreamWriter):
            stream = stream.stream
        self.stream = stream
        BaseHandler.__init__(self, formatter)
@ -61,9 +72,11 @@ class StreamHandler(BaseHandler):
        if not formatted:
            return
        with self._lock:
-            #XXX Should encoding be the formatter's responsibility?
+            if isinstance(formatted, unicode):
-            try:
+                self.stream.write(formatted.encode("utf-8", "replace"))
-                self.stream.write(formatted.encode("utf8", "replace"))
+            elif isinstance(formatted, str):
-            except:
+                self.stream.write(formatted)
-                raise
+            else:
                assert False, "Got output from the formatter of an unexpected type"
            self.stream.flush()
--- a/testing/mozbase/mozlog/mozlog/structured/logtypes.py
+++ b/testing/mozbase/mozlog/mozlog/structured/logtypes.py
@ -0,0 +1,157 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 convertor_registry = {}
 missing = object()
 no_default = object()
 class log_action(object):
    def __init__(self, *args):
        self.args = {}
        self.args_no_default = []
        self.args_with_default = []
        # These are the required fields in a log message that usually aren't
        # supplied by the caller, but can be in the case of log_raw
        self.default_args = [
            Unicode("action"),
            Int("time"),
            Unicode("thread"),
            Int("pid", default=None),
            Unicode("source"),
            Unicode("component")]
        for arg in args:
            if arg.default is no_default:
                self.args_no_default.append(arg.name)
            else:
                self.args_with_default.append(arg.name)
            if arg.name in self.args:
                raise ValueError("Repeated argument name %s" % arg.name)
            self.args[arg.name] = arg
        for extra in self.default_args:
            self.args[extra.name] = extra
    def __call__(self, f):
        convertor_registry[f.__name__] = self
        converter = self
        def inner(self, *args, **kwargs):
            data = converter.convert(*args, **kwargs)
            return f(self, data)
        return inner
    def convert(self, *args, **kwargs):
        data = {}
        values = {}
        values.update(kwargs)
        j = -1
        for i, arg in enumerate(args):
            j += 1
            # Skip over non-defaulted args supplied as keyword arguments
            while self.args_no_default[j] in values:
                j += 1
                if j == len(self.args_no_default):
                    raise TypeError("Too many arguments")
            values[self.args_no_default[j]] = arg
        for k in range(j+1, len(self.args_no_default)):
            if self.args_no_default[k] not in values:
                raise TypeError("Missing required argument %s\n%r\n%r\n%r" %
                                (self.args_no_default[k], args, kwargs, values))
        # Fill in missing arguments
        for name in self.args_with_default:
            if name not in values:
                values[name] = self.args[name].default
        for key, value in values.iteritems():
            out_value = self.args[key](value)
            if out_value is not missing:
                data[key] = out_value
        return data
    def convert_known(self, **kwargs):
        known_kwargs = {name: value for name, value in kwargs.iteritems()
                        if name in self.args}
        return self.convert(**known_kwargs)
 class DataType(object):
    def __init__(self, name, default=no_default, optional=False):
        self.name = name
        self.default = default
        if default is no_default and optional is not False:
            raise ValueError("optional arguments require a default value")
        self.optional = optional
    def __call__(self, value):
        if value == self.default:
            if self.optional:
                return missing
            return self.default
        try:
            return self.convert(value)
        except:
            raise ValueError("Failed to convert value %s of type %s for field %s to type %s" %
                             (value, type(value).__name__, self.name, self.__class__.__name__))
 class Unicode(DataType):
    def convert(self, data):
        if isinstance(data, unicode):
            return data
        if isinstance(data, str):
            return data.decode("utf8", "replace")
        return unicode(data)
 class TestId(DataType):
    def convert(self, data):
        if isinstance(data, unicode):
            return data
        elif isinstance(data, str):
            return data.decode("utf-8", "replace")
        elif isinstance(data, tuple):
            # This is really a bit of a hack; should really split out convertors from the
            # fields they operate on
            func = Unicode(None).convert
            return tuple(func(item) for item in data)
        else:
            raise ValueError
 class Status(DataType):
    allowed = ["PASS", "FAIL", "OK", "ERROR", "TIMEOUT", "CRASH", "ASSERT", "SKIP"]
    def convert(self, data):
        value = data.upper()
        if value not in self.allowed:
            raise ValueError
        return value
 class SubStatus(Status):
    allowed = ["PASS", "FAIL", "ERROR", "TIMEOUT", "CRASH", "ASSERT"]
 class Dict(DataType):
    def convert(self, data):
        return dict(data)
 class List(DataType):
    def __init__(self, name, item_type, default=no_default, optional=False):
        DataType.__init__(self, name, default, optional)
        self.item_type = item_type(None)
    def convert(self, data):
        return [self.item_type.convert(item) for item in data]
 class Int(DataType):
    def convert(self, data):
        return int(data)
--- a/testing/mozbase/mozlog/mozlog/structured/structuredlog.py
+++ b/testing/mozbase/mozlog/mozlog/structured/structuredlog.py
@ -4,12 +4,13 @@
 from __future__ import unicode_literals
 from collections import defaultdict
 from multiprocessing import current_process
 from threading import current_thread, Lock
 import json
 import time
 from logtypes import Unicode, TestId, Status, SubStatus, Dict, List, log_action, convertor_registry
 """Structured Logging for recording test results.
 Allowed actions, and subfields:
@ -129,10 +130,23 @@ class StructuredLogger(object):
        message is logged from this logger"""
        return self._state.handlers
-    def log_raw(self, data):
+    def log_raw(self, raw_data):
-        if "action" not in data:
+        if "action" not in raw_data:
            raise ValueError
-        data = self._make_log_data(data['action'], data)
+
        action = raw_data["action"]
        converted_data = convertor_registry[action].convert_known(**raw_data)
        for k, v in raw_data.iteritems():
            if k not in converted_data:
                converted_data[k] = v
        data = self._make_log_data(action, converted_data)
        if action in ("test_status", "test_end"):
            if (data["expected"] == data["status"] or
                data["status"] == "SKIP"):
                del data["expected"]
        self._handle_log(data)
    def _log_data(self, action, data=None):
@ -158,16 +172,13 @@ class StructuredLogger(object):
        all_data.update(data)
        return all_data
-    def suite_start(self, tests, run_info=None):
+    @log_action(List("tests", Unicode),
                Dict("run_info", default=None, optional=True))
    def suite_start(self, data):
        """Log a suite_start message
        :param tests: List of test identifiers that will be run in the suite.
        """
        data = {"tests": tests}
        if run_info is not None:
            data["run_info"] = run_info
        if self._state.suite_started:
            self.error("Got second suite_start message before suite_end. Logged with data %s" %
                       json.dumps(data))
@ -177,7 +188,8 @@ class StructuredLogger(object):
        self._log_data("suite_start", data)
-    def suite_end(self):
+    @log_action()
    def suite_end(self, data):
        """Log a suite_end message"""
        if not self._state.suite_started:
            self.error("Got suite_end message before suite_start.")
@ -187,22 +199,31 @@ class StructuredLogger(object):
        self._log_data("suite_end")
-    def test_start(self, test):
+    @log_action(TestId("test"))
    def test_start(self, data):
        """Log a test_start message
        :param test: Identifier of the test that will run.
        """
        if not self._state.suite_started:
-            self.error("Got test_start message before suite_start for test %s" % (test,))
+            self.error("Got test_start message before suite_start for test %s" %
                       data["test"])
            return
-        if test in self._state.running_tests:
+        if data["test"] in self._state.running_tests:
-            self.error("test_start for %s logged while in progress." % (test,))
+            self.error("test_start for %s logged while in progress." %
                       data["test"])
            return
-        self._state.running_tests.add(test)
+        self._state.running_tests.add(data["test"])
-        self._log_data("test_start", {"test": test})
+        self._log_data("test_start", data)
-    def test_status(self, test, subtest, status, expected="PASS", message=None,
+    @log_action(TestId("test"),
-                    stack=None, extra=None):
+                Unicode("subtest"),
                SubStatus("status"),
                SubStatus("expected", default="PASS"),
                Unicode("message", default=None, optional=True),
                Unicode("stack", default=None, optional=True),
                Dict("extra", default=None, optional=True))
    def test_status(self, data):
        """
        Log a test_status message indicating a subtest result. Tests that
        do not have subtests are not expected to produce test_status messages.
@ -215,29 +236,25 @@ class StructuredLogger(object):
        :param stack: a stack trace encountered during test execution.
        :param extra: suite-specific data associated with the test result.
        """
        if status.upper() not in ["PASS", "FAIL", "TIMEOUT", "NOTRUN", "ASSERT"]:
            raise ValueError("Unrecognised status %s" % status)
        data = {"test": test,
                "subtest": subtest,
                "status": status.upper()}
        if message is not None:
            data["message"] = unicode(message)
        if expected != data["status"]:
            data["expected"] = expected
        if stack is not None:
            data["stack"] = stack
        if extra is not None:
            data["extra"] = extra
-        if test not in self._state.running_tests:
+        if (data["expected"] == data["status"] or
            data["status"] == "SKIP"):
            del data["expected"]
        if data["test"] not in self._state.running_tests:
            self.error("test_status for %s logged while not in progress. "
-                       "Logged with data: %s" % (test, json.dumps(data)))
+                       "Logged with data: %s" % (data["test"], json.dumps(data)))
            return
        self._log_data("test_status", data)
-    def test_end(self, test, status, expected="OK", message=None, stack=None,
+    @log_action(TestId("test"),
-                 extra=None):
+                Status("status"),
                Status("expected", default="OK"),
                Unicode("message", default=None, optional=True),
                Unicode("stack", default=None, optional=True),
                Dict("extra", default=None, optional=True))
    def test_end(self, data):
        """
        Log a test_end message indicating that a test completed. For tests
        with subtests this indicates whether the overall test completed without
@ -251,28 +268,22 @@ class StructuredLogger(object):
        :param stack: a stack trace encountered during test execution.
        :param extra: suite-specific data associated with the test result.
        """
        if status.upper() not in ["PASS", "FAIL", "OK", "ERROR", "TIMEOUT",
                                  "CRASH", "ASSERT", "SKIP"]:
            raise ValueError("Unrecognised status %s" % status)
        data = {"test": test,
                "status": status.upper()}
        if message is not None:
            data["message"] = unicode(message)
        if expected != data["status"] and status != "SKIP":
            data["expected"] = expected
        if stack is not None:
            data["stack"] = stack
        if extra is not None:
            data["extra"] = extra
-        if test not in self._state.running_tests:
+        if (data["expected"] == data["status"] or
             data["status"] == "SKIP"):
            del data["expected"]
        if data["test"] not in self._state.running_tests:
            self.error("test_end for %s logged while not in progress. "
-                       "Logged with data: %s" % (test, json.dumps(data)))
+                       "Logged with data: %s" % (data["test"], json.dumps(data)))
        else:
-            self._state.running_tests.remove(test)
+            self._state.running_tests.remove(data["test"])
            self._log_data("test_end", data)
-    def process_output(self, process, data, command=None):
+    @log_action(Unicode("process"),
                Unicode("data"),
                Unicode("command", default=None, optional=True))
    def process_output(self, data):
        """Log output from a managed process.
        :param process: A unique identifier for the process producing the output
@ -281,16 +292,15 @@ class StructuredLogger(object):
        :param command: A string representing the full command line used to start
                        the process.
        """
        data = {"process": process, "data": data}
        if command is not None:
            data["command"] = command
        self._log_data("process_output", data)
 def _log_func(level_name):
-    def log(self, message):
+    @log_action(Unicode("message"))
-        data = {"level": level_name, "message": unicode(message)}
+    def log(self, data):
        data["level"] = level_name
        self._log_data("log", data)
    log.__doc__ = """Log a message with level %s
 :param message: The string message to log
--- a/testing/mozbase/mozlog/tests/test_structured.py
+++ b/testing/mozbase/mozlog/tests/test_structured.py
@ -1,9 +1,11 @@
 # -*- coding: utf-8 -*-
 import argparse
 import optparse
 import os
 import unittest
 import StringIO
 import json
 import mozfile
 from mozlog.structured import (
    commandline,
@ -11,6 +13,7 @@ from mozlog.structured import (
    structuredlog,
    stdadapter,
    handlers,
    formatters,
 )
@ -240,7 +243,7 @@ class TestStructuredLog(BaseStructuredTest):
    def test_process(self):
        self.logger.process_output(1234, "test output")
        self.assert_log_equals({"action": "process_output",
-                                "process": 1234,
+                                "process": "1234",
                                "data": "test output"})
    def test_log(self):
@ -321,6 +324,45 @@ class TestStructuredLog(BaseStructuredTest):
                                "level": "INFO",
                                "message": "line 4"})
 class TestTypeconversions(BaseStructuredTest):
    def test_raw(self):
        self.logger.log_raw({"action":"suite_start", "tests":[1], "time": "1234"})
        self.assert_log_equals({"action": "suite_start",
                                "tests":["1"],
                                "time": 1234})
        self.logger.suite_end()
    def test_tuple(self):
        self.logger.suite_start([])
        self.logger.test_start(("\xf0\x90\x8d\x84\xf0\x90\x8c\xb4\xf0\x90\x8d\x83\xf0\x90\x8d\x84", 42, u"\u16a4"))
        self.assert_log_equals({"action": "test_start",
                                "test": (u'\U00010344\U00010334\U00010343\U00010344', u"42", u"\u16a4")})
        self.logger.suite_end()
    def test_non_string_messages(self):
        self.logger.suite_start([])
        self.logger.info(1)
        self.assert_log_equals({"action": "log",
                                "message": "1",
                                "level": "INFO"})
        self.logger.info([1, (2, '3'), "s", "s" + chr(255)])
        self.assert_log_equals({"action": "log",
                                "message": "[1, (2, '3'), 's', 's\\xff']",
                                "level": "INFO"})
        self.logger.suite_end()
    def test_utf8str_write(self):
        with mozfile.NamedTemporaryFile() as logfile:
            _fmt = formatters.TbplFormatter()
            _handler = handlers.StreamHandler(logfile, _fmt)
            self.logger.add_handler(_handler)
            self.logger.suite_start([])
            self.logger.info("☺")
            with open(logfile.name) as f:
                data = f.readlines()
                self.assertEquals(data[-1], "☺\n")
            self.logger.suite_end()
 class TestCommandline(unittest.TestCase):
    def test_setup_logging(self):
        parser = argparse.ArgumentParser()