From 75637885517afe53f3a3e32f2e8c8772369e95f0 Mon Sep 17 00:00:00 2001
From: Geoff Brown <gbrown@mozilla.com>
Date: Wed, 2 Aug 2017 11:08:29 -0600
Subject: [PATCH] Bug 1380126 - Add a copy of mozbase' manifestparser to
 mozharness; r=Callek

--HG--
rename : testing/mozbase/manifestparser/manifestparser/__init__.py => testing/mozharness/manifestparser/__init__.py
rename : testing/mozbase/manifestparser/manifestparser/cli.py => testing/mozharness/manifestparser/cli.py
rename : testing/mozbase/manifestparser/manifestparser/expression.py => testing/mozharness/manifestparser/expression.py
rename : testing/mozbase/manifestparser/manifestparser/filters.py => testing/mozharness/manifestparser/filters.py
rename : testing/mozbase/manifestparser/manifestparser/ini.py => testing/mozharness/manifestparser/ini.py
rename : testing/mozbase/manifestparser/manifestparser/manifestparser.py => testing/mozharness/manifestparser/manifestparser.py
---
 testing/mozharness/manifestparser/__init__.py |   8 +
 testing/mozharness/manifestparser/cli.py      | 247 ++++++
 .../mozharness/manifestparser/expression.py   | 325 +++++++
 testing/mozharness/manifestparser/filters.py  | 421 +++++++++
 testing/mozharness/manifestparser/ini.py      | 168 ++++
 .../manifestparser/manifestparser.py          | 807 ++++++++++++++++++
 6 files changed, 1976 insertions(+)
 create mode 100644 testing/mozharness/manifestparser/__init__.py
 create mode 100644 testing/mozharness/manifestparser/cli.py
 create mode 100644 testing/mozharness/manifestparser/expression.py
 create mode 100644 testing/mozharness/manifestparser/filters.py
 create mode 100644 testing/mozharness/manifestparser/ini.py
 create mode 100644 testing/mozharness/manifestparser/manifestparser.py

diff --git a/testing/mozharness/manifestparser/__init__.py b/testing/mozharness/manifestparser/__init__.py
new file mode 100644
index 000000000000..43c58ae79dfd
--- /dev/null
+++ b/testing/mozharness/manifestparser/__init__.py
@@ -0,0 +1,8 @@
+# flake8: noqa
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from .manifestparser import *
+from .expression import *
+from .ini import *
diff --git a/testing/mozharness/manifestparser/cli.py b/testing/mozharness/manifestparser/cli.py
new file mode 100644
index 000000000000..f5db65272f5f
--- /dev/null
+++ b/testing/mozharness/manifestparser/cli.py
@@ -0,0 +1,247 @@
+#!/usr/bin/env python
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Mozilla universal manifest parser
+"""
+
+from optparse import OptionParser
+import os
+import sys
+
+from .manifestparser import (
+    convert,
+    ManifestParser,
+)
+
+
+class ParserError(Exception):
+    """error for exceptions while parsing the command line"""
+
+
+def parse_args(_args):
+    """
+    parse and return:
+    --keys=value (or --key value)
+    -tags
+    args
+    """
+
+    # return values
+    _dict = {}
+    tags = []
+    args = []
+
+    # parse the arguments
+    key = None
+    for arg in _args:
+        if arg.startswith('---'):
+            raise ParserError("arguments should start with '-' or '--' only")
+        elif arg.startswith('--'):
+            if key:
+                raise ParserError("Key %s still open" % key)
+            key = arg[2:]
+            if '=' in key:
+                key, value = key.split('=', 1)
+                _dict[key] = value
+                key = None
+                continue
+        elif arg.startswith('-'):
+            if key:
+                raise ParserError("Key %s still open" % key)
+            tags.append(arg[1:])
+            continue
+        else:
+            if key:
+                _dict[key] = arg
+                continue
+            args.append(arg)
+
+    # return values
+    return (_dict, tags, args)
+
+
+class CLICommand(object):
+    usage = '%prog [options] command'
+
+    def __init__(self, parser):
+        self._parser = parser  # master parser
+
+    def parser(self):
+        return OptionParser(usage=self.usage, description=self.__doc__,
+                            add_help_option=False)
+
+
+class Copy(CLICommand):
+    usage = '%prog [options] copy manifest directory -tag1 -tag2 --key1=value1 --key2=value2 ...'
+
+    def __call__(self, options, args):
+        # parse the arguments
+        try:
+            kwargs, tags, args = parse_args(args)
+        except ParserError, e:
+            self._parser.error(e.message)
+
+        # make sure we have some manifests, otherwise it will
+        # be quite boring
+        if not len(args) == 2:
+            HelpCLI(self._parser)(options, ['copy'])
+            return
+
+        # read the manifests
+        # TODO: should probably ensure these exist here
+        manifests = ManifestParser()
+        manifests.read(args[0])
+
+        # print the resultant query
+        manifests.copy(args[1], None, *tags, **kwargs)
+
+
+class CreateCLI(CLICommand):
+    """
+    create a manifest from a list of directories
+    """
+    usage = '%prog [options] create directory <directory> <...>'
+
+    def parser(self):
+        parser = CLICommand.parser(self)
+        parser.add_option('-p', '--pattern', dest='pattern',
+                          help="glob pattern for files")
+        parser.add_option('-i', '--ignore', dest='ignore',
+                          default=[], action='append',
+                          help='directories to ignore')
+        parser.add_option('-w', '--in-place', dest='in_place',
+                          help='Write .ini files in place; filename to write to')
+        return parser
+
+    def __call__(self, _options, args):
+        parser = self.parser()
+        options, args = parser.parse_args(args)
+
+        # need some directories
+        if not len(args):
+            parser.print_usage()
+            return
+
+        # add the directories to the manifest
+        for arg in args:
+            assert os.path.exists(arg)
+            assert os.path.isdir(arg)
+            manifest = convert(args, pattern=options.pattern, ignore=options.ignore,
+                               write=options.in_place)
+        if manifest:
+            print manifest
+
+
+class WriteCLI(CLICommand):
+    """
+    write a manifest based on a query
+    """
+    usage = '%prog [options] write manifest <manifest> -tag1 -tag2 --key1=value1 --key2=value2 ...'
+
+    def __call__(self, options, args):
+
+        # parse the arguments
+        try:
+            kwargs, tags, args = parse_args(args)
+        except ParserError, e:
+            self._parser.error(e.message)
+
+        # make sure we have some manifests, otherwise it will
+        # be quite boring
+        if not args:
+            HelpCLI(self._parser)(options, ['write'])
+            return
+
+        # read the manifests
+        # TODO: should probably ensure these exist here
+        manifests = ManifestParser()
+        manifests.read(*args)
+
+        # print the resultant query
+        manifests.write(global_tags=tags, global_kwargs=kwargs)
+
+
+class HelpCLI(CLICommand):
+    """
+    get help on a command
+    """
+    usage = '%prog [options] help [command]'
+
+    def __call__(self, options, args):
+        if len(args) == 1 and args[0] in commands:
+            commands[args[0]](self._parser).parser().print_help()
+        else:
+            self._parser.print_help()
+            print '\nCommands:'
+            for command in sorted(commands):
+                print '  %s : %s' % (command, commands[command].__doc__.strip())
+
+
+class UpdateCLI(CLICommand):
+    """
+    update the tests as listed in a manifest from a directory
+    """
+    usage = '%prog [options] update manifest directory -tag1 -tag2 --key1=value1 --key2=value2 ...'
+
+    def __call__(self, options, args):
+        # parse the arguments
+        try:
+            kwargs, tags, args = parse_args(args)
+        except ParserError, e:
+            self._parser.error(e.message)
+
+        # make sure we have some manifests, otherwise it will
+        # be quite boring
+        if not len(args) == 2:
+            HelpCLI(self._parser)(options, ['update'])
+            return
+
+        # read the manifests
+        # TODO: should probably ensure these exist here
+        manifests = ManifestParser()
+        manifests.read(args[0])
+
+        # print the resultant query
+        manifests.update(args[1], None, *tags, **kwargs)
+
+
+# command -> class mapping
+commands = {'create': CreateCLI,
+            'help': HelpCLI,
+            'update': UpdateCLI,
+            'write': WriteCLI}
+
+
+def main(args=sys.argv[1:]):
+    """console_script entry point"""
+
+    # set up an option parser
+    usage = '%prog [options] [command] ...'
+    description = "%s. Use `help` to display commands" % __doc__.strip()
+    parser = OptionParser(usage=usage, description=description)
+    parser.add_option('-s', '--strict', dest='strict',
+                      action='store_true', default=False,
+                      help='adhere strictly to errors')
+    parser.disable_interspersed_args()
+
+    options, args = parser.parse_args(args)
+
+    if not args:
+        HelpCLI(parser)(options, args)
+        parser.exit()
+
+    # get the command
+    command = args[0]
+    if command not in commands:
+        parser.error("Command must be one of %s (you gave '%s')" %
+                     (', '.join(sorted(commands.keys())), command))
+
+    handler = commands[command](parser)
+    handler(options, args[1:])
+
+
+if __name__ == '__main__':
+    main()
diff --git a/testing/mozharness/manifestparser/expression.py b/testing/mozharness/manifestparser/expression.py
new file mode 100644
index 000000000000..4f6ec3dc8ed2
--- /dev/null
+++ b/testing/mozharness/manifestparser/expression.py
@@ -0,0 +1,325 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+import sys
+import traceback
+
+__all__ = ['parse', 'ParseError', 'ExpressionParser']
+
+# expr.py
+# from:
+# http://k0s.org/mozilla/hg/expressionparser
+# http://hg.mozilla.org/users/tmielczarek_mozilla.com/expressionparser
+
+# Implements a top-down parser/evaluator for simple boolean expressions.
+# ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm
+#
+# Rough grammar:
+# expr := literal
+#       | '(' expr ')'
+#       | expr '&&' expr
+#       | expr '||' expr
+#       | expr '==' expr
+#       | expr '!=' expr
+#       | expr '<' expr
+#       | expr '>' expr
+#       | expr '<=' expr
+#       | expr '>=' expr
+# literal := BOOL
+#          | INT
+#          | STRING
+#          | IDENT
+# BOOL   := true|false
+# INT    := [0-9]+
+# STRING := "[^"]*"
+# IDENT  := [A-Za-z_]\w*
+
+# Identifiers take their values from a mapping dictionary passed as the second
+# argument.
+
+# Glossary (see above URL for details):
+# - nud: null denotation
+# - led: left detonation
+# - lbp: left binding power
+# - rbp: right binding power
+
+
+class ident_token(object):
+
+    def __init__(self, scanner, value):
+        self.value = value
+
+    def nud(self, parser):
+        # identifiers take their value from the value mappings passed
+        # to the parser
+        return parser.value(self.value)
+
+
+class literal_token(object):
+
+    def __init__(self, scanner, value):
+        self.value = value
+
+    def nud(self, parser):
+        return self.value
+
+
+class eq_op_token(object):
+    "=="
+
+    def led(self, parser, left):
+        return left == parser.expression(self.lbp)
+
+
+class neq_op_token(object):
+    "!="
+
+    def led(self, parser, left):
+        return left != parser.expression(self.lbp)
+
+
+class lt_op_token(object):
+    "<"
+
+    def led(self, parser, left):
+        return left < parser.expression(self.lbp)
+
+
+class gt_op_token(object):
+    ">"
+
+    def led(self, parser, left):
+        return left > parser.expression(self.lbp)
+
+
+class le_op_token(object):
+    "<="
+
+    def led(self, parser, left):
+        return left <= parser.expression(self.lbp)
+
+
+class ge_op_token(object):
+    ">="
+
+    def led(self, parser, left):
+        return left >= parser.expression(self.lbp)
+
+
+class not_op_token(object):
+    "!"
+
+    def nud(self, parser):
+        return not parser.expression(100)
+
+
+class and_op_token(object):
+    "&&"
+
+    def led(self, parser, left):
+        right = parser.expression(self.lbp)
+        return left and right
+
+
+class or_op_token(object):
+    "||"
+
+    def led(self, parser, left):
+        right = parser.expression(self.lbp)
+        return left or right
+
+
+class lparen_token(object):
+    "("
+
+    def nud(self, parser):
+        expr = parser.expression()
+        parser.advance(rparen_token)
+        return expr
+
+
+class rparen_token(object):
+    ")"
+
+
+class end_token(object):
+    """always ends parsing"""
+
+# derived literal tokens
+
+
+class bool_token(literal_token):
+
+    def __init__(self, scanner, value):
+        value = {'true': True, 'false': False}[value]
+        literal_token.__init__(self, scanner, value)
+
+
+class int_token(literal_token):
+
+    def __init__(self, scanner, value):
+        literal_token.__init__(self, scanner, int(value))
+
+
+class string_token(literal_token):
+
+    def __init__(self, scanner, value):
+        literal_token.__init__(self, scanner, value[1:-1])
+
+
+precedence = [(end_token, rparen_token),
+              (or_op_token,),
+              (and_op_token,),
+              (lt_op_token, gt_op_token, le_op_token, ge_op_token,
+               eq_op_token, neq_op_token),
+              (lparen_token,),
+              ]
+for index, rank in enumerate(precedence):
+    for token in rank:
+        token.lbp = index  # lbp = lowest left binding power
+
+
+class ParseError(Exception):
+    """error parsing conditional expression"""
+
+
+class ExpressionParser(object):
+    """
+    A parser for a simple expression language.
+
+    The expression language can be described as follows::
+
+        EXPRESSION ::= LITERAL | '(' EXPRESSION ')' | '!' EXPRESSION | EXPRESSION OP EXPRESSION
+        OP ::= '==' | '!=' | '<' | '>' | '<=' | '>=' | '&&' | '||'
+        LITERAL ::= BOOL | INT | IDENT | STRING
+        BOOL ::= 'true' | 'false'
+        INT ::= [0-9]+
+        IDENT ::= [a-zA-Z_]\w*
+        STRING ::= '"' [^\"] '"' | ''' [^\'] '''
+
+    At its core, expressions consist of booleans, integers, identifiers and.
+    strings. Booleans are one of *true* or *false*. Integers are a series
+    of digits. Identifiers are a series of English letters and underscores.
+    Strings are a pair of matching quote characters (single or double) with
+    zero or more characters inside.
+
+    Expressions can be combined with operators: the equals (==) and not
+    equals (!=) operators compare two expressions and produce a boolean. The
+    and (&&) and or (||) operators take two expressions and produce the logical
+    AND or OR value of them, respectively. An expression can also be prefixed
+    with the not (!) operator, which produces its logical negation.
+
+    Finally, any expression may be contained within parentheses for grouping.
+
+    Identifiers take their values from the mapping provided.
+    """
+
+    scanner = None
+
+    def __init__(self, text, valuemapping, strict=False):
+        """
+        Initialize the parser
+        :param text: The expression to parse as a string.
+        :param valuemapping: A dict mapping identifier names to values.
+        :param strict: If true, referencing an identifier that was not
+                       provided in :valuemapping: will raise an error.
+        """
+        self.text = text
+        self.valuemapping = valuemapping
+        self.strict = strict
+
+    def _tokenize(self):
+        """
+        Lex the input text into tokens and yield them in sequence.
+        """
+        if not ExpressionParser.scanner:
+            ExpressionParser.scanner = re.Scanner([
+                # Note: keep these in sync with the class docstring above.
+                (r"true|false", bool_token),
+                (r"[a-zA-Z_]\w*", ident_token),
+                (r"[0-9]+", int_token),
+                (r'("[^"]*")|(\'[^\']*\')', string_token),
+                (r"==", eq_op_token()),
+                (r"!=", neq_op_token()),
+                (r"<=", le_op_token()),
+                (r">=", ge_op_token()),
+                (r"<", lt_op_token()),
+                (r">", gt_op_token()),
+                (r"\|\|", or_op_token()),
+                (r"!", not_op_token()),
+                (r"&&", and_op_token()),
+                (r"\(", lparen_token()),
+                (r"\)", rparen_token()),
+                (r"\s+", None),  # skip whitespace
+            ])
+        tokens, remainder = ExpressionParser.scanner.scan(self.text)
+        for t in tokens:
+            yield t
+        yield end_token()
+
+    def value(self, ident):
+        """
+        Look up the value of |ident| in the value mapping passed in the
+        constructor.
+        """
+        if self.strict:
+            return self.valuemapping[ident]
+        else:
+            return self.valuemapping.get(ident, None)
+
+    def advance(self, expected):
+        """
+        Assert that the next token is an instance of |expected|, and advance
+        to the next token.
+        """
+        if not isinstance(self.token, expected):
+            raise Exception("Unexpected token!")
+        self.token = self.iter.next()
+
+    def expression(self, rbp=0):
+        """
+        Parse and return the value of an expression until a token with
+        right binding power greater than rbp is encountered.
+        """
+        t = self.token
+        self.token = self.iter.next()
+        left = t.nud(self)
+        while rbp < self.token.lbp:
+            t = self.token
+            self.token = self.iter.next()
+            left = t.led(self, left)
+        return left
+
+    def parse(self):
+        """
+        Parse and return the value of the expression in the text
+        passed to the constructor. Raises a ParseError if the expression
+        could not be parsed.
+        """
+        try:
+            self.iter = self._tokenize()
+            self.token = self.iter.next()
+            return self.expression()
+        except:
+            extype, ex, tb = sys.exc_info()
+            formatted = ''.join(traceback.format_exception_only(extype, ex))
+            raise ParseError("could not parse: "
+                             "%s\nexception: %svariables: %s" % (self.text,
+                                                                 formatted,
+                                                                 self.valuemapping)), None, tb
+
+    __call__ = parse
+
+
+def parse(text, **values):
+    """
+    Parse and evaluate a boolean expression.
+    :param text: The expression to parse, as a string.
+    :param values: A dict containing a name to value mapping for identifiers
+                   referenced in *text*.
+    :rtype: the final value of the expression.
+    :raises: :py:exc::ParseError: will be raised if parsing fails.
+    """
+    return ExpressionParser(text, values).parse()
diff --git a/testing/mozharness/manifestparser/filters.py b/testing/mozharness/manifestparser/filters.py
new file mode 100644
index 000000000000..e832c0da657e
--- /dev/null
+++ b/testing/mozharness/manifestparser/filters.py
@@ -0,0 +1,421 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+A filter is a callable that accepts an iterable of test objects and a
+dictionary of values, and returns a new iterable of test objects. It is
+possible to define custom filters if the built-in ones are not enough.
+"""
+
+from collections import defaultdict, MutableSequence
+import itertools
+import os
+
+from .expression import (
+    parse,
+    ParseError,
+)
+
+
+# built-in filters
+
+def skip_if(tests, values):
+    """
+    Sets disabled on all tests containing the `skip-if` tag and whose condition
+    is True. This filter is added by default.
+    """
+    tag = 'skip-if'
+    for test in tests:
+        if tag in test and parse(test[tag], **values):
+            test.setdefault('disabled', '{}: {}'.format(tag, test[tag]))
+        yield test
+
+
+def run_if(tests, values):
+    """
+    Sets disabled on all tests containing the `run-if` tag and whose condition
+    is False. This filter is added by default.
+    """
+    tag = 'run-if'
+    for test in tests:
+        if tag in test and not parse(test[tag], **values):
+            test.setdefault('disabled', '{}: {}'.format(tag, test[tag]))
+        yield test
+
+
+def fail_if(tests, values):
+    """
+    Sets expected to 'fail' on all tests containing the `fail-if` tag and whose
+    condition is True. This filter is added by default.
+    """
+    tag = 'fail-if'
+    for test in tests:
+        if tag in test and parse(test[tag], **values):
+            test['expected'] = 'fail'
+        yield test
+
+
+def enabled(tests, values):
+    """
+    Removes all tests containing the `disabled` key. This filter can be
+    added by passing `disabled=False` into `active_tests`.
+    """
+    for test in tests:
+        if 'disabled' not in test:
+            yield test
+
+
+def exists(tests, values):
+    """
+    Removes all tests that do not exist on the file system. This filter is
+    added by default, but can be removed by passing `exists=False` into
+    `active_tests`.
+    """
+    for test in tests:
+        if os.path.exists(test['path']):
+            yield test
+
+
+# built-in instance filters
+
+class InstanceFilter(object):
+    """
+    Generally only one instance of a class filter should be applied at a time.
+    Two instances of `InstanceFilter` are considered equal if they have the
+    same class name. This ensures only a single instance is ever added to
+    `filterlist`. This class also formats filters' __str__ method for easier
+    debugging.
+    """
+    unique = True
+
+    def __init__(self, *args, **kwargs):
+        self.fmt_args = ', '.join(itertools.chain(
+            [str(a) for a in args],
+            ['{}={}'.format(k, v) for k, v in kwargs.iteritems()]))
+
+    def __eq__(self, other):
+        if self.unique:
+            return self.__class__ == other.__class__
+        return self.__hash__() == other.__hash__()
+
+    def __str__(self):
+        return "{}({})".format(self.__class__.__name__, self.fmt_args)
+
+
+class subsuite(InstanceFilter):
+    """
+    If `name` is None, removes all tests that have a `subsuite` key.
+    Otherwise removes all tests that do not have a subsuite matching `name`.
+
+    It is possible to specify conditional subsuite keys using:
+       subsuite = foo,condition
+
+    where 'foo' is the subsuite name, and 'condition' is the same type of
+    condition used for skip-if.  If the condition doesn't evaluate to true,
+    the subsuite designation will be removed from the test.
+
+    :param name: The name of the subsuite to run (default None)
+    """
+
+    def __init__(self, name=None):
+        InstanceFilter.__init__(self, name=name)
+        self.name = name
+
+    def __call__(self, tests, values):
+        # Look for conditional subsuites, and replace them with the subsuite
+        # itself (if the condition is true), or nothing.
+        for test in tests:
+            subsuite = test.get('subsuite', '')
+            if ',' in subsuite:
+                try:
+                    subsuite, cond = subsuite.split(',')
+                except ValueError:
+                    raise ParseError("subsuite condition can't contain commas")
+                matched = parse(cond, **values)
+                if matched:
+                    test['subsuite'] = subsuite
+                else:
+                    test['subsuite'] = ''
+
+            # Filter on current subsuite
+            if self.name is None:
+                if not test.get('subsuite'):
+                    yield test
+            else:
+                if test.get('subsuite', '') == self.name:
+                    yield test
+
+
+class chunk_by_slice(InstanceFilter):
+    """
+    Basic chunking algorithm that splits tests evenly across total chunks.
+
+    :param this_chunk: the current chunk, 1 <= this_chunk <= total_chunks
+    :param total_chunks: the total number of chunks
+    :param disabled: Whether to include disabled tests in the chunking
+                     algorithm. If False, each chunk contains an equal number
+                     of non-disabled tests. If True, each chunk contains an
+                     equal number of tests (default False)
+    """
+
+    def __init__(self, this_chunk, total_chunks, disabled=False):
+        assert 1 <= this_chunk <= total_chunks
+        InstanceFilter.__init__(self, this_chunk, total_chunks,
+                                disabled=disabled)
+        self.this_chunk = this_chunk
+        self.total_chunks = total_chunks
+        self.disabled = disabled
+
+    def __call__(self, tests, values):
+        tests = list(tests)
+        if self.disabled:
+            chunk_tests = tests[:]
+        else:
+            chunk_tests = [t for t in tests if 'disabled' not in t]
+
+        tests_per_chunk = float(len(chunk_tests)) / self.total_chunks
+        start = int(round((self.this_chunk - 1) * tests_per_chunk))
+        end = int(round(self.this_chunk * tests_per_chunk))
+
+        if not self.disabled:
+            # map start and end back onto original list of tests. Disabled
+            # tests will still be included in the returned list, but each
+            # chunk will contain an equal number of enabled tests.
+            if self.this_chunk == 1:
+                start = 0
+            elif start < len(chunk_tests):
+                start = tests.index(chunk_tests[start])
+
+            if self.this_chunk == self.total_chunks:
+                end = len(tests)
+            elif end < len(chunk_tests):
+                end = tests.index(chunk_tests[end])
+        return (t for t in tests[start:end])
+
+
+class chunk_by_dir(InstanceFilter):
+    """
+    Basic chunking algorithm that splits directories of tests evenly at a
+    given depth.
+
+    For example, a depth of 2 means all test directories two path nodes away
+    from the base are gathered, then split evenly across the total number of
+    chunks. The number of tests in each of the directories is not taken into
+    account (so chunks will not contain an even number of tests). All test
+    paths must be relative to the same root (typically the root of the source
+    repository).
+
+    :param this_chunk: the current chunk, 1 <= this_chunk <= total_chunks
+    :param total_chunks: the total number of chunks
+    :param depth: the minimum depth of a subdirectory before it will be
+                  considered unique
+    """
+
+    def __init__(self, this_chunk, total_chunks, depth):
+        InstanceFilter.__init__(self, this_chunk, total_chunks, depth)
+        self.this_chunk = this_chunk
+        self.total_chunks = total_chunks
+        self.depth = depth
+
+    def __call__(self, tests, values):
+        tests_by_dir = defaultdict(list)
+        ordered_dirs = []
+        for test in tests:
+            path = test['relpath']
+
+            if path.startswith(os.sep):
+                path = path[1:]
+
+            dirs = path.split(os.sep)
+            dirs = dirs[:min(self.depth, len(dirs) - 1)]
+            path = os.sep.join(dirs)
+
+            # don't count directories that only have disabled tests in them,
+            # but still yield disabled tests that are alongside enabled tests
+            if path not in ordered_dirs and 'disabled' not in test:
+                ordered_dirs.append(path)
+            tests_by_dir[path].append(test)
+
+        tests_per_chunk = float(len(ordered_dirs)) / self.total_chunks
+        start = int(round((self.this_chunk - 1) * tests_per_chunk))
+        end = int(round(self.this_chunk * tests_per_chunk))
+
+        for i in range(start, end):
+            for test in tests_by_dir.pop(ordered_dirs[i]):
+                yield test
+
+        # find directories that only contain disabled tests. They still need to
+        # be yielded for reporting purposes. Put them all in chunk 1 for
+        # simplicity.
+        if self.this_chunk == 1:
+            disabled_dirs = [v for k, v in tests_by_dir.iteritems()
+                             if k not in ordered_dirs]
+            for disabled_test in itertools.chain(*disabled_dirs):
+                yield disabled_test
+
+
+class chunk_by_runtime(InstanceFilter):
+    """
+    Chunking algorithm that attempts to group tests into chunks based on their
+    average runtimes. It keeps manifests of tests together and pairs slow
+    running manifests with fast ones.
+
+    :param this_chunk: the current chunk, 1 <= this_chunk <= total_chunks
+    :param total_chunks: the total number of chunks
+    :param runtimes: dictionary of test runtime data, of the form
+                     {<test path>: <average runtime>}
+    :param default_runtime: value in seconds to assign tests that don't exist
+                            in the runtimes file
+    """
+
+    def __init__(self, this_chunk, total_chunks, runtimes, default_runtime=0):
+        InstanceFilter.__init__(self, this_chunk, total_chunks, runtimes,
+                                default_runtime=default_runtime)
+        self.this_chunk = this_chunk
+        self.total_chunks = total_chunks
+
+        # defaultdict(lambda:<int>) assigns all non-existent keys the value of
+        # <int>. This means all tests we encounter that don't exist in the
+        # runtimes file will be assigned `default_runtime`.
+        self.runtimes = defaultdict(lambda: default_runtime)
+        self.runtimes.update(runtimes)
+
+    def __call__(self, tests, values):
+        tests = list(tests)
+        manifests = set(t['manifest'] for t in tests)
+
+        def total_runtime(tests):
+            return sum(self.runtimes[t['relpath']] for t in tests
+                       if 'disabled' not in t)
+
+        tests_by_manifest = []
+        for manifest in manifests:
+            mtests = [t for t in tests if t['manifest'] == manifest]
+            tests_by_manifest.append((total_runtime(mtests), mtests))
+        tests_by_manifest.sort(reverse=True)
+
+        tests_by_chunk = [[0, []] for i in range(self.total_chunks)]
+        for runtime, batch in tests_by_manifest:
+            # sort first by runtime, then by number of tests in case of a tie.
+            # This guarantees the chunk with the fastest runtime will always
+            # get the next batch of tests.
+            tests_by_chunk.sort(key=lambda x: (x[0], len(x[1])))
+            tests_by_chunk[0][0] += runtime
+            tests_by_chunk[0][1].extend(batch)
+
+        return (t for t in tests_by_chunk[self.this_chunk - 1][1])
+
+
+class tags(InstanceFilter):
+    """
+    Removes tests that don't contain any of the given tags. This overrides
+    InstanceFilter's __eq__ method, so multiple instances can be added.
+    Multiple tag filters is equivalent to joining tags with the AND operator.
+
+    To define a tag in a manifest, add a `tags` attribute to a test or DEFAULT
+    section. Tests can have multiple tags, in which case they should be
+    whitespace delimited. For example:
+
+    [test_foobar.html]
+    tags = foo bar
+
+    :param tags: A tag or list of tags to filter tests on
+    """
+    unique = False
+
+    def __init__(self, tags):
+        InstanceFilter.__init__(self, tags)
+        if isinstance(tags, basestring):
+            tags = [tags]
+        self.tags = tags
+
+    def __call__(self, tests, values):
+        for test in tests:
+            if 'tags' not in test:
+                continue
+
+            test_tags = [t.strip() for t in test['tags'].split()]
+            if any(t in self.tags for t in test_tags):
+                yield test
+
+
+class pathprefix(InstanceFilter):
+    """
+    Removes tests that don't start with any of the given test paths.
+
+    :param paths: A list of test paths to filter on
+    """
+
+    def __init__(self, paths):
+        InstanceFilter.__init__(self, paths)
+        if isinstance(paths, basestring):
+            paths = [paths]
+        self.paths = paths
+
+    def __call__(self, tests, values):
+        for test in tests:
+            for tp in self.paths:
+                tp = os.path.normpath(tp)
+
+                path = test['relpath']
+                if os.path.isabs(tp):
+                    path = test['path']
+
+                if not os.path.normpath(path).startswith(tp):
+                    continue
+
+                # any test path that points to a single file will be run no
+                # matter what, even if it's disabled
+                if 'disabled' in test and os.path.normpath(test['relpath']) == tp:
+                    del test['disabled']
+                yield test
+                break
+
+
+# filter container
+
+DEFAULT_FILTERS = (
+    skip_if,
+    run_if,
+    fail_if,
+)
+"""
+By default :func:`~.active_tests` will run the :func:`~.skip_if`,
+:func:`~.run_if` and :func:`~.fail_if` filters.
+"""
+
+
+class filterlist(MutableSequence):
+    """
+    A MutableSequence that raises TypeError when adding a non-callable and
+    ValueError if the item is already added.
+    """
+
+    def __init__(self, items=None):
+        self.items = []
+        if items:
+            self.items = list(items)
+
+    def _validate(self, item):
+        if not callable(item):
+            raise TypeError("Filters must be callable!")
+        if item in self:
+            raise ValueError("Filter {} is already applied!".format(item))
+
+    def __getitem__(self, key):
+        return self.items[key]
+
+    def __setitem__(self, key, value):
+        self._validate(value)
+        self.items[key] = value
+
+    def __delitem__(self, key):
+        del self.items[key]
+
+    def __len__(self):
+        return len(self.items)
+
+    def insert(self, index, value):
+        self._validate(value)
+        self.items.insert(index, value)
diff --git a/testing/mozharness/manifestparser/ini.py b/testing/mozharness/manifestparser/ini.py
new file mode 100644
index 000000000000..9279f2c0bcd9
--- /dev/null
+++ b/testing/mozharness/manifestparser/ini.py
@@ -0,0 +1,168 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import os
+import sys
+
+__all__ = ['read_ini', 'combine_fields']
+
+
+class IniParseError(Exception):
+    def __init__(self, fp, linenum, msg):
+        if isinstance(fp, basestring):
+            path = fp
+        elif hasattr(fp, 'name'):
+            path = fp.name
+        else:
+            path = getattr(fp, 'path', 'unknown')
+        msg = "Error parsing manifest file '{}', line {}: {}".format(path, linenum, msg)
+        super(IniParseError, self).__init__(msg)
+
+
+def read_ini(fp, variables=None, default='DEFAULT', defaults_only=False,
+             comments=None, separators=None, strict=True, handle_defaults=True):
+    """
+    read an .ini file and return a list of [(section, values)]
+    - fp : file pointer or path to read
+    - variables : default set of variables
+    - default : name of the section for the default section
+    - defaults_only : if True, return the default section only
+    - comments : characters that if they start a line denote a comment
+    - separators : strings that denote key, value separation in order
+    - strict : whether to be strict about parsing
+    - handle_defaults : whether to incorporate defaults into each section
+    """
+
+    # variables
+    variables = variables or {}
+    comments = comments or ('#',)
+    separators = separators or ('=', ':')
+    sections = []
+    key = value = None
+    section_names = set()
+    if isinstance(fp, basestring):
+        fp = file(fp)
+
+    # read the lines
+    for (linenum, line) in enumerate(fp.read().splitlines(), start=1):
+
+        stripped = line.strip()
+
+        # ignore blank lines
+        if not stripped:
+            # reset key and value to avoid continuation lines
+            key = value = None
+            continue
+
+        # ignore comment lines
+        if any(stripped.startswith(c) for c in comments):
+            continue
+
+        # strip inline comments (borrowed from configparser)
+        comment_start = sys.maxsize
+        inline_prefixes = {p: -1 for p in comments}
+        while comment_start == sys.maxsize and inline_prefixes:
+            next_prefixes = {}
+            for prefix, index in inline_prefixes.items():
+                index = line.find(prefix, index+1)
+                if index == -1:
+                    continue
+                next_prefixes[prefix] = index
+                if index == 0 or (index > 0 and line[index-1].isspace()):
+                    comment_start = min(comment_start, index)
+            inline_prefixes = next_prefixes
+
+        if comment_start != sys.maxsize:
+            stripped = stripped[:comment_start].rstrip()
+
+        # check for a new section
+        if len(stripped) > 2 and stripped[0] == '[' and stripped[-1] == ']':
+            section = stripped[1:-1].strip()
+            key = value = None
+
+            # deal with DEFAULT section
+            if section.lower() == default.lower():
+                if strict:
+                    assert default not in section_names
+                section_names.add(default)
+                current_section = variables
+                continue
+
+            if strict:
+                # make sure this section doesn't already exist
+                assert section not in section_names, "Section '%s' already found in '%s'" % (
+                    section, section_names)
+
+            section_names.add(section)
+            current_section = {}
+            sections.append((section, current_section))
+            continue
+
+        # if there aren't any sections yet, something bad happen
+        if not section_names:
+            raise IniParseError(fp, linenum, "Expected a comment or section, "
+                                             "instead found '{}'".format(stripped))
+
+        # (key, value) pair
+        for separator in separators:
+            if separator in stripped:
+                key, value = stripped.split(separator, 1)
+                key = key.strip()
+                value = value.strip()
+
+                if strict:
+                    # make sure this key isn't already in the section or empty
+                    assert key
+                    if current_section is not variables:
+                        assert key not in current_section
+
+                current_section[key] = value
+                break
+        else:
+            # continuation line ?
+            if line[0].isspace() and key:
+                value = '%s%s%s' % (value, os.linesep, stripped)
+                current_section[key] = value
+            else:
+                # something bad happened!
+                raise IniParseError(fp, linenum, "Unexpected line '{}'".format(stripped))
+
+    # server-root is a special os path declared relative to the manifest file.
+    # inheritance demands we expand it as absolute
+    if 'server-root' in variables:
+        root = os.path.join(os.path.dirname(fp.name),
+                            variables['server-root'])
+        variables['server-root'] = os.path.abspath(root)
+
+    # return the default section only if requested
+    if defaults_only:
+        return [(default, variables)]
+
+    global_vars = variables if handle_defaults else {}
+    sections = [(i, combine_fields(global_vars, j)) for i, j in sections]
+    return sections
+
+
+def combine_fields(global_vars, local_vars):
+    """
+    Combine the given manifest entries according to the semantics of specific fields.
+    This is used to combine manifest level defaults with a per-test definition.
+    """
+    if not global_vars:
+        return local_vars
+    if not local_vars:
+        return global_vars
+    field_patterns = {
+        'skip-if': '(%s) || (%s)',
+        'support-files': '%s %s',
+    }
+    final_mapping = global_vars.copy()
+    for field_name, value in local_vars.items():
+        if field_name not in field_patterns or field_name not in global_vars:
+            final_mapping[field_name] = value
+            continue
+        global_value = global_vars[field_name]
+        pattern = field_patterns[field_name]
+        final_mapping[field_name] = pattern % (global_value, value)
+    return final_mapping
diff --git a/testing/mozharness/manifestparser/manifestparser.py b/testing/mozharness/manifestparser/manifestparser.py
new file mode 100644
index 000000000000..f69cbab86e9f
--- /dev/null
+++ b/testing/mozharness/manifestparser/manifestparser.py
@@ -0,0 +1,807 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from StringIO import StringIO
+import json
+import fnmatch
+import os
+import shutil
+import sys
+import types
+
+from .ini import read_ini
+from .filters import (
+    DEFAULT_FILTERS,
+    enabled,
+    exists as _exists,
+    filterlist,
+)
+
+__all__ = ['ManifestParser', 'TestManifest', 'convert']
+
+relpath = os.path.relpath
+string = (basestring,)
+
+
+# path normalization
+
+def normalize_path(path):
+    """normalize a relative path"""
+    if sys.platform.startswith('win'):
+        return path.replace('/', os.path.sep)
+    return path
+
+
+def denormalize_path(path):
+    """denormalize a relative path"""
+    if sys.platform.startswith('win'):
+        return path.replace(os.path.sep, '/')
+    return path
+
+
+# objects for parsing manifests
+
+class ManifestParser(object):
+    """read .ini manifests"""
+
+    def __init__(self, manifests=(), defaults=None, strict=True, rootdir=None,
+                 finder=None, handle_defaults=True):
+        """Creates a ManifestParser from the given manifest files.
+
+        :param manifests: An iterable of file paths or file objects corresponding
+                          to manifests. If a file path refers to a manifest file that
+                          does not exist, an IOError is raised.
+        :param defaults: Variables to pre-define in the environment for evaluating
+                         expressions in manifests.
+        :param strict: If False, the provided manifests may contain references to
+                       listed (test) files that do not exist without raising an
+                       IOError during reading, and certain errors in manifests
+                       are not considered fatal. Those errors include duplicate
+                       section names, redefining variables, and defining empty
+                       variables.
+        :param rootdir: The directory used as the basis for conversion to and from
+                        relative paths during manifest reading.
+        :param finder: If provided, this finder object will be used for filesystem
+                       interactions. Finder objects are part of the mozpack package,
+                       documented at
+                       http://gecko.readthedocs.org/en/latest/python/mozpack.html#module-mozpack.files
+        :param handle_defaults: If not set, do not propagate manifest defaults to individual
+                                test objects. Callers are expected to manage per-manifest
+                                defaults themselves via the manifest_defaults member
+                                variable in this case.
+        """
+        self._defaults = defaults or {}
+        self._ancestor_defaults = {}
+        self.tests = []
+        self.manifest_defaults = {}
+        self.source_files = set()
+        self.strict = strict
+        self.rootdir = rootdir
+        self.relativeRoot = None
+        self.finder = finder
+        self._handle_defaults = handle_defaults
+        if manifests:
+            self.read(*manifests)
+
+    def path_exists(self, path):
+        if self.finder:
+            return self.finder.get(path) is not None
+        return os.path.exists(path)
+
+    # methods for reading manifests
+
+    def _read(self, root, filename, defaults, defaults_only=False, parentmanifest=None):
+        """
+        Internal recursive method for reading and parsing manifests.
+        Stores all found tests in self.tests
+        :param root: The base path
+        :param filename: File object or string path for the base manifest file
+        :param defaults: Options that apply to all items
+        :param defaults_only: If True will only gather options, not include
+                              tests. Used for upstream parent includes
+                              (default False)
+        :param parentmanifest: Filename of the parent manifest (default None)
+        """
+        def read_file(type):
+            include_file = section.split(type, 1)[-1]
+            include_file = normalize_path(include_file)
+            if not os.path.isabs(include_file):
+                include_file = os.path.join(here, include_file)
+            if not self.path_exists(include_file):
+                message = "Included file '%s' does not exist" % include_file
+                if self.strict:
+                    raise IOError(message)
+                else:
+                    sys.stderr.write("%s\n" % message)
+                    return
+            return include_file
+
+        # get directory of this file if not file-like object
+        if isinstance(filename, string):
+            # If we're using mercurial as our filesystem via a finder
+            # during manifest reading, the getcwd() calls that happen
+            # with abspath calls will not be meaningful, so absolute
+            # paths are required.
+            if self.finder:
+                assert os.path.isabs(filename)
+            filename = os.path.abspath(filename)
+            self.source_files.add(filename)
+            if self.finder:
+                fp = self.finder.get(filename)
+            else:
+                fp = open(filename)
+            here = os.path.dirname(filename)
+        else:
+            fp = filename
+            filename = here = None
+        defaults['here'] = here
+
+        # Rootdir is needed for relative path calculation. Precompute it for
+        # the microoptimization used below.
+        if self.rootdir is None:
+            rootdir = ""
+        else:
+            assert os.path.isabs(self.rootdir)
+            rootdir = self.rootdir + os.path.sep
+
+        # read the configuration
+        sections = read_ini(fp=fp, variables=defaults, strict=self.strict,
+                            handle_defaults=self._handle_defaults)
+        self.manifest_defaults[filename] = defaults
+
+        parent_section_found = False
+
+        # get the tests
+        for section, data in sections:
+            # In case of defaults only, no other section than parent: has to
+            # be processed.
+            if defaults_only and not section.startswith('parent:'):
+                continue
+
+            # read the parent manifest if specified
+            if section.startswith('parent:'):
+                parent_section_found = True
+
+                include_file = read_file('parent:')
+                if include_file:
+                    self._read(root, include_file, {}, True)
+                continue
+
+            # a file to include
+            # TODO: keep track of included file structure:
+            # self.manifests = {'manifest.ini': 'relative/path.ini'}
+            if section.startswith('include:'):
+                include_file = read_file('include:')
+                if include_file:
+                    include_defaults = data.copy()
+                    self._read(root, include_file, include_defaults, parentmanifest=filename)
+                continue
+
+            # otherwise an item
+            # apply ancestor defaults, while maintaining current file priority
+            data = dict(self._ancestor_defaults.items() + data.items())
+
+            test = data
+            test['name'] = section
+
+            # Will be None if the manifest being read is a file-like object.
+            test['manifest'] = filename
+
+            # determine the path
+            path = test.get('path', section)
+            _relpath = path
+            if '://' not in path:  # don't futz with URLs
+                path = normalize_path(path)
+                if here and not os.path.isabs(path):
+                    # Profiling indicates 25% of manifest parsing is spent
+                    # in this call to normpath, but almost all calls return
+                    # their argument unmodified, so we avoid the call if
+                    # '..' if not present in the path.
+                    path = os.path.join(here, path)
+                    if '..' in path:
+                        path = os.path.normpath(path)
+
+                # Microoptimization, because relpath is quite expensive.
+                # We know that rootdir is an absolute path or empty. If path
+                # starts with rootdir, then path is also absolute and the tail
+                # of the path is the relative path (possibly non-normalized,
+                # when here is unknown).
+                # For this to work rootdir needs to be terminated with a path
+                # separator, so that references to sibling directories with
+                # a common prefix don't get misscomputed (e.g. /root and
+                # /rootbeer/file).
+                # When the rootdir is unknown, the relpath needs to be left
+                # unchanged. We use an empty string as rootdir in that case,
+                # which leaves relpath unchanged after slicing.
+                if path.startswith(rootdir):
+                    _relpath = path[len(rootdir):]
+                else:
+                    _relpath = relpath(path, rootdir)
+
+            test['path'] = path
+            test['relpath'] = _relpath
+
+            if parentmanifest is not None:
+                # If a test was included by a parent manifest we may need to
+                # indicate that in the test object for the sake of identifying
+                # a test, particularly in the case a test file is included by
+                # multiple manifests.
+                test['ancestor-manifest'] = parentmanifest
+
+            # append the item
+            self.tests.append(test)
+
+        # if no parent: section was found for defaults-only, only read the
+        # defaults section of the manifest without interpreting variables
+        if defaults_only and not parent_section_found:
+            sections = read_ini(fp=fp, variables=defaults, defaults_only=True,
+                                strict=self.strict)
+            (section, self._ancestor_defaults) = sections[0]
+
+    def read(self, *filenames, **defaults):
+        """
+        read and add manifests from file paths or file-like objects
+
+        filenames -- file paths or file-like objects to read as manifests
+        defaults -- default variables
+        """
+
+        # ensure all files exist
+        missing = [filename for filename in filenames
+                   if isinstance(filename, string) and not self.path_exists(filename)]
+        if missing:
+            raise IOError('Missing files: %s' % ', '.join(missing))
+
+        # default variables
+        _defaults = defaults.copy() or self._defaults.copy()
+        _defaults.setdefault('here', None)
+
+        # process each file
+        for filename in filenames:
+            # set the per file defaults
+            defaults = _defaults.copy()
+            here = None
+            if isinstance(filename, string):
+                here = os.path.dirname(os.path.abspath(filename))
+                defaults['here'] = here  # directory of master .ini file
+
+            if self.rootdir is None:
+                # set the root directory
+                # == the directory of the first manifest given
+                self.rootdir = here
+
+            self._read(here, filename, defaults)
+
+    # methods for querying manifests
+
+    def query(self, *checks, **kw):
+        """
+        general query function for tests
+        - checks : callable conditions to test if the test fulfills the query
+        """
+        tests = kw.get('tests', None)
+        if tests is None:
+            tests = self.tests
+        retval = []
+        for test in tests:
+            for check in checks:
+                if not check(test):
+                    break
+            else:
+                retval.append(test)
+        return retval
+
+    def get(self, _key=None, inverse=False, tags=None, tests=None, **kwargs):
+        # TODO: pass a dict instead of kwargs since you might hav
+        # e.g. 'inverse' as a key in the dict
+
+        # TODO: tags should just be part of kwargs with None values
+        # (None == any is kinda weird, but probably still better)
+
+        # fix up tags
+        if tags:
+            tags = set(tags)
+        else:
+            tags = set()
+
+        # make some check functions
+        if inverse:
+            def has_tags(test):
+                return not tags.intersection(test.keys())
+
+            def dict_query(test):
+                for key, value in kwargs.items():
+                    if test.get(key) == value:
+                        return False
+                return True
+        else:
+            def has_tags(test):
+                return tags.issubset(test.keys())
+
+            def dict_query(test):
+                for key, value in kwargs.items():
+                    if test.get(key) != value:
+                        return False
+                return True
+
+        # query the tests
+        tests = self.query(has_tags, dict_query, tests=tests)
+
+        # if a key is given, return only a list of that key
+        # useful for keys like 'name' or 'path'
+        if _key:
+            return [test[_key] for test in tests]
+
+        # return the tests
+        return tests
+
+    def manifests(self, tests=None):
+        """
+        return manifests in order in which they appear in the tests
+        """
+        if tests is None:
+            # Make sure to return all the manifests, even ones without tests.
+            return self.manifest_defaults.keys()
+
+        manifests = []
+        for test in tests:
+            manifest = test.get('manifest')
+            if not manifest:
+                continue
+            if manifest not in manifests:
+                manifests.append(manifest)
+        return manifests
+
+    def paths(self):
+        return [i['path'] for i in self.tests]
+
+    # methods for auditing
+
+    def missing(self, tests=None):
+        """
+        return list of tests that do not exist on the filesystem
+        """
+        if tests is None:
+            tests = self.tests
+        existing = list(_exists(tests, {}))
+        return [t for t in tests if t not in existing]
+
+    def check_missing(self, tests=None):
+        missing = self.missing(tests=tests)
+        if missing:
+            missing_paths = [test['path'] for test in missing]
+            if self.strict:
+                raise IOError("Strict mode enabled, test paths must exist. "
+                              "The following test(s) are missing: %s" %
+                              json.dumps(missing_paths, indent=2))
+            print >> sys.stderr, "Warning: The following test(s) are missing: %s" % \
+                json.dumps(missing_paths, indent=2)
+        return missing
+
+    def verifyDirectory(self, directories, pattern=None, extensions=None):
+        """
+        checks what is on the filesystem vs what is in a manifest
+        returns a 2-tuple of sets:
+        (missing_from_filesystem, missing_from_manifest)
+        """
+
+        files = set([])
+        if isinstance(directories, basestring):
+            directories = [directories]
+
+        # get files in directories
+        for directory in directories:
+            for dirpath, dirnames, filenames in os.walk(directory, topdown=True):
+
+                # only add files that match a pattern
+                if pattern:
+                    filenames = fnmatch.filter(filenames, pattern)
+
+                # only add files that have one of the extensions
+                if extensions:
+                    filenames = [filename for filename in filenames
+                                 if os.path.splitext(filename)[-1] in extensions]
+
+                files.update([os.path.join(dirpath, filename) for filename in filenames])
+
+        paths = set(self.paths())
+        missing_from_filesystem = paths.difference(files)
+        missing_from_manifest = files.difference(paths)
+        return (missing_from_filesystem, missing_from_manifest)
+
+    # methods for output
+
+    def write(self, fp=sys.stdout, rootdir=None,
+              global_tags=None, global_kwargs=None,
+              local_tags=None, local_kwargs=None):
+        """
+        write a manifest given a query
+        global and local options will be munged to do the query
+        globals will be written to the top of the file
+        locals (if given) will be written per test
+        """
+
+        # open file if `fp` given as string
+        close = False
+        if isinstance(fp, string):
+            fp = file(fp, 'w')
+            close = True
+
+        # root directory
+        if rootdir is None:
+            rootdir = self.rootdir
+
+        # sanitize input
+        global_tags = global_tags or set()
+        local_tags = local_tags or set()
+        global_kwargs = global_kwargs or {}
+        local_kwargs = local_kwargs or {}
+
+        # create the query
+        tags = set([])
+        tags.update(global_tags)
+        tags.update(local_tags)
+        kwargs = {}
+        kwargs.update(global_kwargs)
+        kwargs.update(local_kwargs)
+
+        # get matching tests
+        tests = self.get(tags=tags, **kwargs)
+
+        # print the .ini manifest
+        if global_tags or global_kwargs:
+            print >> fp, '[DEFAULT]'
+            for tag in global_tags:
+                print >> fp, '%s =' % tag
+            for key, value in global_kwargs.items():
+                print >> fp, '%s = %s' % (key, value)
+            print >> fp
+
+        for test in tests:
+            test = test.copy()  # don't overwrite
+
+            path = test['name']
+            if not os.path.isabs(path):
+                path = test['path']
+                if self.rootdir:
+                    path = relpath(test['path'], self.rootdir)
+                path = denormalize_path(path)
+            print >> fp, '[%s]' % path
+
+            # reserved keywords:
+            reserved = ['path', 'name', 'here', 'manifest', 'relpath', 'ancestor-manifest']
+            for key in sorted(test.keys()):
+                if key in reserved:
+                    continue
+                if key in global_kwargs:
+                    continue
+                if key in global_tags and not test[key]:
+                    continue
+                print >> fp, '%s = %s' % (key, test[key])
+            print >> fp
+
+        if close:
+            # close the created file
+            fp.close()
+
+    def __str__(self):
+        fp = StringIO()
+        self.write(fp=fp)
+        value = fp.getvalue()
+        return value
+
+    def copy(self, directory, rootdir=None, *tags, **kwargs):
+        """
+        copy the manifests and associated tests
+        - directory : directory to copy to
+        - rootdir : root directory to copy to (if not given from manifests)
+        - tags : keywords the tests must have
+        - kwargs : key, values the tests must match
+        """
+        # XXX note that copy does *not* filter the tests out of the
+        # resulting manifest; it just stupidly copies them over.
+        # ideally, it would reread the manifests and filter out the
+        # tests that don't match *tags and **kwargs
+
+        # destination
+        if not os.path.exists(directory):
+            os.path.makedirs(directory)
+        else:
+            # sanity check
+            assert os.path.isdir(directory)
+
+        # tests to copy
+        tests = self.get(tags=tags, **kwargs)
+        if not tests:
+            return  # nothing to do!
+
+        # root directory
+        if rootdir is None:
+            rootdir = self.rootdir
+
+        # copy the manifests + tests
+        manifests = [relpath(manifest, rootdir) for manifest in self.manifests()]
+        for manifest in manifests:
+            destination = os.path.join(directory, manifest)
+            dirname = os.path.dirname(destination)
+            if not os.path.exists(dirname):
+                os.makedirs(dirname)
+            else:
+                # sanity check
+                assert os.path.isdir(dirname)
+            shutil.copy(os.path.join(rootdir, manifest), destination)
+
+        missing = self.check_missing(tests)
+        tests = [test for test in tests if test not in missing]
+        for test in tests:
+            if os.path.isabs(test['name']):
+                continue
+            source = test['path']
+            destination = os.path.join(directory, relpath(test['path'], rootdir))
+            shutil.copy(source, destination)
+            # TODO: ensure that all of the tests are below the from_dir
+
+    def update(self, from_dir, rootdir=None, *tags, **kwargs):
+        """
+        update the tests as listed in a manifest from a directory
+        - from_dir : directory where the tests live
+        - rootdir : root directory to copy to (if not given from manifests)
+        - tags : keys the tests must have
+        - kwargs : key, values the tests must match
+        """
+
+        # get the tests
+        tests = self.get(tags=tags, **kwargs)
+
+        # get the root directory
+        if not rootdir:
+            rootdir = self.rootdir
+
+        # copy them!
+        for test in tests:
+            if not os.path.isabs(test['name']):
+                _relpath = relpath(test['path'], rootdir)
+                source = os.path.join(from_dir, _relpath)
+                if not os.path.exists(source):
+                    message = "Missing test: '%s' does not exist!"
+                    if self.strict:
+                        raise IOError(message)
+                    print >> sys.stderr, message + " Skipping."
+                    continue
+                destination = os.path.join(rootdir, _relpath)
+                shutil.copy(source, destination)
+
+    # directory importers
+
+    @classmethod
+    def _walk_directories(cls, directories, callback, pattern=None, ignore=()):
+        """
+        internal function to import directories
+        """
+
+        if isinstance(pattern, basestring):
+            patterns = [pattern]
+        else:
+            patterns = pattern
+        ignore = set(ignore)
+
+        if not patterns:
+            def accept_filename(filename):
+                return True
+        else:
+            def accept_filename(filename):
+                for pattern in patterns:
+                    if fnmatch.fnmatch(filename, pattern):
+                        return True
+
+        if not ignore:
+            def accept_dirname(dirname):
+                return True
+        else:
+            def accept_dirname(dirname):
+                return dirname not in ignore
+
+        rootdirectories = directories[:]
+        seen_directories = set()
+        for rootdirectory in rootdirectories:
+            # let's recurse directories using list
+            directories = [os.path.realpath(rootdirectory)]
+            while directories:
+                directory = directories.pop(0)
+                if directory in seen_directories:
+                    # eliminate possible infinite recursion due to
+                    # symbolic links
+                    continue
+                seen_directories.add(directory)
+
+                files = []
+                subdirs = []
+                for name in sorted(os.listdir(directory)):
+                    path = os.path.join(directory, name)
+                    if os.path.isfile(path):
+                        # os.path.isfile follow symbolic links, we don't
+                        # need to handle them here.
+                        if accept_filename(name):
+                            files.append(name)
+                        continue
+                    elif os.path.islink(path):
+                        # eliminate symbolic links
+                        path = os.path.realpath(path)
+
+                    # we must have a directory here
+                    if accept_dirname(name):
+                        subdirs.append(name)
+                        # this subdir is added for recursion
+                        directories.insert(0, path)
+
+                # here we got all subdirs and files filtered, we can
+                # call the callback function if directory is not empty
+                if subdirs or files:
+                    callback(rootdirectory, directory, subdirs, files)
+
+    @classmethod
+    def populate_directory_manifests(cls, directories, filename, pattern=None, ignore=(),
+                                     overwrite=False):
+        """
+        walks directories and writes manifests of name `filename` in-place;
+        returns `cls` instance populated with the given manifests
+
+        filename -- filename of manifests to write
+        pattern -- shell pattern (glob) or patterns of filenames to match
+        ignore -- directory names to ignore
+        overwrite -- whether to overwrite existing files of given name
+        """
+
+        manifest_dict = {}
+
+        if os.path.basename(filename) != filename:
+            raise IOError("filename should not include directory name")
+
+        # no need to hit directories more than once
+        _directories = directories
+        directories = []
+        for directory in _directories:
+            if directory not in directories:
+                directories.append(directory)
+
+        def callback(directory, dirpath, dirnames, filenames):
+            """write a manifest for each directory"""
+
+            manifest_path = os.path.join(dirpath, filename)
+            if (dirnames or filenames) and not (os.path.exists(manifest_path) and overwrite):
+                with file(manifest_path, 'w') as manifest:
+                    for dirname in dirnames:
+                        print >> manifest, '[include:%s]' % os.path.join(dirname, filename)
+                    for _filename in filenames:
+                        print >> manifest, '[%s]' % _filename
+
+                # add to list of manifests
+                manifest_dict.setdefault(directory, manifest_path)
+
+        # walk the directories to gather files
+        cls._walk_directories(directories, callback, pattern=pattern, ignore=ignore)
+        # get manifests
+        manifests = [manifest_dict[directory] for directory in _directories]
+
+        # create a `cls` instance with the manifests
+        return cls(manifests=manifests)
+
+    @classmethod
+    def from_directories(cls, directories, pattern=None, ignore=(), write=None, relative_to=None):
+        """
+        convert directories to a simple manifest; returns ManifestParser instance
+
+        pattern -- shell pattern (glob) or patterns of filenames to match
+        ignore -- directory names to ignore
+        write -- filename or file-like object of manifests to write;
+                 if `None` then a StringIO instance will be created
+        relative_to -- write paths relative to this path;
+                       if false then the paths are absolute
+        """
+
+        # determine output
+        opened_manifest_file = None  # name of opened manifest file
+        absolute = not relative_to  # whether to output absolute path names as names
+        if isinstance(write, string):
+            opened_manifest_file = write
+            write = file(write, 'w')
+        if write is None:
+            write = StringIO()
+
+        # walk the directories, generating manifests
+        def callback(directory, dirpath, dirnames, filenames):
+
+            # absolute paths
+            filenames = [os.path.join(dirpath, filename)
+                         for filename in filenames]
+            # ensure new manifest isn't added
+            filenames = [filename for filename in filenames
+                         if filename != opened_manifest_file]
+            # normalize paths
+            if not absolute and relative_to:
+                filenames = [relpath(filename, relative_to)
+                             for filename in filenames]
+
+            # write to manifest
+            print >> write, '\n'.join(['[%s]' % denormalize_path(filename)
+                                       for filename in filenames])
+
+        cls._walk_directories(directories, callback, pattern=pattern, ignore=ignore)
+
+        if opened_manifest_file:
+            # close file
+            write.close()
+            manifests = [opened_manifest_file]
+        else:
+            # manifests/write is a file-like object;
+            # rewind buffer
+            write.flush()
+            write.seek(0)
+            manifests = [write]
+
+        # make a ManifestParser instance
+        return cls(manifests=manifests)
+
+
+convert = ManifestParser.from_directories
+
+
+class TestManifest(ManifestParser):
+    """
+    apply logic to manifests;  this is your integration layer :)
+    specific harnesses may subclass from this if they need more logic
+    """
+
+    def __init__(self, *args, **kwargs):
+        ManifestParser.__init__(self, *args, **kwargs)
+        self.filters = filterlist(DEFAULT_FILTERS)
+        self.last_used_filters = []
+
+    def active_tests(self, exists=True, disabled=True, filters=None, **values):
+        """
+        Run all applied filters on the set of tests.
+
+        :param exists: filter out non-existing tests (default True)
+        :param disabled: whether to return disabled tests (default True)
+        :param values: keys and values to filter on (e.g. `os = linux mac`)
+        :param filters: list of filters to apply to the tests
+        :returns: list of test objects that were not filtered out
+        """
+        tests = [i.copy() for i in self.tests]  # shallow copy
+
+        # mark all tests as passing
+        for test in tests:
+            test['expected'] = test.get('expected', 'pass')
+
+        # make a copy so original doesn't get modified
+        fltrs = self.filters[:]
+        if exists:
+            if self.strict:
+                self.check_missing(tests)
+            else:
+                fltrs.append(_exists)
+
+        if not disabled:
+            fltrs.append(enabled)
+
+        if filters:
+            fltrs += filters
+
+        self.last_used_filters = fltrs[:]
+        for fn in fltrs:
+            tests = fn(tests, values)
+        return list(tests)
+
+    def test_paths(self):
+        return [test['path'] for test in self.active_tests()]
+
+    def fmt_filters(self, filters=None):
+        filters = filters or self.last_used_filters
+        names = []
+        for f in filters:
+            if isinstance(f, types.FunctionType):
+                names.append(f.__name__)
+            else:
+                names.append(str(f))
+        return ', '.join(names)