diff --git a/testing/web-platform/tests/tools/gitignore/gitignore.py b/testing/web-platform/tests/tools/gitignore/gitignore.py index 2b3b4adf6a99..3c12c0e251fb 100644 --- a/testing/web-platform/tests/tools/gitignore/gitignore.py +++ b/testing/web-platform/tests/tools/gitignore/gitignore.py @@ -1,26 +1,29 @@ import re import os +import itertools +from six import itervalues, iteritems +from collections import defaultdict + end_space = re.compile(r"([^\\]\s)*$") -def fnmatch_translate(pat, allow_component_only=True): +def fnmatch_translate(pat): parts = [] - seq = False + seq = None i = 0 - component_pattern = False + any_char = "[^/]" if pat[0] == "/": parts.append("^") - any_char = "[^/]" - if pat[0] == "/": - pat = pat[1:] + pat = pat[1:] else: - any_char = "." - if allow_component_only and "/" not in pat: - component_pattern = True - parts.append("^") - else: - parts.append("^(?:.*/)?") + # By default match the entire path up to a / + # but if / doesn't appear in the pattern we will mark is as + # a name pattern and just produce a pattern that matches against + # the filename + parts.append("^(?:.*/)?") + + name_pattern = True if pat[-1] == "/": # If the last character is / match this directory or any subdirectory pat = pat[:-1] @@ -36,11 +39,10 @@ def fnmatch_translate(pat, allow_component_only=True): parts.append(re.escape(c)) else: raise ValueError - elif seq: + elif seq is not None: + # TODO: this doesn't really handle invalid sequences in the right way if c == "]": - seq = False - # First two cases are to deal with the case where / is the only character - # in the sequence but path_name is True so it shouldn't match anything + seq = None if parts[-1] == "[": parts = parts[:-1] elif parts[-1] == "^" and parts[-2] == "[": @@ -56,28 +58,39 @@ def fnmatch_translate(pat, allow_component_only=True): if i < len(pat) - 1 and pat[i+1] in ("!", "^"): parts.append("^") i += 1 - seq = True + seq = i elif c == "*": if i < len(pat) - 1 and pat[i+1] == "*": - parts.append(any_char + "*") + if i > 0 and pat[i-1] != "/": + raise ValueError + parts.append(".*") i += 1 - if i < len(pat) - 1 and pat[i+1] == "*": + if i < len(pat) - 1 and pat[i+1] != "/": raise ValueError else: parts.append(any_char + "*") elif c == "?": parts.append(any_char) + elif c == "/" and not seq: + name_pattern = False + parts.append(c) else: parts.append(re.escape(c)) i += 1 - if seq: + if name_pattern: + parts[0] = "^" + + if seq is not None: raise ValueError parts.append(suffix) try: - return component_pattern, re.compile("".join(parts)) + return name_pattern, re.compile("".join(parts)) except Exception: - raise + raise ValueError + +# Regexp matching rules that have to be converted to patterns +pattern_re = re.compile(r".*[\*\[\?]") def parse_line(line): @@ -94,11 +107,19 @@ def parse_line(line): if dir_only: line = line[:-1] - return invert, dir_only, fnmatch_translate(line, dir_only) + # Could make a special case for **/foo, but we don't have any patterns like that + if not invert and not pattern_re.match(line): + literal = True + pattern = tuple(line.rsplit("/", 1)) + else: + pattern = fnmatch_translate(line) + literal = False + + return invert, dir_only, literal, pattern class PathFilter(object): - def __init__(self, root, extras=None): + def __init__(self, root, extras=None, cache=None): if root: ignore_path = os.path.join(root, ".gitignore") else: @@ -108,51 +129,119 @@ class PathFilter(object): return self.trivial = False - self.rules_file = [] - self.rules_dir = [] + self.literals_file = defaultdict(dict) + self.literals_dir = defaultdict(dict) + self.patterns_file = [] + self.patterns_dir = [] + self.cache = cache or {} if extras is None: extras = [] if ignore_path and os.path.exists(ignore_path): - self._read_ignore(ignore_path) + args = ignore_path, extras + else: + args = None, extras + self._read_ignore(*args) - for item in extras: - self._read_line(item) - - def _read_ignore(self, ignore_path): - with open(ignore_path) as f: - for line in f: - self._read_line(line) + def _read_ignore(self, ignore_path, extras): + if ignore_path is not None: + with open(ignore_path) as f: + for line in f: + self._read_line(line) + for line in extras: + self._read_line(line) def _read_line(self, line): parsed = parse_line(line) if not parsed: return - invert, dir_only, regexp = parsed - if dir_only: - self.rules_dir.append((regexp, invert)) + invert, dir_only, literal, rule = parsed + + if invert: + # For exclude rules, we attach the rules to all preceeding patterns, so + # that we can match patterns out of order and check if they were later + # overriden by an exclude rule + assert not literal + if not dir_only: + rules_iter = itertools.chain( + itertools.chain(*(iteritems(item) for item in itervalues(self.literals_dir))), + itertools.chain(*(iteritems(item) for item in itervalues(self.literals_file))), + self.patterns_dir, + self.patterns_file) + else: + rules_iter = itertools.chain( + itertools.chain(*(iteritems(item) for item in itervalues(self.literals_dir))), + self.patterns_dir) + + for rules in rules_iter: + rules[1].append(rule) else: - self.rules_file.append((regexp, invert)) + if literal: + if len(rule) == 1: + dir_name, pattern = None, rule[0] + else: + dir_name, pattern = rule + self.literals_dir[dir_name][pattern] = [] + if not dir_only: + self.literals_file[dir_name][pattern] = [] + else: + self.patterns_dir.append((rule, [])) + if not dir_only: + self.patterns_file.append((rule, [])) - def __call__(self, path): - if os.path.sep != "/": - path = path.replace(os.path.sep, "/") + def filter(self, iterator): + empty = {} + for dirpath, dirnames, filenames in iterator: + orig_dirpath = dirpath + if os.path.sep != "/": + dirpath = dirpath.replace(os.path.sep, "/") + keep_dirs = [] + keep_files = [] + + for iter_items, literals, patterns, target, suffix in [ + (dirnames, self.literals_dir, self.patterns_dir, keep_dirs, "/"), + (filenames, self.literals_file, self.patterns_file, keep_files, "")]: + for item in iter_items: + name = item[0] + if dirpath: + path = "%s/%s" % (dirpath, name) + suffix + else: + path = name + suffix + if path in self.cache: + if not self.cache[path]: + target.append(item) + continue + for rule_dir in [None, dirpath]: + if name in literals.get(rule_dir, empty): + exclude = literals[rule_dir][name] + if not any(rule.match(path) for rule in exclude): + # Skip this item + self.cache[path] = True + break + else: + for (component_only, pattern), exclude in patterns: + if component_only: + match = pattern.match(name) + else: + match = pattern.match(path) + if match: + if not any(rule.match(name if name_only else path) + for name_only, rule in exclude): + # Skip this item + self.cache[path] = True + break + else: + self.cache[path] = False + target.append(item) + + dirnames[:] = keep_dirs + assert ".git" not in dirnames + yield orig_dirpath, dirnames, keep_files + + def __call__(self, iterator): if self.trivial: - return True + return iterator - path_is_dir = path[-1] == "/" - if path_is_dir: - path = path[:-1] - rules = self.rules_dir - else: - rules = self.rules_file - - include = True - for regexp, invert in rules: - if not include and invert and regexp.match(path): - include = True - elif include and not invert and regexp.match(path): - include = False - return include + return self.filter(iterator) diff --git a/testing/web-platform/tests/tools/gitignore/tests/test_gitignore.py b/testing/web-platform/tests/tools/gitignore/tests/test_gitignore.py index 75a0d58b2c60..2a310046ffb7 100644 --- a/testing/web-platform/tests/tools/gitignore/tests/test_gitignore.py +++ b/testing/web-platform/tests/tools/gitignore/tests/test_gitignore.py @@ -3,80 +3,98 @@ import pytest from ..gitignore import fnmatch_translate, PathFilter match_data = [ - ("foo", False, ["a/foo", "foo"]), - ("*.a", False, ["foo.a", "a/foo.a", "a/b/foo.a", "a.a/foo.a"]), - ("*.py[co]", False, ["a.pyc", "a.pyo", "a/b/c.pyc"]), - ("\\#*", False, ["#a", "a/#b"]), - ("*#", False, ["a#", "a/b#", "#a#"]), - ("/*.c", False, ["a.c", ".c"]), + ("foo", True, ["a/foo", "foo"]), + ("*.a", True, ["foo.a", "a/foo.a", "a/b/foo.a", "a.a/foo.a"]), + ("*.py[co]", True, ["a.pyc", "a.pyo", "a/b/c.pyc"]), + ("\\#*", True, ["#a", "a/#b"]), + ("*#", True, ["a#", "a/b#", "#a#"]), + ("/*.c", True, ["a.c", ".c"]), ("**/b", False, ["a/b", "a/c/b"]), ("*b", True, ["ab"]), - ("**/b", True, ["a/b"]), - ("a/", True, ["a", "a/b", "a/b/c"]) + ("*b", True, ["a/b"]), + ("**/b", False, ["a/b"]), + ("a/", True, ["a"]), + ("a[/]b", True, []), + ("**/b", False, ["a/c/b"]), + ("a?c", True, ["abc"]), + ("a[^b]c", True, ["acc"]), + ("a[b-c]c", True, ["abc", "acc"]), + ("a[^]c", True, ["ac"]), # This is probably wrong + ("a[^]c", True, ["ac"]), # This is probably wrong ] mismatch_data = [ - ("foo", False, ["foob", "afoo"]), - ("*.a", False, ["a", "foo:a", "a.a/foo"]), - ("*.py[co]", False, ["a.pyd", "pyo"]), - ("/*.c", False, ["a/b.c"]), - ("*b", True, ["a/b"]), - ("**b", True, ["a/b"]), - ("a[/]b", True, ["a/b"]), - ("**/b", True, ["a/c/b"]), - ("a", True, ["ab"]) + ("foo", True, ["foob", "afoo"]), + ("*.a", True, ["a", "foo:a", "a.a/foo"]), + ("*.py[co]", True, ["a.pyd", "pyo", "a.py"]), + ("a", True, ["ab"]), + ("a?c", True, ["ac", "abbc"]), + ("a[^b]c", True, ["abc"]), + ("a[b-c]c", True, ["adc"]), ] invalid_data = [ "[a", "***/foo", "a\\", + "**b", + "b**/", + "[[]" ] filter_data = [ - ("foo", True), - ("a", False), - ("a/b", False), - ("a/c", True), - ("a/c/", False), - ("c/b", True) + (["foo", "bar/", "/a", "*.py"], + [("", ["foo", "bar", "baz"], ["a"]), + ("baz", ["a"], ["foo", "bar"])], + [(["baz"], []), + (["a"], ["bar"])]), + (["#foo", "", "a*", "!a.py"], + [("", ["foo"], ["a", "a.foo", "a.py"])], + [(["foo"], ["a.py"])]), ] def expand_data(compact_data): - for pattern, path_name, inputs in compact_data: + for pattern, name_only, inputs in compact_data: for input in inputs: - yield pattern, input, path_name + yield pattern, name_only, input -@pytest.mark.parametrize("pattern, input, path_name", expand_data(match_data)) -def tests_match(pattern, input, path_name): - regexp = fnmatch_translate(pattern, path_name) +@pytest.mark.parametrize("pattern, name_only, input", expand_data(match_data)) +def tests_match(pattern, name_only, input): + name_only_result, regexp = fnmatch_translate(pattern) + assert name_only_result == name_only + if name_only: + input = input.rsplit("/", 1)[-1] assert regexp.match(input) is not None -@pytest.mark.parametrize("pattern, input, path_name", expand_data(mismatch_data)) -def tests_no_match(pattern, input, path_name): - regexp = fnmatch_translate(pattern, path_name) +@pytest.mark.parametrize("pattern, name_only, input", expand_data(mismatch_data)) +def tests_no_match(pattern, name_only, input): + name_only_result, regexp = fnmatch_translate(pattern) + assert name_only_result == name_only + if name_only: + input = input.rsplit("/", 1)[-1] assert regexp.match(input) is None @pytest.mark.parametrize("pattern", invalid_data) def tests_invalid(pattern): with pytest.raises(ValueError): - fnmatch_translate(pattern, False) - with pytest.raises(ValueError): - fnmatch_translate(pattern, True) + fnmatch_translate(pattern) -@pytest.mark.parametrize("path, expected", filter_data) -def test_path_filter(path, expected): - extras = [ - "#foo", - "a ", - "**/b", - "a/c/", - "!c/b", - ] - f = PathFilter(None, extras) - assert f(path) == expected +@pytest.mark.parametrize("rules, input, expected", filter_data) +def test_path_filter(rules, input, expected): + f = PathFilter(None, rules) + # Add some fake stat data + for i, item in enumerate(input): + repl = [input[i][0]] + for j in [1, 2]: + repl.append([(name, None) for name in input[i][j]]) + input[i] = tuple(repl) + + for i, output in enumerate(f(input)): + assert output[0] == input[i][0] + for j in [1, 2]: + assert [item[0] for item in output[j]] == expected[i][j-1]