зеркало из https://github.com/microsoft/git.git
tree_entry_interesting: do basedir compare on wildcard patterns when possible
Currently we treat "*.c" and "path/to/*.c" the same way. Which means we check all possible paths in repo against "path/to/*.c". One could see that "path/elsewhere/foo.c" obviously cannot match "path/to/*.c" and we only need to check all paths _inside_ "path/to/" against that pattern. This patch checks the leading fixed part of a pathspec against base directory and exit early if possible. We could even optimize further in "path/to/something*.c" case (i.e. check the fixed part against name_entry as well) but that's more complicated and probably does not gain us much. -O2 build on linux-2.6, without and with this patch respectively: $ time git rev-list --quiet HEAD -- 'drivers/*.c' real 1m9.484s user 1m9.128s sys 0m0.181s $ time ~/w/git/git rev-list --quiet HEAD -- 'drivers/*.c' real 0m15.710s user 0m15.564s sys 0m0.107s Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Родитель
8c6abbcd27
Коммит
c904cd89e4
65
tree-walk.c
65
tree-walk.c
|
@ -572,6 +572,54 @@ static int match_dir_prefix(const char *base,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform matching on the leading non-wildcard part of
|
||||
* pathspec. item->nowildcard_len must be greater than zero. Return
|
||||
* non-zero if base is matched.
|
||||
*/
|
||||
static int match_wildcard_base(const struct pathspec_item *item,
|
||||
const char *base, int baselen,
|
||||
int *matched)
|
||||
{
|
||||
const char *match = item->match;
|
||||
/* the wildcard part is not considered in this function */
|
||||
int matchlen = item->nowildcard_len;
|
||||
|
||||
if (baselen) {
|
||||
int dirlen;
|
||||
/*
|
||||
* Return early if base is longer than the
|
||||
* non-wildcard part but it does not match.
|
||||
*/
|
||||
if (baselen >= matchlen) {
|
||||
*matched = matchlen;
|
||||
return !strncmp(base, match, matchlen);
|
||||
}
|
||||
|
||||
dirlen = matchlen;
|
||||
while (dirlen && match[dirlen - 1] != '/')
|
||||
dirlen--;
|
||||
|
||||
/*
|
||||
* Return early if base is shorter than the
|
||||
* non-wildcard part but it does not match. Note that
|
||||
* base ends with '/' so we are sure it really matches
|
||||
* directory
|
||||
*/
|
||||
if (strncmp(base, match, baselen))
|
||||
return 0;
|
||||
*matched = baselen;
|
||||
} else
|
||||
*matched = 0;
|
||||
/*
|
||||
* we could have checked entry against the non-wildcard part
|
||||
* that is not in base and does similar never_interesting
|
||||
* optimization as in match_entry. For now just be happy with
|
||||
* base comparison.
|
||||
*/
|
||||
return entry_interesting;
|
||||
}
|
||||
|
||||
/*
|
||||
* Is a tree entry interesting given the pathspec we have?
|
||||
*
|
||||
|
@ -602,7 +650,7 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
|
|||
const struct pathspec_item *item = ps->items+i;
|
||||
const char *match = item->match;
|
||||
const char *base_str = base->buf + base_offset;
|
||||
int matchlen = item->len;
|
||||
int matchlen = item->len, matched = 0;
|
||||
|
||||
if (baselen >= matchlen) {
|
||||
/* If it doesn't match, move along... */
|
||||
|
@ -647,9 +695,24 @@ match_wildcards:
|
|||
if (item->nowildcard_len == item->len)
|
||||
continue;
|
||||
|
||||
if (item->nowildcard_len &&
|
||||
!match_wildcard_base(item, base_str, baselen, &matched))
|
||||
return entry_not_interesting;
|
||||
|
||||
/*
|
||||
* Concatenate base and entry->path into one and do
|
||||
* fnmatch() on it.
|
||||
*
|
||||
* While we could avoid concatenation in certain cases
|
||||
* [1], which saves a memcpy and potentially a
|
||||
* realloc, it turns out not worth it. Measurement on
|
||||
* linux-2.6 does not show any clear improvements,
|
||||
* partly because of the nowildcard_len optimization
|
||||
* in git_fnmatch(). Avoid micro-optimizations here.
|
||||
*
|
||||
* [1] if match_wildcard_base() says the base
|
||||
* directory is already matched, we only need to match
|
||||
* the rest, which is shorter so _in theory_ faster.
|
||||
*/
|
||||
|
||||
strbuf_add(base, entry->path, pathlen);
|
||||
|
|
Загрузка…
Ссылка в новой задаче