зеркало из https://github.com/microsoft/git.git
unpack-trees:virtualfilesystem: Improve efficiency of clear_ce_flags
When the virtualfilesystem is enabled the previous implementation of clear_ce_flags would iterate all of the cache entries and query whether each one is in the virtual filesystem to determine whether to clear one of the SKIP_WORKTREE bits. For each cache entry, we would do a hash lookup for each parent directory in the is_included_in_virtualfilesystem function. The former approach is slow for a typical Windows OS enlistment with 3 million files where only a small percentage is in the virtual filesystem. The cost is O(n_index_entries * n_chars_per_path * n_parent_directories_per_path). In this change, we use the same approach as apply_virtualfilesystem, which iterates the set of entries in the virtualfilesystem and searches in the cache for the corresponding entries in order to clear their flags. This approach has a cost of O(n_virtual_filesystem_entries * n_chars_per_path * log(n_index_entries)). The apply_virtualfilesystem code was refactored a bit and modified to clear flags for all names that 'alias' a given virtual filesystem name when ignore_case is set. n_virtual_filesystem_entries is typically much less than n_index_entries, in which case the new approach is much faster. We wind up building the name hash for the index, but this occurs quickly thanks to the multi-threading. Signed-off-by: Neeraj Singh <neerajsi@ntdev.microsoft.com>
This commit is contained in:
Родитель
1b535dffd3
Коммит
ffb1d21bb1
1
cache.h
1
cache.h
|
@ -810,6 +810,7 @@ int strcmp_offset(const char *s1, const char *s2, size_t *first_change);
|
||||||
int index_dir_exists(struct index_state *istate, const char *name, int namelen);
|
int index_dir_exists(struct index_state *istate, const char *name, int namelen);
|
||||||
void adjust_dirname_case(struct index_state *istate, char *name);
|
void adjust_dirname_case(struct index_state *istate, char *name);
|
||||||
struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase);
|
struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase);
|
||||||
|
struct cache_entry *index_file_next_match(struct index_state *istate, struct cache_entry *ce, int igncase);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Searches for an entry defined by name and namelen in the given index.
|
* Searches for an entry defined by name and namelen in the given index.
|
||||||
|
|
20
name-hash.c
20
name-hash.c
|
@ -730,6 +730,26 @@ struct cache_entry *index_file_exists(struct index_state *istate, const char *na
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct cache_entry *index_file_next_match(struct index_state *istate, struct cache_entry *ce, int igncase)
|
||||||
|
{
|
||||||
|
struct cache_entry *next;
|
||||||
|
|
||||||
|
if (!igncase || !ce) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
next = hashmap_get_next_entry(&istate->name_hash, ce, ent);
|
||||||
|
if (!next)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
hashmap_for_each_entry_from(&istate->name_hash, next, ent) {
|
||||||
|
if (same_name(next, ce->name, ce_namelen(ce), igncase))
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
void free_name_hash(struct index_state *istate)
|
void free_name_hash(struct index_state *istate)
|
||||||
{
|
{
|
||||||
if (!istate->name_hash_initialized)
|
if (!istate->name_hash_initialized)
|
||||||
|
|
|
@ -1612,14 +1612,6 @@ static int clear_ce_flags_1(struct index_state *istate,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if it's not in the virtual file system, exit early */
|
|
||||||
if (core_virtualfilesystem) {
|
|
||||||
if (is_included_in_virtualfilesystem(ce->name, ce->ce_namelen) > 0)
|
|
||||||
ce->ce_flags &= ~clear_mask;
|
|
||||||
cache++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (prefix->len && strncmp(ce->name, prefix->buf, prefix->len))
|
if (prefix->len && strncmp(ce->name, prefix->buf, prefix->len))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1696,12 +1688,19 @@ static int clear_ce_flags(struct index_state *istate,
|
||||||
xsnprintf(label, sizeof(label), "clear_ce_flags/0x%08lx_0x%08lx",
|
xsnprintf(label, sizeof(label), "clear_ce_flags/0x%08lx_0x%08lx",
|
||||||
(unsigned long)select_mask, (unsigned long)clear_mask);
|
(unsigned long)select_mask, (unsigned long)clear_mask);
|
||||||
trace2_region_enter("unpack_trees", label, the_repository);
|
trace2_region_enter("unpack_trees", label, the_repository);
|
||||||
rval = clear_ce_flags_1(istate,
|
if (core_virtualfilesystem) {
|
||||||
istate->cache,
|
rval = clear_ce_flags_virtualfilesystem(istate,
|
||||||
istate->cache_nr,
|
select_mask,
|
||||||
&prefix,
|
clear_mask);
|
||||||
select_mask, clear_mask,
|
} else {
|
||||||
pl, 0, 0);
|
rval = clear_ce_flags_1(istate,
|
||||||
|
istate->cache,
|
||||||
|
istate->cache_nr,
|
||||||
|
&prefix,
|
||||||
|
select_mask, clear_mask,
|
||||||
|
pl, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
trace2_region_leave("unpack_trees", label, the_repository);
|
trace2_region_leave("unpack_trees", label, the_repository);
|
||||||
|
|
||||||
stop_progress(&istate->progress);
|
stop_progress(&istate->progress);
|
||||||
|
|
|
@ -247,93 +247,133 @@ int is_excluded_from_virtualfilesystem(const char *pathname, int pathlen, int dt
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
struct apply_virtual_filesystem_stats {
|
||||||
* Update the CE_SKIP_WORKTREE bits based on the virtual file system.
|
int nr_unknown;
|
||||||
*/
|
int nr_vfs_dirs;
|
||||||
void apply_virtualfilesystem(struct index_state *istate)
|
int nr_vfs_rows;
|
||||||
|
int nr_bulk_skip;
|
||||||
|
int nr_explicit_skip;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void clear_ce_flags_virtualfilesystem_1(struct index_state *istate, int select_mask, int clear_mask,
|
||||||
|
struct apply_virtual_filesystem_stats *stats)
|
||||||
{
|
{
|
||||||
char *buf, *entry;
|
char *buf, *entry;
|
||||||
int i;
|
int i;
|
||||||
int nr_unknown = 0;
|
|
||||||
int nr_vfs_dirs = 0;
|
|
||||||
int nr_vfs_rows = 0;
|
|
||||||
int nr_bulk_skip = 0;
|
|
||||||
int nr_explicit_skip = 0;
|
|
||||||
|
|
||||||
if (!git_config_get_virtualfilesystem())
|
|
||||||
return;
|
|
||||||
|
|
||||||
trace2_region_enter("vfs", "apply", the_repository);
|
|
||||||
|
|
||||||
if (!virtual_filesystem_data.len)
|
if (!virtual_filesystem_data.len)
|
||||||
get_virtual_filesystem_data(&virtual_filesystem_data);
|
get_virtual_filesystem_data(&virtual_filesystem_data);
|
||||||
|
|
||||||
/* set CE_SKIP_WORKTREE bit on all entries */
|
/* clear specified flag bits for everything in the virtual file system */
|
||||||
for (i = 0; i < istate->cache_nr; i++)
|
|
||||||
istate->cache[i]->ce_flags |= CE_SKIP_WORKTREE;
|
|
||||||
|
|
||||||
/* clear CE_SKIP_WORKTREE bit for everything in the virtual file system */
|
|
||||||
entry = buf = virtual_filesystem_data.buf;
|
entry = buf = virtual_filesystem_data.buf;
|
||||||
for (i = 0; i < virtual_filesystem_data.len; i++) {
|
for (i = 0; i < virtual_filesystem_data.len; i++) {
|
||||||
if (buf[i] == '\0') {
|
if (buf[i] == '\0') {
|
||||||
|
struct cache_entry *ce;
|
||||||
int pos, len;
|
int pos, len;
|
||||||
|
|
||||||
nr_vfs_rows++;
|
stats->nr_vfs_rows++;
|
||||||
|
|
||||||
len = buf + i - entry;
|
len = buf + i - entry;
|
||||||
|
|
||||||
/* look for a directory wild card (ie "dir1/") */
|
/* look for a directory wild card (ie "dir1/") */
|
||||||
if (buf[i - 1] == '/') {
|
if (buf[i - 1] == '/') {
|
||||||
nr_vfs_dirs++;
|
stats->nr_vfs_dirs++;
|
||||||
if (ignore_case)
|
if (ignore_case)
|
||||||
adjust_dirname_case(istate, entry);
|
adjust_dirname_case(istate, entry);
|
||||||
pos = index_name_pos(istate, entry, len);
|
pos = index_name_pos(istate, entry, len);
|
||||||
if (pos < 0) {
|
if (pos < 0) {
|
||||||
pos = -pos - 1;
|
for (pos = -pos - 1; pos < istate->cache_nr; pos++) {
|
||||||
while (pos < istate->cache_nr && !fspathncmp(istate->cache[pos]->name, entry, len)) {
|
ce = istate->cache[pos];
|
||||||
if (istate->cache[pos]->ce_flags & CE_SKIP_WORKTREE)
|
if (fspathncmp(ce->name, entry, len))
|
||||||
nr_bulk_skip++;
|
break;
|
||||||
istate->cache[pos]->ce_flags &= ~CE_SKIP_WORKTREE;
|
|
||||||
pos++;
|
if (select_mask && !(ce->ce_flags & select_mask))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (ce->ce_flags & clear_mask)
|
||||||
|
stats->nr_bulk_skip++;
|
||||||
|
ce->ce_flags &= ~clear_mask;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (ignore_case) {
|
if (ignore_case) {
|
||||||
struct cache_entry *ce = index_file_exists(istate, entry, len, ignore_case);
|
ce = index_file_exists(istate, entry, len, ignore_case);
|
||||||
if (ce) {
|
|
||||||
if (ce->ce_flags & CE_SKIP_WORKTREE)
|
|
||||||
nr_explicit_skip++;
|
|
||||||
ce->ce_flags &= ~CE_SKIP_WORKTREE;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
nr_unknown++;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
int pos = index_name_pos(istate, entry, len);
|
int pos = index_name_pos(istate, entry, len);
|
||||||
if (pos >= 0) {
|
|
||||||
if (istate->cache[pos]->ce_flags & CE_SKIP_WORKTREE)
|
ce = NULL;
|
||||||
nr_explicit_skip++;
|
if (pos >= 0)
|
||||||
istate->cache[pos]->ce_flags &= ~CE_SKIP_WORKTREE;
|
ce = istate->cache[pos];
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
nr_unknown++;
|
if (ce) {
|
||||||
}
|
do {
|
||||||
|
if (!select_mask || (ce->ce_flags & select_mask)) {
|
||||||
|
if (ce->ce_flags & clear_mask)
|
||||||
|
stats->nr_explicit_skip++;
|
||||||
|
ce->ce_flags &= ~clear_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There may be aliases with different cases of the same
|
||||||
|
* name that also need to be modified.
|
||||||
|
*/
|
||||||
|
if (ignore_case)
|
||||||
|
ce = index_file_next_match(istate, ce, ignore_case);
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
|
||||||
|
} while (ce);
|
||||||
|
} else {
|
||||||
|
stats->nr_unknown++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
entry += len + 1;
|
entry += len + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (nr_vfs_rows > 0) {
|
/*
|
||||||
trace2_data_intmax("vfs", the_repository, "apply/tracked", nr_bulk_skip + nr_explicit_skip);
|
* Clear the specified flags for all entries in the virtual file system
|
||||||
|
* that match the specified select mask. Returns the number of entries
|
||||||
|
* processed.
|
||||||
|
*/
|
||||||
|
int clear_ce_flags_virtualfilesystem(struct index_state *istate, int select_mask, int clear_mask)
|
||||||
|
{
|
||||||
|
struct apply_virtual_filesystem_stats stats = {0};
|
||||||
|
|
||||||
trace2_data_intmax("vfs", the_repository, "apply/vfs_rows", nr_vfs_rows);
|
clear_ce_flags_virtualfilesystem_1(istate, select_mask, clear_mask, &stats);
|
||||||
trace2_data_intmax("vfs", the_repository, "apply/vfs_dirs", nr_vfs_dirs);
|
return istate->cache_nr;
|
||||||
|
}
|
||||||
|
|
||||||
trace2_data_intmax("vfs", the_repository, "apply/nr_unknown", nr_unknown);
|
/*
|
||||||
trace2_data_intmax("vfs", the_repository, "apply/nr_bulk_skip", nr_bulk_skip);
|
* Update the CE_SKIP_WORKTREE bits based on the virtual file system.
|
||||||
trace2_data_intmax("vfs", the_repository, "apply/nr_explicit_skip", nr_explicit_skip);
|
*/
|
||||||
|
void apply_virtualfilesystem(struct index_state *istate)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct apply_virtual_filesystem_stats stats = {0};
|
||||||
|
|
||||||
|
if (!git_config_get_virtualfilesystem())
|
||||||
|
return;
|
||||||
|
|
||||||
|
trace2_region_enter("vfs", "apply", the_repository);
|
||||||
|
|
||||||
|
/* set CE_SKIP_WORKTREE bit on all entries */
|
||||||
|
for (i = 0; i < istate->cache_nr; i++)
|
||||||
|
istate->cache[i]->ce_flags |= CE_SKIP_WORKTREE;
|
||||||
|
|
||||||
|
clear_ce_flags_virtualfilesystem_1(istate, 0, CE_SKIP_WORKTREE, &stats);
|
||||||
|
if (stats.nr_vfs_rows > 0) {
|
||||||
|
trace2_data_intmax("vfs", the_repository, "apply/tracked", stats.nr_bulk_skip + stats.nr_explicit_skip);
|
||||||
|
|
||||||
|
trace2_data_intmax("vfs", the_repository, "apply/vfs_rows", stats.nr_vfs_rows);
|
||||||
|
trace2_data_intmax("vfs", the_repository, "apply/vfs_dirs", stats.nr_vfs_dirs);
|
||||||
|
|
||||||
|
trace2_data_intmax("vfs", the_repository, "apply/nr_unknown", stats.nr_unknown);
|
||||||
|
trace2_data_intmax("vfs", the_repository, "apply/nr_bulk_skip", stats.nr_bulk_skip);
|
||||||
|
trace2_data_intmax("vfs", the_repository, "apply/nr_explicit_skip", stats.nr_explicit_skip);
|
||||||
}
|
}
|
||||||
|
|
||||||
trace2_region_leave("vfs", "apply", the_repository);
|
trace2_region_leave("vfs", "apply", the_repository);
|
||||||
|
|
|
@ -6,6 +6,13 @@
|
||||||
*/
|
*/
|
||||||
void apply_virtualfilesystem(struct index_state *istate);
|
void apply_virtualfilesystem(struct index_state *istate);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Clear the specified flags for all entries in the virtual file system
|
||||||
|
* that match the specified select mask. Returns the number of entries
|
||||||
|
* processed.
|
||||||
|
*/
|
||||||
|
int clear_ce_flags_virtualfilesystem(struct index_state *istate, int select_mask, int clear_mask);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return 1 if the requested item is found in the virtual file system,
|
* Return 1 if the requested item is found in the virtual file system,
|
||||||
* 0 for not found and -1 for undecided.
|
* 0 for not found and -1 for undecided.
|
||||||
|
|
Загрузка…
Ссылка в новой задаче