"git grep" learned to expand the sparse-index more lazily and on
demand in a sparse checkout.

* sy/sparse-grep:
  builtin/grep.c: integrate with sparse index
This commit is contained in:
Junio C Hamano 2022-10-10 10:08:40 -07:00
Родитель 4b4d97cfda 7cae7627c4
Коммит 67bf4a83e9
3 изменённых файлов: 118 добавлений и 3 удалений

Просмотреть файл

@ -458,6 +458,33 @@ static int grep_submodule(struct grep_opt *opt,
* subrepo's odbs to the in-memory alternates list.
*/
obj_read_lock();
/*
* NEEDSWORK: when reading a submodule, the sparsity settings in the
* superproject are incorrectly forgotten or misused. For example:
*
* 1. "command_requires_full_index"
* When this setting is turned on for `grep`, only the superproject
* knows it. All the submodules are read with their own configs
* and get prepare_repo_settings()'d. Therefore, these submodules
* "forget" the sparse-index feature switch. As a result, the index
* of these submodules are expanded unexpectedly.
*
* 2. "core_apply_sparse_checkout"
* When running `grep` in the superproject, this setting is
* populated using the superproject's configs. However, once
* initialized, this config is globally accessible and is read by
* prepare_repo_settings() for the submodules. For instance, if a
* submodule is using a sparse-checkout, however, the superproject
* is not, the result is that the config from the superproject will
* dictate the behavior for the submodule, making it "forget" its
* sparse-checkout state.
*
* 3. "core_sparse_checkout_cone"
* ditto.
*
* Note that this list is not exhaustive.
*/
repo_read_gitmodules(subrepo, 0);
/*
@ -520,8 +547,6 @@ static int grep_cache(struct grep_opt *opt,
if (repo_read_index(repo) < 0)
die(_("index file corrupt"));
/* TODO: audit for interaction with sparse-index. */
ensure_full_index(repo->index);
for (nr = 0; nr < repo->index->cache_nr; nr++) {
const struct cache_entry *ce = repo->index->cache[nr];
@ -530,8 +555,20 @@ static int grep_cache(struct grep_opt *opt,
strbuf_setlen(&name, name_base_len);
strbuf_addstr(&name, ce->name);
if (S_ISSPARSEDIR(ce->ce_mode)) {
enum object_type type;
struct tree_desc tree;
void *data;
unsigned long size;
if (S_ISREG(ce->ce_mode) &&
data = read_object_file(&ce->oid, &type, &size);
init_tree_desc(&tree, data, size);
hit |= grep_tree(opt, pathspec, &tree, &name, 0, 0);
strbuf_setlen(&name, name_base_len);
strbuf_addstr(&name, ce->name);
free(data);
} else if (S_ISREG(ce->ce_mode) &&
match_pathspec(repo->index, pathspec, name.buf, name.len, 0, NULL,
S_ISDIR(ce->ce_mode) ||
S_ISGITLINK(ce->ce_mode))) {
@ -984,6 +1021,11 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
PARSE_OPT_KEEP_DASHDASH |
PARSE_OPT_STOP_AT_NON_OPTION);
if (the_repository->gitdir) {
prepare_repo_settings(the_repository);
the_repository->settings.command_requires_full_index = 0;
}
if (use_index && !startup_info->have_repository) {
int fallback = 0;
git_config_get_bool("grep.fallbacktonoindex", &fallback);

Просмотреть файл

@ -124,5 +124,6 @@ test_perf_on_all git read-tree -mu HEAD
test_perf_on_all git checkout-index -f --all
test_perf_on_all git update-index --add --remove $SPARSE_CONE/a
test_perf_on_all "git rm -f $SPARSE_CONE/a && git checkout HEAD -- $SPARSE_CONE/a"
test_perf_on_all git grep --cached --sparse bogus -- "f2/f1/f1/*"
test_done

Просмотреть файл

@ -162,6 +162,19 @@ init_repos () {
git -C sparse-index sparse-checkout set deep
}
init_repos_as_submodules () {
git reset --hard &&
init_repos &&
git submodule add ./full-checkout &&
git submodule add ./sparse-checkout &&
git submodule add ./sparse-index &&
git submodule status >actual &&
grep full-checkout actual &&
grep sparse-checkout actual &&
grep sparse-index actual
}
run_on_sparse () {
(
cd sparse-checkout &&
@ -1981,4 +1994,63 @@ test_expect_success 'sparse index is not expanded: rm' '
ensure_not_expanded rm -r deep
'
test_expect_success 'grep with and --cached' '
init_repos &&
test_all_match git grep --cached a &&
test_all_match git grep --cached a -- "folder1/*"
'
test_expect_success 'grep is not expanded' '
init_repos &&
ensure_not_expanded grep a &&
ensure_not_expanded grep a -- deep/* &&
# All files within the folder1/* pathspec are sparse,
# so this command does not find any matches
ensure_not_expanded ! grep a -- folder1/* &&
# test out-of-cone pathspec with or without wildcard
ensure_not_expanded grep --cached a -- "folder1/a" &&
ensure_not_expanded grep --cached a -- "folder1/*" &&
# test in-cone pathspec with or without wildcard
ensure_not_expanded grep --cached a -- "deep/a" &&
ensure_not_expanded grep --cached a -- "deep/*"
'
# NEEDSWORK: when running `grep` in the superproject with --recurse-submodules,
# Git expands the index of the submodules unexpectedly. Even though `grep`
# builtin is marked as "command_requires_full_index = 0", this config is only
# useful for the superproject. Namely, the submodules have their own configs,
# which are _not_ populated by the one-time sparse-index feature switch.
test_expect_failure 'grep within submodules is not expanded' '
init_repos_as_submodules &&
# do not use ensure_not_expanded() here, becasue `grep` should be
# run in the superproject, not in "./sparse-index"
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
git grep --cached --recurse-submodules a -- "*/folder1/*" &&
test_region ! index ensure_full_index trace2.txt
'
# NEEDSWORK: this test is not actually testing the code. The design purpose
# of this test is to verify the grep result when the submodules are using a
# sparse-index. Namely, we want "folder1/" as a tree (a sparse directory); but
# because of the index expansion, we are now grepping the "folder1/a" blob.
# Because of the problem stated above 'grep within submodules is not expanded',
# we don't have the ideal test environment yet.
test_expect_success 'grep sparse directory within submodules' '
init_repos_as_submodules &&
cat >expect <<-\EOF &&
full-checkout/folder1/a:a
sparse-checkout/folder1/a:a
sparse-index/folder1/a:a
EOF
git grep --cached --recurse-submodules a -- "*/folder1/*" >actual &&
test_cmp actual expect
'
test_done