Merge branch 'jt/grep-wo-submodule-odb-as-alternate'

The code to make "git grep" recurse into submodules has been
updated to migrate away from the "add submodule's object store as
an alternate object store" mechanism (which is suboptimal).

* jt/grep-wo-submodule-odb-as-alternate:
  t7814: show lack of alternate ODB-adding
  submodule-config: pass repo upon blob config read
  grep: add repository to OID grep sources
  grep: allocate subrepos on heap
  grep: read submodule entry with explicit repo
  grep: typesafe versions of grep_source_init
  grep: use submodule-ODB-as-alternate lazy-addition
  submodule: lazily add submodule ODBs as alternates
This commit is contained in:
Junio C Hamano 2021-09-20 15:20:39 -07:00
Родитель 0649303820 18a2f66d8a
Коммит 11e5d0a262
11 изменённых файлов: 161 добавлений и 55 удалений

Просмотреть файл

@ -65,6 +65,9 @@ static int todo_done;
/* Has all work items been added? */
static int all_work_added;
static struct repository **repos_to_free;
static size_t repos_to_free_nr, repos_to_free_alloc;
/* This lock protects all the variables above. */
static pthread_mutex_t grep_mutex;
@ -168,6 +171,19 @@ static void work_done(struct work_item *w)
grep_unlock();
}
static void free_repos(void)
{
int i;
for (i = 0; i < repos_to_free_nr; i++) {
repo_clear(repos_to_free[i]);
free(repos_to_free[i]);
}
FREE_AND_NULL(repos_to_free);
repos_to_free_nr = 0;
repos_to_free_alloc = 0;
}
static void *run(void *arg)
{
int hit = 0;
@ -333,7 +349,7 @@ static int grep_oid(struct grep_opt *opt, const struct object_id *oid,
struct grep_source gs;
grep_source_name(opt, filename, tree_name_len, &pathbuf);
grep_source_init(&gs, GREP_SOURCE_OID, pathbuf.buf, path, oid);
grep_source_init_oid(&gs, pathbuf.buf, path, oid, opt->repo);
strbuf_release(&pathbuf);
if (num_threads > 1) {
@ -359,7 +375,7 @@ static int grep_file(struct grep_opt *opt, const char *filename)
struct grep_source gs;
grep_source_name(opt, filename, 0, &buf);
grep_source_init(&gs, GREP_SOURCE_FILE, buf.buf, filename, filename);
grep_source_init_file(&gs, buf.buf, filename);
strbuf_release(&buf);
if (num_threads > 1) {
@ -415,19 +431,24 @@ static int grep_submodule(struct grep_opt *opt,
const struct object_id *oid,
const char *filename, const char *path, int cached)
{
struct repository subrepo;
struct repository *subrepo;
struct repository *superproject = opt->repo;
const struct submodule *sub;
struct grep_opt subopt;
int hit;
int hit = 0;
sub = submodule_from_path(superproject, null_oid(), path);
if (!is_submodule_active(superproject, path))
return 0;
if (repo_submodule_init(&subrepo, superproject, sub))
subrepo = xmalloc(sizeof(*subrepo));
if (repo_submodule_init(subrepo, superproject, sub)) {
free(subrepo);
return 0;
}
ALLOC_GROW(repos_to_free, repos_to_free_nr + 1, repos_to_free_alloc);
repos_to_free[repos_to_free_nr++] = subrepo;
/*
* NEEDSWORK: repo_read_gitmodules() might call
@ -438,53 +459,49 @@ static int grep_submodule(struct grep_opt *opt,
* subrepo's odbs to the in-memory alternates list.
*/
obj_read_lock();
repo_read_gitmodules(&subrepo, 0);
repo_read_gitmodules(subrepo, 0);
/*
* NEEDSWORK: This adds the submodule's object directory to the list of
* alternates for the single in-memory object store. This has some bad
* consequences for memory (processed objects will never be freed) and
* performance (this increases the number of pack files git has to pay
* attention to, to the sum of the number of pack files in all the
* repositories processed so far). This can be removed once the object
* store is no longer global and instead is a member of the repository
* object.
* All code paths tested by test code no longer need submodule ODBs to
* be added as alternates, but add it to the list just in case.
* Submodule ODBs added through add_submodule_odb_by_path() will be
* lazily registered as alternates when needed (and except in an
* unexpected code interaction, it won't be needed).
*/
add_to_alternates_memory(subrepo.objects->odb->path);
add_submodule_odb_by_path(subrepo->objects->odb->path);
obj_read_unlock();
memcpy(&subopt, opt, sizeof(subopt));
subopt.repo = &subrepo;
subopt.repo = subrepo;
if (oid) {
struct object *object;
enum object_type object_type;
struct tree_desc tree;
void *data;
unsigned long size;
struct strbuf base = STRBUF_INIT;
obj_read_lock();
object = parse_object_or_die(oid, NULL);
object_type = oid_object_info(subrepo, oid, NULL);
obj_read_unlock();
data = read_object_with_reference(&subrepo,
&object->oid, tree_type,
data = read_object_with_reference(subrepo,
oid, tree_type,
&size, NULL);
if (!data)
die(_("unable to read tree (%s)"), oid_to_hex(&object->oid));
die(_("unable to read tree (%s)"), oid_to_hex(oid));
strbuf_addstr(&base, filename);
strbuf_addch(&base, '/');
init_tree_desc(&tree, data, size);
hit = grep_tree(&subopt, pathspec, &tree, &base, base.len,
object->type == OBJ_COMMIT);
object_type == OBJ_COMMIT);
strbuf_release(&base);
free(data);
} else {
hit = grep_cache(&subopt, pathspec, cached);
}
repo_clear(&subrepo);
return hit;
}
@ -1182,5 +1199,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
run_pager(&opt, prefix);
clear_pathspec(&pathspec);
free_grep_patterns(&opt);
free_repos();
return !hit;
}

Просмотреть файл

@ -1796,6 +1796,7 @@ int git_config_from_mem(config_fn_t fn,
int git_config_from_blob_oid(config_fn_t fn,
const char *name,
struct repository *repo,
const struct object_id *oid,
void *data)
{
@ -1804,7 +1805,7 @@ int git_config_from_blob_oid(config_fn_t fn,
unsigned long size;
int ret;
buf = read_object_file(oid, &type, &size);
buf = repo_read_object_file(repo, oid, &type, &size);
if (!buf)
return error(_("unable to load config blob object '%s'"), name);
if (type != OBJ_BLOB) {
@ -1820,14 +1821,15 @@ int git_config_from_blob_oid(config_fn_t fn,
}
static int git_config_from_blob_ref(config_fn_t fn,
struct repository *repo,
const char *name,
void *data)
{
struct object_id oid;
if (get_oid(name, &oid) < 0)
if (repo_get_oid(repo, name, &oid) < 0)
return error(_("unable to resolve config blob '%s'"), name);
return git_config_from_blob_oid(fn, name, &oid, data);
return git_config_from_blob_oid(fn, name, repo, &oid, data);
}
char *git_system_config(void)
@ -1958,12 +1960,16 @@ int config_with_options(config_fn_t fn, void *data,
* If we have a specific filename, use it. Otherwise, follow the
* regular lookup sequence.
*/
if (config_source && config_source->use_stdin)
if (config_source && config_source->use_stdin) {
return git_config_from_stdin(fn, data);
else if (config_source && config_source->file)
} else if (config_source && config_source->file) {
return git_config_from_file(fn, config_source->file, data);
else if (config_source && config_source->blob)
return git_config_from_blob_ref(fn, config_source->blob, data);
} else if (config_source && config_source->blob) {
struct repository *repo = config_source->repo ?
config_source->repo : the_repository;
return git_config_from_blob_ref(fn, repo, config_source->blob,
data);
}
return do_git_config_sequence(opts, fn, data);
}

Просмотреть файл

@ -49,6 +49,8 @@ const char *config_scope_name(enum config_scope scope);
struct git_config_source {
unsigned int use_stdin:1;
const char *file;
/* The repository if blob is not NULL; leave blank for the_repository */
struct repository *repo;
const char *blob;
enum config_scope scope;
};
@ -136,6 +138,7 @@ int git_config_from_mem(config_fn_t fn,
const char *buf, size_t len,
void *data, const struct config_options *opts);
int git_config_from_blob_oid(config_fn_t fn, const char *name,
struct repository *repo,
const struct object_id *oid, void *data);
void git_config_push_parameter(const char *text);
void git_config_push_env(const char *spec);

51
grep.c
Просмотреть файл

@ -1825,14 +1825,24 @@ int grep_source(struct grep_opt *opt, struct grep_source *gs)
return grep_source_1(opt, gs, 0);
}
static void grep_source_init_buf(struct grep_source *gs, char *buf,
unsigned long size)
{
gs->type = GREP_SOURCE_BUF;
gs->name = NULL;
gs->path = NULL;
gs->buf = buf;
gs->size = size;
gs->driver = NULL;
gs->identifier = NULL;
}
int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
{
struct grep_source gs;
int r;
grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL);
gs.buf = buf;
gs.size = size;
grep_source_init_buf(&gs, buf, size);
r = grep_source(opt, &gs);
@ -1840,28 +1850,30 @@ int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
return r;
}
void grep_source_init(struct grep_source *gs, enum grep_source_type type,
const char *name, const char *path,
const void *identifier)
void grep_source_init_file(struct grep_source *gs, const char *name,
const char *path)
{
gs->type = type;
gs->type = GREP_SOURCE_FILE;
gs->name = xstrdup_or_null(name);
gs->path = xstrdup_or_null(path);
gs->buf = NULL;
gs->size = 0;
gs->driver = NULL;
gs->identifier = xstrdup(path);
}
switch (type) {
case GREP_SOURCE_FILE:
gs->identifier = xstrdup(identifier);
break;
case GREP_SOURCE_OID:
gs->identifier = oiddup(identifier);
break;
case GREP_SOURCE_BUF:
gs->identifier = NULL;
break;
}
void grep_source_init_oid(struct grep_source *gs, const char *name,
const char *path, const struct object_id *oid,
struct repository *repo)
{
gs->type = GREP_SOURCE_OID;
gs->name = xstrdup_or_null(name);
gs->path = xstrdup_or_null(path);
gs->buf = NULL;
gs->size = 0;
gs->driver = NULL;
gs->identifier = oiddup(oid);
gs->repo = repo;
}
void grep_source_clear(struct grep_source *gs)
@ -1890,7 +1902,8 @@ static int grep_source_load_oid(struct grep_source *gs)
{
enum object_type type;
gs->buf = read_object_file(gs->identifier, &type, &gs->size);
gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type,
&gs->size);
if (!gs->buf)
return error(_("'%s': unable to read %s"),
gs->name,

22
grep.h
Просмотреть файл

@ -120,7 +120,20 @@ struct grep_opt {
struct grep_pat *header_list;
struct grep_pat **header_tail;
struct grep_expr *pattern_expression;
/*
* NEEDSWORK: See if we can remove this field, because the repository
* should probably be per-source. That is, grep.c functions using this
* field should probably start using "repo" in "struct grep_source"
* instead.
*
* This is potentially the cause of at least one bug - "git grep"
* ignoring the textconv attributes from submodules. See [1] for more
* information.
* [1] https://lore.kernel.org/git/CAHd-oW5iEQarYVxEXoTG-ua2zdoybTrSjCBKtO0YT292fm0NQQ@mail.gmail.com/
*/
struct repository *repo;
const char *prefix;
int prefix_length;
regex_t regexp;
@ -187,6 +200,7 @@ struct grep_source {
GREP_SOURCE_BUF,
} type;
void *identifier;
struct repository *repo; /* if GREP_SOURCE_OID */
char *buf;
unsigned long size;
@ -195,9 +209,11 @@ struct grep_source {
struct userdiff_driver *driver;
};
void grep_source_init(struct grep_source *gs, enum grep_source_type type,
const char *name, const char *path,
const void *identifier);
void grep_source_init_file(struct grep_source *gs, const char *name,
const char *path);
void grep_source_init_oid(struct grep_source *gs, const char *name,
const char *path, const struct object_id *oid,
struct repository *repo);
void grep_source_clear_data(struct grep_source *gs);
void grep_source_clear(struct grep_source *gs);
void grep_source_load_driver(struct grep_source *gs,

Просмотреть файл

@ -32,6 +32,7 @@
#include "packfile.h"
#include "object-store.h"
#include "promisor-remote.h"
#include "submodule.h"
/* The maximum size for an object header. */
#define MAX_HEADER_LEN 32
@ -1613,6 +1614,10 @@ static int do_oid_object_info_extended(struct repository *r,
break;
}
if (register_all_submodule_odb_as_alternates())
/* We added some alternates; retry */
continue;
/* Check if it is a missing object */
if (fetch_if_missing && repo_has_promisor_remote(r) &&
!already_retried &&

Просмотреть файл

@ -649,9 +649,10 @@ static void config_from_gitmodules(config_fn_t fn, struct repository *repo, void
config_source.file = file;
} else if (repo_get_oid(repo, GITMODULES_INDEX, &oid) >= 0 ||
repo_get_oid(repo, GITMODULES_HEAD, &oid) >= 0) {
config_source.repo = repo;
config_source.blob = oidstr = xstrdup(oid_to_hex(&oid));
if (repo != the_repository)
add_to_alternates_memory(repo->objects->odb->path);
add_submodule_odb_by_path(repo->objects->odb->path);
} else {
goto out;
}
@ -702,7 +703,7 @@ void gitmodules_config_oid(const struct object_id *commit_oid)
if (gitmodule_oid_from_commit(commit_oid, &oid, &rev)) {
git_config_from_blob_oid(gitmodules_cb, rev.buf,
&oid, the_repository);
the_repository, &oid, the_repository);
}
strbuf_release(&rev);

Просмотреть файл

@ -165,6 +165,8 @@ void stage_updated_gitmodules(struct index_state *istate)
die(_("staging updated .gitmodules failed"));
}
static struct string_list added_submodule_odb_paths = STRING_LIST_INIT_NODUP;
/* TODO: remove this function, use repo_submodule_init instead. */
int add_submodule_odb(const char *path)
{
@ -178,12 +180,33 @@ int add_submodule_odb(const char *path)
ret = -1;
goto done;
}
add_to_alternates_memory(objects_directory.buf);
string_list_insert(&added_submodule_odb_paths,
strbuf_detach(&objects_directory, NULL));
done:
strbuf_release(&objects_directory);
return ret;
}
void add_submodule_odb_by_path(const char *path)
{
string_list_insert(&added_submodule_odb_paths, xstrdup(path));
}
int register_all_submodule_odb_as_alternates(void)
{
int i;
int ret = added_submodule_odb_paths.nr;
for (i = 0; i < added_submodule_odb_paths.nr; i++)
add_to_alternates_memory(added_submodule_odb_paths.items[i].string);
if (ret) {
string_list_clear(&added_submodule_odb_paths, 0);
if (git_env_bool("GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB", 0))
BUG("register_all_submodule_odb_as_alternates() called");
}
return ret;
}
void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt,
const char *path)
{

Просмотреть файл

@ -97,7 +97,15 @@ int submodule_uses_gitfile(const char *path);
#define SUBMODULE_REMOVAL_IGNORE_IGNORED_UNTRACKED (1<<2)
int bad_to_remove_submodule(const char *path, unsigned flags);
/*
* Call add_submodule_odb() to add the submodule at the given path to a list.
* When register_all_submodule_odb_as_alternates() is called, the object stores
* of all submodules in that list will be added as alternates in
* the_repository.
*/
int add_submodule_odb(const char *path);
void add_submodule_odb_by_path(const char *path);
int register_all_submodule_odb_as_alternates(void);
/*
* Checks if there are submodule changes in a..b. If a is the null OID,

Просмотреть файл

@ -452,6 +452,16 @@ GIT_TEST_CHECKOUT_WORKERS=<n> overrides the 'checkout.workers' setting
to <n> and 'checkout.thresholdForParallelism' to 0, forcing the
execution of the parallel-checkout code.
GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB=<boolean>, when true, makes
registering submodule ODBs as alternates a fatal action. Support for
this environment variable can be removed once the migration to
explicitly providing repositories when accessing submodule objects is
complete (in which case we might want to replace this with a trace2
call so that users can make it visible if accessing submodule objects
without an explicit repository still happens) or needs to be abandoned
for whatever reason (in which case the migrated codepaths still retain
their performance benefits).
Naming Tests
------------

Просмотреть файл

@ -8,6 +8,9 @@ submodules.
. ./test-lib.sh
GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB=1
export GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB
test_expect_success 'setup directory structure and submodule' '
echo "(1|2)d(3|4)" >a &&
mkdir b &&