2018-04-02 23:34:18 +03:00
|
|
|
#include "builtin.h"
|
|
|
|
#include "config.h"
|
2018-04-02 23:34:20 +03:00
|
|
|
#include "dir.h"
|
|
|
|
#include "lockfile.h"
|
2018-04-02 23:34:18 +03:00
|
|
|
#include "parse-options.h"
|
2018-06-27 16:24:32 +03:00
|
|
|
#include "repository.h"
|
2018-04-02 23:34:20 +03:00
|
|
|
#include "commit-graph.h"
|
2019-06-18 21:14:32 +03:00
|
|
|
#include "object-store.h"
|
2018-04-02 23:34:18 +03:00
|
|
|
|
|
|
|
static char const * const builtin_commit_graph_usage[] = {
|
2019-08-26 19:29:58 +03:00
|
|
|
N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
|
2020-04-06 19:59:51 +03:00
|
|
|
N_("git commit-graph write [--object-dir <objdir>] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--changed-paths] [--[no-]progress] <split options>"),
|
2018-04-02 23:34:20 +03:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2018-06-27 16:24:32 +03:00
|
|
|
static const char * const builtin_commit_graph_verify_usage[] = {
|
2019-08-26 19:29:58 +03:00
|
|
|
N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
|
2018-06-27 16:24:32 +03:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2018-04-02 23:34:20 +03:00
|
|
|
static const char * const builtin_commit_graph_write_usage[] = {
|
2020-04-06 19:59:51 +03:00
|
|
|
N_("git commit-graph write [--object-dir <objdir>] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--changed-paths] [--[no-]progress] <split options>"),
|
2018-04-02 23:34:18 +03:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct opts_commit_graph {
|
|
|
|
const char *obj_dir;
|
2018-06-27 16:24:45 +03:00
|
|
|
int reachable;
|
2018-04-10 15:56:06 +03:00
|
|
|
int stdin_packs;
|
2018-04-10 15:56:07 +03:00
|
|
|
int stdin_commits;
|
2018-04-10 15:56:08 +03:00
|
|
|
int append;
|
2019-06-18 21:14:28 +03:00
|
|
|
int split;
|
2019-06-18 21:14:32 +03:00
|
|
|
int shallow;
|
2019-08-26 19:29:58 +03:00
|
|
|
int progress;
|
2020-04-06 19:59:51 +03:00
|
|
|
int enable_changed_paths;
|
2018-04-02 23:34:18 +03:00
|
|
|
} opts;
|
|
|
|
|
2020-02-04 08:51:50 +03:00
|
|
|
static struct object_directory *find_odb(struct repository *r,
|
|
|
|
const char *obj_dir)
|
|
|
|
{
|
|
|
|
struct object_directory *odb;
|
|
|
|
char *obj_dir_real = real_pathdup(obj_dir, 1);
|
2020-03-10 16:11:22 +03:00
|
|
|
struct strbuf odb_path_real = STRBUF_INIT;
|
2020-02-04 08:51:50 +03:00
|
|
|
|
|
|
|
prepare_alt_odb(r);
|
|
|
|
for (odb = r->objects->odb; odb; odb = odb->next) {
|
2020-03-10 16:11:22 +03:00
|
|
|
strbuf_realpath(&odb_path_real, odb->path, 1);
|
|
|
|
if (!strcmp(obj_dir_real, odb_path_real.buf))
|
2020-02-04 08:51:50 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
free(obj_dir_real);
|
2020-03-10 16:11:22 +03:00
|
|
|
strbuf_release(&odb_path_real);
|
2020-02-04 08:51:50 +03:00
|
|
|
|
|
|
|
if (!odb)
|
|
|
|
die(_("could not find object directory matching %s"), obj_dir);
|
|
|
|
return odb;
|
|
|
|
}
|
|
|
|
|
2018-06-27 16:24:32 +03:00
|
|
|
static int graph_verify(int argc, const char **argv)
|
|
|
|
{
|
|
|
|
struct commit_graph *graph = NULL;
|
2020-02-04 08:51:50 +03:00
|
|
|
struct object_directory *odb = NULL;
|
2018-06-27 16:24:32 +03:00
|
|
|
char *graph_name;
|
2019-03-25 15:08:30 +03:00
|
|
|
int open_ok;
|
|
|
|
int fd;
|
|
|
|
struct stat st;
|
2019-06-18 21:14:32 +03:00
|
|
|
int flags = 0;
|
2018-06-27 16:24:32 +03:00
|
|
|
|
|
|
|
static struct option builtin_commit_graph_verify_options[] = {
|
|
|
|
OPT_STRING(0, "object-dir", &opts.obj_dir,
|
|
|
|
N_("dir"),
|
|
|
|
N_("The object directory to store the graph")),
|
2019-06-18 21:14:32 +03:00
|
|
|
OPT_BOOL(0, "shallow", &opts.shallow,
|
|
|
|
N_("if the commit-graph is split, only verify the tip file")),
|
2019-08-26 19:29:58 +03:00
|
|
|
OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")),
|
2018-06-27 16:24:32 +03:00
|
|
|
OPT_END(),
|
|
|
|
};
|
|
|
|
|
2019-08-27 19:56:34 +03:00
|
|
|
trace2_cmd_mode("verify");
|
|
|
|
|
2019-08-26 19:29:58 +03:00
|
|
|
opts.progress = isatty(2);
|
2018-06-27 16:24:32 +03:00
|
|
|
argc = parse_options(argc, argv, NULL,
|
|
|
|
builtin_commit_graph_verify_options,
|
|
|
|
builtin_commit_graph_verify_usage, 0);
|
|
|
|
|
|
|
|
if (!opts.obj_dir)
|
|
|
|
opts.obj_dir = get_object_directory();
|
2019-06-18 21:14:32 +03:00
|
|
|
if (opts.shallow)
|
|
|
|
flags |= COMMIT_GRAPH_VERIFY_SHALLOW;
|
2019-08-26 19:29:58 +03:00
|
|
|
if (opts.progress)
|
|
|
|
flags |= COMMIT_GRAPH_WRITE_PROGRESS;
|
2018-06-27 16:24:32 +03:00
|
|
|
|
2020-02-04 08:51:50 +03:00
|
|
|
odb = find_odb(the_repository, opts.obj_dir);
|
commit-graph.c: remove path normalization, comparison
As of the previous patch, all calls to 'commit-graph.c' functions which
perform path normalization (for e.g., 'get_commit_graph_filename()') are
of the form 'ctx->odb->path', which is always in normalized form.
Now that there are no callers passing non-normalized paths to these
functions, ensure that future callers are bound by the same restrictions
by making these functions take a 'struct object_directory *' instead of
a 'const char *'. To match, replace all calls with arguments of the form
'ctx->odb->path' with 'ctx->odb' To recover the path, functions that
perform path manipulation simply use 'odb->path'.
Further, avoid string comparisons with arguments of the form
'odb->path', and instead prefer raw pointer comparisons, which
accomplish the same effect, but are far less brittle.
This has a pleasant side-effect of making these functions much more
robust to paths that cannot be normalized by 'normalize_path_copy()',
i.e., because they are outside of the current working directory.
For example, prior to this patch, Valgrind reports that the following
uninitialized memory read [1]:
$ ( cd t && GIT_DIR=../.git valgrind git rev-parse HEAD^ )
because 'normalize_path_copy()' can't normalize '../.git' (since it's
relative to but above of the current working directory) [2].
By using a 'struct object_directory *' directly,
'get_commit_graph_filename()' does not need to normalize, because all
paths are relative to the current working directory since they are
always read from the '->path' of an object directory.
[1]: https://lore.kernel.org/git/20191027042116.GA5801@sigill.intra.peff.net.
[2]: The bug here is that 'get_commit_graph_filename()' returns the
result of 'normalize_path_copy()' without checking the return
value.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-04 00:18:02 +03:00
|
|
|
graph_name = get_commit_graph_filename(odb);
|
2019-03-25 15:08:30 +03:00
|
|
|
open_ok = open_commit_graph(graph_name, &fd, &st);
|
2019-06-18 21:14:32 +03:00
|
|
|
if (!open_ok && errno != ENOENT)
|
2019-03-25 15:08:32 +03:00
|
|
|
die_errno(_("Could not open commit-graph '%s'"), graph_name);
|
2019-06-18 21:14:32 +03:00
|
|
|
|
2018-06-27 16:24:32 +03:00
|
|
|
FREE_AND_NULL(graph_name);
|
|
|
|
|
2019-06-18 21:14:32 +03:00
|
|
|
if (open_ok)
|
2020-02-04 00:18:04 +03:00
|
|
|
graph = load_commit_graph_one_fd_st(fd, &st, odb);
|
2020-02-04 08:51:50 +03:00
|
|
|
else
|
2020-02-04 00:18:00 +03:00
|
|
|
graph = read_commit_graph_one(the_repository, odb);
|
2019-06-18 21:14:32 +03:00
|
|
|
|
|
|
|
/* Return failure if open_ok predicted success */
|
2018-06-27 16:24:32 +03:00
|
|
|
if (!graph)
|
2019-06-18 21:14:32 +03:00
|
|
|
return !!open_ok;
|
2018-06-27 16:24:32 +03:00
|
|
|
|
2018-10-03 20:12:17 +03:00
|
|
|
UNLEAK(graph);
|
2019-06-18 21:14:32 +03:00
|
|
|
return verify_commit_graph(the_repository, graph, flags);
|
2018-06-27 16:24:32 +03:00
|
|
|
}
|
|
|
|
|
2018-08-20 21:24:27 +03:00
|
|
|
extern int read_replace_refs;
|
2019-06-18 21:14:32 +03:00
|
|
|
static struct split_commit_graph_opts split_opts;
|
2018-08-20 21:24:27 +03:00
|
|
|
|
2018-04-02 23:34:20 +03:00
|
|
|
static int graph_write(int argc, const char **argv)
|
|
|
|
{
|
2018-06-27 16:24:44 +03:00
|
|
|
struct string_list *pack_indexes = NULL;
|
|
|
|
struct string_list *commit_hex = NULL;
|
2020-02-04 08:51:50 +03:00
|
|
|
struct object_directory *odb = NULL;
|
2018-06-27 16:24:44 +03:00
|
|
|
struct string_list lines;
|
2019-06-12 16:29:37 +03:00
|
|
|
int result = 0;
|
2019-08-26 19:29:58 +03:00
|
|
|
enum commit_graph_write_flags flags = 0;
|
2018-04-10 15:56:06 +03:00
|
|
|
|
2018-04-02 23:34:20 +03:00
|
|
|
static struct option builtin_commit_graph_write_options[] = {
|
|
|
|
OPT_STRING(0, "object-dir", &opts.obj_dir,
|
|
|
|
N_("dir"),
|
|
|
|
N_("The object directory to store the graph")),
|
2018-06-27 16:24:45 +03:00
|
|
|
OPT_BOOL(0, "reachable", &opts.reachable,
|
|
|
|
N_("start walk at all refs")),
|
2018-04-10 15:56:06 +03:00
|
|
|
OPT_BOOL(0, "stdin-packs", &opts.stdin_packs,
|
|
|
|
N_("scan pack-indexes listed by stdin for commits")),
|
2018-04-10 15:56:07 +03:00
|
|
|
OPT_BOOL(0, "stdin-commits", &opts.stdin_commits,
|
|
|
|
N_("start walk at commits listed by stdin")),
|
2018-04-10 15:56:08 +03:00
|
|
|
OPT_BOOL(0, "append", &opts.append,
|
|
|
|
N_("include all commits already in the commit-graph file")),
|
2020-04-06 19:59:51 +03:00
|
|
|
OPT_BOOL(0, "changed-paths", &opts.enable_changed_paths,
|
|
|
|
N_("enable computation for changed paths")),
|
2019-08-26 19:29:58 +03:00
|
|
|
OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")),
|
2019-06-18 21:14:28 +03:00
|
|
|
OPT_BOOL(0, "split", &opts.split,
|
|
|
|
N_("allow writing an incremental commit-graph file")),
|
2019-06-18 21:14:32 +03:00
|
|
|
OPT_INTEGER(0, "max-commits", &split_opts.max_commits,
|
|
|
|
N_("maximum number of commits in a non-base split commit-graph")),
|
|
|
|
OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple,
|
|
|
|
N_("maximum ratio between two levels of a split commit-graph")),
|
|
|
|
OPT_EXPIRY_DATE(0, "expire-time", &split_opts.expire_time,
|
|
|
|
N_("maximum number of commits in a non-base split commit-graph")),
|
2018-04-02 23:34:20 +03:00
|
|
|
OPT_END(),
|
|
|
|
};
|
|
|
|
|
2019-08-26 19:29:58 +03:00
|
|
|
opts.progress = isatty(2);
|
2019-06-18 21:14:32 +03:00
|
|
|
split_opts.size_multiple = 2;
|
|
|
|
split_opts.max_commits = 0;
|
|
|
|
split_opts.expire_time = 0;
|
|
|
|
|
2019-08-27 19:56:34 +03:00
|
|
|
trace2_cmd_mode("write");
|
|
|
|
|
2018-04-02 23:34:20 +03:00
|
|
|
argc = parse_options(argc, argv, NULL,
|
|
|
|
builtin_commit_graph_write_options,
|
|
|
|
builtin_commit_graph_write_usage, 0);
|
|
|
|
|
2018-06-27 16:24:45 +03:00
|
|
|
if (opts.reachable + opts.stdin_packs + opts.stdin_commits > 1)
|
|
|
|
die(_("use at most one of --reachable, --stdin-commits, or --stdin-packs"));
|
2018-04-02 23:34:20 +03:00
|
|
|
if (!opts.obj_dir)
|
|
|
|
opts.obj_dir = get_object_directory();
|
2019-06-12 16:29:38 +03:00
|
|
|
if (opts.append)
|
2019-08-05 11:02:39 +03:00
|
|
|
flags |= COMMIT_GRAPH_WRITE_APPEND;
|
2019-06-18 21:14:28 +03:00
|
|
|
if (opts.split)
|
2019-08-05 11:02:39 +03:00
|
|
|
flags |= COMMIT_GRAPH_WRITE_SPLIT;
|
2019-08-26 19:29:58 +03:00
|
|
|
if (opts.progress)
|
|
|
|
flags |= COMMIT_GRAPH_WRITE_PROGRESS;
|
2020-04-06 19:59:55 +03:00
|
|
|
if (opts.enable_changed_paths ||
|
|
|
|
git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0))
|
2020-04-06 19:59:51 +03:00
|
|
|
flags |= COMMIT_GRAPH_WRITE_BLOOM_FILTERS;
|
2018-04-02 23:34:20 +03:00
|
|
|
|
2018-08-20 21:24:27 +03:00
|
|
|
read_replace_refs = 0;
|
2020-02-04 08:51:50 +03:00
|
|
|
odb = find_odb(the_repository, opts.obj_dir);
|
2018-08-20 21:24:27 +03:00
|
|
|
|
2019-06-18 21:14:32 +03:00
|
|
|
if (opts.reachable) {
|
2020-02-04 08:51:50 +03:00
|
|
|
if (write_commit_graph_reachable(odb, flags, &split_opts))
|
2019-06-18 21:14:32 +03:00
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
2018-06-27 16:24:45 +03:00
|
|
|
|
2018-06-27 16:24:44 +03:00
|
|
|
string_list_init(&lines, 0);
|
2018-04-10 15:56:07 +03:00
|
|
|
if (opts.stdin_packs || opts.stdin_commits) {
|
2018-04-10 15:56:06 +03:00
|
|
|
struct strbuf buf = STRBUF_INIT;
|
2018-06-27 16:24:44 +03:00
|
|
|
|
|
|
|
while (strbuf_getline(&buf, stdin) != EOF)
|
|
|
|
string_list_append(&lines, strbuf_detach(&buf, NULL));
|
|
|
|
|
|
|
|
if (opts.stdin_packs)
|
|
|
|
pack_indexes = &lines;
|
2019-08-05 11:02:40 +03:00
|
|
|
if (opts.stdin_commits) {
|
2018-06-27 16:24:44 +03:00
|
|
|
commit_hex = &lines;
|
2019-08-05 11:02:40 +03:00
|
|
|
flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS;
|
|
|
|
}
|
2018-10-03 20:12:17 +03:00
|
|
|
|
|
|
|
UNLEAK(buf);
|
2018-04-10 15:56:06 +03:00
|
|
|
}
|
|
|
|
|
2020-02-04 08:51:50 +03:00
|
|
|
if (write_commit_graph(odb,
|
2019-06-12 16:29:37 +03:00
|
|
|
pack_indexes,
|
|
|
|
commit_hex,
|
2019-06-18 21:14:32 +03:00
|
|
|
flags,
|
|
|
|
&split_opts))
|
2019-06-12 16:29:37 +03:00
|
|
|
result = 1;
|
2018-04-10 15:56:06 +03:00
|
|
|
|
2018-10-03 20:12:17 +03:00
|
|
|
UNLEAK(lines);
|
2019-06-12 16:29:37 +03:00
|
|
|
return result;
|
2018-04-02 23:34:20 +03:00
|
|
|
}
|
2018-04-02 23:34:18 +03:00
|
|
|
|
|
|
|
int cmd_commit_graph(int argc, const char **argv, const char *prefix)
|
|
|
|
{
|
|
|
|
static struct option builtin_commit_graph_options[] = {
|
|
|
|
OPT_STRING(0, "object-dir", &opts.obj_dir,
|
|
|
|
N_("dir"),
|
|
|
|
N_("The object directory to store the graph")),
|
|
|
|
OPT_END(),
|
|
|
|
};
|
|
|
|
|
|
|
|
if (argc == 2 && !strcmp(argv[1], "-h"))
|
|
|
|
usage_with_options(builtin_commit_graph_usage,
|
|
|
|
builtin_commit_graph_options);
|
|
|
|
|
|
|
|
git_config(git_default_config, NULL);
|
|
|
|
argc = parse_options(argc, argv, prefix,
|
|
|
|
builtin_commit_graph_options,
|
|
|
|
builtin_commit_graph_usage,
|
|
|
|
PARSE_OPT_STOP_AT_NON_OPTION);
|
|
|
|
|
commit-graph: turn off save_commit_buffer
The commit-graph tool may read a lot of commits, but it only cares about
parsing their metadata (parents, trees, etc) and doesn't ever show the
messages to the user. And so it should not need save_commit_buffer,
which is meant for holding onto the object data of parsed commits so
that we can show them later. In fact, it's quite harmful to do so.
According to massif, the max heap of "git commit-graph write
--reachable" in linux.git before/after this patch (removing the commit
graph file in between) goes from ~1.1GB to ~270MB.
Which isn't surprising, since the difference is about the sum of the
uncompressed sizes of all commits in the repository, and this was
equivalent to leaking them.
This obviously helps if you're under memory pressure, but even without
it, things go faster. My before/after times for that command (without
massif) went from 12.521s to 11.874s, a speedup of ~5%.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-07 08:04:40 +03:00
|
|
|
save_commit_buffer = 0;
|
|
|
|
|
2018-04-02 23:34:20 +03:00
|
|
|
if (argc > 0) {
|
2018-06-27 16:24:32 +03:00
|
|
|
if (!strcmp(argv[0], "verify"))
|
|
|
|
return graph_verify(argc, argv);
|
2018-04-02 23:34:20 +03:00
|
|
|
if (!strcmp(argv[0], "write"))
|
|
|
|
return graph_write(argc, argv);
|
|
|
|
}
|
|
|
|
|
2018-04-02 23:34:18 +03:00
|
|
|
usage_with_options(builtin_commit_graph_usage,
|
|
|
|
builtin_commit_graph_options);
|
|
|
|
}
|