2018-04-02 23:34:18 +03:00
|
|
|
#include "builtin.h"
|
|
|
|
#include "config.h"
|
2018-04-02 23:34:20 +03:00
|
|
|
#include "dir.h"
|
|
|
|
#include "lockfile.h"
|
2018-04-02 23:34:18 +03:00
|
|
|
#include "parse-options.h"
|
2018-06-27 16:24:32 +03:00
|
|
|
#include "repository.h"
|
2018-04-02 23:34:20 +03:00
|
|
|
#include "commit-graph.h"
|
2019-06-18 21:14:32 +03:00
|
|
|
#include "object-store.h"
|
2020-05-14 00:59:44 +03:00
|
|
|
#include "progress.h"
|
commit-graph: drop COMMIT_GRAPH_WRITE_CHECK_OIDS flag
Since 7c5c9b9c57 (commit-graph: error out on invalid commit oids in
'write --stdin-commits', 2019-08-05), the commit-graph builtin dies on
receiving non-commit OIDs as input to '--stdin-commits'.
This behavior can be cumbersome to work around in, say, the case of
piping 'git for-each-ref' to 'git commit-graph write --stdin-commits' if
the caller does not want to cull out non-commits themselves. In this
situation, it would be ideal if 'git commit-graph write' wrote the graph
containing the inputs that did pertain to commits, and silently ignored
the remainder of the input.
Some options have been proposed to the effect of '--[no-]check-oids'
which would allow callers to have the commit-graph builtin do just that.
After some discussion, it is difficult to imagine a caller who wouldn't
want to pass '--no-check-oids', suggesting that we should get rid of the
behavior of complaining about non-commit inputs altogether.
If callers do wish to retain this behavior, they can easily work around
this change by doing the following:
git for-each-ref --format='%(objectname) %(objecttype) %(*objecttype)' |
awk '
!/commit/ { print "not-a-commit:"$1 }
/commit/ { print $1 }
' |
git commit-graph write --stdin-commits
To make it so that valid OIDs that refer to non-existent objects are
indeed an error after loosening the error handling, perform an extra
lookup to make sure that object indeed exists before sending it to the
commit-graph internals.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-14 00:59:55 +03:00
|
|
|
#include "tag.h"
|
2018-04-02 23:34:18 +03:00
|
|
|
|
2021-08-23 15:30:15 +03:00
|
|
|
#define BUILTIN_COMMIT_GRAPH_VERIFY_USAGE \
|
2022-10-13 18:39:10 +03:00
|
|
|
N_("git commit-graph verify [--object-dir <dir>] [--shallow] [--[no-]progress]")
|
2021-08-23 15:30:15 +03:00
|
|
|
|
|
|
|
#define BUILTIN_COMMIT_GRAPH_WRITE_USAGE \
|
2022-10-13 18:39:10 +03:00
|
|
|
N_("git commit-graph write [--object-dir <dir>] [--append]\n" \
|
2022-10-13 18:39:06 +03:00
|
|
|
" [--split[=<strategy>]] [--reachable | --stdin-packs | --stdin-commits]\n" \
|
2022-10-13 18:39:02 +03:00
|
|
|
" [--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress]\n" \
|
|
|
|
" <split options>")
|
2021-08-23 15:30:15 +03:00
|
|
|
|
|
|
|
static const char * builtin_commit_graph_verify_usage[] = {
|
|
|
|
BUILTIN_COMMIT_GRAPH_VERIFY_USAGE,
|
2018-04-02 23:34:20 +03:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2021-08-23 15:30:15 +03:00
|
|
|
static const char * builtin_commit_graph_write_usage[] = {
|
|
|
|
BUILTIN_COMMIT_GRAPH_WRITE_USAGE,
|
2018-06-27 16:24:32 +03:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2021-08-23 15:30:15 +03:00
|
|
|
static char const * const builtin_commit_graph_usage[] = {
|
|
|
|
BUILTIN_COMMIT_GRAPH_VERIFY_USAGE,
|
|
|
|
BUILTIN_COMMIT_GRAPH_WRITE_USAGE,
|
|
|
|
NULL,
|
2018-04-02 23:34:18 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct opts_commit_graph {
|
|
|
|
const char *obj_dir;
|
2018-06-27 16:24:45 +03:00
|
|
|
int reachable;
|
2018-04-10 15:56:06 +03:00
|
|
|
int stdin_packs;
|
2018-04-10 15:56:07 +03:00
|
|
|
int stdin_commits;
|
2018-04-10 15:56:08 +03:00
|
|
|
int append;
|
2019-06-18 21:14:28 +03:00
|
|
|
int split;
|
2019-06-18 21:14:32 +03:00
|
|
|
int shallow;
|
2019-08-26 19:29:58 +03:00
|
|
|
int progress;
|
2020-04-06 19:59:51 +03:00
|
|
|
int enable_changed_paths;
|
2018-04-02 23:34:18 +03:00
|
|
|
} opts;
|
|
|
|
|
2021-08-23 15:30:17 +03:00
|
|
|
static struct option common_opts[] = {
|
|
|
|
OPT_STRING(0, "object-dir", &opts.obj_dir,
|
|
|
|
N_("dir"),
|
|
|
|
N_("the object directory to store the graph")),
|
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct option *add_common_options(struct option *to)
|
|
|
|
{
|
|
|
|
return parse_options_concat(common_opts, to);
|
|
|
|
}
|
|
|
|
|
2022-08-19 19:04:02 +03:00
|
|
|
static int graph_verify(int argc, const char **argv, const char *prefix)
|
2018-06-27 16:24:32 +03:00
|
|
|
{
|
|
|
|
struct commit_graph *graph = NULL;
|
2020-02-04 08:51:50 +03:00
|
|
|
struct object_directory *odb = NULL;
|
2018-06-27 16:24:32 +03:00
|
|
|
char *graph_name;
|
2019-03-25 15:08:30 +03:00
|
|
|
int open_ok;
|
|
|
|
int fd;
|
|
|
|
struct stat st;
|
2019-06-18 21:14:32 +03:00
|
|
|
int flags = 0;
|
2018-06-27 16:24:32 +03:00
|
|
|
|
|
|
|
static struct option builtin_commit_graph_verify_options[] = {
|
2019-06-18 21:14:32 +03:00
|
|
|
OPT_BOOL(0, "shallow", &opts.shallow,
|
|
|
|
N_("if the commit-graph is split, only verify the tip file")),
|
2021-09-18 19:02:37 +03:00
|
|
|
OPT_BOOL(0, "progress", &opts.progress,
|
|
|
|
N_("force progress reporting")),
|
2018-06-27 16:24:32 +03:00
|
|
|
OPT_END(),
|
|
|
|
};
|
2021-08-23 15:30:17 +03:00
|
|
|
struct option *options = add_common_options(builtin_commit_graph_verify_options);
|
2018-06-27 16:24:32 +03:00
|
|
|
|
2019-08-27 19:56:34 +03:00
|
|
|
trace2_cmd_mode("verify");
|
|
|
|
|
2019-08-26 19:29:58 +03:00
|
|
|
opts.progress = isatty(2);
|
pass subcommand "prefix" arguments to parse_options()
Recent commits such as bf0a6b65fc (builtin/multi-pack-index.c: let
parse-options parse subcommands, 2022-08-19) converted a few functions
to match our usual argc/argv/prefix conventions, but the prefix argument
remains unused.
However, there is a good use for it: they should pass it to their own
parse_options() functions, where it may be used to adjust the value of
any filename options. In all but one of these functions, there's no
behavior change, since they don't use OPT_FILENAME. But this is an
actual fix for one option, which you can see by modifying the test suite
like so:
diff --git a/t/t5326-multi-pack-bitmaps.sh b/t/t5326-multi-pack-bitmaps.sh
index 4fe57414c1..d0974d4371 100755
--- a/t/t5326-multi-pack-bitmaps.sh
+++ b/t/t5326-multi-pack-bitmaps.sh
@@ -186,7 +186,11 @@ test_expect_success 'writing a bitmap with --refs-snapshot' '
# Then again, but with a refs snapshot which only sees
# refs/tags/one.
- git multi-pack-index write --bitmap --refs-snapshot=snapshot &&
+ (
+ mkdir subdir &&
+ cd subdir &&
+ git multi-pack-index write --bitmap --refs-snapshot=../snapshot
+ ) &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
I'd emphasize that this wasn't broken by bf0a6b65fc; it has been broken
all along, because the sub-function never got to see the prefix. It is
that commit which is actually enabling us to fix it (and which also
brought attention to the problem because it triggers -Wunused-parameter!)
The other functions changed here don't use OPT_FILENAME at all. In their
cases this isn't fixing anything visible, but it's following the usual
pattern and future-proofing them against somebody adding new options and
being surprised.
I didn't include a test for the one visible case above. We don't
generally test routine parse-options behavior for individual options.
The challenge here was finding the problem, and now that this has been
done, it's not likely to regress. Likewise, we could apply the patch
above to cover it "for free" but it makes reading the rest of the test
unnecessarily complicated.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-25 13:47:00 +03:00
|
|
|
argc = parse_options(argc, argv, prefix,
|
2021-08-23 15:30:17 +03:00
|
|
|
options,
|
2018-06-27 16:24:32 +03:00
|
|
|
builtin_commit_graph_verify_usage, 0);
|
2021-08-23 15:30:20 +03:00
|
|
|
if (argc)
|
|
|
|
usage_with_options(builtin_commit_graph_verify_usage, options);
|
2018-06-27 16:24:32 +03:00
|
|
|
|
|
|
|
if (!opts.obj_dir)
|
|
|
|
opts.obj_dir = get_object_directory();
|
2019-06-18 21:14:32 +03:00
|
|
|
if (opts.shallow)
|
|
|
|
flags |= COMMIT_GRAPH_VERIFY_SHALLOW;
|
2019-08-26 19:29:58 +03:00
|
|
|
if (opts.progress)
|
|
|
|
flags |= COMMIT_GRAPH_WRITE_PROGRESS;
|
2018-06-27 16:24:32 +03:00
|
|
|
|
2020-02-04 08:51:50 +03:00
|
|
|
odb = find_odb(the_repository, opts.obj_dir);
|
commit-graph.c: remove path normalization, comparison
As of the previous patch, all calls to 'commit-graph.c' functions which
perform path normalization (for e.g., 'get_commit_graph_filename()') are
of the form 'ctx->odb->path', which is always in normalized form.
Now that there are no callers passing non-normalized paths to these
functions, ensure that future callers are bound by the same restrictions
by making these functions take a 'struct object_directory *' instead of
a 'const char *'. To match, replace all calls with arguments of the form
'ctx->odb->path' with 'ctx->odb' To recover the path, functions that
perform path manipulation simply use 'odb->path'.
Further, avoid string comparisons with arguments of the form
'odb->path', and instead prefer raw pointer comparisons, which
accomplish the same effect, but are far less brittle.
This has a pleasant side-effect of making these functions much more
robust to paths that cannot be normalized by 'normalize_path_copy()',
i.e., because they are outside of the current working directory.
For example, prior to this patch, Valgrind reports that the following
uninitialized memory read [1]:
$ ( cd t && GIT_DIR=../.git valgrind git rev-parse HEAD^ )
because 'normalize_path_copy()' can't normalize '../.git' (since it's
relative to but above of the current working directory) [2].
By using a 'struct object_directory *' directly,
'get_commit_graph_filename()' does not need to normalize, because all
paths are relative to the current working directory since they are
always read from the '->path' of an object directory.
[1]: https://lore.kernel.org/git/20191027042116.GA5801@sigill.intra.peff.net.
[2]: The bug here is that 'get_commit_graph_filename()' returns the
result of 'normalize_path_copy()' without checking the return
value.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-04 00:18:02 +03:00
|
|
|
graph_name = get_commit_graph_filename(odb);
|
2019-03-25 15:08:30 +03:00
|
|
|
open_ok = open_commit_graph(graph_name, &fd, &st);
|
2019-06-18 21:14:32 +03:00
|
|
|
if (!open_ok && errno != ENOENT)
|
2019-03-25 15:08:32 +03:00
|
|
|
die_errno(_("Could not open commit-graph '%s'"), graph_name);
|
2019-06-18 21:14:32 +03:00
|
|
|
|
2018-06-27 16:24:32 +03:00
|
|
|
FREE_AND_NULL(graph_name);
|
2021-08-23 15:30:17 +03:00
|
|
|
FREE_AND_NULL(options);
|
2018-06-27 16:24:32 +03:00
|
|
|
|
2019-06-18 21:14:32 +03:00
|
|
|
if (open_ok)
|
2020-09-09 18:22:56 +03:00
|
|
|
graph = load_commit_graph_one_fd_st(the_repository, fd, &st, odb);
|
2020-02-04 08:51:50 +03:00
|
|
|
else
|
2020-02-04 00:18:00 +03:00
|
|
|
graph = read_commit_graph_one(the_repository, odb);
|
2019-06-18 21:14:32 +03:00
|
|
|
|
|
|
|
/* Return failure if open_ok predicted success */
|
2018-06-27 16:24:32 +03:00
|
|
|
if (!graph)
|
2019-06-18 21:14:32 +03:00
|
|
|
return !!open_ok;
|
2018-06-27 16:24:32 +03:00
|
|
|
|
2018-10-03 20:12:17 +03:00
|
|
|
UNLEAK(graph);
|
2019-06-18 21:14:32 +03:00
|
|
|
return verify_commit_graph(the_repository, graph, flags);
|
2018-06-27 16:24:32 +03:00
|
|
|
}
|
|
|
|
|
2018-08-20 21:24:27 +03:00
|
|
|
extern int read_replace_refs;
|
2020-09-18 05:59:49 +03:00
|
|
|
static struct commit_graph_opts write_opts;
|
2018-08-20 21:24:27 +03:00
|
|
|
|
2020-04-14 07:04:08 +03:00
|
|
|
static int write_option_parse_split(const struct option *opt, const char *arg,
|
|
|
|
int unset)
|
|
|
|
{
|
builtin/commit-graph.c: introduce split strategy 'no-merge'
In the previous commit, we laid the groundwork for supporting different
splitting strategies. In this commit, we introduce the first splitting
strategy: 'no-merge'.
Passing '--split=no-merge' is useful for callers which wish to write a
new incremental commit-graph, but do not want to spend effort condensing
the incremental chain [1]. Previously, this was possible by passing
'--size-multiple=0', but this no longer the case following 63020f175f
(commit-graph: prefer default size_mult when given zero, 2020-01-02).
When '--split=no-merge' is given, the commit-graph machinery will never
condense an existing chain, and it will always write a new incremental.
[1]: This might occur when, for example, a server administrator running
some program after each push may want to ensure that each job runs
proportional in time to the size of the push, and does not "jump" when
the commit-graph machinery decides to trigger a merge.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-14 07:04:12 +03:00
|
|
|
enum commit_graph_split_flags *flags = opt->value;
|
|
|
|
|
2020-09-30 15:29:02 +03:00
|
|
|
BUG_ON_OPT_NEG(unset);
|
|
|
|
|
2020-04-14 07:04:08 +03:00
|
|
|
opts.split = 1;
|
|
|
|
if (!arg)
|
|
|
|
return 0;
|
|
|
|
|
builtin/commit-graph.c: introduce split strategy 'no-merge'
In the previous commit, we laid the groundwork for supporting different
splitting strategies. In this commit, we introduce the first splitting
strategy: 'no-merge'.
Passing '--split=no-merge' is useful for callers which wish to write a
new incremental commit-graph, but do not want to spend effort condensing
the incremental chain [1]. Previously, this was possible by passing
'--size-multiple=0', but this no longer the case following 63020f175f
(commit-graph: prefer default size_mult when given zero, 2020-01-02).
When '--split=no-merge' is given, the commit-graph machinery will never
condense an existing chain, and it will always write a new incremental.
[1]: This might occur when, for example, a server administrator running
some program after each push may want to ensure that each job runs
proportional in time to the size of the push, and does not "jump" when
the commit-graph machinery decides to trigger a merge.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-14 07:04:12 +03:00
|
|
|
if (!strcmp(arg, "no-merge"))
|
|
|
|
*flags = COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED;
|
builtin/commit-graph.c: introduce split strategy 'replace'
When using split commit-graphs, it is sometimes useful to completely
replace the commit-graph chain with a new base.
For example, consider a scenario in which a repository builds a new
commit-graph incremental for each push. Occasionally (say, after some
fixed number of pushes), they may wish to rebuild the commit-graph chain
with all reachable commits.
They can do so with
$ git commit-graph write --reachable
but this removes the chain entirely and replaces it with a single
commit-graph in 'objects/info/commit-graph'. Unfortunately, this means
that the next push will have to move this commit-graph into the first
layer of a new chain, and then write its new commits on top.
Avoid such copying entirely by allowing the caller to specify that they
wish to replace the entirety of their commit-graph chain, while also
specifying that the new commit-graph should become the basis of a fresh,
length-one chain.
This addresses the above situation by making it possible for the caller
to instead write:
$ git commit-graph write --reachable --split=replace
which writes a new length-one chain to 'objects/info/commit-graphs',
making the commit-graph incremental generated by the subsequent push
relatively cheap by avoiding the aforementioned copy.
In order to do this, remove an assumption in 'write_commit_graph_file'
that chains are always at least two incrementals long.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-14 07:04:17 +03:00
|
|
|
else if (!strcmp(arg, "replace"))
|
|
|
|
*flags = COMMIT_GRAPH_SPLIT_REPLACE;
|
builtin/commit-graph.c: introduce split strategy 'no-merge'
In the previous commit, we laid the groundwork for supporting different
splitting strategies. In this commit, we introduce the first splitting
strategy: 'no-merge'.
Passing '--split=no-merge' is useful for callers which wish to write a
new incremental commit-graph, but do not want to spend effort condensing
the incremental chain [1]. Previously, this was possible by passing
'--size-multiple=0', but this no longer the case following 63020f175f
(commit-graph: prefer default size_mult when given zero, 2020-01-02).
When '--split=no-merge' is given, the commit-graph machinery will never
condense an existing chain, and it will always write a new incremental.
[1]: This might occur when, for example, a server administrator running
some program after each push may want to ensure that each job runs
proportional in time to the size of the push, and does not "jump" when
the commit-graph machinery decides to trigger a merge.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-14 07:04:12 +03:00
|
|
|
else
|
|
|
|
die(_("unrecognized --split argument, %s"), arg);
|
2020-04-14 07:04:08 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-05-14 00:59:44 +03:00
|
|
|
static int read_one_commit(struct oidset *commits, struct progress *progress,
|
|
|
|
const char *hash)
|
2020-05-18 22:27:09 +03:00
|
|
|
{
|
commit-graph: drop COMMIT_GRAPH_WRITE_CHECK_OIDS flag
Since 7c5c9b9c57 (commit-graph: error out on invalid commit oids in
'write --stdin-commits', 2019-08-05), the commit-graph builtin dies on
receiving non-commit OIDs as input to '--stdin-commits'.
This behavior can be cumbersome to work around in, say, the case of
piping 'git for-each-ref' to 'git commit-graph write --stdin-commits' if
the caller does not want to cull out non-commits themselves. In this
situation, it would be ideal if 'git commit-graph write' wrote the graph
containing the inputs that did pertain to commits, and silently ignored
the remainder of the input.
Some options have been proposed to the effect of '--[no-]check-oids'
which would allow callers to have the commit-graph builtin do just that.
After some discussion, it is difficult to imagine a caller who wouldn't
want to pass '--no-check-oids', suggesting that we should get rid of the
behavior of complaining about non-commit inputs altogether.
If callers do wish to retain this behavior, they can easily work around
this change by doing the following:
git for-each-ref --format='%(objectname) %(objecttype) %(*objecttype)' |
awk '
!/commit/ { print "not-a-commit:"$1 }
/commit/ { print $1 }
' |
git commit-graph write --stdin-commits
To make it so that valid OIDs that refer to non-existent objects are
indeed an error after loosening the error handling, perform an extra
lookup to make sure that object indeed exists before sending it to the
commit-graph internals.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-14 00:59:55 +03:00
|
|
|
struct object *result;
|
2020-05-18 22:27:09 +03:00
|
|
|
struct object_id oid;
|
|
|
|
const char *end;
|
|
|
|
|
|
|
|
if (parse_oid_hex(hash, &oid, &end))
|
|
|
|
return error(_("unexpected non-hex object ID: %s"), hash);
|
|
|
|
|
commit-graph: drop COMMIT_GRAPH_WRITE_CHECK_OIDS flag
Since 7c5c9b9c57 (commit-graph: error out on invalid commit oids in
'write --stdin-commits', 2019-08-05), the commit-graph builtin dies on
receiving non-commit OIDs as input to '--stdin-commits'.
This behavior can be cumbersome to work around in, say, the case of
piping 'git for-each-ref' to 'git commit-graph write --stdin-commits' if
the caller does not want to cull out non-commits themselves. In this
situation, it would be ideal if 'git commit-graph write' wrote the graph
containing the inputs that did pertain to commits, and silently ignored
the remainder of the input.
Some options have been proposed to the effect of '--[no-]check-oids'
which would allow callers to have the commit-graph builtin do just that.
After some discussion, it is difficult to imagine a caller who wouldn't
want to pass '--no-check-oids', suggesting that we should get rid of the
behavior of complaining about non-commit inputs altogether.
If callers do wish to retain this behavior, they can easily work around
this change by doing the following:
git for-each-ref --format='%(objectname) %(objecttype) %(*objecttype)' |
awk '
!/commit/ { print "not-a-commit:"$1 }
/commit/ { print $1 }
' |
git commit-graph write --stdin-commits
To make it so that valid OIDs that refer to non-existent objects are
indeed an error after loosening the error handling, perform an extra
lookup to make sure that object indeed exists before sending it to the
commit-graph internals.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-14 00:59:55 +03:00
|
|
|
result = deref_tag(the_repository, parse_object(the_repository, &oid),
|
|
|
|
NULL, 0);
|
|
|
|
if (!result)
|
|
|
|
return error(_("invalid object: %s"), hash);
|
2020-06-17 12:14:08 +03:00
|
|
|
else if (object_as_type(result, OBJ_COMMIT, 1))
|
commit-graph: drop COMMIT_GRAPH_WRITE_CHECK_OIDS flag
Since 7c5c9b9c57 (commit-graph: error out on invalid commit oids in
'write --stdin-commits', 2019-08-05), the commit-graph builtin dies on
receiving non-commit OIDs as input to '--stdin-commits'.
This behavior can be cumbersome to work around in, say, the case of
piping 'git for-each-ref' to 'git commit-graph write --stdin-commits' if
the caller does not want to cull out non-commits themselves. In this
situation, it would be ideal if 'git commit-graph write' wrote the graph
containing the inputs that did pertain to commits, and silently ignored
the remainder of the input.
Some options have been proposed to the effect of '--[no-]check-oids'
which would allow callers to have the commit-graph builtin do just that.
After some discussion, it is difficult to imagine a caller who wouldn't
want to pass '--no-check-oids', suggesting that we should get rid of the
behavior of complaining about non-commit inputs altogether.
If callers do wish to retain this behavior, they can easily work around
this change by doing the following:
git for-each-ref --format='%(objectname) %(objecttype) %(*objecttype)' |
awk '
!/commit/ { print "not-a-commit:"$1 }
/commit/ { print $1 }
' |
git commit-graph write --stdin-commits
To make it so that valid OIDs that refer to non-existent objects are
indeed an error after loosening the error handling, perform an extra
lookup to make sure that object indeed exists before sending it to the
commit-graph internals.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-14 00:59:55 +03:00
|
|
|
oidset_insert(commits, &result->oid);
|
2020-05-14 00:59:44 +03:00
|
|
|
|
|
|
|
display_progress(progress, oidset_size(commits));
|
|
|
|
|
2020-05-18 22:27:09 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-09-18 16:27:27 +03:00
|
|
|
static int write_option_max_new_filters(const struct option *opt,
|
|
|
|
const char *arg,
|
|
|
|
int unset)
|
|
|
|
{
|
|
|
|
int *to = opt->value;
|
|
|
|
if (unset)
|
|
|
|
*to = -1;
|
|
|
|
else {
|
|
|
|
const char *s;
|
|
|
|
*to = strtol(arg, (char **)&s, 10);
|
|
|
|
if (*s)
|
2021-10-08 22:07:43 +03:00
|
|
|
return error(_("option `%s' expects a numerical value"),
|
|
|
|
"max-new-filters");
|
2020-09-18 16:27:27 +03:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-09-18 05:59:57 +03:00
|
|
|
static int git_commit_graph_write_config(const char *var, const char *value,
|
2022-08-25 20:09:48 +03:00
|
|
|
void *cb UNUSED)
|
2020-09-18 05:59:57 +03:00
|
|
|
{
|
|
|
|
if (!strcmp(var, "commitgraph.maxnewfilters"))
|
|
|
|
write_opts.max_new_filters = git_config_int(var, value);
|
|
|
|
/*
|
|
|
|
* No need to fall-back to 'git_default_config', since this was already
|
|
|
|
* called in 'cmd_commit_graph()'.
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-08-19 19:04:02 +03:00
|
|
|
static int graph_write(int argc, const char **argv, const char *prefix)
|
2018-04-02 23:34:20 +03:00
|
|
|
{
|
2022-03-04 21:32:12 +03:00
|
|
|
struct string_list pack_indexes = STRING_LIST_INIT_DUP;
|
2020-05-18 22:27:09 +03:00
|
|
|
struct strbuf buf = STRBUF_INIT;
|
2020-04-14 07:04:25 +03:00
|
|
|
struct oidset commits = OIDSET_INIT;
|
2020-02-04 08:51:50 +03:00
|
|
|
struct object_directory *odb = NULL;
|
2019-06-12 16:29:37 +03:00
|
|
|
int result = 0;
|
2019-08-26 19:29:58 +03:00
|
|
|
enum commit_graph_write_flags flags = 0;
|
2020-05-14 00:59:44 +03:00
|
|
|
struct progress *progress = NULL;
|
2018-04-10 15:56:06 +03:00
|
|
|
|
2018-04-02 23:34:20 +03:00
|
|
|
static struct option builtin_commit_graph_write_options[] = {
|
2018-06-27 16:24:45 +03:00
|
|
|
OPT_BOOL(0, "reachable", &opts.reachable,
|
|
|
|
N_("start walk at all refs")),
|
2018-04-10 15:56:06 +03:00
|
|
|
OPT_BOOL(0, "stdin-packs", &opts.stdin_packs,
|
|
|
|
N_("scan pack-indexes listed by stdin for commits")),
|
2018-04-10 15:56:07 +03:00
|
|
|
OPT_BOOL(0, "stdin-commits", &opts.stdin_commits,
|
|
|
|
N_("start walk at commits listed by stdin")),
|
2018-04-10 15:56:08 +03:00
|
|
|
OPT_BOOL(0, "append", &opts.append,
|
|
|
|
N_("include all commits already in the commit-graph file")),
|
2020-04-06 19:59:51 +03:00
|
|
|
OPT_BOOL(0, "changed-paths", &opts.enable_changed_paths,
|
|
|
|
N_("enable computation for changed paths")),
|
2020-09-18 05:59:49 +03:00
|
|
|
OPT_CALLBACK_F(0, "split", &write_opts.split_flags, NULL,
|
2020-04-14 07:04:08 +03:00
|
|
|
N_("allow writing an incremental commit-graph file"),
|
|
|
|
PARSE_OPT_OPTARG | PARSE_OPT_NONEG,
|
|
|
|
write_option_parse_split),
|
2020-09-18 05:59:49 +03:00
|
|
|
OPT_INTEGER(0, "max-commits", &write_opts.max_commits,
|
2019-06-18 21:14:32 +03:00
|
|
|
N_("maximum number of commits in a non-base split commit-graph")),
|
2020-09-18 05:59:49 +03:00
|
|
|
OPT_INTEGER(0, "size-multiple", &write_opts.size_multiple,
|
2019-06-18 21:14:32 +03:00
|
|
|
N_("maximum ratio between two levels of a split commit-graph")),
|
2020-09-18 05:59:49 +03:00
|
|
|
OPT_EXPIRY_DATE(0, "expire-time", &write_opts.expire_time,
|
2020-04-02 00:00:44 +03:00
|
|
|
N_("only expire files older than a given date-time")),
|
2020-09-18 16:27:27 +03:00
|
|
|
OPT_CALLBACK_F(0, "max-new-filters", &write_opts.max_new_filters,
|
|
|
|
NULL, N_("maximum number of changed-path Bloom filters to compute"),
|
|
|
|
0, write_option_max_new_filters),
|
2021-09-18 19:02:37 +03:00
|
|
|
OPT_BOOL(0, "progress", &opts.progress,
|
|
|
|
N_("force progress reporting")),
|
2018-04-02 23:34:20 +03:00
|
|
|
OPT_END(),
|
|
|
|
};
|
2021-08-23 15:30:17 +03:00
|
|
|
struct option *options = add_common_options(builtin_commit_graph_write_options);
|
2018-04-02 23:34:20 +03:00
|
|
|
|
2019-08-26 19:29:58 +03:00
|
|
|
opts.progress = isatty(2);
|
2020-07-01 16:27:24 +03:00
|
|
|
opts.enable_changed_paths = -1;
|
2020-09-18 05:59:49 +03:00
|
|
|
write_opts.size_multiple = 2;
|
|
|
|
write_opts.max_commits = 0;
|
|
|
|
write_opts.expire_time = 0;
|
2020-09-18 16:27:27 +03:00
|
|
|
write_opts.max_new_filters = -1;
|
2019-06-18 21:14:32 +03:00
|
|
|
|
2019-08-27 19:56:34 +03:00
|
|
|
trace2_cmd_mode("write");
|
|
|
|
|
2020-09-18 05:59:57 +03:00
|
|
|
git_config(git_commit_graph_write_config, &opts);
|
|
|
|
|
pass subcommand "prefix" arguments to parse_options()
Recent commits such as bf0a6b65fc (builtin/multi-pack-index.c: let
parse-options parse subcommands, 2022-08-19) converted a few functions
to match our usual argc/argv/prefix conventions, but the prefix argument
remains unused.
However, there is a good use for it: they should pass it to their own
parse_options() functions, where it may be used to adjust the value of
any filename options. In all but one of these functions, there's no
behavior change, since they don't use OPT_FILENAME. But this is an
actual fix for one option, which you can see by modifying the test suite
like so:
diff --git a/t/t5326-multi-pack-bitmaps.sh b/t/t5326-multi-pack-bitmaps.sh
index 4fe57414c1..d0974d4371 100755
--- a/t/t5326-multi-pack-bitmaps.sh
+++ b/t/t5326-multi-pack-bitmaps.sh
@@ -186,7 +186,11 @@ test_expect_success 'writing a bitmap with --refs-snapshot' '
# Then again, but with a refs snapshot which only sees
# refs/tags/one.
- git multi-pack-index write --bitmap --refs-snapshot=snapshot &&
+ (
+ mkdir subdir &&
+ cd subdir &&
+ git multi-pack-index write --bitmap --refs-snapshot=../snapshot
+ ) &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
I'd emphasize that this wasn't broken by bf0a6b65fc; it has been broken
all along, because the sub-function never got to see the prefix. It is
that commit which is actually enabling us to fix it (and which also
brought attention to the problem because it triggers -Wunused-parameter!)
The other functions changed here don't use OPT_FILENAME at all. In their
cases this isn't fixing anything visible, but it's following the usual
pattern and future-proofing them against somebody adding new options and
being surprised.
I didn't include a test for the one visible case above. We don't
generally test routine parse-options behavior for individual options.
The challenge here was finding the problem, and now that this has been
done, it's not likely to regress. Likewise, we could apply the patch
above to cover it "for free" but it makes reading the rest of the test
unnecessarily complicated.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-25 13:47:00 +03:00
|
|
|
argc = parse_options(argc, argv, prefix,
|
2021-08-23 15:30:17 +03:00
|
|
|
options,
|
2018-04-02 23:34:20 +03:00
|
|
|
builtin_commit_graph_write_usage, 0);
|
2021-08-23 15:30:20 +03:00
|
|
|
if (argc)
|
|
|
|
usage_with_options(builtin_commit_graph_write_usage, options);
|
2018-04-02 23:34:20 +03:00
|
|
|
|
2018-06-27 16:24:45 +03:00
|
|
|
if (opts.reachable + opts.stdin_packs + opts.stdin_commits > 1)
|
|
|
|
die(_("use at most one of --reachable, --stdin-commits, or --stdin-packs"));
|
2018-04-02 23:34:20 +03:00
|
|
|
if (!opts.obj_dir)
|
|
|
|
opts.obj_dir = get_object_directory();
|
2019-06-12 16:29:38 +03:00
|
|
|
if (opts.append)
|
2019-08-05 11:02:39 +03:00
|
|
|
flags |= COMMIT_GRAPH_WRITE_APPEND;
|
2019-06-18 21:14:28 +03:00
|
|
|
if (opts.split)
|
2019-08-05 11:02:39 +03:00
|
|
|
flags |= COMMIT_GRAPH_WRITE_SPLIT;
|
2019-08-26 19:29:58 +03:00
|
|
|
if (opts.progress)
|
|
|
|
flags |= COMMIT_GRAPH_WRITE_PROGRESS;
|
2020-07-01 16:27:24 +03:00
|
|
|
if (!opts.enable_changed_paths)
|
|
|
|
flags |= COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS;
|
|
|
|
if (opts.enable_changed_paths == 1 ||
|
2020-04-06 19:59:55 +03:00
|
|
|
git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0))
|
2020-04-06 19:59:51 +03:00
|
|
|
flags |= COMMIT_GRAPH_WRITE_BLOOM_FILTERS;
|
2018-04-02 23:34:20 +03:00
|
|
|
|
2020-02-04 08:51:50 +03:00
|
|
|
odb = find_odb(the_repository, opts.obj_dir);
|
2018-08-20 21:24:27 +03:00
|
|
|
|
2019-06-18 21:14:32 +03:00
|
|
|
if (opts.reachable) {
|
2020-09-18 05:59:49 +03:00
|
|
|
if (write_commit_graph_reachable(odb, flags, &write_opts))
|
2019-06-18 21:14:32 +03:00
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
2018-06-27 16:24:45 +03:00
|
|
|
|
2020-05-18 22:27:09 +03:00
|
|
|
if (opts.stdin_packs) {
|
2018-06-27 16:24:44 +03:00
|
|
|
while (strbuf_getline(&buf, stdin) != EOF)
|
2022-03-04 21:32:12 +03:00
|
|
|
string_list_append_nodup(&pack_indexes,
|
|
|
|
strbuf_detach(&buf, NULL));
|
2020-05-18 22:27:09 +03:00
|
|
|
} else if (opts.stdin_commits) {
|
|
|
|
oidset_init(&commits, 0);
|
2020-05-14 00:59:44 +03:00
|
|
|
if (opts.progress)
|
|
|
|
progress = start_delayed_progress(
|
|
|
|
_("Collecting commits from input"), 0);
|
2020-05-18 22:27:09 +03:00
|
|
|
|
|
|
|
while (strbuf_getline(&buf, stdin) != EOF) {
|
2020-05-14 00:59:44 +03:00
|
|
|
if (read_one_commit(&commits, progress, buf.buf)) {
|
2020-05-18 22:27:09 +03:00
|
|
|
result = 1;
|
|
|
|
goto cleanup;
|
2020-04-14 07:04:25 +03:00
|
|
|
}
|
2019-08-05 11:02:40 +03:00
|
|
|
}
|
2020-05-14 00:59:44 +03:00
|
|
|
|
commit-graph: fix "Collecting commits from input" progress line
To display a progress line while reading commits from standard input
and looking them up, 5b6653e523 (builtin/commit-graph.c: dereference
tags in builtin, 2020-05-13) should have added a pair of
start_delayed_progress() and stop_progress() calls around the loop
reading stdin. Alas, the stop_progress() call ended up at the wrong
place, after write_commit_graph(), which does all the commit-graph
computation and writing, and has several progress lines of its own.
Consequently, that new
Collecting commits from input: 1234
progress line is overwritten by the first progress line shown by
write_commit_graph(), and its final "done" line is shown last, after
everything is finished:
$ { sleep 3 ; git rev-list -3 HEAD ; sleep 1 ; } | ~/src/git/git commit-graph write --stdin-commits
Expanding reachable commits in commit graph: 873402, done.
Writing out commit graph in 4 passes: 100% (3493608/3493608), done.
Collecting commits from input: 3, done.
Furthermore, that stop_progress() call was added after the 'cleanup'
label, where that loop reading stdin jumps in case of an error. In
case of invalid input this then results in the "done" line shown after
the error message:
$ { sleep 3 ; git rev-list -3 HEAD ; echo junk ; } | ~/src/git/git commit-graph write --stdin-commits
error: unexpected non-hex object ID: junk
Collecting commits from input: 3, done.
Move that stop_progress() call to the right place.
While at it, drop the unnecessary 'if (progress)' condition protecting
the stop_progress() call, because that function is prepared to handle
a NULL progress struct.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Reviewed-by: Derrick Stolee <stolee@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-07-10 22:02:38 +03:00
|
|
|
stop_progress(&progress);
|
2018-04-10 15:56:06 +03:00
|
|
|
}
|
|
|
|
|
2020-02-04 08:51:50 +03:00
|
|
|
if (write_commit_graph(odb,
|
2020-05-18 22:27:09 +03:00
|
|
|
opts.stdin_packs ? &pack_indexes : NULL,
|
2020-04-14 07:04:25 +03:00
|
|
|
opts.stdin_commits ? &commits : NULL,
|
2019-06-18 21:14:32 +03:00
|
|
|
flags,
|
2020-09-18 05:59:49 +03:00
|
|
|
&write_opts))
|
2019-06-12 16:29:37 +03:00
|
|
|
result = 1;
|
2018-04-10 15:56:06 +03:00
|
|
|
|
2020-05-18 22:27:09 +03:00
|
|
|
cleanup:
|
2021-08-23 15:30:17 +03:00
|
|
|
FREE_AND_NULL(options);
|
2020-05-18 22:27:09 +03:00
|
|
|
string_list_clear(&pack_indexes, 0);
|
|
|
|
strbuf_release(&buf);
|
2019-06-12 16:29:37 +03:00
|
|
|
return result;
|
2018-04-02 23:34:20 +03:00
|
|
|
}
|
2018-04-02 23:34:18 +03:00
|
|
|
|
|
|
|
int cmd_commit_graph(int argc, const char **argv, const char *prefix)
|
|
|
|
{
|
2022-08-19 19:04:02 +03:00
|
|
|
parse_opt_subcommand_fn *fn = NULL;
|
|
|
|
struct option builtin_commit_graph_options[] = {
|
|
|
|
OPT_SUBCOMMAND("verify", &fn, graph_verify),
|
|
|
|
OPT_SUBCOMMAND("write", &fn, graph_write),
|
|
|
|
OPT_END(),
|
|
|
|
};
|
|
|
|
struct option *options = parse_options_concat(builtin_commit_graph_options, common_opts);
|
2018-04-02 23:34:18 +03:00
|
|
|
|
|
|
|
git_config(git_default_config, NULL);
|
|
|
|
|
2021-10-15 02:37:16 +03:00
|
|
|
read_replace_refs = 0;
|
commit-graph: turn off save_commit_buffer
The commit-graph tool may read a lot of commits, but it only cares about
parsing their metadata (parents, trees, etc) and doesn't ever show the
messages to the user. And so it should not need save_commit_buffer,
which is meant for holding onto the object data of parsed commits so
that we can show them later. In fact, it's quite harmful to do so.
According to massif, the max heap of "git commit-graph write
--reachable" in linux.git before/after this patch (removing the commit
graph file in between) goes from ~1.1GB to ~270MB.
Which isn't surprising, since the difference is about the sum of the
uncompressed sizes of all commits in the repository, and this was
equivalent to leaking them.
This obviously helps if you're under memory pressure, but even without
it, things go faster. My before/after times for that command (without
massif) went from 12.521s to 11.874s, a speedup of ~5%.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-07 08:04:40 +03:00
|
|
|
save_commit_buffer = 0;
|
|
|
|
|
2022-08-19 19:04:02 +03:00
|
|
|
argc = parse_options(argc, argv, prefix, options,
|
|
|
|
builtin_commit_graph_usage, 0);
|
|
|
|
FREE_AND_NULL(options);
|
2018-04-02 23:34:20 +03:00
|
|
|
|
2022-08-19 19:04:02 +03:00
|
|
|
return fn(argc, argv, prefix);
|
2018-04-02 23:34:18 +03:00
|
|
|
}
|