From 2fa05f31bd95eed34d6e8848b6b1e6265674f50a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2020 22:04:04 -0600 Subject: [PATCH 1/8] t/helper/test-read-graph.c: support commit-graph chains In 61df89c8e5 (commit-graph: don't early exit(1) on e.g. "git status", 2019-03-25), the former 'load_commit_graph_one' was refactored into 'open_commit_graph' and 'load_commit_graph_one_fd_st' as a means of avoiding an early-exit from non-library code. However, 'load_commit_graph_one' does not support commit-graph chains, and hence the 'read-graph' test tool does not work with them. Replace 'load_commit_graph_one' with 'read_commit_graph_one' in order to support commit-graph chains. In the spirit of 61df89c8e5, 'read_commit_graph_one' does not ever 'die()', making it a suitable replacement here. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/helper/test-read-graph.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/t/helper/test-read-graph.c b/t/helper/test-read-graph.c index f8a461767c..4846040363 100644 --- a/t/helper/test-read-graph.c +++ b/t/helper/test-read-graph.c @@ -7,26 +7,15 @@ int cmd__read_graph(int argc, const char **argv) { struct commit_graph *graph = NULL; - char *graph_name; - int open_ok; - int fd; - struct stat st; struct object_directory *odb; setup_git_directory(); odb = the_repository->objects->odb; - graph_name = get_commit_graph_filename(odb); - - open_ok = open_commit_graph(graph_name, &fd, &st); - if (!open_ok) - die_errno(_("Could not open commit-graph '%s'"), graph_name); - - graph = load_commit_graph_one_fd_st(fd, &st, odb); + graph = read_commit_graph_one(the_repository, odb); if (!graph) return 1; - FREE_AND_NULL(graph_name); printf("header: %08x %d %d %d %d\n", ntohl(*(uint32_t*)graph->data), From 4f027355f6b6b5b2ba69e23ff50cb7313d13dd70 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2020 22:04:08 -0600 Subject: [PATCH 2/8] builtin/commit-graph.c: support for '--split[=]' With '--split', the commit-graph machinery writes new commits in another incremental commit-graph which is part of the existing chain, and optionally decides to condense the chain into a single commit-graph. This is done to ensure that the asymptotic behavior of looking up a commit in an incremental chain is not dominated by the number of incrementals in that chain. It can be controlled by the '--max-commits' and '--size-multiple' options. In the next two commits, we will introduce additional splitting strategies that can exert additional control over: - when a split commit-graph is and isn't written, and - when the existing commit-graph chain is discarded completely and replaced with another graph To prepare for this, make '--split' take an optional strategy (as in '--split[=]'), and add a new enum to describe which strategy is being used. For now, no strategies are given, and the only enumerated value is 'COMMIT_GRAPH_SPLIT_UNSPECIFIED', indicating the absence of a strategy. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 11 ++++++----- builtin/commit-graph.c | 26 ++++++++++++++++++++++---- commit-graph.h | 5 +++++ 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 28d1fee505..10d757c5cc 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -57,11 +57,12 @@ or `--stdin-packs`.) With the `--append` option, include all commits that are present in the existing commit-graph file. + -With the `--split` option, write the commit-graph as a chain of multiple -commit-graph files stored in `/info/commit-graphs`. The new commits -not already in the commit-graph are added in a new "tip" file. This file -is merged with the existing file if the following merge conditions are -met: +With the `--split[=]` option, write the commit-graph as a +chain of multiple commit-graph files stored in +`/info/commit-graphs`. Commit-graph layers are merged based on the +strategy and other splitting options. The new commits not already in the +commit-graph are added in a new "tip" file. This file is merged with the +existing file if the following merge conditions are met: + * If `--size-multiple=` is not specified, let `X` equal 2. If the new tip file would have `N` commits and the previous tip has `M` commits and diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 4a70b33fb5..345fd97c61 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -9,7 +9,9 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph verify [--object-dir ] [--shallow] [--[no-]progress]"), - N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--[no-]progress] "), + N_("git commit-graph write [--object-dir ] [--append] " + "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " + "[--[no-]progress] "), NULL }; @@ -19,7 +21,9 @@ static const char * const builtin_commit_graph_verify_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--[no-]progress] "), + N_("git commit-graph write [--object-dir ] [--append] " + "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " + "[--[no-]progress] "), NULL }; @@ -111,6 +115,18 @@ static int graph_verify(int argc, const char **argv) extern int read_replace_refs; static struct split_commit_graph_opts split_opts; +static int write_option_parse_split(const struct option *opt, const char *arg, + int unset) +{ + opts.split = 1; + if (!arg) + return 0; + + die(_("unrecognized --split argument, %s"), arg); + + return 0; +} + static int graph_write(int argc, const char **argv) { struct string_list *pack_indexes = NULL; @@ -133,8 +149,10 @@ static int graph_write(int argc, const char **argv) OPT_BOOL(0, "append", &opts.append, N_("include all commits already in the commit-graph file")), OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")), - OPT_BOOL(0, "split", &opts.split, - N_("allow writing an incremental commit-graph file")), + OPT_CALLBACK_F(0, "split", &split_opts.flags, NULL, + N_("allow writing an incremental commit-graph file"), + PARSE_OPT_OPTARG | PARSE_OPT_NONEG, + write_option_parse_split), OPT_INTEGER(0, "max-commits", &split_opts.max_commits, N_("maximum number of commits in a non-base split commit-graph")), OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple, diff --git a/commit-graph.h b/commit-graph.h index e87a6f6360..e799008ff4 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -82,10 +82,15 @@ enum commit_graph_write_flags { COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3) }; +enum commit_graph_split_flags { + COMMIT_GRAPH_SPLIT_UNSPECIFIED = 0 +}; + struct split_commit_graph_opts { int size_multiple; int max_commits; timestamp_t expire_time; + enum commit_graph_split_flags flags; }; /* From fdbde82fe523374b3c5d1f0f01f3c43dcaca9465 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2020 22:04:12 -0600 Subject: [PATCH 3/8] builtin/commit-graph.c: introduce split strategy 'no-merge' In the previous commit, we laid the groundwork for supporting different splitting strategies. In this commit, we introduce the first splitting strategy: 'no-merge'. Passing '--split=no-merge' is useful for callers which wish to write a new incremental commit-graph, but do not want to spend effort condensing the incremental chain [1]. Previously, this was possible by passing '--size-multiple=0', but this no longer the case following 63020f175f (commit-graph: prefer default size_mult when given zero, 2020-01-02). When '--split=no-merge' is given, the commit-graph machinery will never condense an existing chain, and it will always write a new incremental. [1]: This might occur when, for example, a server administrator running some program after each push may want to ensure that each job runs proportional in time to the size of the push, and does not "jump" when the commit-graph machinery decides to trigger a merge. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 5 +++++ builtin/commit-graph.c | 7 ++++++- commit-graph.c | 19 ++++++++++++------- commit-graph.h | 3 ++- t/t5324-split-commit-graph.sh | 11 +++++++++++ 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 10d757c5cc..a4c4a641e5 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -63,6 +63,11 @@ chain of multiple commit-graph files stored in strategy and other splitting options. The new commits not already in the commit-graph are added in a new "tip" file. This file is merged with the existing file if the following merge conditions are met: +* If `--split=no-merge` is specified, a merge is never performed, and +the remaining options are ignored. A bare `--split` defers to the +remaining options. (Note that merging a chain of commit graphs replaces +the existing chain with a length-1 chain where the first and only +incremental holds the entire graph). + * If `--size-multiple=` is not specified, let `X` equal 2. If the new tip file would have `N` commits and the previous tip has `M` commits and diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 345fd97c61..9234b95ecf 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -118,11 +118,16 @@ static struct split_commit_graph_opts split_opts; static int write_option_parse_split(const struct option *opt, const char *arg, int unset) { + enum commit_graph_split_flags *flags = opt->value; + opts.split = 1; if (!arg) return 0; - die(_("unrecognized --split argument, %s"), arg); + if (!strcmp(arg, "no-merge")) + *flags = COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED; + else + die(_("unrecognized --split argument, %s"), arg); return 0; } diff --git a/commit-graph.c b/commit-graph.c index 656dd647d5..71f488839c 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1529,6 +1529,7 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) { struct commit_graph *g; uint32_t num_commits; + enum commit_graph_split_flags flags = COMMIT_GRAPH_SPLIT_UNSPECIFIED; uint32_t i; int max_commits = 0; @@ -1539,21 +1540,25 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) if (ctx->split_opts->size_multiple) size_mult = ctx->split_opts->size_multiple; + + flags = ctx->split_opts->flags; } g = ctx->r->objects->commit_graph; num_commits = ctx->commits.nr; ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; - while (g && (g->num_commits <= size_mult * num_commits || - (max_commits && num_commits > max_commits))) { - if (g->odb != ctx->odb) - break; + if (flags != COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED) { + while (g && (g->num_commits <= size_mult * num_commits || + (max_commits && num_commits > max_commits))) { + if (g->odb != ctx->odb) + break; - num_commits += g->num_commits; - g = g->base_graph; + num_commits += g->num_commits; + g = g->base_graph; - ctx->num_commit_graphs_after--; + ctx->num_commit_graphs_after--; + } } ctx->new_base_graph = g; diff --git a/commit-graph.h b/commit-graph.h index e799008ff4..8752afb88d 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -83,7 +83,8 @@ enum commit_graph_write_flags { }; enum commit_graph_split_flags { - COMMIT_GRAPH_SPLIT_UNSPECIFIED = 0 + COMMIT_GRAPH_SPLIT_UNSPECIFIED = 0, + COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1 }; struct split_commit_graph_opts { diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index c24823431f..29de47e834 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -344,4 +344,15 @@ test_expect_success 'split across alternate where alternate is not split' ' test_cmp commit-graph .git/objects/info/commit-graph ' +test_expect_success '--split=no-merge always writes an incremental' ' + test_when_finished rm -rf a b && + rm -rf $graphdir $infodir/commit-graph && + git reset --hard commits/2 && + git rev-list HEAD~1 >a && + git rev-list HEAD >b && + git commit-graph write --split --stdin-commits Date: Mon, 13 Apr 2020 22:04:17 -0600 Subject: [PATCH 4/8] builtin/commit-graph.c: introduce split strategy 'replace' When using split commit-graphs, it is sometimes useful to completely replace the commit-graph chain with a new base. For example, consider a scenario in which a repository builds a new commit-graph incremental for each push. Occasionally (say, after some fixed number of pushes), they may wish to rebuild the commit-graph chain with all reachable commits. They can do so with $ git commit-graph write --reachable but this removes the chain entirely and replaces it with a single commit-graph in 'objects/info/commit-graph'. Unfortunately, this means that the next push will have to move this commit-graph into the first layer of a new chain, and then write its new commits on top. Avoid such copying entirely by allowing the caller to specify that they wish to replace the entirety of their commit-graph chain, while also specifying that the new commit-graph should become the basis of a fresh, length-one chain. This addresses the above situation by making it possible for the caller to instead write: $ git commit-graph write --reachable --split=replace which writes a new length-one chain to 'objects/info/commit-graphs', making the commit-graph incremental generated by the subsequent push relatively cheap by avoiding the aforementioned copy. In order to do this, remove an assumption in 'write_commit_graph_file' that chains are always at least two incrementals long. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 7 ++-- builtin/commit-graph.c | 2 ++ commit-graph.c | 53 ++++++++++++++++++++++-------- commit-graph.h | 3 +- t/t5324-split-commit-graph.sh | 19 +++++++++++ 5 files changed, 66 insertions(+), 18 deletions(-) diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index a4c4a641e5..46f7f7c573 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -64,9 +64,10 @@ strategy and other splitting options. The new commits not already in the commit-graph are added in a new "tip" file. This file is merged with the existing file if the following merge conditions are met: * If `--split=no-merge` is specified, a merge is never performed, and -the remaining options are ignored. A bare `--split` defers to the -remaining options. (Note that merging a chain of commit graphs replaces -the existing chain with a length-1 chain where the first and only +the remaining options are ignored. `--split=replace` overwrites the +existing chain with a new one. A bare `--split` defers to the remaining +options. (Note that merging a chain of commit graphs replaces the +existing chain with a length-1 chain where the first and only incremental holds the entire graph). + * If `--size-multiple=` is not specified, let `X` equal 2. If the new diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 9234b95ecf..e0dd2876a1 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -126,6 +126,8 @@ static int write_option_parse_split(const struct option *opt, const char *arg, if (!strcmp(arg, "no-merge")) *flags = COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED; + else if (!strcmp(arg, "replace")) + *flags = COMMIT_GRAPH_SPLIT_REPLACE; else die(_("unrecognized --split argument, %s"), arg); diff --git a/commit-graph.c b/commit-graph.c index 71f488839c..36e1d6f9b3 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -866,7 +866,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, if (edge_value >= 0) edge_value += ctx->new_num_commits_in_base; - else { + else if (ctx->new_base_graph) { uint32_t pos; if (find_commit_in_graph(parent->item, ctx->new_base_graph, @@ -897,7 +897,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, if (edge_value >= 0) edge_value += ctx->new_num_commits_in_base; - else { + else if (ctx->new_base_graph) { uint32_t pos; if (find_commit_in_graph(parent->item, ctx->new_base_graph, @@ -964,7 +964,7 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, if (edge_value >= 0) edge_value += ctx->new_num_commits_in_base; - else { + else if (ctx->new_base_graph) { uint32_t pos; if (find_commit_in_graph(parent->item, ctx->new_base_graph, @@ -1037,6 +1037,8 @@ static void close_reachable(struct write_commit_graph_context *ctx) { int i; struct commit *commit; + enum commit_graph_split_flags flags = ctx->split_opts ? + ctx->split_opts->flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED; if (ctx->report_progress) ctx->progress = start_delayed_progress( @@ -1066,8 +1068,9 @@ static void close_reachable(struct write_commit_graph_context *ctx) if (!commit) continue; if (ctx->split) { - if (!parse_commit(commit) && - commit->graph_pos == COMMIT_NOT_FROM_GRAPH) + if ((!parse_commit(commit) && + commit->graph_pos == COMMIT_NOT_FROM_GRAPH) || + flags == COMMIT_GRAPH_SPLIT_REPLACE) add_missing_parents(ctx, commit); } else if (!parse_commit_no_graph(commit)) add_missing_parents(ctx, commit); @@ -1287,6 +1290,8 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx) static void copy_oids_to_commits(struct write_commit_graph_context *ctx) { uint32_t i; + enum commit_graph_split_flags flags = ctx->split_opts ? + ctx->split_opts->flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED; ctx->num_extra_edges = 0; if (ctx->report_progress) @@ -1303,11 +1308,14 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc); ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]); - if (ctx->split && + if (ctx->split && flags != COMMIT_GRAPH_SPLIT_REPLACE && ctx->commits.list[ctx->commits.nr]->graph_pos != COMMIT_NOT_FROM_GRAPH) continue; - parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); + if (ctx->split && flags == COMMIT_GRAPH_SPLIT_REPLACE) + parse_commit(ctx->commits.list[ctx->commits.nr]); + else + parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); num_parents = commit_list_count(ctx->commits.list[ctx->commits.nr]->parents); if (num_parents > 2) @@ -1488,8 +1496,12 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) } if (ctx->base_graph_name) { - const char *dest = ctx->commit_graph_filenames_after[ - ctx->num_commit_graphs_after - 2]; + const char *dest; + int idx = ctx->num_commit_graphs_after - 1; + if (ctx->num_commit_graphs_after > 1) + idx--; + + dest = ctx->commit_graph_filenames_after[idx]; if (strcmp(ctx->base_graph_name, dest)) { result = rename(ctx->base_graph_name, dest); @@ -1546,9 +1558,13 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) g = ctx->r->objects->commit_graph; num_commits = ctx->commits.nr; - ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; + if (flags == COMMIT_GRAPH_SPLIT_REPLACE) + ctx->num_commit_graphs_after = 1; + else + ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; - if (flags != COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED) { + if (flags != COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED && + flags != COMMIT_GRAPH_SPLIT_REPLACE) { while (g && (g->num_commits <= size_mult * num_commits || (max_commits && num_commits > max_commits))) { if (g->odb != ctx->odb) @@ -1561,7 +1577,11 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) } } - ctx->new_base_graph = g; + if (flags != COMMIT_GRAPH_SPLIT_REPLACE) + ctx->new_base_graph = g; + else if (ctx->num_commit_graphs_after != 1) + BUG("split_graph_merge_strategy: num_commit_graphs_after " + "should be 1 with --split=replace"); if (ctx->num_commit_graphs_after == 2) { char *old_graph_name = get_commit_graph_filename(g->odb); @@ -1772,6 +1792,7 @@ int write_commit_graph(struct object_directory *odb, struct write_commit_graph_context *ctx; uint32_t i, count_distinct = 0; int res = 0; + int replace = 0; if (!commit_graph_compatible(the_repository)) return 0; @@ -1806,6 +1827,9 @@ int write_commit_graph(struct object_directory *odb, g = g->base_graph; } } + + if (ctx->split_opts) + replace = ctx->split_opts->flags & COMMIT_GRAPH_SPLIT_REPLACE; } ctx->approx_nr_objects = approximate_object_count(); @@ -1866,13 +1890,14 @@ int write_commit_graph(struct object_directory *odb, goto cleanup; } - if (!ctx->commits.nr) + if (!ctx->commits.nr && !replace) goto cleanup; if (ctx->split) { split_graph_merge_strategy(ctx); - merge_commit_graphs(ctx); + if (!replace) + merge_commit_graphs(ctx); } else ctx->num_commit_graphs_after = 1; diff --git a/commit-graph.h b/commit-graph.h index 8752afb88d..718433d79b 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -84,7 +84,8 @@ enum commit_graph_write_flags { enum commit_graph_split_flags { COMMIT_GRAPH_SPLIT_UNSPECIFIED = 0, - COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1 + COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1, + COMMIT_GRAPH_SPLIT_REPLACE = 2 }; struct split_commit_graph_opts { diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 29de47e834..87a9148fbe 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -355,4 +355,23 @@ test_expect_success '--split=no-merge always writes an incremental' ' test_line_count = 2 $graphdir/commit-graph-chain ' +test_expect_success '--split=replace replaces the chain' ' + rm -rf $graphdir $infodir/commit-graph && + git reset --hard commits/3 && + git rev-list -1 HEAD~2 >a && + git rev-list -1 HEAD~1 >b && + git rev-list -1 HEAD >c && + git commit-graph write --split=no-merge --stdin-commits graph-files && + test_line_count = 1 graph-files && + verify_chain_files_exist $graphdir && + graph_read_expect 2 +' + test_done From f4781068faafad5e10fb6e8ae312ebd5b0e7bb55 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2020 22:04:22 -0600 Subject: [PATCH 5/8] oidset: introduce 'oidset_size' Occasionally, it may be useful for callers to know the number of object IDs in an oidset. Right now, the only way to compute this is to call 'kh_size' on the internal 'kh_set_oid_t'. Similar to how we wrap other 'kh_*' functions over the 'oidset' type, let's allow callers to compute this value by introducing 'oidset_size'. We will add its first caller in the subsequent commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- oidset.c | 5 +++++ oidset.h | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/oidset.c b/oidset.c index f63ce818f6..15d4e18c37 100644 --- a/oidset.c +++ b/oidset.c @@ -36,6 +36,11 @@ void oidset_clear(struct oidset *set) oidset_init(set, 0); } +int oidset_size(struct oidset *set) +{ + return kh_size(&set->set); +} + void oidset_parse_file(struct oidset *set, const char *path) { FILE *fp; diff --git a/oidset.h b/oidset.h index 5346563b0b..80c3a347f9 100644 --- a/oidset.h +++ b/oidset.h @@ -55,6 +55,11 @@ int oidset_insert(struct oidset *set, const struct object_id *oid); */ int oidset_remove(struct oidset *set, const struct object_id *oid); +/** + * Returns the number of oids in the set. + */ +int oidset_size(struct oidset *set); + /** * Remove all entries from the oidset, freeing any resources associated with * it. From 6830c360777468434184f60023e2562348c9dacc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2020 22:04:25 -0600 Subject: [PATCH 6/8] commit-graph.h: replace 'commit_hex' with 'commits' The 'write_commit_graph()' function takes in either a string list of pack indices, or a string list of hexadecimal commit OIDs. These correspond to the '--stdin-packs' and '--stdin-commits' mode(s) from 'git commit-graph write'. Using a string_list of hexadecimal commit IDs is not the most efficient use of memory, since we can instead use the 'struct oidset', which is more well-suited for this case. This has another benefit which will become apparent in the following commit. This is that we are about to disambiguate the kinds of errors we produce with '--stdin-commits' into "non-hex input" and "hex-input, but referring to a non-commit object". By having 'write_commit_graph' take in a 'struct oidset *' of commits, we place the burden on the caller (in this case, the builtin) to handle the first case, and the commit-graph machinery can handle the second case. Suggested-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 19 ++++++++++--- commit-graph.c | 59 +++++++++++++++++++++++------------------ commit-graph.h | 3 ++- t/t5318-commit-graph.sh | 2 +- 4 files changed, 52 insertions(+), 31 deletions(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index e0dd2876a1..075f8f6928 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -137,7 +137,7 @@ static int write_option_parse_split(const struct option *opt, const char *arg, static int graph_write(int argc, const char **argv) { struct string_list *pack_indexes = NULL; - struct string_list *commit_hex = NULL; + struct oidset commits = OIDSET_INIT; struct object_directory *odb = NULL; struct string_list lines; int result = 0; @@ -210,7 +210,20 @@ static int graph_write(int argc, const char **argv) if (opts.stdin_packs) pack_indexes = &lines; if (opts.stdin_commits) { - commit_hex = &lines; + struct string_list_item *item; + oidset_init(&commits, lines.nr); + for_each_string_list_item(item, &lines) { + struct object_id oid; + const char *end; + + if (parse_oid_hex(item->string, &oid, &end)) { + error(_("unexpected non-hex object ID: " + "%s"), item->string); + return 1; + } + + oidset_insert(&commits, &oid); + } flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; } @@ -219,7 +232,7 @@ static int graph_write(int argc, const char **argv) if (write_commit_graph(odb, pack_indexes, - commit_hex, + opts.stdin_commits ? &commits : NULL, flags, &split_opts)) result = 1; diff --git a/commit-graph.c b/commit-graph.c index 36e1d6f9b3..bf86e4a92b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1136,13 +1136,13 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } -static int add_ref_to_list(const char *refname, - const struct object_id *oid, - int flags, void *cb_data) +static int add_ref_to_set(const char *refname, + const struct object_id *oid, + int flags, void *cb_data) { - struct string_list *list = (struct string_list *)cb_data; + struct oidset *commits = (struct oidset *)cb_data; - string_list_append(list, oid_to_hex(oid)); + oidset_insert(commits, oid); return 0; } @@ -1150,14 +1150,14 @@ int write_commit_graph_reachable(struct object_directory *odb, enum commit_graph_write_flags flags, const struct split_commit_graph_opts *split_opts) { - struct string_list list = STRING_LIST_INIT_DUP; + struct oidset commits = OIDSET_INIT; int result; - for_each_ref(add_ref_to_list, &list); - result = write_commit_graph(odb, NULL, &list, + for_each_ref(add_ref_to_set, &commits); + result = write_commit_graph(odb, NULL, &commits, flags, split_opts); - string_list_clear(&list, 0); + oidset_clear(&commits); return result; } @@ -1206,39 +1206,46 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx, return 0; } -static int fill_oids_from_commit_hex(struct write_commit_graph_context *ctx, - struct string_list *commit_hex) +static int fill_oids_from_commits(struct write_commit_graph_context *ctx, + struct oidset *commits) { - uint32_t i; + uint32_t i = 0; struct strbuf progress_title = STRBUF_INIT; + struct oidset_iter iter; + struct object_id *oid; + + if (!oidset_size(commits)) + return 0; if (ctx->report_progress) { strbuf_addf(&progress_title, Q_("Finding commits for commit graph from %d ref", "Finding commits for commit graph from %d refs", - commit_hex->nr), - commit_hex->nr); + oidset_size(commits)), + oidset_size(commits)); ctx->progress = start_delayed_progress( progress_title.buf, - commit_hex->nr); + oidset_size(commits)); } - for (i = 0; i < commit_hex->nr; i++) { - const char *end; - struct object_id oid; + + oidset_iter_init(commits, &iter); + while ((oid = oidset_iter_next(&iter))) { struct commit *result; - display_progress(ctx->progress, i + 1); - if (!parse_oid_hex(commit_hex->items[i].string, &oid, &end) && - (result = lookup_commit_reference_gently(ctx->r, &oid, 1))) { + display_progress(ctx->progress, ++i); + + result = lookup_commit_reference_gently(ctx->r, oid, 1); + if (result) { ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); oidcpy(&ctx->oids.list[ctx->oids.nr], &(result->object.oid)); ctx->oids.nr++; } else if (ctx->check_oids) { error(_("invalid commit object id: %s"), - commit_hex->items[i].string); + oid_to_hex(oid)); return -1; } } + stop_progress(&ctx->progress); strbuf_release(&progress_title); @@ -1785,7 +1792,7 @@ out: int write_commit_graph(struct object_directory *odb, struct string_list *pack_indexes, - struct string_list *commit_hex, + struct oidset *commits, enum commit_graph_write_flags flags, const struct split_commit_graph_opts *split_opts) { @@ -1861,12 +1868,12 @@ int write_commit_graph(struct object_directory *odb, goto cleanup; } - if (commit_hex) { - if ((res = fill_oids_from_commit_hex(ctx, commit_hex))) + if (commits) { + if ((res = fill_oids_from_commits(ctx, commits))) goto cleanup; } - if (!pack_indexes && !commit_hex) + if (!pack_indexes && !commits) fill_oids_from_all_packs(ctx); close_reachable(ctx); diff --git a/commit-graph.h b/commit-graph.h index 718433d79b..98ef121924 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -6,6 +6,7 @@ #include "string-list.h" #include "cache.h" #include "object-store.h" +#include "oidset.h" #define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH" #define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD" @@ -106,7 +107,7 @@ int write_commit_graph_reachable(struct object_directory *odb, const struct split_commit_graph_opts *split_opts); int write_commit_graph(struct object_directory *odb, struct string_list *pack_indexes, - struct string_list *commit_hex, + struct oidset *commits, enum commit_graph_write_flags flags, const struct split_commit_graph_opts *split_opts); diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 0bf98b56ec..69599cea7f 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -43,7 +43,7 @@ test_expect_success 'create commits and repack' ' test_expect_success 'exit with correct error on bad input to --stdin-commits' ' cd "$TRASH_DIRECTORY/full" && echo HEAD | test_expect_code 1 git commit-graph write --stdin-commits 2>stderr && - test_i18ngrep "invalid commit object id" stderr && + test_i18ngrep "unexpected non-hex object ID: HEAD" stderr && # valid tree OID, but not a commit OID git rev-parse HEAD^{tree} | test_expect_code 1 git commit-graph write --stdin-commits 2>stderr && test_i18ngrep "invalid commit object id" stderr From 7a9ce0269bc0f4ef230f930b3910b70ac3142552 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 14 Apr 2020 22:31:37 -0600 Subject: [PATCH 7/8] commit-graph.c: introduce '--[no-]check-oids' When operating on a stream of commit OIDs on stdin, 'git commit-graph write' checks that each OID refers to an object that is indeed a commit. This is convenient to make sure that the given input is well-formed, but can sometimes be undesirable. For example, server operators may wish to feed the refnames that were updated during a push to 'git commit-graph write --input=stdin-commits', and silently discard refs that don't point at commits. This can be done by combing the output of 'git for-each-ref' with '--format %(*objecttype)', but this requires opening up a potentially large number of objects. Instead, it is more convenient to feed the updated refs to the commit-graph machinery, and let it throw out refs that don't point to commits. Introduce '--[no-]check-oids' to make such a behavior possible. With '--check-oids' (the default behavior to retain backwards compatibility), 'git commit-graph write' will barf on a non-commit line in its input. With 'no-check-oids', such lines will be silently ignored, making the above possible by specifying this option. No matter which is supplied, 'git commit-graph write' retains the behavior from the previous commit of rejecting non-OID inputs like "HEAD" and "refs/heads/foo" as before. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 5 +++++ builtin/commit-graph.c | 11 ++++++++--- commit-graph.c | 2 +- t/t5318-commit-graph.sh | 28 ++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 46f7f7c573..91e8027b86 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -82,6 +82,11 @@ tip with the previous tip. Finally, if `--expire-time=` is not specified, let `datetime` be the current time. After writing the split commit-graph, delete all unused commit-graph whose modified times are older than `datetime`. ++ +The `--[no-]check-oids` option decides whether or not OIDs are required +to be commits. By default, `--check-oids` is implied, generating an +error on non-commit objects. If `--no-check-oids` is given, non-commits +are silently discarded. 'verify':: diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 075f8f6928..2857153008 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -11,7 +11,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph verify [--object-dir ] [--shallow] [--[no-]progress]"), N_("git commit-graph write [--object-dir ] [--append] " "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--[no-]progress] "), + "[--[no-]progress] [--[no-]check-oids] "), NULL }; @@ -23,7 +23,7 @@ static const char * const builtin_commit_graph_verify_usage[] = { static const char * const builtin_commit_graph_write_usage[] = { N_("git commit-graph write [--object-dir ] [--append] " "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--[no-]progress] "), + "[--[no-]progress] [--[no-]check-oids] "), NULL }; @@ -36,6 +36,7 @@ static struct opts_commit_graph { int split; int shallow; int progress; + int check_oids; } opts; static struct object_directory *find_odb(struct repository *r, @@ -160,6 +161,8 @@ static int graph_write(int argc, const char **argv) N_("allow writing an incremental commit-graph file"), PARSE_OPT_OPTARG | PARSE_OPT_NONEG, write_option_parse_split), + OPT_BOOL(0, "check-oids", &opts.check_oids, + N_("require OIDs to be commits")), OPT_INTEGER(0, "max-commits", &split_opts.max_commits, N_("maximum number of commits in a non-base split commit-graph")), OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple, @@ -170,6 +173,7 @@ static int graph_write(int argc, const char **argv) }; opts.progress = isatty(2); + opts.check_oids = 1; split_opts.size_multiple = 2; split_opts.max_commits = 0; split_opts.expire_time = 0; @@ -224,7 +228,8 @@ static int graph_write(int argc, const char **argv) oidset_insert(&commits, &oid); } - flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; + if (opts.check_oids) + flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; } UNLEAK(buf); diff --git a/commit-graph.c b/commit-graph.c index bf86e4a92b..af677fc98e 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -145,7 +145,7 @@ static int verify_commit_graph_lite(struct commit_graph *g) * * There should only be very basic checks here to ensure that * we don't e.g. segfault in fill_commit_in_graph(), but - * because this is a very hot codepath nothing that e.g. loops + e because this is a very hot codepath nothing that e.g. loops * over g->num_commits, or runs a checksum on the commit-graph * itself. */ diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 69599cea7f..23c7b7e036 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -49,6 +49,34 @@ test_expect_success 'exit with correct error on bad input to --stdin-commits' ' test_i18ngrep "invalid commit object id" stderr ' +graph_expect_commits() { + test-tool read-graph >got + if ! grep "num_commits: $1" got + then + echo "graph_expect_commits: expected $1 commit(s), got:" + cat got + false + fi +} + +test_expect_success 'ignores non-commit OIDs to --input=stdin-commits with --no-check-oids' ' + test_when_finished rm -rf "$objdir/info/commit-graph" && + cd "$TRASH_DIRECTORY/full" && + # write a graph to ensure layers are/are not added appropriately + git rev-parse HEAD~1 >base && + git commit-graph write --stdin-commits bad && + test_expect_code 1 git commit-graph write --stdin-commits err && + test_i18ngrep "unexpected non-hex object ID: HEAD" err && + graph_expect_commits 2 && + # update with valid commit OID, ignore tree OID + git rev-parse HEAD HEAD^{tree} >in && + git commit-graph write --stdin-commits --no-check-oids output git -c core.commitGraph=false $1 >expect From dbd5e0a1861c5bb1446e5518173aa1760c6617b0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 29 Apr 2020 12:44:40 -0700 Subject: [PATCH 8/8] Revert "commit-graph.c: introduce '--[no-]check-oids'" This reverts commit 7a9ce0269bc0f4ef230f930b3910b70ac3142552, which has not yet gained consensus. --- Documentation/git-commit-graph.txt | 5 ----- builtin/commit-graph.c | 11 +++-------- commit-graph.c | 2 +- t/t5318-commit-graph.sh | 28 ---------------------------- 4 files changed, 4 insertions(+), 42 deletions(-) diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 91e8027b86..46f7f7c573 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -82,11 +82,6 @@ tip with the previous tip. Finally, if `--expire-time=` is not specified, let `datetime` be the current time. After writing the split commit-graph, delete all unused commit-graph whose modified times are older than `datetime`. -+ -The `--[no-]check-oids` option decides whether or not OIDs are required -to be commits. By default, `--check-oids` is implied, generating an -error on non-commit objects. If `--no-check-oids` is given, non-commits -are silently discarded. 'verify':: diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 2857153008..075f8f6928 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -11,7 +11,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph verify [--object-dir ] [--shallow] [--[no-]progress]"), N_("git commit-graph write [--object-dir ] [--append] " "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--[no-]progress] [--[no-]check-oids] "), + "[--[no-]progress] "), NULL }; @@ -23,7 +23,7 @@ static const char * const builtin_commit_graph_verify_usage[] = { static const char * const builtin_commit_graph_write_usage[] = { N_("git commit-graph write [--object-dir ] [--append] " "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--[no-]progress] [--[no-]check-oids] "), + "[--[no-]progress] "), NULL }; @@ -36,7 +36,6 @@ static struct opts_commit_graph { int split; int shallow; int progress; - int check_oids; } opts; static struct object_directory *find_odb(struct repository *r, @@ -161,8 +160,6 @@ static int graph_write(int argc, const char **argv) N_("allow writing an incremental commit-graph file"), PARSE_OPT_OPTARG | PARSE_OPT_NONEG, write_option_parse_split), - OPT_BOOL(0, "check-oids", &opts.check_oids, - N_("require OIDs to be commits")), OPT_INTEGER(0, "max-commits", &split_opts.max_commits, N_("maximum number of commits in a non-base split commit-graph")), OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple, @@ -173,7 +170,6 @@ static int graph_write(int argc, const char **argv) }; opts.progress = isatty(2); - opts.check_oids = 1; split_opts.size_multiple = 2; split_opts.max_commits = 0; split_opts.expire_time = 0; @@ -228,8 +224,7 @@ static int graph_write(int argc, const char **argv) oidset_insert(&commits, &oid); } - if (opts.check_oids) - flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; + flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; } UNLEAK(buf); diff --git a/commit-graph.c b/commit-graph.c index af677fc98e..bf86e4a92b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -145,7 +145,7 @@ static int verify_commit_graph_lite(struct commit_graph *g) * * There should only be very basic checks here to ensure that * we don't e.g. segfault in fill_commit_in_graph(), but - e because this is a very hot codepath nothing that e.g. loops + * because this is a very hot codepath nothing that e.g. loops * over g->num_commits, or runs a checksum on the commit-graph * itself. */ diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 23c7b7e036..69599cea7f 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -49,34 +49,6 @@ test_expect_success 'exit with correct error on bad input to --stdin-commits' ' test_i18ngrep "invalid commit object id" stderr ' -graph_expect_commits() { - test-tool read-graph >got - if ! grep "num_commits: $1" got - then - echo "graph_expect_commits: expected $1 commit(s), got:" - cat got - false - fi -} - -test_expect_success 'ignores non-commit OIDs to --input=stdin-commits with --no-check-oids' ' - test_when_finished rm -rf "$objdir/info/commit-graph" && - cd "$TRASH_DIRECTORY/full" && - # write a graph to ensure layers are/are not added appropriately - git rev-parse HEAD~1 >base && - git commit-graph write --stdin-commits bad && - test_expect_code 1 git commit-graph write --stdin-commits err && - test_i18ngrep "unexpected non-hex object ID: HEAD" err && - graph_expect_commits 2 && - # update with valid commit OID, ignore tree OID - git rev-parse HEAD HEAD^{tree} >in && - git commit-graph write --stdin-commits --no-check-oids output git -c core.commitGraph=false $1 >expect