From 55abcb417bb5b769e2aacb92a1f05c6c58127cae Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:26 -0400 Subject: [PATCH 01/23] t5318-commit-graph.sh: use core.commitGraph The commit-graph tests should be checking that normal Git operations succeed and have matching output with and without the commit-graph feature enabled. However, the test was toggling 'core.graph' instead of the correct 'core.commitGraph' variable. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5318-commit-graph.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 77d85aefe7..59d0be2877 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -28,8 +28,8 @@ test_expect_success 'create commits and repack' ' ' graph_git_two_modes() { - git -c core.graph=true $1 >output - git -c core.graph=false $1 >expect + git -c core.commitGraph=true $1 >output + git -c core.commitGraph=false $1 >expect test_cmp output expect } From 883e5c7fe90aa2ef9f6efd1ef476ca281d09e79b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:27 -0400 Subject: [PATCH 02/23] commit-graph: UNLEAK before die() Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 37420ae0fd..f0875b8bf3 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -51,8 +51,11 @@ static int graph_read(int argc, const char **argv) graph_name = get_commit_graph_filename(opts.obj_dir); graph = load_commit_graph_one(graph_name); - if (!graph) + if (!graph) { + UNLEAK(graph_name); die("graph file %s does not exist", graph_name); + } + FREE_AND_NULL(graph_name); printf("header: %08x %d %d %d %d\n", From 0e3b97cccbec2bd01eae4b3267bf00a9bfb277d8 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:28 -0400 Subject: [PATCH 03/23] commit-graph: fix GRAPH_MIN_SIZE The GRAPH_MIN_SIZE macro should be the smallest size of a parsable commit-graph file. However, the minimum number of chunks was wrong. It is possible to write a commit-graph file with zero commits, and that violates this macro's value. Rewrite the macro, and use extra macros to better explain the magic constants. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index b63a1fc85e..f83f6d2373 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -35,10 +35,11 @@ #define GRAPH_LAST_EDGE 0x80000000 +#define GRAPH_HEADER_SIZE 8 #define GRAPH_FANOUT_SIZE (4 * 256) #define GRAPH_CHUNKLOOKUP_WIDTH 12 -#define GRAPH_MIN_SIZE (5 * GRAPH_CHUNKLOOKUP_WIDTH + GRAPH_FANOUT_SIZE + \ - GRAPH_OID_LEN + 8) +#define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \ + + GRAPH_FANOUT_SIZE + GRAPH_OID_LEN) char *get_commit_graph_filename(const char *obj_dir) { From ee79705311c190e61ac35d67705d387fdeae8c21 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:29 -0400 Subject: [PATCH 04/23] commit-graph: parse commit from chosen graph Before verifying a commit-graph file against the object database, we need to parse all commits from the given commit-graph file. Create parse_commit_in_graph_one() to target a given struct commit_graph. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index f83f6d2373..e77b19971d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -314,7 +314,7 @@ static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uin } } -int parse_commit_in_graph(struct commit *item) +static int parse_commit_in_graph_one(struct commit_graph *g, struct commit *item) { uint32_t pos; @@ -322,9 +322,21 @@ int parse_commit_in_graph(struct commit *item) return 0; if (item->object.parsed) return 1; + + if (find_commit_in_graph(item, g, &pos)) + return fill_commit_in_graph(item, g, pos); + + return 0; +} + +int parse_commit_in_graph(struct commit *item) +{ + if (!core_commit_graph) + return 0; + prepare_commit_graph(); - if (commit_graph && find_commit_in_graph(item, commit_graph, &pos)) - return fill_commit_in_graph(item, commit_graph, pos); + if (commit_graph) + return parse_commit_in_graph_one(commit_graph, item); return 0; } From 9b19adac6f160c55427bd3d74293a94670487d83 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:30 -0400 Subject: [PATCH 05/23] commit: force commit to parse from object database In anticipation of verifying commit-graph file contents against the object database, create parse_commit_internal() to allow side-stepping the commit-graph file and parse directly from the object database. Due to the use of generation numbers, this method should not be called unless the intention is explicit in avoiding commits from the commit-graph file. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit.c | 10 ++++++++-- commit.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/commit.c b/commit.c index 0c3b75aeff..598cf21cee 100644 --- a/commit.c +++ b/commit.c @@ -418,7 +418,7 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s return 0; } -int parse_commit_gently(struct commit *item, int quiet_on_missing) +int parse_commit_internal(struct commit *item, int quiet_on_missing, int use_commit_graph) { enum object_type type; void *buffer; @@ -429,7 +429,7 @@ int parse_commit_gently(struct commit *item, int quiet_on_missing) return -1; if (item->object.parsed) return 0; - if (parse_commit_in_graph(item)) + if (use_commit_graph && parse_commit_in_graph(item)) return 0; buffer = read_object_file(&item->object.oid, &type, &size); if (!buffer) @@ -441,6 +441,7 @@ int parse_commit_gently(struct commit *item, int quiet_on_missing) return error("Object %s not a commit", oid_to_hex(&item->object.oid)); } + ret = parse_commit_buffer(item, buffer, size, 0); if (save_commit_buffer && !ret) { set_commit_buffer(item, buffer, size); @@ -450,6 +451,11 @@ int parse_commit_gently(struct commit *item, int quiet_on_missing) return ret; } +int parse_commit_gently(struct commit *item, int quiet_on_missing) +{ + return parse_commit_internal(item, quiet_on_missing, 1); +} + void parse_commit_or_die(struct commit *item) { if (parse_commit(item)) diff --git a/commit.h b/commit.h index 3ad07c2e3d..7e0f273720 100644 --- a/commit.h +++ b/commit.h @@ -77,6 +77,7 @@ struct commit *lookup_commit_reference_by_name(const char *name); struct commit *lookup_commit_or_die(const struct object_id *oid, const char *ref_name); int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size, int check_graph); +int parse_commit_internal(struct commit *item, int quiet_on_missing, int use_commit_graph); int parse_commit_gently(struct commit *item, int quiet_on_missing); static inline int parse_commit(struct commit *item) { From 0cbef8f8ce6ff434011ecbf3d8d2d41ba6f10c70 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:31 -0400 Subject: [PATCH 06/23] commit-graph: load a root tree from specific graph When lazy-loading a tree for a commit, it will be important to select the tree from a specific struct commit_graph. Create a new method that specifies the commit-graph file and use that in get_commit_tree_in_graph(). Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index e77b19971d..9e228d3bb5 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -362,14 +362,20 @@ static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit * return c->maybe_tree; } -struct tree *get_commit_tree_in_graph(const struct commit *c) +static struct tree *get_commit_tree_in_graph_one(struct commit_graph *g, + const struct commit *c) { if (c->maybe_tree) return c->maybe_tree; if (c->graph_pos == COMMIT_NOT_FROM_GRAPH) - BUG("get_commit_tree_in_graph called from non-commit-graph commit"); + BUG("get_commit_tree_in_graph_one called from non-commit-graph commit"); - return load_tree_for_commit(commit_graph, (struct commit *)c); + return load_tree_for_commit(g, (struct commit *)c); +} + +struct tree *get_commit_tree_in_graph(const struct commit *c) +{ + return get_commit_tree_in_graph_one(commit_graph, c); } static void write_graph_chunk_fanout(struct hashfile *f, From 283e68c72f49e6cfbae53cb5547d5b399ed25d1a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:32 -0400 Subject: [PATCH 07/23] commit-graph: add 'verify' subcommand If the commit-graph file becomes corrupt, we need a way to verify that its contents match the object database. In the manner of 'git fsck' we will implement a 'git commit-graph verify' subcommand to report all issues with the file. Add the 'verify' subcommand to the 'commit-graph' builtin and its documentation. The subcommand is currently a no-op except for loading the commit-graph into memory, which may trigger run-time errors that would be caught by normal use. Add a simple test that ensures the command returns a zero error code. If no commit-graph file exists, this is an acceptable state. Do not report any errors. Helped-by: Ramsay Jones Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 6 +++++ builtin/commit-graph.c | 39 ++++++++++++++++++++++++++++++ commit-graph.c | 23 ++++++++++++++++++ commit-graph.h | 3 +++ t/t5318-commit-graph.sh | 10 ++++++++ 5 files changed, 81 insertions(+) diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 4c97b555cc..a222cfab08 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -10,6 +10,7 @@ SYNOPSIS -------- [verse] 'git commit-graph read' [--object-dir ] +'git commit-graph verify' [--object-dir ] 'git commit-graph write' [--object-dir ] @@ -52,6 +53,11 @@ existing commit-graph file. Read a graph file given by the commit-graph file and output basic details about the graph file. Used for debugging purposes. +'verify':: + +Read the commit-graph file and verify its contents against the object +database. Used to check for corrupted data. + EXAMPLES -------- diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index f0875b8bf3..9d108f43a9 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -3,15 +3,22 @@ #include "dir.h" #include "lockfile.h" #include "parse-options.h" +#include "repository.h" #include "commit-graph.h" static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), + N_("git commit-graph verify [--object-dir ]"), N_("git commit-graph write [--object-dir ] [--append] [--stdin-packs|--stdin-commits]"), NULL }; +static const char * const builtin_commit_graph_verify_usage[] = { + N_("git commit-graph verify [--object-dir ]"), + NULL +}; + static const char * const builtin_commit_graph_read_usage[] = { N_("git commit-graph read [--object-dir ]"), NULL @@ -29,6 +36,36 @@ static struct opts_commit_graph { int append; } opts; + +static int graph_verify(int argc, const char **argv) +{ + struct commit_graph *graph = NULL; + char *graph_name; + + static struct option builtin_commit_graph_verify_options[] = { + OPT_STRING(0, "object-dir", &opts.obj_dir, + N_("dir"), + N_("The object directory to store the graph")), + OPT_END(), + }; + + argc = parse_options(argc, argv, NULL, + builtin_commit_graph_verify_options, + builtin_commit_graph_verify_usage, 0); + + if (!opts.obj_dir) + opts.obj_dir = get_object_directory(); + + graph_name = get_commit_graph_filename(opts.obj_dir); + graph = load_commit_graph_one(graph_name); + FREE_AND_NULL(graph_name); + + if (!graph) + return 0; + + return verify_commit_graph(the_repository, graph); +} + static int graph_read(int argc, const char **argv) { struct commit_graph *graph = NULL; @@ -165,6 +202,8 @@ int cmd_commit_graph(int argc, const char **argv, const char *prefix) if (argc > 0) { if (!strcmp(argv[0], "read")) return graph_read(argc, argv); + if (!strcmp(argv[0], "verify")) + return graph_verify(argc, argv); if (!strcmp(argv[0], "write")) return graph_write(argc, argv); } diff --git a/commit-graph.c b/commit-graph.c index 9e228d3bb5..488216a736 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -827,3 +827,26 @@ void write_commit_graph(const char *obj_dir, oids.alloc = 0; oids.nr = 0; } + +static int verify_commit_graph_error; + +static void graph_report(const char *fmt, ...) +{ + va_list ap; + + verify_commit_graph_error = 1; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); +} + +int verify_commit_graph(struct repository *r, struct commit_graph *g) +{ + if (!g) { + graph_report("no commit-graph file loaded"); + return 1; + } + + return verify_commit_graph_error; +} diff --git a/commit-graph.h b/commit-graph.h index 96cccb10f3..4359812fb4 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -2,6 +2,7 @@ #define COMMIT_GRAPH_H #include "git-compat-util.h" +#include "repository.h" char *get_commit_graph_filename(const char *obj_dir); @@ -53,4 +54,6 @@ void write_commit_graph(const char *obj_dir, int nr_commits, int append); +int verify_commit_graph(struct repository *r, struct commit_graph *g); + #endif diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 59d0be2877..0830ef9fdd 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -11,6 +11,11 @@ test_expect_success 'setup full repo' ' objdir=".git/objects" ' +test_expect_success 'verify graph with no graph file' ' + cd "$TRASH_DIRECTORY/full" && + git commit-graph verify +' + test_expect_success 'write graph with no packs' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write --object-dir . && @@ -230,4 +235,9 @@ test_expect_success 'perform fast-forward merge in full repo' ' test_cmp expect output ' +test_expect_success 'git commit-graph verify' ' + cd "$TRASH_DIRECTORY/full" && + git commit-graph verify >output +' + test_done From d9b9f8a6fd146ee1069e1cc4cf16eba76bdc1e08 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:33 -0400 Subject: [PATCH 08/23] commit-graph: verify catches corrupt signature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the first of several commits that add a test to check that 'git commit-graph verify' catches corruption in the commit-graph file. The first test checks that the command catches an error in the file signature. This is a check that exists in the existing commit-graph reading code. Add a helper method 'corrupt_graph_and_verify' to the test script t5318-commit-graph.sh. This helper corrupts the commit-graph file at a certain location, runs 'git commit-graph verify', and reports the output to the 'err' file. This data is filtered to remove the lines added by 'test_must_fail' when the test is run verbosely. Then, the output is checked to contain a specific error message. Most messages from 'git commit-graph verify' will not be marked for translation. There will be one exception: the message that reports an invalid checksum will be marked for translation, as that is the only message that is intended for a typical user. Helped-by: Szeder Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5318-commit-graph.sh | 43 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 0830ef9fdd..c0c1ff09b9 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -235,9 +235,52 @@ test_expect_success 'perform fast-forward merge in full repo' ' test_cmp expect output ' +# the verify tests below expect the commit-graph to contain +# exactly the commits reachable from the commits/8 branch. +# If the file changes the set of commits in the list, then the +# offsets into the binary file will result in different edits +# and the tests will likely break. + test_expect_success 'git commit-graph verify' ' cd "$TRASH_DIRECTORY/full" && + git rev-parse commits/8 | git commit-graph write --stdin-commits && git commit-graph verify >output ' +GRAPH_BYTE_VERSION=4 +GRAPH_BYTE_HASH=5 + +# usage: corrupt_graph_and_verify +# Manipulates the commit-graph file at the position +# by inserting the data, then runs 'git commit-graph verify' +# and places the output in the file 'err'. Test 'err' for +# the given string. +corrupt_graph_and_verify() { + pos=$1 + data="${2:-\0}" + grepstr=$3 + cd "$TRASH_DIRECTORY/full" && + test_when_finished mv commit-graph-backup $objdir/info/commit-graph && + cp $objdir/info/commit-graph commit-graph-backup && + printf "$data" | dd of="$objdir/info/commit-graph" bs=1 seek="$pos" conv=notrunc && + test_must_fail git commit-graph verify 2>test_err && + grep -v "^+" test_err >err + test_i18ngrep "$grepstr" err +} + +test_expect_success 'detect bad signature' ' + corrupt_graph_and_verify 0 "\0" \ + "graph signature" +' + +test_expect_success 'detect bad version' ' + corrupt_graph_and_verify $GRAPH_BYTE_VERSION "\02" \ + "graph version" +' + +test_expect_success 'detect bad hash version' ' + corrupt_graph_and_verify $GRAPH_BYTE_HASH "\02" \ + "hash version" +' + test_done From 2bd0365f374558ca053412966b51fe01885ea3b5 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:34 -0400 Subject: [PATCH 09/23] commit-graph: verify required chunks are present The commit-graph file requires the following three chunks: * OID Fanout * OID Lookup * Commit Data If any of these are missing, then the 'verify' subcommand should report a failure. This includes the chunk IDs malformed or the chunk count is truncated. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 9 +++++++++ t/t5318-commit-graph.sh | 29 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/commit-graph.c b/commit-graph.c index 488216a736..26328c3b74 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -848,5 +848,14 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) return 1; } + verify_commit_graph_error = 0; + + if (!g->chunk_oid_fanout) + graph_report("commit-graph is missing the OID Fanout chunk"); + if (!g->chunk_oid_lookup) + graph_report("commit-graph is missing the OID Lookup chunk"); + if (!g->chunk_commit_data) + graph_report("commit-graph is missing the Commit Data chunk"); + return verify_commit_graph_error; } diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index c0c1ff09b9..dc16849ddd 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -249,6 +249,15 @@ test_expect_success 'git commit-graph verify' ' GRAPH_BYTE_VERSION=4 GRAPH_BYTE_HASH=5 +GRAPH_BYTE_CHUNK_COUNT=6 +GRAPH_CHUNK_LOOKUP_OFFSET=8 +GRAPH_CHUNK_LOOKUP_WIDTH=12 +GRAPH_CHUNK_LOOKUP_ROWS=5 +GRAPH_BYTE_OID_FANOUT_ID=$GRAPH_CHUNK_LOOKUP_OFFSET +GRAPH_BYTE_OID_LOOKUP_ID=$(($GRAPH_CHUNK_LOOKUP_OFFSET + \ + 1 * $GRAPH_CHUNK_LOOKUP_WIDTH)) +GRAPH_BYTE_COMMIT_DATA_ID=$(($GRAPH_CHUNK_LOOKUP_OFFSET + \ + 2 * $GRAPH_CHUNK_LOOKUP_WIDTH)) # usage: corrupt_graph_and_verify # Manipulates the commit-graph file at the position @@ -283,4 +292,24 @@ test_expect_success 'detect bad hash version' ' "hash version" ' +test_expect_success 'detect low chunk count' ' + corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \ + "missing the .* chunk" +' + +test_expect_success 'detect missing OID fanout chunk' ' + corrupt_graph_and_verify $GRAPH_BYTE_OID_FANOUT_ID "\0" \ + "missing the OID Fanout chunk" +' + +test_expect_success 'detect missing OID lookup chunk' ' + corrupt_graph_and_verify $GRAPH_BYTE_OID_LOOKUP_ID "\0" \ + "missing the OID Lookup chunk" +' + +test_expect_success 'detect missing commit data chunk' ' + corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_DATA_ID "\0" \ + "missing the Commit Data chunk" +' + test_done From 9bda84678950c95e4d81cad60e5210c001fbe119 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:35 -0400 Subject: [PATCH 10/23] commit-graph: verify corrupt OID fanout and lookup In the commit-graph file, the OID fanout chunk provides an index into the OID lookup. The 'verify' subcommand should find incorrect values in the fanout. Similarly, the 'verify' subcommand should find out-of-order values in the OID lookup. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 36 ++++++++++++++++++++++++++++++++++++ t/t5318-commit-graph.sh | 22 ++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/commit-graph.c b/commit-graph.c index 26328c3b74..b34f62f277 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -843,6 +843,9 @@ static void graph_report(const char *fmt, ...) int verify_commit_graph(struct repository *r, struct commit_graph *g) { + uint32_t i, cur_fanout_pos = 0; + struct object_id prev_oid, cur_oid; + if (!g) { graph_report("no commit-graph file loaded"); return 1; @@ -857,5 +860,38 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) if (!g->chunk_commit_data) graph_report("commit-graph is missing the Commit Data chunk"); + if (verify_commit_graph_error) + return verify_commit_graph_error; + + for (i = 0; i < g->num_commits; i++) { + hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); + + if (i && oidcmp(&prev_oid, &cur_oid) >= 0) + graph_report("commit-graph has incorrect OID order: %s then %s", + oid_to_hex(&prev_oid), + oid_to_hex(&cur_oid)); + + oidcpy(&prev_oid, &cur_oid); + + while (cur_oid.hash[0] > cur_fanout_pos) { + uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos); + + if (i != fanout_value) + graph_report("commit-graph has incorrect fanout value: fanout[%d] = %u != %u", + cur_fanout_pos, fanout_value, i); + cur_fanout_pos++; + } + } + + while (cur_fanout_pos < 256) { + uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos); + + if (g->num_commits != fanout_value) + graph_report("commit-graph has incorrect fanout value: fanout[%d] = %u != %u", + cur_fanout_pos, fanout_value, i); + + cur_fanout_pos++; + } + return verify_commit_graph_error; } diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index dc16849ddd..8ae2a49cfa 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -247,6 +247,7 @@ test_expect_success 'git commit-graph verify' ' git commit-graph verify >output ' +HASH_LEN=20 GRAPH_BYTE_VERSION=4 GRAPH_BYTE_HASH=5 GRAPH_BYTE_CHUNK_COUNT=6 @@ -258,6 +259,12 @@ GRAPH_BYTE_OID_LOOKUP_ID=$(($GRAPH_CHUNK_LOOKUP_OFFSET + \ 1 * $GRAPH_CHUNK_LOOKUP_WIDTH)) GRAPH_BYTE_COMMIT_DATA_ID=$(($GRAPH_CHUNK_LOOKUP_OFFSET + \ 2 * $GRAPH_CHUNK_LOOKUP_WIDTH)) +GRAPH_FANOUT_OFFSET=$(($GRAPH_CHUNK_LOOKUP_OFFSET + \ + $GRAPH_CHUNK_LOOKUP_WIDTH * $GRAPH_CHUNK_LOOKUP_ROWS)) +GRAPH_BYTE_FANOUT1=$(($GRAPH_FANOUT_OFFSET + 4 * 4)) +GRAPH_BYTE_FANOUT2=$(($GRAPH_FANOUT_OFFSET + 4 * 255)) +GRAPH_OID_LOOKUP_OFFSET=$(($GRAPH_FANOUT_OFFSET + 4 * 256)) +GRAPH_BYTE_OID_LOOKUP_ORDER=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * 8)) # usage: corrupt_graph_and_verify # Manipulates the commit-graph file at the position @@ -312,4 +319,19 @@ test_expect_success 'detect missing commit data chunk' ' "missing the Commit Data chunk" ' +test_expect_success 'detect incorrect fanout' ' + corrupt_graph_and_verify $GRAPH_BYTE_FANOUT1 "\01" \ + "fanout value" +' + +test_expect_success 'detect incorrect fanout final value' ' + corrupt_graph_and_verify $GRAPH_BYTE_FANOUT2 "\01" \ + "fanout value" +' + +test_expect_success 'detect incorrect OID order' ' + corrupt_graph_and_verify $GRAPH_BYTE_OID_LOOKUP_ORDER "\01" \ + "incorrect OID order" +' + test_done From 96af91d410c70ab750a9a1ecdf858c9ec46be767 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:36 -0400 Subject: [PATCH 11/23] commit-graph: verify objects exist In the 'verify' subcommand, load commits directly from the object database to ensure they exist. Parse by skipping the commit-graph. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 18 ++++++++++++++++++ t/t5318-commit-graph.sh | 7 +++++++ 2 files changed, 25 insertions(+) diff --git a/commit-graph.c b/commit-graph.c index b34f62f277..282cdb4aec 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -11,6 +11,7 @@ #include "sha1-lookup.h" #include "commit-graph.h" #include "object-store.h" +#include "alloc.h" #define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */ #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ @@ -242,6 +243,7 @@ static struct commit_list **insert_parent_or_die(struct commit_graph *g, { struct commit *c; struct object_id oid; + hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * pos); c = lookup_commit(&oid); if (!c) @@ -893,5 +895,21 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) cur_fanout_pos++; } + if (verify_commit_graph_error) + return verify_commit_graph_error; + + for (i = 0; i < g->num_commits; i++) { + struct commit *odb_commit; + + hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); + + odb_commit = (struct commit *)create_object(r, cur_oid.hash, alloc_commit_node(r)); + if (parse_commit_internal(odb_commit, 0, 0)) { + graph_report("failed to parse %s from object database", + oid_to_hex(&cur_oid)); + continue; + } + } + return verify_commit_graph_error; } diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 8ae2a49cfa..a27ec97269 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -247,6 +247,7 @@ test_expect_success 'git commit-graph verify' ' git commit-graph verify >output ' +NUM_COMMITS=9 HASH_LEN=20 GRAPH_BYTE_VERSION=4 GRAPH_BYTE_HASH=5 @@ -265,6 +266,7 @@ GRAPH_BYTE_FANOUT1=$(($GRAPH_FANOUT_OFFSET + 4 * 4)) GRAPH_BYTE_FANOUT2=$(($GRAPH_FANOUT_OFFSET + 4 * 255)) GRAPH_OID_LOOKUP_OFFSET=$(($GRAPH_FANOUT_OFFSET + 4 * 256)) GRAPH_BYTE_OID_LOOKUP_ORDER=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * 8)) +GRAPH_BYTE_OID_LOOKUP_MISSING=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * 4 + 10)) # usage: corrupt_graph_and_verify # Manipulates the commit-graph file at the position @@ -334,4 +336,9 @@ test_expect_success 'detect incorrect OID order' ' "incorrect OID order" ' +test_expect_success 'detect OID not in object database' ' + corrupt_graph_and_verify $GRAPH_BYTE_OID_LOOKUP_MISSING "\01" \ + "from object database" +' + test_done From 2e3c07378f9b777a708e1078a474666a00a8be05 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:37 -0400 Subject: [PATCH 12/23] commit-graph: verify root tree OIDs The 'verify' subcommand must compare the commit content parsed from the commit-graph against the content in the object database. Use lookup_commit() and parse_commit_in_graph_one() to parse the commits from the graph and compare against a commit that is loaded separately and parsed directly from the object database. Add checks for the root tree OID. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 17 ++++++++++++++++- t/t5318-commit-graph.sh | 7 +++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/commit-graph.c b/commit-graph.c index 282cdb4aec..17624b90d7 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -866,6 +866,8 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) return verify_commit_graph_error; for (i = 0; i < g->num_commits; i++) { + struct commit *graph_commit; + hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); if (i && oidcmp(&prev_oid, &cur_oid) >= 0) @@ -883,6 +885,11 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) cur_fanout_pos, fanout_value, i); cur_fanout_pos++; } + + graph_commit = lookup_commit(&cur_oid); + if (!parse_commit_in_graph_one(g, graph_commit)) + graph_report("failed to parse %s from commit-graph", + oid_to_hex(&cur_oid)); } while (cur_fanout_pos < 256) { @@ -899,16 +906,24 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) return verify_commit_graph_error; for (i = 0; i < g->num_commits; i++) { - struct commit *odb_commit; + struct commit *graph_commit, *odb_commit; hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); + graph_commit = lookup_commit(&cur_oid); odb_commit = (struct commit *)create_object(r, cur_oid.hash, alloc_commit_node(r)); if (parse_commit_internal(odb_commit, 0, 0)) { graph_report("failed to parse %s from object database", oid_to_hex(&cur_oid)); continue; } + + if (oidcmp(&get_commit_tree_in_graph_one(g, graph_commit)->object.oid, + get_commit_tree_oid(odb_commit))) + graph_report("root tree OID for commit %s in commit-graph is %s != %s", + oid_to_hex(&cur_oid), + oid_to_hex(get_commit_tree_oid(graph_commit)), + oid_to_hex(get_commit_tree_oid(odb_commit))); } return verify_commit_graph_error; diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index a27ec97269..f258c6d5d0 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -267,6 +267,8 @@ GRAPH_BYTE_FANOUT2=$(($GRAPH_FANOUT_OFFSET + 4 * 255)) GRAPH_OID_LOOKUP_OFFSET=$(($GRAPH_FANOUT_OFFSET + 4 * 256)) GRAPH_BYTE_OID_LOOKUP_ORDER=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * 8)) GRAPH_BYTE_OID_LOOKUP_MISSING=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * 4 + 10)) +GRAPH_COMMIT_DATA_OFFSET=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * $NUM_COMMITS)) +GRAPH_BYTE_COMMIT_TREE=$GRAPH_COMMIT_DATA_OFFSET # usage: corrupt_graph_and_verify # Manipulates the commit-graph file at the position @@ -341,4 +343,9 @@ test_expect_success 'detect OID not in object database' ' "from object database" ' +test_expect_success 'detect incorrect tree OID' ' + corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_TREE "\01" \ + "root tree OID for commit" +' + test_done From 53614b13516ffd27bb045b696d0815b9da435cb0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:38 -0400 Subject: [PATCH 13/23] commit-graph: verify parent list The commit-graph file stores parents in a two-column portion of the commit data chunk. If there is only one parent, then the second column stores 0xFFFFFFFF to indicate no second parent. The 'verify' subcommand checks the parent list for the commit loaded from the commit-graph and the one parsed from the object database. Test these checks for corrupt parents, too many parents, and wrong parents. Add a boundary check to insert_parent_or_die() for when the parent position value is out of range. The octopus merge will be tested in a later commit. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 28 ++++++++++++++++++++++++++++ t/t5318-commit-graph.sh | 18 ++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/commit-graph.c b/commit-graph.c index 17624b90d7..a79b06f37a 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -244,6 +244,9 @@ static struct commit_list **insert_parent_or_die(struct commit_graph *g, struct commit *c; struct object_id oid; + if (pos >= g->num_commits) + die("invalid parent position %"PRIu64, pos); + hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * pos); c = lookup_commit(&oid); if (!c) @@ -907,6 +910,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) for (i = 0; i < g->num_commits; i++) { struct commit *graph_commit, *odb_commit; + struct commit_list *graph_parents, *odb_parents; hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); @@ -924,6 +928,30 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) oid_to_hex(&cur_oid), oid_to_hex(get_commit_tree_oid(graph_commit)), oid_to_hex(get_commit_tree_oid(odb_commit))); + + graph_parents = graph_commit->parents; + odb_parents = odb_commit->parents; + + while (graph_parents) { + if (odb_parents == NULL) { + graph_report("commit-graph parent list for commit %s is too long", + oid_to_hex(&cur_oid)); + break; + } + + if (oidcmp(&graph_parents->item->object.oid, &odb_parents->item->object.oid)) + graph_report("commit-graph parent for %s is %s != %s", + oid_to_hex(&cur_oid), + oid_to_hex(&graph_parents->item->object.oid), + oid_to_hex(&odb_parents->item->object.oid)); + + graph_parents = graph_parents->next; + odb_parents = odb_parents->next; + } + + if (odb_parents != NULL) + graph_report("commit-graph parent list for commit %s terminates early", + oid_to_hex(&cur_oid)); } return verify_commit_graph_error; diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index f258c6d5d0..b41c8f4d9b 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -269,6 +269,9 @@ GRAPH_BYTE_OID_LOOKUP_ORDER=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * 8)) GRAPH_BYTE_OID_LOOKUP_MISSING=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * 4 + 10)) GRAPH_COMMIT_DATA_OFFSET=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * $NUM_COMMITS)) GRAPH_BYTE_COMMIT_TREE=$GRAPH_COMMIT_DATA_OFFSET +GRAPH_BYTE_COMMIT_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN)) +GRAPH_BYTE_COMMIT_EXTRA_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 4)) +GRAPH_BYTE_COMMIT_WRONG_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 3)) # usage: corrupt_graph_and_verify # Manipulates the commit-graph file at the position @@ -348,4 +351,19 @@ test_expect_success 'detect incorrect tree OID' ' "root tree OID for commit" ' +test_expect_success 'detect incorrect parent int-id' ' + corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_PARENT "\01" \ + "invalid parent" +' + +test_expect_success 'detect extra parent int-id' ' + corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_EXTRA_PARENT "\00" \ + "is too long" +' + +test_expect_success 'detect wrong parent' ' + corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_WRONG_PARENT "\01" \ + "commit-graph parent for" +' + test_done From 1373e547f7d38b9444fcaae16c1de698b719aa15 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:39 -0400 Subject: [PATCH 14/23] commit-graph: verify generation number While iterating through the commit parents, perform the generation number calculation and compare against the value stored in the commit-graph. The tests demonstrate that having a different set of parents affects the generation number calculation, and this value propagates to descendants. Hence, we drop the single-line condition on the output. Since Git will ship with the commit-graph feature without generation numbers, we need to accept commit-graphs with all generation numbers equal to zero. In this case, ignore the generation number calculation. However, verify that we should never have a mix of zero and non-zero generation numbers. Create a test that sets one commit to generation zero and all following commits report a failure as they have non-zero generation in a file that contains generation number zero. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 34 ++++++++++++++++++++++++++++++++++ t/t5318-commit-graph.sh | 11 +++++++++++ 2 files changed, 45 insertions(+) diff --git a/commit-graph.c b/commit-graph.c index a79b06f37a..de656ebbaf 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -846,10 +846,14 @@ static void graph_report(const char *fmt, ...) va_end(ap); } +#define GENERATION_ZERO_EXISTS 1 +#define GENERATION_NUMBER_EXISTS 2 + int verify_commit_graph(struct repository *r, struct commit_graph *g) { uint32_t i, cur_fanout_pos = 0; struct object_id prev_oid, cur_oid; + int generation_zero = 0; if (!g) { graph_report("no commit-graph file loaded"); @@ -911,6 +915,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) for (i = 0; i < g->num_commits; i++) { struct commit *graph_commit, *odb_commit; struct commit_list *graph_parents, *odb_parents; + uint32_t max_generation = 0; hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); @@ -945,6 +950,9 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) oid_to_hex(&graph_parents->item->object.oid), oid_to_hex(&odb_parents->item->object.oid)); + if (graph_parents->item->generation > max_generation) + max_generation = graph_parents->item->generation; + graph_parents = graph_parents->next; odb_parents = odb_parents->next; } @@ -952,6 +960,32 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) if (odb_parents != NULL) graph_report("commit-graph parent list for commit %s terminates early", oid_to_hex(&cur_oid)); + + if (!graph_commit->generation) { + if (generation_zero == GENERATION_NUMBER_EXISTS) + graph_report("commit-graph has generation number zero for commit %s, but non-zero elsewhere", + oid_to_hex(&cur_oid)); + generation_zero = GENERATION_ZERO_EXISTS; + } else if (generation_zero == GENERATION_ZERO_EXISTS) + graph_report("commit-graph has non-zero generation number for commit %s, but zero elsewhere", + oid_to_hex(&cur_oid)); + + if (generation_zero == GENERATION_ZERO_EXISTS) + continue; + + /* + * If one of our parents has generation GENERATION_NUMBER_MAX, then + * our generation is also GENERATION_NUMBER_MAX. Decrement to avoid + * extra logic in the following condition. + */ + if (max_generation == GENERATION_NUMBER_MAX) + max_generation--; + + if (graph_commit->generation != max_generation + 1) + graph_report("commit-graph generation for commit %s is %u != %u", + oid_to_hex(&cur_oid), + graph_commit->generation, + max_generation + 1); } return verify_commit_graph_error; diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index b41c8f4d9b..3128e19aef 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -272,6 +272,7 @@ GRAPH_BYTE_COMMIT_TREE=$GRAPH_COMMIT_DATA_OFFSET GRAPH_BYTE_COMMIT_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN)) GRAPH_BYTE_COMMIT_EXTRA_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 4)) GRAPH_BYTE_COMMIT_WRONG_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 3)) +GRAPH_BYTE_COMMIT_GENERATION=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 11)) # usage: corrupt_graph_and_verify # Manipulates the commit-graph file at the position @@ -366,4 +367,14 @@ test_expect_success 'detect wrong parent' ' "commit-graph parent for" ' +test_expect_success 'detect incorrect generation number' ' + corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_GENERATION "\070" \ + "generation for commit" +' + +test_expect_success 'detect incorrect generation number' ' + corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_GENERATION "\01" \ + "non-zero generation number" +' + test_done From 88968ebf86d9b4524b17f6684dd8c67f0c6df652 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:40 -0400 Subject: [PATCH 15/23] commit-graph: verify commit date Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 6 ++++++ t/t5318-commit-graph.sh | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/commit-graph.c b/commit-graph.c index de656ebbaf..3ac28b5eb5 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -986,6 +986,12 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) oid_to_hex(&cur_oid), graph_commit->generation, max_generation + 1); + + if (graph_commit->date != odb_commit->date) + graph_report("commit date for commit %s in commit-graph is %"PRItime" != %"PRItime, + oid_to_hex(&cur_oid), + graph_commit->date, + odb_commit->date); } return verify_commit_graph_error; diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 3128e19aef..2c65e6a95c 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -273,6 +273,7 @@ GRAPH_BYTE_COMMIT_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN)) GRAPH_BYTE_COMMIT_EXTRA_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 4)) GRAPH_BYTE_COMMIT_WRONG_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 3)) GRAPH_BYTE_COMMIT_GENERATION=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 11)) +GRAPH_BYTE_COMMIT_DATE=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 12)) # usage: corrupt_graph_and_verify # Manipulates the commit-graph file at the position @@ -377,4 +378,9 @@ test_expect_success 'detect incorrect generation number' ' "non-zero generation number" ' +test_expect_success 'detect incorrect commit date' ' + corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_DATE "\01" \ + "commit date" +' + test_done From 437787ae1b05ddf1d4f16e92d07573d7c320a3a4 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:41 -0400 Subject: [PATCH 16/23] commit-graph: test for corrupted octopus edge The commit-graph file has an extra chunk to store the parent int-ids for parents beyond the first parent for octopus merges. Our test repo has a single octopus merge that we can manipulate to demonstrate the 'verify' subcommand detects incorrect values in that chunk. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5318-commit-graph.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 2c65e6a95c..a0cf1f66de 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -248,6 +248,7 @@ test_expect_success 'git commit-graph verify' ' ' NUM_COMMITS=9 +NUM_OCTOPUS_EDGES=2 HASH_LEN=20 GRAPH_BYTE_VERSION=4 GRAPH_BYTE_HASH=5 @@ -274,6 +275,10 @@ GRAPH_BYTE_COMMIT_EXTRA_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 4)) GRAPH_BYTE_COMMIT_WRONG_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 3)) GRAPH_BYTE_COMMIT_GENERATION=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 11)) GRAPH_BYTE_COMMIT_DATE=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 12)) +GRAPH_COMMIT_DATA_WIDTH=$(($HASH_LEN + 16)) +GRAPH_OCTOPUS_DATA_OFFSET=$(($GRAPH_COMMIT_DATA_OFFSET + \ + $GRAPH_COMMIT_DATA_WIDTH * $NUM_COMMITS)) +GRAPH_BYTE_OCTOPUS=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4)) # usage: corrupt_graph_and_verify # Manipulates the commit-graph file at the position @@ -383,4 +388,9 @@ test_expect_success 'detect incorrect commit date' ' "commit date" ' +test_expect_success 'detect incorrect parent for octopus merge' ' + corrupt_graph_and_verify $GRAPH_BYTE_OCTOPUS "\01" \ + "invalid parent" +' + test_done From 41df0e307fede8ad01799322af41d8b59d2f6edf Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:42 -0400 Subject: [PATCH 17/23] commit-graph: verify contents match checksum The commit-graph file ends with a SHA1 hash of the previous contents. If a commit-graph file has errors but the checksum hash is correct, then we know that the problem is a bug in Git and not simply file corruption after-the-fact. Compute the checksum right away so it is the first error that appears, and make the message translatable since this error can be "corrected" by a user by simply deleting the file and recomputing. The rest of the errors are useful only to developers. Be sure to continue checking the rest of the file data if the checksum is wrong. This is important for our tests, as we break the checksum as we modify bytes of the commit-graph file. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 16 ++++++++++++++-- t/t5318-commit-graph.sh | 6 ++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 3ac28b5eb5..aca9e5e2dc 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -833,6 +833,7 @@ void write_commit_graph(const char *obj_dir, oids.nr = 0; } +#define VERIFY_COMMIT_GRAPH_ERROR_HASH 2 static int verify_commit_graph_error; static void graph_report(const char *fmt, ...) @@ -852,8 +853,10 @@ static void graph_report(const char *fmt, ...) int verify_commit_graph(struct repository *r, struct commit_graph *g) { uint32_t i, cur_fanout_pos = 0; - struct object_id prev_oid, cur_oid; + struct object_id prev_oid, cur_oid, checksum; int generation_zero = 0; + struct hashfile *f; + int devnull; if (!g) { graph_report("no commit-graph file loaded"); @@ -872,6 +875,15 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) if (verify_commit_graph_error) return verify_commit_graph_error; + devnull = open("/dev/null", O_WRONLY); + f = hashfd(devnull, NULL); + hashwrite(f, g->data, g->data_len - g->hash_len); + finalize_hashfile(f, checksum.hash, CSUM_CLOSE); + if (hashcmp(checksum.hash, g->data + g->data_len - g->hash_len)) { + graph_report(_("the commit-graph file has incorrect checksum and is likely corrupt")); + verify_commit_graph_error = VERIFY_COMMIT_GRAPH_ERROR_HASH; + } + for (i = 0; i < g->num_commits; i++) { struct commit *graph_commit; @@ -909,7 +921,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) cur_fanout_pos++; } - if (verify_commit_graph_error) + if (verify_commit_graph_error & ~VERIFY_COMMIT_GRAPH_ERROR_HASH) return verify_commit_graph_error; for (i = 0; i < g->num_commits; i++) { diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index a0cf1f66de..fed05e2f12 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -279,6 +279,7 @@ GRAPH_COMMIT_DATA_WIDTH=$(($HASH_LEN + 16)) GRAPH_OCTOPUS_DATA_OFFSET=$(($GRAPH_COMMIT_DATA_OFFSET + \ $GRAPH_COMMIT_DATA_WIDTH * $NUM_COMMITS)) GRAPH_BYTE_OCTOPUS=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4)) +GRAPH_BYTE_FOOTER=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4 * $NUM_OCTOPUS_EDGES)) # usage: corrupt_graph_and_verify # Manipulates the commit-graph file at the position @@ -393,4 +394,9 @@ test_expect_success 'detect incorrect parent for octopus merge' ' "invalid parent" ' +test_expect_success 'detect invalid checksum hash' ' + corrupt_graph_and_verify $GRAPH_BYTE_FOOTER "\00" \ + "incorrect checksum" +' + test_done From e0fd51e1d7d3877d4b0b4133c763d46b65d46f7a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:43 -0400 Subject: [PATCH 18/23] fsck: verify commit-graph If core.commitGraph is true, verify the contents of the commit-graph during 'git fsck' using the 'git commit-graph verify' subcommand. Run this check on all alternates, as well. We use a new process for two reasons: 1. The subcommand decouples the details of loading and verifying a commit-graph file from the other fsck details. 2. The commit-graph verification requires the commits to be loaded in a specific order to guarantee we parse from the commit-graph file for some objects and from the object database for others. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-fsck.txt | 3 +++ builtin/fsck.c | 21 +++++++++++++++++++++ t/t5318-commit-graph.sh | 8 ++++++++ 3 files changed, 32 insertions(+) diff --git a/Documentation/git-fsck.txt b/Documentation/git-fsck.txt index b9f060e3b2..ab9a93fb9b 100644 --- a/Documentation/git-fsck.txt +++ b/Documentation/git-fsck.txt @@ -110,6 +110,9 @@ Any corrupt objects you will have to find in backups or other archives (i.e., you can just remove them and do an 'rsync' with some other site in the hopes that somebody else has the object you have corrupted). +If core.commitGraph is true, the commit-graph file will also be inspected +using 'git commit-graph verify'. See linkgit:git-commit-graph[1]. + Extracted Diagnostics --------------------- diff --git a/builtin/fsck.c b/builtin/fsck.c index 3ad4f160f9..eca7900ee0 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -18,6 +18,7 @@ #include "decorate.h" #include "packfile.h" #include "object-store.h" +#include "run-command.h" #define REACHABLE 0x0001 #define SEEN 0x0002 @@ -47,6 +48,7 @@ static int name_objects; #define ERROR_REACHABLE 02 #define ERROR_PACK 04 #define ERROR_REFS 010 +#define ERROR_COMMIT_GRAPH 020 static const char *describe_object(struct object *obj) { @@ -822,5 +824,24 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) } check_connectivity(); + + if (core_commit_graph) { + struct child_process commit_graph_verify = CHILD_PROCESS_INIT; + const char *verify_argv[] = { "commit-graph", "verify", NULL, NULL, NULL }; + + commit_graph_verify.argv = verify_argv; + commit_graph_verify.git_cmd = 1; + if (run_command(&commit_graph_verify)) + errors_found |= ERROR_COMMIT_GRAPH; + + prepare_alt_odb(the_repository); + for (alt = the_repository->objects->alt_odb_list; alt; alt = alt->next) { + verify_argv[2] = "--object-dir"; + verify_argv[3] = alt->path; + if (run_command(&commit_graph_verify)) + errors_found |= ERROR_COMMIT_GRAPH; + } + } + return errors_found; } diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index fed05e2f12..a9e8c774d5 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -399,4 +399,12 @@ test_expect_success 'detect invalid checksum hash' ' "incorrect checksum" ' +test_expect_success 'git fsck (checks commit-graph)' ' + cd "$TRASH_DIRECTORY/full" && + git fsck && + corrupt_graph_and_verify $GRAPH_BYTE_FOOTER "\00" \ + "incorrect checksum" && + test_must_fail git fsck +' + test_done From d88b14b3fd691fc71c3cea5bc5bde9dd10b5e86c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:44 -0400 Subject: [PATCH 19/23] commit-graph: use string-list API for input Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 35 +++++++++++------------------------ commit-graph.c | 15 +++++++-------- commit-graph.h | 7 +++---- 3 files changed, 21 insertions(+), 36 deletions(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 9d108f43a9..ea28bc311a 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -119,13 +119,9 @@ static int graph_read(int argc, const char **argv) static int graph_write(int argc, const char **argv) { - const char **pack_indexes = NULL; - int packs_nr = 0; - const char **commit_hex = NULL; - int commits_nr = 0; - const char **lines = NULL; - int lines_nr = 0; - int lines_alloc = 0; + struct string_list *pack_indexes = NULL; + struct string_list *commit_hex = NULL; + struct string_list lines; static struct option builtin_commit_graph_write_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, @@ -149,34 +145,25 @@ static int graph_write(int argc, const char **argv) if (!opts.obj_dir) opts.obj_dir = get_object_directory(); + string_list_init(&lines, 0); if (opts.stdin_packs || opts.stdin_commits) { struct strbuf buf = STRBUF_INIT; - lines_nr = 0; - lines_alloc = 128; - ALLOC_ARRAY(lines, lines_alloc); - while (strbuf_getline(&buf, stdin) != EOF) { - ALLOC_GROW(lines, lines_nr + 1, lines_alloc); - lines[lines_nr++] = strbuf_detach(&buf, NULL); - } + while (strbuf_getline(&buf, stdin) != EOF) + string_list_append(&lines, strbuf_detach(&buf, NULL)); - if (opts.stdin_packs) { - pack_indexes = lines; - packs_nr = lines_nr; - } - if (opts.stdin_commits) { - commit_hex = lines; - commits_nr = lines_nr; - } + if (opts.stdin_packs) + pack_indexes = &lines; + if (opts.stdin_commits) + commit_hex = &lines; } write_commit_graph(opts.obj_dir, pack_indexes, - packs_nr, commit_hex, - commits_nr, opts.append); + string_list_clear(&lines, 0); return 0; } diff --git a/commit-graph.c b/commit-graph.c index aca9e5e2dc..5b7fa707a5 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -657,10 +657,8 @@ static void compute_generation_numbers(struct packed_commit_list* commits) } void write_commit_graph(const char *obj_dir, - const char **pack_indexes, - int nr_packs, - const char **commit_hex, - int nr_commits, + struct string_list *pack_indexes, + struct string_list *commit_hex, int append) { struct packed_oid_list oids; @@ -701,10 +699,10 @@ void write_commit_graph(const char *obj_dir, int dirlen; strbuf_addf(&packname, "%s/pack/", obj_dir); dirlen = packname.len; - for (i = 0; i < nr_packs; i++) { + for (i = 0; i < pack_indexes->nr; i++) { struct packed_git *p; strbuf_setlen(&packname, dirlen); - strbuf_addstr(&packname, pack_indexes[i]); + strbuf_addstr(&packname, pack_indexes->items[i].string); p = add_packed_git(packname.buf, packname.len, 1); if (!p) die("error adding pack %s", packname.buf); @@ -717,12 +715,13 @@ void write_commit_graph(const char *obj_dir, } if (commit_hex) { - for (i = 0; i < nr_commits; i++) { + for (i = 0; i < commit_hex->nr; i++) { const char *end; struct object_id oid; struct commit *result; - if (commit_hex[i] && parse_oid_hex(commit_hex[i], &oid, &end)) + if (commit_hex->items[i].string && + parse_oid_hex(commit_hex->items[i].string, &oid, &end)) continue; result = lookup_commit_reference_gently(&oid, 1); diff --git a/commit-graph.h b/commit-graph.h index 4359812fb4..a79b854470 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -3,6 +3,7 @@ #include "git-compat-util.h" #include "repository.h" +#include "string-list.h" char *get_commit_graph_filename(const char *obj_dir); @@ -48,10 +49,8 @@ struct commit_graph { struct commit_graph *load_commit_graph_one(const char *graph_file); void write_commit_graph(const char *obj_dir, - const char **pack_indexes, - int nr_packs, - const char **commit_hex, - int nr_commits, + struct string_list *pack_indexes, + struct string_list *commit_hex, int append); int verify_commit_graph(struct repository *r, struct commit_graph *g); From 59fb87701ff68eb114e54ce6834e91c4ae8f60a7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:45 -0400 Subject: [PATCH 20/23] commit-graph: add '--reachable' option When writing commit-graph files, it can be convenient to ask for all reachable commits (starting at the ref set) in the resulting file. This is particularly helpful when writing to stdin is complicated, such as a future integration with 'git gc'. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 8 ++++++-- builtin/commit-graph.c | 16 ++++++++++++---- commit-graph.c | 20 ++++++++++++++++++++ commit-graph.h | 1 + t/t5318-commit-graph.sh | 10 ++++++++++ 5 files changed, 49 insertions(+), 6 deletions(-) diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index a222cfab08..dececb79d7 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -38,12 +38,16 @@ Write a commit graph file based on the commits found in packfiles. + With the `--stdin-packs` option, generate the new commit graph by walking objects only in the specified pack-indexes. (Cannot be combined -with --stdin-commits.) +with `--stdin-commits` or `--reachable`.) + With the `--stdin-commits` option, generate the new commit graph by walking commits starting at the commits specified in stdin as a list of OIDs in hex, one OID per line. (Cannot be combined with ---stdin-packs.) +`--stdin-packs` or `--reachable`.) ++ +With the `--reachable` option, generate the new commit graph by walking +commits starting at all refs. (Cannot be combined with `--stdin-commits` +or `--stdin-packs`.) + With the `--append` option, include all commits that are present in the existing commit-graph file. diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index ea28bc311a..c7d0db5ab4 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -10,7 +10,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), N_("git commit-graph verify [--object-dir ]"), - N_("git commit-graph write [--object-dir ] [--append] [--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits]"), NULL }; @@ -25,12 +25,13 @@ static const char * const builtin_commit_graph_read_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--append] [--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits]"), NULL }; static struct opts_commit_graph { const char *obj_dir; + int reachable; int stdin_packs; int stdin_commits; int append; @@ -127,6 +128,8 @@ static int graph_write(int argc, const char **argv) OPT_STRING(0, "object-dir", &opts.obj_dir, N_("dir"), N_("The object directory to store the graph")), + OPT_BOOL(0, "reachable", &opts.reachable, + N_("start walk at all refs")), OPT_BOOL(0, "stdin-packs", &opts.stdin_packs, N_("scan pack-indexes listed by stdin for commits")), OPT_BOOL(0, "stdin-commits", &opts.stdin_commits, @@ -140,11 +143,16 @@ static int graph_write(int argc, const char **argv) builtin_commit_graph_write_options, builtin_commit_graph_write_usage, 0); - if (opts.stdin_packs && opts.stdin_commits) - die(_("cannot use both --stdin-commits and --stdin-packs")); + if (opts.reachable + opts.stdin_packs + opts.stdin_commits > 1) + die(_("use at most one of --reachable, --stdin-commits, or --stdin-packs")); if (!opts.obj_dir) opts.obj_dir = get_object_directory(); + if (opts.reachable) { + write_commit_graph_reachable(opts.obj_dir, opts.append); + return 0; + } + string_list_init(&lines, 0); if (opts.stdin_packs || opts.stdin_commits) { struct strbuf buf = STRBUF_INIT; diff --git a/commit-graph.c b/commit-graph.c index 5b7fa707a5..212232e752 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -7,6 +7,7 @@ #include "packfile.h" #include "commit.h" #include "object.h" +#include "refs.h" #include "revision.h" #include "sha1-lookup.h" #include "commit-graph.h" @@ -656,6 +657,25 @@ static void compute_generation_numbers(struct packed_commit_list* commits) } } +static int add_ref_to_list(const char *refname, + const struct object_id *oid, + int flags, void *cb_data) +{ + struct string_list *list = (struct string_list *)cb_data; + + string_list_append(list, oid_to_hex(oid)); + return 0; +} + +void write_commit_graph_reachable(const char *obj_dir, int append) +{ + struct string_list list; + + string_list_init(&list, 1); + for_each_ref(add_ref_to_list, &list); + write_commit_graph(obj_dir, NULL, &list, append); +} + void write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, diff --git a/commit-graph.h b/commit-graph.h index a79b854470..506cb45fb1 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -48,6 +48,7 @@ struct commit_graph { struct commit_graph *load_commit_graph_one(const char *graph_file); +void write_commit_graph_reachable(const char *obj_dir, int append); void write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index a9e8c774d5..2208c266b5 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -205,6 +205,16 @@ test_expect_success 'build graph from commits with append' ' graph_git_behavior 'append graph, commit 8 vs merge 1' full commits/8 merge/1 graph_git_behavior 'append graph, commit 8 vs merge 2' full commits/8 merge/2 +test_expect_success 'build graph using --reachable' ' + cd "$TRASH_DIRECTORY/full" && + git commit-graph write --reachable && + test_path_is_file $objdir/info/commit-graph && + graph_read_expect "11" "large_edges" +' + +graph_git_behavior 'append graph, commit 8 vs merge 1' full commits/8 merge/1 +graph_git_behavior 'append graph, commit 8 vs merge 2' full commits/8 merge/2 + test_expect_success 'setup bare repo' ' cd "$TRASH_DIRECTORY" && git clone --bare --no-local full bare && From d5d5d7b64131a23a13698242797f11f6479e39e8 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:46 -0400 Subject: [PATCH 21/23] gc: automatically write commit-graph files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit-graph file is a very helpful feature for speeding up git operations. In order to make it more useful, make it possible to write the commit-graph file during standard garbage collection operations. Add a 'gc.commitGraph' config setting that triggers writing a commit-graph file after any non-trivial 'git gc' command. Defaults to false while the commit-graph feature matures. We specifically do not want to have this on by default until the commit-graph feature is fully integrated with history-modifying features like shallow clones. Helped-by: Ævar Arnfjörð Bjarmason Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/config.txt | 9 ++++++--- Documentation/git-gc.txt | 4 ++++ builtin/gc.c | 6 ++++++ t/t5318-commit-graph.sh | 14 ++++++++++++++ 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index 1cc18a828c..978deecfee 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -904,9 +904,12 @@ core.notesRef:: This setting defaults to "refs/notes/commits", and it can be overridden by the `GIT_NOTES_REF` environment variable. See linkgit:git-notes[1]. -core.commitGraph:: - Enable git commit graph feature. Allows reading from the - commit-graph file. +gc.commitGraph:: + If true, then gc will rewrite the commit-graph file when + linkgit:git-gc[1] is run. When using linkgit:git-gc[1] + '--auto' the commit-graph will be updated if housekeeping is + required. Default is false. See linkgit:git-commit-graph[1] + for details. core.sparseCheckout:: Enable "sparse checkout" feature. See section "Sparse checkout" in diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt index 24b2dd44fe..f5bc98ccb3 100644 --- a/Documentation/git-gc.txt +++ b/Documentation/git-gc.txt @@ -136,6 +136,10 @@ The optional configuration variable `gc.packRefs` determines if it within all non-bare repos or it can be set to a boolean value. This defaults to true. +The optional configuration variable `gc.commitGraph` determines if +'git gc' should run 'git commit-graph write'. This can be set to a +boolean value. This defaults to false. + The optional configuration variable `gc.aggressiveWindow` controls how much time is spent optimizing the delta compression of the objects in the repository when the --aggressive option is specified. The larger diff --git a/builtin/gc.c b/builtin/gc.c index ccfb1ceaeb..e103f0f85d 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -20,6 +20,7 @@ #include "sigchain.h" #include "argv-array.h" #include "commit.h" +#include "commit-graph.h" #include "packfile.h" #include "object-store.h" #include "pack.h" @@ -40,6 +41,7 @@ static int aggressive_depth = 50; static int aggressive_window = 250; static int gc_auto_threshold = 6700; static int gc_auto_pack_limit = 50; +static int gc_write_commit_graph; static int detach_auto = 1; static timestamp_t gc_log_expire_time; static const char *gc_log_expire = "1.day.ago"; @@ -129,6 +131,7 @@ static void gc_config(void) git_config_get_int("gc.aggressivedepth", &aggressive_depth); git_config_get_int("gc.auto", &gc_auto_threshold); git_config_get_int("gc.autopacklimit", &gc_auto_pack_limit); + git_config_get_bool("gc.writecommitgraph", &gc_write_commit_graph); git_config_get_bool("gc.autodetach", &detach_auto); git_config_get_expiry("gc.pruneexpire", &prune_expire); git_config_get_expiry("gc.worktreepruneexpire", &prune_worktrees_expire); @@ -641,6 +644,9 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (pack_garbage.nr > 0) clean_pack_garbage(); + if (gc_write_commit_graph) + write_commit_graph_reachable(get_object_directory(), 0); + if (auto_gc && too_many_loose_objects()) warning(_("There are too many unreachable loose objects; " "run 'git prune' to remove them.")); diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 2208c266b5..5947de3d24 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -245,6 +245,20 @@ test_expect_success 'perform fast-forward merge in full repo' ' test_cmp expect output ' +test_expect_success 'check that gc computes commit-graph' ' + cd "$TRASH_DIRECTORY/full" && + git commit --allow-empty -m "blank" && + git commit-graph write --reachable && + cp $objdir/info/commit-graph commit-graph-before-gc && + git reset --hard HEAD~1 && + git config gc.writeCommitGraph true && + git gc && + cp $objdir/info/commit-graph commit-graph-after-gc && + ! test_cmp commit-graph-before-gc commit-graph-after-gc && + git commit-graph write --reachable && + test_cmp commit-graph-after-gc $objdir/info/commit-graph +' + # the verify tests below expect the commit-graph to contain # exactly the commits reachable from the commits/8 branch. # If the file changes the set of commits in the list, then the From d4f65b8d141e041eb5e558cd9e763873e29863b9 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 27 Jun 2018 09:24:47 -0400 Subject: [PATCH 22/23] commit-graph: update design document The commit-graph feature is now integrated with 'fsck' and 'gc', so remove those items from the "Future Work" section of the commit-graph design document. Also remove the section on lazy-loading trees, as that was completed in an earlier patch series. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/commit-graph.txt | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index e1a883eb46..c664acbd76 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -118,9 +118,6 @@ Future Work - The commit graph feature currently does not honor commit grafts. This can be remedied by duplicating or refactoring the current graft logic. -- The 'commit-graph' subcommand does not have a "verify" mode that is - necessary for integration with fsck. - - After computing and storing generation numbers, we must make graph walks aware of generation numbers to gain the performance benefits they enable. This will mostly be accomplished by swapping a commit-date-ordered @@ -130,25 +127,6 @@ Future Work - 'log --topo-order' - 'tag --merged' -- Currently, parse_commit_gently() requires filling in the root tree - object for a commit. This passes through lookup_tree() and consequently - lookup_object(). Also, it calls lookup_commit() when loading the parents. - These method calls check the ODB for object existence, even if the - consumer does not need the content. For example, we do not need the - tree contents when computing merge bases. Now that commit parsing is - removed from the computation time, these lookup operations are the - slowest operations keeping graph walks from being fast. Consider - loading these objects without verifying their existence in the ODB and - only loading them fully when consumers need them. Consider a method - such as "ensure_tree_loaded(commit)" that fully loads a tree before - using commit->tree. - -- The current design uses the 'commit-graph' subcommand to generate the graph. - When this feature stabilizes enough to recommend to most users, we should - add automatic graph writes to common operations that create many commits. - For example, one could compute a graph on 'clone', 'fetch', or 'repack' - commands. - - A server could provide a commit graph file as part of the network protocol to avoid extra calculations by clients. This feature is only of benefit if the user is willing to trust the file, because verifying the file is correct From b18ef13a3fc5ed6e77980f67a7f3ca89050c1715 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 13 Jul 2018 16:30:46 +0000 Subject: [PATCH 23/23] coccinelle: update commit.cocci A recent patch series renamed the get_commit_tree_from_graph method but forgot to update the coccinelle script that exempted it from rules regarding accesses to 'maybe_tree'. This fixes that oversight to bring the coccinelle scripts back to a good state. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- contrib/coccinelle/commit.cocci | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/coccinelle/commit.cocci b/contrib/coccinelle/commit.cocci index a7e9215ffc..aec3345adb 100644 --- a/contrib/coccinelle/commit.cocci +++ b/contrib/coccinelle/commit.cocci @@ -12,7 +12,7 @@ expression c; // These excluded functions must access c->maybe_tree direcly. @@ -identifier f !~ "^(get_commit_tree|get_commit_tree_in_graph|load_tree_for_commit)$"; +identifier f !~ "^(get_commit_tree|get_commit_tree_in_graph_one|load_tree_for_commit)$"; expression c; @@ f(...) {...