From 0a489b0680b841d3e7714be53b263ff190c39193 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 13 Aug 2015 13:02:52 -0500 Subject: [PATCH 1/3] prepare_packed_git(): refactor garbage reporting in pack directory The hook to report "garbage" files in $GIT_OBJECT_DIRECTORY/pack/ could be generic but is too specific to count-object's needs. Move the part to produce human-readable messages to count-objects, and refine the interface to callback with the "bits" with values defined in the cache.h header file, so that other callers (e.g. prune) can later use the same mechanism to enumerate different kinds of garbage files and do something intelligent about them, other than reporting in textual messages. Signed-off-by: Junio C Hamano --- builtin/count-objects.c | 26 ++++++++++++++++++++++++-- cache.h | 7 +++++-- path.c | 2 +- sha1_file.c | 23 ++++++----------------- 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/builtin/count-objects.c b/builtin/count-objects.c index ad0c79954a..ba9291944f 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -15,9 +15,31 @@ static int verbose; static unsigned long loose, packed, packed_loose; static off_t loose_size; -static void real_report_garbage(const char *desc, const char *path) +static const char *bits_to_msg(unsigned seen_bits) +{ + switch (seen_bits) { + case 0: + return "no corresponding .idx or .pack"; + case PACKDIR_FILE_GARBAGE: + return "garbage found"; + case PACKDIR_FILE_PACK: + return "no corresponding .idx"; + case PACKDIR_FILE_IDX: + return "no corresponding .pack"; + case PACKDIR_FILE_PACK|PACKDIR_FILE_IDX: + default: + return NULL; + } +} + +static void real_report_garbage(unsigned seen_bits, const char *path) { struct stat st; + const char *desc = bits_to_msg(seen_bits); + + if (!desc) + return; + if (!stat(path, &st)) size_garbage += st.st_size; warning("%s: %s", desc, path); @@ -27,7 +49,7 @@ static void real_report_garbage(const char *desc, const char *path) static void loose_garbage(const char *path) { if (verbose) - report_garbage("garbage found", path); + report_garbage(PACKDIR_FILE_GARBAGE, path); } static int count_loose(const unsigned char *sha1, const char *path, void *data) diff --git a/cache.h b/cache.h index 4f554664c5..c6de1e9c0e 100644 --- a/cache.h +++ b/cache.h @@ -1255,8 +1255,11 @@ struct pack_entry { extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path); -/* A hook for count-objects to report invalid files in pack directory */ -extern void (*report_garbage)(const char *desc, const char *path); +/* A hook to report invalid files in pack directory */ +#define PACKDIR_FILE_PACK 1 +#define PACKDIR_FILE_IDX 2 +#define PACKDIR_FILE_GARBAGE 4 +extern void (*report_garbage)(unsigned seen_bits, const char *path); extern void prepare_packed_git(void); extern void reprepare_packed_git(void); diff --git a/path.c b/path.c index 10f4cbf6b7..75ec2363fd 100644 --- a/path.c +++ b/path.c @@ -143,7 +143,7 @@ void report_linked_checkout_garbage(void) strbuf_setlen(&sb, len); strbuf_addstr(&sb, path); if (file_exists(sb.buf)) - report_garbage("unused in linked checkout", sb.buf); + report_garbage(PACKDIR_FILE_GARBAGE, sb.buf); } strbuf_release(&sb); } diff --git a/sha1_file.c b/sha1_file.c index 1cee438422..0c0b652949 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1183,27 +1183,16 @@ void install_packed_git(struct packed_git *pack) packed_git = pack; } -void (*report_garbage)(const char *desc, const char *path); +void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, int seen_bits, int first, int last) { - const char *msg; - switch (seen_bits) { - case 0: - msg = "no corresponding .idx or .pack"; - break; - case 1: - msg = "no corresponding .idx"; - break; - case 2: - msg = "no corresponding .pack"; - break; - default: + if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX)) return; - } + for (; first < last; first++) - report_garbage(msg, list->items[first].string); + report_garbage(seen_bits, list->items[first].string); } static void report_pack_garbage(struct string_list *list) @@ -1226,7 +1215,7 @@ static void report_pack_garbage(struct string_list *list) if (baselen == -1) { const char *dot = strrchr(path, '.'); if (!dot) { - report_garbage("garbage found", path); + report_garbage(PACKDIR_FILE_GARBAGE, path); continue; } baselen = dot - path + 1; @@ -1298,7 +1287,7 @@ static void prepare_packed_git_one(char *objdir, int local) ends_with(de->d_name, ".keep")) string_list_append(&garbage, path.buf); else - report_garbage("garbage found", path.buf); + report_garbage(PACKDIR_FILE_GARBAGE, path.buf); } closedir(dir); report_pack_garbage(&garbage); From e6d65c9a47a0f245614d34246fe28782a6d2209b Mon Sep 17 00:00:00 2001 From: Doug Kelly Date: Tue, 3 Nov 2015 21:05:07 -0600 Subject: [PATCH 2/3] t5304: test cleaning pack garbage Pack garbage, noticeably stale .idx files, can be cleaned up during a garbage collection. This tests to ensure such garbage is properly cleaned up. Note that the prior test for checking pack garbage with count-objects left some stale garbage after the test exited. This has also been corrected. Signed-off-by: Doug Kelly Signed-off-by: Junio C Hamano --- t/t5304-prune.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/t/t5304-prune.sh b/t/t5304-prune.sh index 023d7c6f7b..029751544f 100755 --- a/t/t5304-prune.sh +++ b/t/t5304-prune.sh @@ -219,6 +219,7 @@ test_expect_success 'gc: prune old objects after local clone' ' test_expect_success 'garbage report in count-objects -v' ' test_when_finished "rm -f .git/objects/pack/fake*" && + test_when_finished "rm -f .git/objects/pack/foo*" && : >.git/objects/pack/foo && : >.git/objects/pack/foo.bar && : >.git/objects/pack/foo.keep && @@ -244,6 +245,26 @@ EOF test_cmp expected actual ' +test_expect_failure 'clean pack garbage with gc' ' + test_when_finished "rm -f .git/objects/pack/fake*" && + test_when_finished "rm -f .git/objects/pack/foo*" && + : >.git/objects/pack/foo.keep && + : >.git/objects/pack/foo.pack && + : >.git/objects/pack/fake.idx && + : >.git/objects/pack/fake2.keep && + : >.git/objects/pack/fake2.idx && + : >.git/objects/pack/fake3.keep && + git gc && + git count-objects -v 2>stderr && + grep "^warning:" stderr | sort >actual && + cat >expected <<\EOF && +warning: no corresponding .idx or .pack: .git/objects/pack/fake3.keep +warning: no corresponding .idx: .git/objects/pack/foo.keep +warning: no corresponding .idx: .git/objects/pack/foo.pack +EOF + test_cmp expected actual +' + test_expect_success 'prune .git/shallow' ' SHA1=`echo hi|git commit-tree HEAD^{tree}` && echo $SHA1 >.git/shallow && From 478f34d2b6ea13d5f56ecec04de7ca7ce18367c0 Mon Sep 17 00:00:00 2001 From: Doug Kelly Date: Tue, 3 Nov 2015 21:05:08 -0600 Subject: [PATCH 3/3] gc: remove garbage .idx files from pack dir Add a custom report_garbage handler to collect and remove garbage .idx files from the pack directory. Signed-off-by: Doug Kelly Signed-off-by: Junio C Hamano --- builtin/gc.c | 21 +++++++++++++++++++++ t/t5304-prune.sh | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/builtin/gc.c b/builtin/gc.c index 4957c39032..203265db76 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -57,6 +57,22 @@ static void remove_pidfile_on_signal(int signo) raise(signo); } +static struct string_list pack_garbage = STRING_LIST_INIT_DUP; + +static void clean_pack_garbage(void) +{ + int i; + for (i = 0; i < pack_garbage.nr; i++) + unlink_or_warn(pack_garbage.items[i].string); + string_list_clear(&pack_garbage, 0); +} + +static void report_pack_garbage(unsigned seen_bits, const char *path) +{ + if (seen_bits == PACKDIR_FILE_IDX) + string_list_append(&pack_garbage, path); +} + static void git_config_date_string(const char *key, const char **output) { if (git_config_get_string_const(key, output)) @@ -372,6 +388,11 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (run_command_v_opt(rerere.argv, RUN_GIT_CMD)) return error(FAILED_RUN, rerere.argv[0]); + report_garbage = report_pack_garbage; + reprepare_packed_git(); + if (pack_garbage.nr > 0) + clean_pack_garbage(); + if (auto_gc && too_many_loose_objects()) warning(_("There are too many unreachable loose objects; " "run 'git prune' to remove them.")); diff --git a/t/t5304-prune.sh b/t/t5304-prune.sh index 029751544f..def203c724 100755 --- a/t/t5304-prune.sh +++ b/t/t5304-prune.sh @@ -245,7 +245,7 @@ EOF test_cmp expected actual ' -test_expect_failure 'clean pack garbage with gc' ' +test_expect_success 'clean pack garbage with gc' ' test_when_finished "rm -f .git/objects/pack/fake*" && test_when_finished "rm -f .git/objects/pack/foo*" && : >.git/objects/pack/foo.keep &&