diff --git a/builtin/gc.c b/builtin/gc.c index 5f877b097a..8d22361fa9 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1035,6 +1035,46 @@ static int multi_pack_index_expire(struct maintenance_run_opts *opts) return 0; } +#define TWO_GIGABYTES (INT32_MAX) + +static off_t get_auto_pack_size(void) +{ + /* + * The "auto" value is special: we optimize for + * one large pack-file (i.e. from a clone) and + * expect the rest to be small and they can be + * repacked quickly. + * + * The strategy we select here is to select a + * size that is one more than the second largest + * pack-file. This ensures that we will repack + * at least two packs if there are three or more + * packs. + */ + off_t max_size = 0; + off_t second_largest_size = 0; + off_t result_size; + struct packed_git *p; + struct repository *r = the_repository; + + reprepare_packed_git(r); + for (p = get_all_packs(r); p; p = p->next) { + if (p->pack_size > max_size) { + second_largest_size = max_size; + max_size = p->pack_size; + } else if (p->pack_size > second_largest_size) + second_largest_size = p->pack_size; + } + + result_size = second_largest_size + 1; + + /* But limit ourselves to a batch size of 2g */ + if (result_size > TWO_GIGABYTES) + result_size = TWO_GIGABYTES; + + return result_size; +} + static int multi_pack_index_repack(struct maintenance_run_opts *opts) { struct child_process child = CHILD_PROCESS_INIT; @@ -1045,7 +1085,8 @@ static int multi_pack_index_repack(struct maintenance_run_opts *opts) if (opts->quiet) strvec_push(&child.args, "--no-progress"); - strvec_push(&child.args, "--batch-size=0"); + strvec_pushf(&child.args, "--batch-size=%"PRIuMAX, + (uintmax_t)get_auto_pack_size()); close_object_store(the_repository->objects); diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index a2db2291b0..9e6ea23f35 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -181,10 +181,42 @@ test_expect_success 'incremental-repack task' ' test_line_count = 4 packs-between && # the job deletes the two old packs, and does not write - # a new one because only one pack remains. + # a new one because the batch size is not high enough to + # pack the largest pack-file. git maintenance run --task=incremental-repack && ls .git/objects/pack/*.pack >packs-after && - test_line_count = 1 packs-after + test_line_count = 2 packs-after +' + +test_expect_success EXPENSIVE 'incremental-repack 2g limit' ' + for i in $(test_seq 1 5) + do + test-tool genrandom foo$i $((512 * 1024 * 1024 + 1)) >>big || + return 1 + done && + git add big && + git commit -m "Add big file (1)" && + + # ensure any possible loose objects are in a pack-file + git maintenance run --task=loose-objects && + + rm big && + for i in $(test_seq 6 10) + do + test-tool genrandom foo$i $((512 * 1024 * 1024 + 1)) >>big || + return 1 + done && + git add big && + git commit -m "Add big file (2)" && + + # ensure any possible loose objects are in a pack-file + git maintenance run --task=loose-objects && + + # Now run the incremental-repack task and check the batch-size + GIT_TRACE2_EVENT="$(pwd)/run-2g.txt" git maintenance run \ + --task=incremental-repack 2>/dev/null && + test_subcommand git multi-pack-index repack \ + --no-progress --batch-size=2147483647