From 3173a94d577e14501039e85f333e9c98cbd92667 Mon Sep 17 00:00:00 2001 From: Jiang Xin Date: Sat, 2 Feb 2019 21:30:12 +0800 Subject: [PATCH 1/6] t5323: test cases for git-pack-redundant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add test cases for git pack-redundant to validate new algorithm for git pack-redundant. Signed-off-by: Jiang Xin Reviewed-by: SZEDER Gábor Helped-by: Eric Sunshine Reviewed-by: Sun Chao Signed-off-by: Junio C Hamano --- t/t5323-pack-redundant.sh | 467 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 467 insertions(+) create mode 100755 t/t5323-pack-redundant.sh diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh new file mode 100755 index 0000000000..5929c1da49 --- /dev/null +++ b/t/t5323-pack-redundant.sh @@ -0,0 +1,467 @@ +#!/bin/sh +# +# Copyright (c) 2018 Jiang Xin +# + +test_description='Test git pack-redundant + +In order to test git-pack-redundant, we will create a number of objects and +packs in the repository `master.git`. The relationship between packs (P1-P8) +and objects (T, A-R) is showed in the following chart. Objects of a pack will +be marked with letter x, while objects of redundant packs will be marked with +exclamation point, and redundant pack itself will be marked with asterisk. + + | T A B C D E F G H I J K L M N O P Q R + ----+-------------------------------------- + P1 | x x x x x x x x + P2* | ! ! ! ! ! ! ! + P3 | x x x x x x + P4* | ! ! ! ! ! + P5 | x x x x + P6* | ! ! ! + P7 | x x + P8* | ! + ----+-------------------------------------- + ALL | x x x x x x x x x x x x x x x x x x x + +Another repository `shared.git` has unique objects (X-Z), while other objects +(marked with letter s) are shared through alt-odb (of `master.git`). The +relationship between packs and objects is as follows: + + | T A B C D E F G H I J K L M N O P Q R X Y Z + ----+---------------------------------------------- + Px1 | s s s x x x + Px2 | s s s x x x +' + +. ./test-lib.sh + +master_repo=master.git +shared_repo=shared.git + +# Create commits in and assign each commit's oid to shell variables +# given in the arguments (A, B, and C). E.g.: +# +# create_commits_in A B C +# +# NOTE: Avoid calling this function from a subshell since variable +# assignments will disappear when subshell exits. +create_commits_in () { + repo="$1" && + if ! parent=$(git -C "$repo" rev-parse HEAD^{} 2>/dev/null) + then + parent= + fi && + T=$(git -C "$repo" write-tree) && + shift && + while test $# -gt 0 + do + name=$1 && + test_tick && + if test -z "$parent" + then + oid=$(echo $name | git -C "$repo" commit-tree $T) + else + oid=$(echo $name | git -C "$repo" commit-tree -p $parent $T) + fi && + eval $name=$oid && + parent=$oid && + shift || + return 1 + done && + git -C "$repo" update-ref refs/heads/master $oid +} + +# Create pack in and assign pack id to variable given in the 2nd argument +# (). Commits in the pack will be read from stdin. E.g.: +# +# create_pack_in <<-EOF +# ... +# EOF +# +# NOTE: commits from stdin should be given using heredoc, not using pipe, and +# avoid calling this function from a subshell since variable assignments will +# disappear when subshell exits. +create_pack_in () { + repo="$1" && + name="$2" && + pack=$(git -C "$repo/objects/pack" pack-objects -q pack) && + eval $name=$pack && + eval P$pack=$name:$pack +} + +format_packfiles () { + sed \ + -e "s#.*/pack-\(.*\)\.idx#\1#" \ + -e "s#.*/pack-\(.*\)\.pack#\1#" | + sort -u | + while read p + do + if test -z "$(eval echo \${P$p})" + then + echo $p + else + eval echo "\${P$p}" + fi + done | + sort +} + +test_expect_success 'setup master repo' ' + git init --bare "$master_repo" && + create_commits_in "$master_repo" A B C D E F G H I J K L M N O P Q R +' + +############################################################################# +# Chart of packs and objects for this test case +# +# | T A B C D E F G H I J K L M N O P Q R +# ----+-------------------------------------- +# P1 | x x x x x x x x +# P2 | x x x x x x x +# P3 | x x x x x x +# ----+-------------------------------------- +# ALL | x x x x x x x x x x x x x x x +# +############################################################################# +test_expect_success 'master: no redundant for pack 1, 2, 3' ' + create_pack_in "$master_repo" P1 <<-EOF && + $T + $A + $B + $C + $D + $E + $F + $R + EOF + create_pack_in "$master_repo" P2 <<-EOF && + $B + $C + $D + $E + $G + $H + $I + EOF + create_pack_in "$master_repo" P3 <<-EOF && + $F + $I + $J + $K + $L + $M + EOF + ( + cd "$master_repo" && + git pack-redundant --all >out && + test_must_be_empty out + ) +' + +############################################################################# +# Chart of packs and objects for this test case +# +# | T A B C D E F G H I J K L M N O P Q R +# ----+-------------------------------------- +# P1 | x x x x x x x x +# P2* | ! ! ! ! ! ! ! +# P3 | x x x x x x +# P4 | x x x x x +# P5 | x x x x +# ----+-------------------------------------- +# ALL | x x x x x x x x x x x x x x x x x x +# +############################################################################# +test_expect_success 'master: one of pack-2/pack-3 is redundant' ' + create_pack_in "$master_repo" P4 <<-EOF && + $J + $K + $L + $M + $P + EOF + create_pack_in "$master_repo" P5 <<-EOF && + $G + $H + $N + $O + EOF + ( + cd "$master_repo" && + cat >expect <<-EOF && + P2:$P2 + EOF + git pack-redundant --all >out && + format_packfiles actual && + test_cmp expect actual + ) +' + +############################################################################# +# Chart of packs and objects for this test case +# +# | T A B C D E F G H I J K L M N O P Q R +# ----+-------------------------------------- +# P1 | x x x x x x x x +# P2* | ! ! ! ! ! ! ! +# P3 | x x x x x x +# P4* | ! ! ! ! ! +# P5 | x x x x +# P6* | ! ! ! +# P7 | x x +# ----+-------------------------------------- +# ALL | x x x x x x x x x x x x x x x x x x x +# +############################################################################# +test_expect_success 'master: pack 2, 4, and 6 are redundant' ' + create_pack_in "$master_repo" P6 <<-EOF && + $N + $O + $Q + EOF + create_pack_in "$master_repo" P7 <<-EOF && + $P + $Q + EOF + ( + cd "$master_repo" && + cat >expect <<-EOF && + P2:$P2 + P4:$P4 + P6:$P6 + EOF + git pack-redundant --all >out && + format_packfiles actual && + test_cmp expect actual + ) +' + +############################################################################# +# Chart of packs and objects for this test case +# +# | T A B C D E F G H I J K L M N O P Q R +# ----+-------------------------------------- +# P1 | x x x x x x x x +# P2* | ! ! ! ! ! ! ! +# P3 | x x x x x x +# P4* | ! ! ! ! ! +# P5 | x x x x +# P6* | ! ! ! +# P7 | x x +# P8* | ! +# ----+-------------------------------------- +# ALL | x x x x x x x x x x x x x x x x x x x +# +############################################################################# +test_expect_success 'master: pack-8 (subset of pack-1) is also redundant' ' + create_pack_in "$master_repo" P8 <<-EOF && + $A + EOF + ( + cd "$master_repo" && + cat >expect <<-EOF && + P2:$P2 + P4:$P4 + P6:$P6 + P8:$P8 + EOF + git pack-redundant --all >out && + format_packfiles actual && + test_cmp expect actual + ) +' + +test_expect_success 'master: clean loose objects' ' + ( + cd "$master_repo" && + git prune-packed && + find objects -type f | sed -e "/objects\/pack\//d" >out && + test_must_be_empty out + ) +' + +test_expect_success 'master: remove redundant packs and pass fsck' ' + ( + cd "$master_repo" && + git pack-redundant --all | xargs rm && + git fsck && + git pack-redundant --all >out && + test_must_be_empty out + ) +' + +# The following test cases will execute inside `shared.git`, instead of +# inside `master.git`. +test_expect_success 'setup shared.git' ' + git clone --mirror "$master_repo" "$shared_repo" && + ( + cd "$shared_repo" && + printf "../../$master_repo/objects\n" >objects/info/alternates + ) +' + +test_expect_success 'shared: all packs are redundant, but no output without --alt-odb' ' + ( + cd "$shared_repo" && + git pack-redundant --all >out && + test_must_be_empty out + ) +' + +############################################################################# +# Chart of packs and objects for this test case +# +# ================ master.git =============== +# | T A B C D E F G H I J K L M N O P Q R <----------+ +# ----+-------------------------------------- | +# P1 | x x x x x x x x | +# P3 | x x x x x x | +# P5 | x x x x | +# P7 | x x | +# ----+-------------------------------------- | +# ALL | x x x x x x x x x x x x x x x x x x x | +# | +# | +# ================ shared.git =============== | +# | T A B C D E F G H I J K L M N O P Q R +# ----+-------------------------------------- +# P1* | s s s s s s s s +# P3* | s s s s s s +# P5* | s s s s +# P7* | s s +# ----+-------------------------------------- +# ALL | x x x x x x x x x x x x x x x x x x x +# +############################################################################# +test_expect_success 'shared: show redundant packs in stderr for verbose mode' ' + ( + cd "$shared_repo" && + cat >expect <<-EOF && + P1:$P1 + P3:$P3 + P5:$P5 + P7:$P7 + EOF + git pack-redundant --all --verbose >out 2>out.err && + test_must_be_empty out && + grep "pack$" out.err | format_packfiles >actual && + test_cmp expect actual + ) +' + +test_expect_success 'shared: remove redundant packs, no packs left' ' + ( + cd "$shared_repo" && + cat >expect <<-EOF && + fatal: Zero packs found! + EOF + git pack-redundant --all --alt-odb | xargs rm && + git fsck && + test_must_fail git pack-redundant --all --alt-odb >actual 2>&1 && + test_cmp expect actual + ) +' + +test_expect_success 'shared: create new objects and packs' ' + create_commits_in "$shared_repo" X Y Z && + create_pack_in "$shared_repo" Px1 <<-EOF && + $X + $Y + $Z + $A + $B + $C + EOF + create_pack_in "$shared_repo" Px2 <<-EOF + $X + $Y + $Z + $D + $E + $F + EOF +' + +test_expect_success 'shared: no redundant without --alt-odb' ' + ( + cd "$shared_repo" && + git pack-redundant --all >out && + test_must_be_empty out + ) +' + +############################################################################# +# Chart of packs and objects for this test case +# +# ================ master.git =============== +# | T A B C D E F G H I J K L M N O P Q R <----------------+ +# ----+-------------------------------------- | +# P1 | x x x x x x x x | +# P3 | x x x x x x | +# P5 | x x x x | +# P7 | x x | +# ----+-------------------------------------- | +# ALL | x x x x x x x x x x x x x x x x x x x | +# | +# | +# ================ shared.git ======================= | +# | T A B C D E F G H I J K L M N O P Q R X Y Z +# ----+---------------------------------------------- +# Px1 | s s s x x x +# Px2*| s s s ! ! ! +# ----+---------------------------------------------- +# ALL | s s s s s s s s s s s s s s s s s s s x x x +# +############################################################################# +test_expect_success 'shared: one pack is redundant with --alt-odb' ' + ( + cd "$shared_repo" && + git pack-redundant --all --alt-odb >out && + format_packfiles actual && + test_line_count = 1 actual + ) +' + +############################################################################# +# Chart of packs and objects for this test case +# +# ================ master.git =============== +# | T A B C D E F G H I J K L M N O P Q R <----------------+ +# ----+-------------------------------------- | +# P1 | x x x x x x x x | +# P3 | x x x x x x | +# P5 | x x x x | +# P7 | x x | +# ----+-------------------------------------- | +# ALL | x x x x x x x x x x x x x x x x x x x | +# | +# | +# ================ shared.git ======================= | +# | T A B C D E F G H I J K L M N O P Q R X Y Z +# ----+---------------------------------------------- +# Px1*| s s s i i i +# Px2*| s s s i i i +# ----+---------------------------------------------- +# ALL | s s s s s s s s s s s s s s s s s s s i i i +# (ignored objects, marked with i) +# +############################################################################# +test_expect_success 'shared: ignore unique objects and all two packs are redundant' ' + ( + cd "$shared_repo" && + cat >expect <<-EOF && + Px1:$Px1 + Px2:$Px2 + EOF + git pack-redundant --all --alt-odb >out <<-EOF && + $X + $Y + $Z + EOF + format_packfiles actual && + test_cmp expect actual + ) +' + +test_done From 301117764041101ae228e8e33c12c0b232b06408 Mon Sep 17 00:00:00 2001 From: Jiang Xin Date: Sat, 2 Feb 2019 21:30:13 +0800 Subject: [PATCH 2/6] pack-redundant: delay creation of unique_objects Instead of initializing unique_objects in `add_pack()`, copy from all_objects in `cmp_two_packs()`, when unwanted objects are removed from all_objects. This will save memory (no allocate memory for alt-odb packs), and run `llist_sorted_difference_inplace()` only once when removing ignored objects and removing objects in alt-odb in `scan_alt_odb_packs()`. Signed-off-by: Jiang Xin Signed-off-by: Junio C Hamano --- builtin/pack-redundant.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index cf9a9aabd4..f7dab0ec60 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -254,6 +254,11 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2) struct llist_item *p1_hint = NULL, *p2_hint = NULL; const unsigned int hashsz = the_hash_algo->rawsz; + if (!p1->unique_objects) + p1->unique_objects = llist_copy(p1->all_objects); + if (!p2->unique_objects) + p2->unique_objects = llist_copy(p2->all_objects); + p1_base = p1->pack->index_data; p2_base = p2->pack->index_data; p1_base += 256 * 4 + ((p1->pack->index_version < 2) ? 4 : 8); @@ -536,7 +541,7 @@ static void scan_alt_odb_packs(void) while (alt) { local = local_packs; while (local) { - llist_sorted_difference_inplace(local->unique_objects, + llist_sorted_difference_inplace(local->all_objects, alt->all_objects); local = local->next; } @@ -567,8 +572,7 @@ static struct pack_list * add_pack(struct packed_git *p) llist_insert_back(l.all_objects, (const struct object_id *)(base + off)); off += step; } - /* this list will be pruned in cmp_two_packs later */ - l.unique_objects = llist_copy(l.all_objects); + l.unique_objects = NULL; if (p->pack_local) return pack_list_insert(&local_packs, &l); else @@ -646,7 +650,6 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix) load_all_objects(); - cmp_local_packs(); if (alt_odb) scan_alt_odb_packs(); @@ -663,10 +666,12 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix) llist_sorted_difference_inplace(all_objects, ignore); pl = local_packs; while (pl) { - llist_sorted_difference_inplace(pl->unique_objects, ignore); + llist_sorted_difference_inplace(pl->all_objects, ignore); pl = pl->next; } + cmp_local_packs(); + minimize(&min); if (verbose) { From 8822859363a86ade287c7deb07224af345a699f4 Mon Sep 17 00:00:00 2001 From: Sun Chao Date: Sat, 2 Feb 2019 21:30:14 +0800 Subject: [PATCH 3/6] pack-redundant: delete redundant code The objects in alt-odb are removed from `all_objects` twice in `load_all_objects` and `scan_alt_odb_packs`, remove it from the later function. Signed-off-by: Sun Chao Signed-off-by: Jiang Xin Signed-off-by: Junio C Hamano --- builtin/pack-redundant.c | 1 - 1 file changed, 1 deletion(-) diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index f7dab0ec60..4a06f057dd 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -545,7 +545,6 @@ static void scan_alt_odb_packs(void) alt->all_objects); local = local->next; } - llist_sorted_difference_inplace(all_objects, alt->all_objects); alt = alt->next; } } From 3084a01e5efefc57d8f52562c82fd41799379697 Mon Sep 17 00:00:00 2001 From: Sun Chao Date: Sat, 2 Feb 2019 21:30:15 +0800 Subject: [PATCH 4/6] pack-redundant: new algorithm to find min packs When calling `git pack-redundant --all`, if there are too many local packs and too many redundant objects within them, the too deep iteration of `get_permutations` will exhaust all the resources, and the process of `git pack-redundant` will be killed. The following script could create a repository with too many redundant packs, and running `git pack-redundant --all` in the `test.git` repo will die soon. #!/bin/sh repo="$(pwd)/test.git" work="$(pwd)/test" i=1 max=199 if test -d "$repo" || test -d "$work"; then echo >&2 "ERROR: '$repo' or '$work' already exist" exit 1 fi git init -q --bare "$repo" git --git-dir="$repo" config gc.auto 0 git --git-dir="$repo" config transfer.unpackLimit 0 git clone -q "$repo" "$work" 2>/dev/null while :; do cd "$work" echo "loop $i: $(date +%s)" >$i git add $i git commit -q -sm "loop $i" git push -q origin HEAD:master printf "\rCreate pack %4d/%d\t" $i $max if test $i -ge $max; then break; fi cd "$repo" git repack -q if test $(($i % 2)) -eq 0; then git repack -aq pack=$(ls -t $repo/objects/pack/*.pack | head -1) touch "${pack%.pack}.keep" fi i=$((i+1)) done printf "\ndone\n" To get the `min` unique pack list, we can replace the iteration in `minimize` function with a new algorithm, and this could solve this issue: 1. Get the unique and non_uniqe packs, add the unique packs to the `min` list. 2. Remove the objects of unique packs from non_unique packs, then each object left in the non_unique packs will have at least two copies. 3. Sort the non_unique packs by the objects' size, more objects first, and add the first non_unique pack to `min` list. 4. Drop the duplicated objects from other packs in the ordered non_unique pack list, and repeat step 3. Some test cases will fail on Mac OS X. Mark them and will resolve in later commit. Original PR and discussions: https://github.com/jiangxin/git/pull/25 Signed-off-by: Sun Chao Signed-off-by: Jiang Xin Signed-off-by: Junio C Hamano --- builtin/pack-redundant.c | 192 +++++++++++++------------------------- t/t5323-pack-redundant.sh | 12 +-- 2 files changed, 72 insertions(+), 132 deletions(-) diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index 4a06f057dd..d6d9a66e46 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -35,11 +35,6 @@ static struct pack_list { struct llist *all_objects; } *local_packs = NULL, *altodb_packs = NULL; -struct pll { - struct pll *next; - struct pack_list *pl; -}; - static struct llist_item *free_nodes; static inline void llist_item_put(struct llist_item *item) @@ -63,15 +58,6 @@ static inline struct llist_item *llist_item_get(void) return new_item; } -static void llist_free(struct llist *list) -{ - while ((list->back = list->front)) { - list->front = list->front->next; - llist_item_put(list->back); - } - free(list); -} - static inline void llist_init(struct llist **list) { *list = xmalloc(sizeof(struct llist)); @@ -290,78 +276,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2) } } -static void pll_free(struct pll *l) -{ - struct pll *old; - struct pack_list *opl; - - while (l) { - old = l; - while (l->pl) { - opl = l->pl; - l->pl = opl->next; - free(opl); - } - l = l->next; - free(old); - } -} - -/* all the permutations have to be free()d at the same time, - * since they refer to each other - */ -static struct pll * get_permutations(struct pack_list *list, int n) -{ - struct pll *subset, *ret = NULL, *new_pll = NULL; - - if (list == NULL || pack_list_size(list) < n || n == 0) - return NULL; - - if (n == 1) { - while (list) { - new_pll = xmalloc(sizeof(*new_pll)); - new_pll->pl = NULL; - pack_list_insert(&new_pll->pl, list); - new_pll->next = ret; - ret = new_pll; - list = list->next; - } - return ret; - } - - while (list->next) { - subset = get_permutations(list->next, n - 1); - while (subset) { - new_pll = xmalloc(sizeof(*new_pll)); - new_pll->pl = subset->pl; - pack_list_insert(&new_pll->pl, list); - new_pll->next = ret; - ret = new_pll; - subset = subset->next; - } - list = list->next; - } - return ret; -} - -static int is_superset(struct pack_list *pl, struct llist *list) -{ - struct llist *diff; - - diff = llist_copy(list); - - while (pl) { - llist_sorted_difference_inplace(diff, pl->all_objects); - if (diff->size == 0) { /* we're done */ - llist_free(diff); - return 1; - } - pl = pl->next; - } - llist_free(diff); - return 0; -} - static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2) { size_t ret = 0; @@ -426,14 +340,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl) return ret; } +static int cmp_pack_list_reverse(const void *a, const void *b) +{ + struct pack_list *pl_a = *((struct pack_list **)a); + struct pack_list *pl_b = *((struct pack_list **)b); + size_t sz_a = pl_a->all_objects->size; + size_t sz_b = pl_b->all_objects->size; + + if (sz_a == sz_b) + return 0; + else if (sz_a < sz_b) + return 1; + else + return -1; +} + +/* Sort pack_list, greater size of all_objects first */ +static void sort_pack_list(struct pack_list **pl) +{ + struct pack_list **ary, *p; + int i; + size_t n = pack_list_size(*pl); + + if (n < 2) + return; + + /* prepare an array of packed_list for easier sorting */ + ary = xcalloc(n, sizeof(struct pack_list *)); + for (n = 0, p = *pl; p; p = p->next) + ary[n++] = p; + + QSORT(ary, n, cmp_pack_list_reverse); + + /* link them back again */ + for (i = 0; i < n - 1; i++) + ary[i]->next = ary[i + 1]; + ary[n - 1]->next = NULL; + *pl = ary[0]; + + free(ary); +} + + static void minimize(struct pack_list **min) { - struct pack_list *pl, *unique = NULL, - *non_unique = NULL, *min_perm = NULL; - struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm; - struct llist *missing; - off_t min_perm_size = 0, perm_size; - int n; + struct pack_list *pl, *unique = NULL, *non_unique = NULL; + struct llist *missing, *unique_pack_objects; pl = local_packs; while (pl) { @@ -451,49 +403,37 @@ static void minimize(struct pack_list **min) pl = pl->next; } + *min = unique; + /* return if there are no objects missing from the unique set */ if (missing->size == 0) { - *min = unique; free(missing); return; } - /* find the permutations which contain all missing objects */ - for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) { - perm_all = perm = get_permutations(non_unique, n); - while (perm) { - if (is_superset(perm->pl, missing)) { - new_perm = xmalloc(sizeof(struct pll)); - memcpy(new_perm, perm, sizeof(struct pll)); - new_perm->next = perm_ok; - perm_ok = new_perm; - } - perm = perm->next; - } - if (perm_ok) - break; - pll_free(perm_all); - } - if (perm_ok == NULL) - die("Internal error: No complete sets found!"); + unique_pack_objects = llist_copy(all_objects); + llist_sorted_difference_inplace(unique_pack_objects, missing); - /* find the permutation with the smallest size */ - perm = perm_ok; - while (perm) { - perm_size = pack_set_bytecount(perm->pl); - if (!min_perm_size || min_perm_size > perm_size) { - min_perm_size = perm_size; - min_perm = perm->pl; - } - perm = perm->next; - } - *min = min_perm; - /* add the unique packs to the list */ - pl = unique; + /* remove unique pack objects from the non_unique packs */ + pl = non_unique; while (pl) { - pack_list_insert(min, pl); + llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects); pl = pl->next; } + + while (non_unique) { + /* sort the non_unique packs, greater size of all_objects first */ + sort_pack_list(&non_unique); + if (non_unique->all_objects->size == 0) + break; + + pack_list_insert(min, non_unique); + + for (pl = non_unique->next; pl && pl->all_objects->size > 0; pl = pl->next) + llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects); + + non_unique = non_unique->next; + } } static void load_all_objects(void) @@ -606,7 +546,7 @@ static void load_all(void) int cmd_pack_redundant(int argc, const char **argv, const char *prefix) { int i; - struct pack_list *min, *red, *pl; + struct pack_list *min = NULL, *red, *pl; struct llist *ignore; struct object_id *oid; char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */ diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh index 5929c1da49..e859965489 100755 --- a/t/t5323-pack-redundant.sh +++ b/t/t5323-pack-redundant.sh @@ -173,7 +173,7 @@ test_expect_success 'master: no redundant for pack 1, 2, 3' ' # ALL | x x x x x x x x x x x x x x x x x x # ############################################################################# -test_expect_success 'master: one of pack-2/pack-3 is redundant' ' +test_expect_failure 'master: one of pack-2/pack-3 is redundant (failed on Mac)' ' create_pack_in "$master_repo" P4 <<-EOF && $J $K @@ -214,7 +214,7 @@ test_expect_success 'master: one of pack-2/pack-3 is redundant' ' # ALL | x x x x x x x x x x x x x x x x x x x # ############################################################################# -test_expect_success 'master: pack 2, 4, and 6 are redundant' ' +test_expect_failure 'master: pack 2, 4, and 6 are redundant (failed on Mac)' ' create_pack_in "$master_repo" P6 <<-EOF && $N $O @@ -254,7 +254,7 @@ test_expect_success 'master: pack 2, 4, and 6 are redundant' ' # ALL | x x x x x x x x x x x x x x x x x x x # ############################################################################# -test_expect_success 'master: pack-8 (subset of pack-1) is also redundant' ' +test_expect_failure 'master: pack-8 (subset of pack-1) is also redundant (failed on Mac)' ' create_pack_in "$master_repo" P8 <<-EOF && $A EOF @@ -281,7 +281,7 @@ test_expect_success 'master: clean loose objects' ' ) ' -test_expect_success 'master: remove redundant packs and pass fsck' ' +test_expect_failure 'master: remove redundant packs and pass fsck (failed on Mac)' ' ( cd "$master_repo" && git pack-redundant --all | xargs rm && @@ -301,7 +301,7 @@ test_expect_success 'setup shared.git' ' ) ' -test_expect_success 'shared: all packs are redundant, but no output without --alt-odb' ' +test_expect_failure 'shared: all packs are redundant, but no output without --alt-odb (failed on Mac)' ' ( cd "$shared_repo" && git pack-redundant --all >out && @@ -334,7 +334,7 @@ test_expect_success 'shared: all packs are redundant, but no output without --al # ALL | x x x x x x x x x x x x x x x x x x x # ############################################################################# -test_expect_success 'shared: show redundant packs in stderr for verbose mode' ' +test_expect_failure 'shared: show redundant packs in stderr for verbose mode (failed on Mac)' ' ( cd "$shared_repo" && cat >expect <<-EOF && From 4bc0cc12c19f00380052d0cfaf4e71e0a4630f41 Mon Sep 17 00:00:00 2001 From: Jiang Xin Date: Sat, 2 Feb 2019 21:30:16 +0800 Subject: [PATCH 5/6] pack-redundant: rename pack_list.all_objects New algorithm uses `pack_list.all_objects` to track remaining objects, so rename it to `pack_list.remaining_objects`. Signed-off-by: Jiang Xin Signed-off-by: Junio C Hamano --- builtin/pack-redundant.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index d6d9a66e46..15cdf233c4 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -32,7 +32,7 @@ static struct pack_list { struct pack_list *next; struct packed_git *pack; struct llist *unique_objects; - struct llist *all_objects; + struct llist *remaining_objects; } *local_packs = NULL, *altodb_packs = NULL; static struct llist_item *free_nodes; @@ -241,9 +241,9 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2) const unsigned int hashsz = the_hash_algo->rawsz; if (!p1->unique_objects) - p1->unique_objects = llist_copy(p1->all_objects); + p1->unique_objects = llist_copy(p1->remaining_objects); if (!p2->unique_objects) - p2->unique_objects = llist_copy(p2->all_objects); + p2->unique_objects = llist_copy(p2->remaining_objects); p1_base = p1->pack->index_data; p2_base = p2->pack->index_data; @@ -344,8 +344,8 @@ static int cmp_pack_list_reverse(const void *a, const void *b) { struct pack_list *pl_a = *((struct pack_list **)a); struct pack_list *pl_b = *((struct pack_list **)b); - size_t sz_a = pl_a->all_objects->size; - size_t sz_b = pl_b->all_objects->size; + size_t sz_a = pl_a->remaining_objects->size; + size_t sz_b = pl_b->remaining_objects->size; if (sz_a == sz_b) return 0; @@ -355,7 +355,7 @@ static int cmp_pack_list_reverse(const void *a, const void *b) return -1; } -/* Sort pack_list, greater size of all_objects first */ +/* Sort pack_list, greater size of remaining_objects first */ static void sort_pack_list(struct pack_list **pl) { struct pack_list **ary, *p; @@ -399,7 +399,7 @@ static void minimize(struct pack_list **min) missing = llist_copy(all_objects); pl = unique; while (pl) { - llist_sorted_difference_inplace(missing, pl->all_objects); + llist_sorted_difference_inplace(missing, pl->remaining_objects); pl = pl->next; } @@ -417,20 +417,20 @@ static void minimize(struct pack_list **min) /* remove unique pack objects from the non_unique packs */ pl = non_unique; while (pl) { - llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects); + llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects); pl = pl->next; } while (non_unique) { - /* sort the non_unique packs, greater size of all_objects first */ + /* sort the non_unique packs, greater size of remaining_objects first */ sort_pack_list(&non_unique); - if (non_unique->all_objects->size == 0) + if (non_unique->remaining_objects->size == 0) break; pack_list_insert(min, non_unique); - for (pl = non_unique->next; pl && pl->all_objects->size > 0; pl = pl->next) - llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects); + for (pl = non_unique->next; pl && pl->remaining_objects->size > 0; pl = pl->next) + llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects); non_unique = non_unique->next; } @@ -445,7 +445,7 @@ static void load_all_objects(void) while (pl) { hint = NULL; - l = pl->all_objects->front; + l = pl->remaining_objects->front; while (l) { hint = llist_insert_sorted_unique(all_objects, l->oid, hint); @@ -456,7 +456,7 @@ static void load_all_objects(void) /* remove objects present in remote packs */ pl = altodb_packs; while (pl) { - llist_sorted_difference_inplace(all_objects, pl->all_objects); + llist_sorted_difference_inplace(all_objects, pl->remaining_objects); pl = pl->next; } } @@ -481,8 +481,8 @@ static void scan_alt_odb_packs(void) while (alt) { local = local_packs; while (local) { - llist_sorted_difference_inplace(local->all_objects, - alt->all_objects); + llist_sorted_difference_inplace(local->remaining_objects, + alt->remaining_objects); local = local->next; } alt = alt->next; @@ -499,7 +499,7 @@ static struct pack_list * add_pack(struct packed_git *p) return NULL; l.pack = p; - llist_init(&l.all_objects); + llist_init(&l.remaining_objects); if (open_pack_index(p)) return NULL; @@ -508,7 +508,7 @@ static struct pack_list * add_pack(struct packed_git *p) base += 256 * 4 + ((p->index_version < 2) ? 4 : 8); step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0); while (off < p->num_objects * step) { - llist_insert_back(l.all_objects, (const struct object_id *)(base + off)); + llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off)); off += step; } l.unique_objects = NULL; @@ -605,7 +605,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix) llist_sorted_difference_inplace(all_objects, ignore); pl = local_packs; while (pl) { - llist_sorted_difference_inplace(pl->all_objects, ignore); + llist_sorted_difference_inplace(pl->remaining_objects, ignore); pl = pl->next; } From 0e37abd2e89d475500e93a6368f1f054ca2fec55 Mon Sep 17 00:00:00 2001 From: Jiang Xin Date: Sat, 2 Feb 2019 21:30:17 +0800 Subject: [PATCH 6/6] pack-redundant: consistent sort method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SZEDER reported that test case t5323 has different test result on MacOS. This is because `cmp_pack_list_reverse` cannot give identical result when two pack being sorted has the same size of remaining_objects. Changes to the sorting function will make consistent test result for t5323. The new algorithm to find redundant packs is a trade-off to save memory resources, and the result of it may be different with old one, and may be not the best result sometimes. Update t5323 for the new algorithm. Reported-by: SZEDER Gábor Signed-off-by: Jiang Xin Signed-off-by: Junio C Hamano --- builtin/pack-redundant.c | 24 ++++++++++++++++-------- t/t5323-pack-redundant.sh | 18 +++++++++--------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index 15cdf233c4..29ff5e99cb 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -33,6 +33,7 @@ static struct pack_list { struct packed_git *pack; struct llist *unique_objects; struct llist *remaining_objects; + size_t all_objects_size; } *local_packs = NULL, *altodb_packs = NULL; static struct llist_item *free_nodes; @@ -340,19 +341,25 @@ static inline off_t pack_set_bytecount(struct pack_list *pl) return ret; } -static int cmp_pack_list_reverse(const void *a, const void *b) +static int cmp_remaining_objects(const void *a, const void *b) { struct pack_list *pl_a = *((struct pack_list **)a); struct pack_list *pl_b = *((struct pack_list **)b); - size_t sz_a = pl_a->remaining_objects->size; - size_t sz_b = pl_b->remaining_objects->size; - if (sz_a == sz_b) - return 0; - else if (sz_a < sz_b) + if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) { + /* have the same remaining_objects, big pack first */ + if (pl_a->all_objects_size == pl_b->all_objects_size) + return 0; + else if (pl_a->all_objects_size < pl_b->all_objects_size) + return 1; + else + return -1; + } else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) { + /* sort by remaining objects, more objects first */ return 1; - else + } else { return -1; + } } /* Sort pack_list, greater size of remaining_objects first */ @@ -370,7 +377,7 @@ static void sort_pack_list(struct pack_list **pl) for (n = 0, p = *pl; p; p = p->next) ary[n++] = p; - QSORT(ary, n, cmp_pack_list_reverse); + QSORT(ary, n, cmp_remaining_objects); /* link them back again */ for (i = 0; i < n - 1; i++) @@ -511,6 +518,7 @@ static struct pack_list * add_pack(struct packed_git *p) llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off)); off += step; } + l.all_objects_size = l.remaining_objects->size; l.unique_objects = NULL; if (p->pack_local) return pack_list_insert(&local_packs, &l); diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh index e859965489..6b4d1ca353 100755 --- a/t/t5323-pack-redundant.sh +++ b/t/t5323-pack-redundant.sh @@ -165,15 +165,15 @@ test_expect_success 'master: no redundant for pack 1, 2, 3' ' # | T A B C D E F G H I J K L M N O P Q R # ----+-------------------------------------- # P1 | x x x x x x x x -# P2* | ! ! ! ! ! ! ! -# P3 | x x x x x x +# P2 | x x x x x x x +# P3* | ! ! ! ! ! ! # P4 | x x x x x # P5 | x x x x # ----+-------------------------------------- # ALL | x x x x x x x x x x x x x x x x x x # ############################################################################# -test_expect_failure 'master: one of pack-2/pack-3 is redundant (failed on Mac)' ' +test_expect_success 'master: one of pack-2/pack-3 is redundant' ' create_pack_in "$master_repo" P4 <<-EOF && $J $K @@ -190,7 +190,7 @@ test_expect_failure 'master: one of pack-2/pack-3 is redundant (failed on Mac)' ( cd "$master_repo" && cat >expect <<-EOF && - P2:$P2 + P3:$P3 EOF git pack-redundant --all >out && format_packfiles actual && @@ -214,7 +214,7 @@ test_expect_failure 'master: one of pack-2/pack-3 is redundant (failed on Mac)' # ALL | x x x x x x x x x x x x x x x x x x x # ############################################################################# -test_expect_failure 'master: pack 2, 4, and 6 are redundant (failed on Mac)' ' +test_expect_success 'master: pack 2, 4, and 6 are redundant' ' create_pack_in "$master_repo" P6 <<-EOF && $N $O @@ -254,7 +254,7 @@ test_expect_failure 'master: pack 2, 4, and 6 are redundant (failed on Mac)' ' # ALL | x x x x x x x x x x x x x x x x x x x # ############################################################################# -test_expect_failure 'master: pack-8 (subset of pack-1) is also redundant (failed on Mac)' ' +test_expect_success 'master: pack-8 (subset of pack-1) is also redundant' ' create_pack_in "$master_repo" P8 <<-EOF && $A EOF @@ -281,7 +281,7 @@ test_expect_success 'master: clean loose objects' ' ) ' -test_expect_failure 'master: remove redundant packs and pass fsck (failed on Mac)' ' +test_expect_success 'master: remove redundant packs and pass fsck' ' ( cd "$master_repo" && git pack-redundant --all | xargs rm && @@ -301,7 +301,7 @@ test_expect_success 'setup shared.git' ' ) ' -test_expect_failure 'shared: all packs are redundant, but no output without --alt-odb (failed on Mac)' ' +test_expect_success 'shared: all packs are redundant, but no output without --alt-odb' ' ( cd "$shared_repo" && git pack-redundant --all >out && @@ -334,7 +334,7 @@ test_expect_failure 'shared: all packs are redundant, but no output without --al # ALL | x x x x x x x x x x x x x x x x x x x # ############################################################################# -test_expect_failure 'shared: show redundant packs in stderr for verbose mode (failed on Mac)' ' +test_expect_success 'shared: show redundant packs in stderr for verbose mode' ' ( cd "$shared_repo" && cat >expect <<-EOF &&