Merge branch 'sc/pack-redundant'

Update the implementation of pack-redundant for performance in a
repository with many packfiles.

* sc/pack-redundant:
  pack-redundant: consistent sort method
  pack-redundant: rename pack_list.all_objects
  pack-redundant: new algorithm to find min packs
  pack-redundant: delete redundant code
  pack-redundant: delay creation of unique_objects
  t5323: test cases for git-pack-redundant
This commit is contained in:
Junio C Hamano 2019-03-07 09:59:54 +09:00
Родитель 3710f60a80 0e37abd2e8
Коммит cf0879f7e9
2 изменённых файлов: 558 добавлений и 139 удалений

Просмотреть файл

@ -32,14 +32,10 @@ static struct pack_list {
struct pack_list *next; struct pack_list *next;
struct packed_git *pack; struct packed_git *pack;
struct llist *unique_objects; struct llist *unique_objects;
struct llist *all_objects; struct llist *remaining_objects;
size_t all_objects_size;
} *local_packs = NULL, *altodb_packs = NULL; } *local_packs = NULL, *altodb_packs = NULL;
struct pll {
struct pll *next;
struct pack_list *pl;
};
static struct llist_item *free_nodes; static struct llist_item *free_nodes;
static inline void llist_item_put(struct llist_item *item) static inline void llist_item_put(struct llist_item *item)
@ -63,15 +59,6 @@ static inline struct llist_item *llist_item_get(void)
return new_item; return new_item;
} }
static void llist_free(struct llist *list)
{
while ((list->back = list->front)) {
list->front = list->front->next;
llist_item_put(list->back);
}
free(list);
}
static inline void llist_init(struct llist **list) static inline void llist_init(struct llist **list)
{ {
*list = xmalloc(sizeof(struct llist)); *list = xmalloc(sizeof(struct llist));
@ -254,6 +241,11 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
struct llist_item *p1_hint = NULL, *p2_hint = NULL; struct llist_item *p1_hint = NULL, *p2_hint = NULL;
const unsigned int hashsz = the_hash_algo->rawsz; const unsigned int hashsz = the_hash_algo->rawsz;
if (!p1->unique_objects)
p1->unique_objects = llist_copy(p1->remaining_objects);
if (!p2->unique_objects)
p2->unique_objects = llist_copy(p2->remaining_objects);
p1_base = p1->pack->index_data; p1_base = p1->pack->index_data;
p2_base = p2->pack->index_data; p2_base = p2->pack->index_data;
p1_base += 256 * 4 + ((p1->pack->index_version < 2) ? 4 : 8); p1_base += 256 * 4 + ((p1->pack->index_version < 2) ? 4 : 8);
@ -285,78 +277,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
} }
} }
static void pll_free(struct pll *l)
{
struct pll *old;
struct pack_list *opl;
while (l) {
old = l;
while (l->pl) {
opl = l->pl;
l->pl = opl->next;
free(opl);
}
l = l->next;
free(old);
}
}
/* all the permutations have to be free()d at the same time,
* since they refer to each other
*/
static struct pll * get_permutations(struct pack_list *list, int n)
{
struct pll *subset, *ret = NULL, *new_pll = NULL;
if (list == NULL || pack_list_size(list) < n || n == 0)
return NULL;
if (n == 1) {
while (list) {
new_pll = xmalloc(sizeof(*new_pll));
new_pll->pl = NULL;
pack_list_insert(&new_pll->pl, list);
new_pll->next = ret;
ret = new_pll;
list = list->next;
}
return ret;
}
while (list->next) {
subset = get_permutations(list->next, n - 1);
while (subset) {
new_pll = xmalloc(sizeof(*new_pll));
new_pll->pl = subset->pl;
pack_list_insert(&new_pll->pl, list);
new_pll->next = ret;
ret = new_pll;
subset = subset->next;
}
list = list->next;
}
return ret;
}
static int is_superset(struct pack_list *pl, struct llist *list)
{
struct llist *diff;
diff = llist_copy(list);
while (pl) {
llist_sorted_difference_inplace(diff, pl->all_objects);
if (diff->size == 0) { /* we're done */
llist_free(diff);
return 1;
}
pl = pl->next;
}
llist_free(diff);
return 0;
}
static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2) static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
{ {
size_t ret = 0; size_t ret = 0;
@ -421,14 +341,58 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
return ret; return ret;
} }
static int cmp_remaining_objects(const void *a, const void *b)
{
struct pack_list *pl_a = *((struct pack_list **)a);
struct pack_list *pl_b = *((struct pack_list **)b);
if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
/* have the same remaining_objects, big pack first */
if (pl_a->all_objects_size == pl_b->all_objects_size)
return 0;
else if (pl_a->all_objects_size < pl_b->all_objects_size)
return 1;
else
return -1;
} else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
/* sort by remaining objects, more objects first */
return 1;
} else {
return -1;
}
}
/* Sort pack_list, greater size of remaining_objects first */
static void sort_pack_list(struct pack_list **pl)
{
struct pack_list **ary, *p;
int i;
size_t n = pack_list_size(*pl);
if (n < 2)
return;
/* prepare an array of packed_list for easier sorting */
ary = xcalloc(n, sizeof(struct pack_list *));
for (n = 0, p = *pl; p; p = p->next)
ary[n++] = p;
QSORT(ary, n, cmp_remaining_objects);
/* link them back again */
for (i = 0; i < n - 1; i++)
ary[i]->next = ary[i + 1];
ary[n - 1]->next = NULL;
*pl = ary[0];
free(ary);
}
static void minimize(struct pack_list **min) static void minimize(struct pack_list **min)
{ {
struct pack_list *pl, *unique = NULL, struct pack_list *pl, *unique = NULL, *non_unique = NULL;
*non_unique = NULL, *min_perm = NULL; struct llist *missing, *unique_pack_objects;
struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
struct llist *missing;
off_t min_perm_size = 0, perm_size;
int n;
pl = local_packs; pl = local_packs;
while (pl) { while (pl) {
@ -442,53 +406,41 @@ static void minimize(struct pack_list **min)
missing = llist_copy(all_objects); missing = llist_copy(all_objects);
pl = unique; pl = unique;
while (pl) { while (pl) {
llist_sorted_difference_inplace(missing, pl->all_objects); llist_sorted_difference_inplace(missing, pl->remaining_objects);
pl = pl->next; pl = pl->next;
} }
*min = unique;
/* return if there are no objects missing from the unique set */ /* return if there are no objects missing from the unique set */
if (missing->size == 0) { if (missing->size == 0) {
*min = unique;
free(missing); free(missing);
return; return;
} }
/* find the permutations which contain all missing objects */ unique_pack_objects = llist_copy(all_objects);
for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) { llist_sorted_difference_inplace(unique_pack_objects, missing);
perm_all = perm = get_permutations(non_unique, n);
while (perm) {
if (is_superset(perm->pl, missing)) {
new_perm = xmalloc(sizeof(struct pll));
memcpy(new_perm, perm, sizeof(struct pll));
new_perm->next = perm_ok;
perm_ok = new_perm;
}
perm = perm->next;
}
if (perm_ok)
break;
pll_free(perm_all);
}
if (perm_ok == NULL)
die("Internal error: No complete sets found!");
/* find the permutation with the smallest size */ /* remove unique pack objects from the non_unique packs */
perm = perm_ok; pl = non_unique;
while (perm) {
perm_size = pack_set_bytecount(perm->pl);
if (!min_perm_size || min_perm_size > perm_size) {
min_perm_size = perm_size;
min_perm = perm->pl;
}
perm = perm->next;
}
*min = min_perm;
/* add the unique packs to the list */
pl = unique;
while (pl) { while (pl) {
pack_list_insert(min, pl); llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects);
pl = pl->next; pl = pl->next;
} }
while (non_unique) {
/* sort the non_unique packs, greater size of remaining_objects first */
sort_pack_list(&non_unique);
if (non_unique->remaining_objects->size == 0)
break;
pack_list_insert(min, non_unique);
for (pl = non_unique->next; pl && pl->remaining_objects->size > 0; pl = pl->next)
llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects);
non_unique = non_unique->next;
}
} }
static void load_all_objects(void) static void load_all_objects(void)
@ -500,7 +452,7 @@ static void load_all_objects(void)
while (pl) { while (pl) {
hint = NULL; hint = NULL;
l = pl->all_objects->front; l = pl->remaining_objects->front;
while (l) { while (l) {
hint = llist_insert_sorted_unique(all_objects, hint = llist_insert_sorted_unique(all_objects,
l->oid, hint); l->oid, hint);
@ -511,7 +463,7 @@ static void load_all_objects(void)
/* remove objects present in remote packs */ /* remove objects present in remote packs */
pl = altodb_packs; pl = altodb_packs;
while (pl) { while (pl) {
llist_sorted_difference_inplace(all_objects, pl->all_objects); llist_sorted_difference_inplace(all_objects, pl->remaining_objects);
pl = pl->next; pl = pl->next;
} }
} }
@ -536,11 +488,10 @@ static void scan_alt_odb_packs(void)
while (alt) { while (alt) {
local = local_packs; local = local_packs;
while (local) { while (local) {
llist_sorted_difference_inplace(local->unique_objects, llist_sorted_difference_inplace(local->remaining_objects,
alt->all_objects); alt->remaining_objects);
local = local->next; local = local->next;
} }
llist_sorted_difference_inplace(all_objects, alt->all_objects);
alt = alt->next; alt = alt->next;
} }
} }
@ -555,7 +506,7 @@ static struct pack_list * add_pack(struct packed_git *p)
return NULL; return NULL;
l.pack = p; l.pack = p;
llist_init(&l.all_objects); llist_init(&l.remaining_objects);
if (open_pack_index(p)) if (open_pack_index(p))
return NULL; return NULL;
@ -564,11 +515,11 @@ static struct pack_list * add_pack(struct packed_git *p)
base += 256 * 4 + ((p->index_version < 2) ? 4 : 8); base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0); step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0);
while (off < p->num_objects * step) { while (off < p->num_objects * step) {
llist_insert_back(l.all_objects, (const struct object_id *)(base + off)); llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
off += step; off += step;
} }
/* this list will be pruned in cmp_two_packs later */ l.all_objects_size = l.remaining_objects->size;
l.unique_objects = llist_copy(l.all_objects); l.unique_objects = NULL;
if (p->pack_local) if (p->pack_local)
return pack_list_insert(&local_packs, &l); return pack_list_insert(&local_packs, &l);
else else
@ -603,7 +554,7 @@ static void load_all(void)
int cmd_pack_redundant(int argc, const char **argv, const char *prefix) int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
{ {
int i; int i;
struct pack_list *min, *red, *pl; struct pack_list *min = NULL, *red, *pl;
struct llist *ignore; struct llist *ignore;
struct object_id *oid; struct object_id *oid;
char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */ char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
@ -646,7 +597,6 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
load_all_objects(); load_all_objects();
cmp_local_packs();
if (alt_odb) if (alt_odb)
scan_alt_odb_packs(); scan_alt_odb_packs();
@ -663,10 +613,12 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
llist_sorted_difference_inplace(all_objects, ignore); llist_sorted_difference_inplace(all_objects, ignore);
pl = local_packs; pl = local_packs;
while (pl) { while (pl) {
llist_sorted_difference_inplace(pl->unique_objects, ignore); llist_sorted_difference_inplace(pl->remaining_objects, ignore);
pl = pl->next; pl = pl->next;
} }
cmp_local_packs();
minimize(&min); minimize(&min);
if (verbose) { if (verbose) {

467
t/t5323-pack-redundant.sh Executable file
Просмотреть файл

@ -0,0 +1,467 @@
#!/bin/sh
#
# Copyright (c) 2018 Jiang Xin
#
test_description='Test git pack-redundant
In order to test git-pack-redundant, we will create a number of objects and
packs in the repository `master.git`. The relationship between packs (P1-P8)
and objects (T, A-R) is showed in the following chart. Objects of a pack will
be marked with letter x, while objects of redundant packs will be marked with
exclamation point, and redundant pack itself will be marked with asterisk.
| T A B C D E F G H I J K L M N O P Q R
----+--------------------------------------
P1 | x x x x x x x x
P2* | ! ! ! ! ! ! !
P3 | x x x x x x
P4* | ! ! ! ! !
P5 | x x x x
P6* | ! ! !
P7 | x x
P8* | !
----+--------------------------------------
ALL | x x x x x x x x x x x x x x x x x x x
Another repository `shared.git` has unique objects (X-Z), while other objects
(marked with letter s) are shared through alt-odb (of `master.git`). The
relationship between packs and objects is as follows:
| T A B C D E F G H I J K L M N O P Q R X Y Z
----+----------------------------------------------
Px1 | s s s x x x
Px2 | s s s x x x
'
. ./test-lib.sh
master_repo=master.git
shared_repo=shared.git
# Create commits in <repo> and assign each commit's oid to shell variables
# given in the arguments (A, B, and C). E.g.:
#
# create_commits_in <repo> A B C
#
# NOTE: Avoid calling this function from a subshell since variable
# assignments will disappear when subshell exits.
create_commits_in () {
repo="$1" &&
if ! parent=$(git -C "$repo" rev-parse HEAD^{} 2>/dev/null)
then
parent=
fi &&
T=$(git -C "$repo" write-tree) &&
shift &&
while test $# -gt 0
do
name=$1 &&
test_tick &&
if test -z "$parent"
then
oid=$(echo $name | git -C "$repo" commit-tree $T)
else
oid=$(echo $name | git -C "$repo" commit-tree -p $parent $T)
fi &&
eval $name=$oid &&
parent=$oid &&
shift ||
return 1
done &&
git -C "$repo" update-ref refs/heads/master $oid
}
# Create pack in <repo> and assign pack id to variable given in the 2nd argument
# (<name>). Commits in the pack will be read from stdin. E.g.:
#
# create_pack_in <repo> <name> <<-EOF
# ...
# EOF
#
# NOTE: commits from stdin should be given using heredoc, not using pipe, and
# avoid calling this function from a subshell since variable assignments will
# disappear when subshell exits.
create_pack_in () {
repo="$1" &&
name="$2" &&
pack=$(git -C "$repo/objects/pack" pack-objects -q pack) &&
eval $name=$pack &&
eval P$pack=$name:$pack
}
format_packfiles () {
sed \
-e "s#.*/pack-\(.*\)\.idx#\1#" \
-e "s#.*/pack-\(.*\)\.pack#\1#" |
sort -u |
while read p
do
if test -z "$(eval echo \${P$p})"
then
echo $p
else
eval echo "\${P$p}"
fi
done |
sort
}
test_expect_success 'setup master repo' '
git init --bare "$master_repo" &&
create_commits_in "$master_repo" A B C D E F G H I J K L M N O P Q R
'
#############################################################################
# Chart of packs and objects for this test case
#
# | T A B C D E F G H I J K L M N O P Q R
# ----+--------------------------------------
# P1 | x x x x x x x x
# P2 | x x x x x x x
# P3 | x x x x x x
# ----+--------------------------------------
# ALL | x x x x x x x x x x x x x x x
#
#############################################################################
test_expect_success 'master: no redundant for pack 1, 2, 3' '
create_pack_in "$master_repo" P1 <<-EOF &&
$T
$A
$B
$C
$D
$E
$F
$R
EOF
create_pack_in "$master_repo" P2 <<-EOF &&
$B
$C
$D
$E
$G
$H
$I
EOF
create_pack_in "$master_repo" P3 <<-EOF &&
$F
$I
$J
$K
$L
$M
EOF
(
cd "$master_repo" &&
git pack-redundant --all >out &&
test_must_be_empty out
)
'
#############################################################################
# Chart of packs and objects for this test case
#
# | T A B C D E F G H I J K L M N O P Q R
# ----+--------------------------------------
# P1 | x x x x x x x x
# P2 | x x x x x x x
# P3* | ! ! ! ! ! !
# P4 | x x x x x
# P5 | x x x x
# ----+--------------------------------------
# ALL | x x x x x x x x x x x x x x x x x x
#
#############################################################################
test_expect_success 'master: one of pack-2/pack-3 is redundant' '
create_pack_in "$master_repo" P4 <<-EOF &&
$J
$K
$L
$M
$P
EOF
create_pack_in "$master_repo" P5 <<-EOF &&
$G
$H
$N
$O
EOF
(
cd "$master_repo" &&
cat >expect <<-EOF &&
P3:$P3
EOF
git pack-redundant --all >out &&
format_packfiles <out >actual &&
test_cmp expect actual
)
'
#############################################################################
# Chart of packs and objects for this test case
#
# | T A B C D E F G H I J K L M N O P Q R
# ----+--------------------------------------
# P1 | x x x x x x x x
# P2* | ! ! ! ! ! ! !
# P3 | x x x x x x
# P4* | ! ! ! ! !
# P5 | x x x x
# P6* | ! ! !
# P7 | x x
# ----+--------------------------------------
# ALL | x x x x x x x x x x x x x x x x x x x
#
#############################################################################
test_expect_success 'master: pack 2, 4, and 6 are redundant' '
create_pack_in "$master_repo" P6 <<-EOF &&
$N
$O
$Q
EOF
create_pack_in "$master_repo" P7 <<-EOF &&
$P
$Q
EOF
(
cd "$master_repo" &&
cat >expect <<-EOF &&
P2:$P2
P4:$P4
P6:$P6
EOF
git pack-redundant --all >out &&
format_packfiles <out >actual &&
test_cmp expect actual
)
'
#############################################################################
# Chart of packs and objects for this test case
#
# | T A B C D E F G H I J K L M N O P Q R
# ----+--------------------------------------
# P1 | x x x x x x x x
# P2* | ! ! ! ! ! ! !
# P3 | x x x x x x
# P4* | ! ! ! ! !
# P5 | x x x x
# P6* | ! ! !
# P7 | x x
# P8* | !
# ----+--------------------------------------
# ALL | x x x x x x x x x x x x x x x x x x x
#
#############################################################################
test_expect_success 'master: pack-8 (subset of pack-1) is also redundant' '
create_pack_in "$master_repo" P8 <<-EOF &&
$A
EOF
(
cd "$master_repo" &&
cat >expect <<-EOF &&
P2:$P2
P4:$P4
P6:$P6
P8:$P8
EOF
git pack-redundant --all >out &&
format_packfiles <out >actual &&
test_cmp expect actual
)
'
test_expect_success 'master: clean loose objects' '
(
cd "$master_repo" &&
git prune-packed &&
find objects -type f | sed -e "/objects\/pack\//d" >out &&
test_must_be_empty out
)
'
test_expect_success 'master: remove redundant packs and pass fsck' '
(
cd "$master_repo" &&
git pack-redundant --all | xargs rm &&
git fsck &&
git pack-redundant --all >out &&
test_must_be_empty out
)
'
# The following test cases will execute inside `shared.git`, instead of
# inside `master.git`.
test_expect_success 'setup shared.git' '
git clone --mirror "$master_repo" "$shared_repo" &&
(
cd "$shared_repo" &&
printf "../../$master_repo/objects\n" >objects/info/alternates
)
'
test_expect_success 'shared: all packs are redundant, but no output without --alt-odb' '
(
cd "$shared_repo" &&
git pack-redundant --all >out &&
test_must_be_empty out
)
'
#############################################################################
# Chart of packs and objects for this test case
#
# ================ master.git ===============
# | T A B C D E F G H I J K L M N O P Q R <----------+
# ----+-------------------------------------- |
# P1 | x x x x x x x x |
# P3 | x x x x x x |
# P5 | x x x x |
# P7 | x x |
# ----+-------------------------------------- |
# ALL | x x x x x x x x x x x x x x x x x x x |
# |
# |
# ================ shared.git =============== |
# | T A B C D E F G H I J K L M N O P Q R <objects/info/alternates>
# ----+--------------------------------------
# P1* | s s s s s s s s
# P3* | s s s s s s
# P5* | s s s s
# P7* | s s
# ----+--------------------------------------
# ALL | x x x x x x x x x x x x x x x x x x x
#
#############################################################################
test_expect_success 'shared: show redundant packs in stderr for verbose mode' '
(
cd "$shared_repo" &&
cat >expect <<-EOF &&
P1:$P1
P3:$P3
P5:$P5
P7:$P7
EOF
git pack-redundant --all --verbose >out 2>out.err &&
test_must_be_empty out &&
grep "pack$" out.err | format_packfiles >actual &&
test_cmp expect actual
)
'
test_expect_success 'shared: remove redundant packs, no packs left' '
(
cd "$shared_repo" &&
cat >expect <<-EOF &&
fatal: Zero packs found!
EOF
git pack-redundant --all --alt-odb | xargs rm &&
git fsck &&
test_must_fail git pack-redundant --all --alt-odb >actual 2>&1 &&
test_cmp expect actual
)
'
test_expect_success 'shared: create new objects and packs' '
create_commits_in "$shared_repo" X Y Z &&
create_pack_in "$shared_repo" Px1 <<-EOF &&
$X
$Y
$Z
$A
$B
$C
EOF
create_pack_in "$shared_repo" Px2 <<-EOF
$X
$Y
$Z
$D
$E
$F
EOF
'
test_expect_success 'shared: no redundant without --alt-odb' '
(
cd "$shared_repo" &&
git pack-redundant --all >out &&
test_must_be_empty out
)
'
#############################################################################
# Chart of packs and objects for this test case
#
# ================ master.git ===============
# | T A B C D E F G H I J K L M N O P Q R <----------------+
# ----+-------------------------------------- |
# P1 | x x x x x x x x |
# P3 | x x x x x x |
# P5 | x x x x |
# P7 | x x |
# ----+-------------------------------------- |
# ALL | x x x x x x x x x x x x x x x x x x x |
# |
# |
# ================ shared.git ======================= |
# | T A B C D E F G H I J K L M N O P Q R X Y Z <objects/info/alternates>
# ----+----------------------------------------------
# Px1 | s s s x x x
# Px2*| s s s ! ! !
# ----+----------------------------------------------
# ALL | s s s s s s s s s s s s s s s s s s s x x x
#
#############################################################################
test_expect_success 'shared: one pack is redundant with --alt-odb' '
(
cd "$shared_repo" &&
git pack-redundant --all --alt-odb >out &&
format_packfiles <out >actual &&
test_line_count = 1 actual
)
'
#############################################################################
# Chart of packs and objects for this test case
#
# ================ master.git ===============
# | T A B C D E F G H I J K L M N O P Q R <----------------+
# ----+-------------------------------------- |
# P1 | x x x x x x x x |
# P3 | x x x x x x |
# P5 | x x x x |
# P7 | x x |
# ----+-------------------------------------- |
# ALL | x x x x x x x x x x x x x x x x x x x |
# |
# |
# ================ shared.git ======================= |
# | T A B C D E F G H I J K L M N O P Q R X Y Z <objects/info/alternates>
# ----+----------------------------------------------
# Px1*| s s s i i i
# Px2*| s s s i i i
# ----+----------------------------------------------
# ALL | s s s s s s s s s s s s s s s s s s s i i i
# (ignored objects, marked with i)
#
#############################################################################
test_expect_success 'shared: ignore unique objects and all two packs are redundant' '
(
cd "$shared_repo" &&
cat >expect <<-EOF &&
Px1:$Px1
Px2:$Px2
EOF
git pack-redundant --all --alt-odb >out <<-EOF &&
$X
$Y
$Z
EOF
format_packfiles <out >actual &&
test_cmp expect actual
)
'
test_done