diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index caa4cd0211..f434e1fb0c 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -41,6 +41,7 @@ #define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj) #define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj) #define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val) +#define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid) #define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val) #define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val) #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val) @@ -60,6 +61,7 @@ static struct packing_data to_pack; static struct pack_idx_entry **written_list; static uint32_t nr_result, nr_written, nr_seen; +static struct bitmap_index *bitmap_git; static int non_empty; static int reuse_delta = 1, reuse_object = 1; @@ -80,6 +82,7 @@ static unsigned long pack_size_limit; static int depth = 50; static int delta_search_threads; static int pack_to_stdout; +static int thin; static int num_preferred_base; static struct progress *progress_state; @@ -1538,11 +1541,15 @@ static void check_object(struct object_entry *entry) break; } - if (base_ref && (base_entry = packlist_find(&to_pack, base_ref, NULL))) { + if (base_ref && ( + (base_entry = packlist_find(&to_pack, base_ref, NULL)) || + (thin && + bitmap_has_sha1_in_uninteresting(bitmap_git, base_ref)))) { /* * If base_ref was set above that means we wish to - * reuse delta data, and we even found that base - * in the list of objects we want to pack. Goodie! + * reuse delta data, and either we found that object in + * the list of objects we want to pack, or it's one we + * know the receiver has. * * Depth value does not matter - find_deltas() will * never consider reused delta as the base object to @@ -1551,10 +1558,16 @@ static void check_object(struct object_entry *entry) */ oe_set_type(entry, entry->in_pack_type); SET_SIZE(entry, in_pack_size); /* delta size */ - SET_DELTA(entry, base_entry); SET_DELTA_SIZE(entry, in_pack_size); - entry->delta_sibling_idx = base_entry->delta_child_idx; - SET_DELTA_CHILD(base_entry, entry); + + if (base_entry) { + SET_DELTA(entry, base_entry); + entry->delta_sibling_idx = base_entry->delta_child_idx; + SET_DELTA_CHILD(base_entry, entry); + } else { + SET_DELTA_EXT(entry, base_ref); + } + unuse_pack(&w_curs); return; } @@ -2979,7 +2992,6 @@ static int pack_options_allow_reuse(void) static int get_object_list_from_bitmap(struct rev_info *revs) { - struct bitmap_index *bitmap_git; if (!(bitmap_git = prepare_bitmap_walk(revs))) return -1; @@ -2995,7 +3007,6 @@ static int get_object_list_from_bitmap(struct rev_info *revs) } traverse_bitmap_commit_list(bitmap_git, &add_object_entry_from_bitmap); - free_bitmap_index(bitmap_git); return 0; } @@ -3143,7 +3154,6 @@ static int option_parse_unpack_unreachable(const struct option *opt, int cmd_pack_objects(int argc, const char **argv, const char *prefix) { int use_internal_rev_list = 0; - int thin = 0; int shallow = 0; int all_progress_implied = 0; struct argv_array rp = ARGV_ARRAY_INIT; diff --git a/pack-bitmap.c b/pack-bitmap.c index 4e50ab391f..c7d593c91c 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -86,6 +86,9 @@ struct bitmap_index { /* Bitmap result of the last performed walk */ struct bitmap *result; + /* "have" bitmap from the last performed walk */ + struct bitmap *haves; + /* Version of the bitmap index */ unsigned int version; @@ -759,8 +762,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs) bitmap_and_not(wants_bitmap, haves_bitmap); bitmap_git->result = wants_bitmap; + bitmap_git->haves = haves_bitmap; - bitmap_free(haves_bitmap); return bitmap_git; cleanup: @@ -1114,5 +1117,25 @@ void free_bitmap_index(struct bitmap_index *b) free(b->ext_index.objects); free(b->ext_index.hashes); bitmap_free(b->result); + bitmap_free(b->haves); free(b); } + +int bitmap_has_sha1_in_uninteresting(struct bitmap_index *bitmap_git, + const unsigned char *sha1) +{ + int pos; + + if (!bitmap_git) + return 0; /* no bitmap loaded */ + if (!bitmap_git->result) + BUG("failed to perform bitmap walk before querying"); + if (!bitmap_git->haves) + return 0; /* walk had no "haves" */ + + pos = bitmap_position_packfile(bitmap_git, sha1); + if (pos < 0) + return 0; + + return bitmap_get(bitmap_git->haves, pos); +} diff --git a/pack-bitmap.h b/pack-bitmap.h index 8a04741e12..c633bf5238 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -53,6 +53,13 @@ int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping khash_sha1 *reused_bitmaps, int show_progress); void free_bitmap_index(struct bitmap_index *); +/* + * After a traversal has been performed on the bitmap_index, this can be + * queried to see if a particular object was reachable from any of the + * objects flagged as UNINTERESTING. + */ +int bitmap_has_sha1_in_uninteresting(struct bitmap_index *, const unsigned char *sha1); + void bitmap_writer_show_progress(int show); void bitmap_writer_set_checksum(unsigned char *sha1); void bitmap_writer_build_type_index(struct packing_data *to_pack, diff --git a/pack-objects.c b/pack-objects.c index d04cfa8e9f..3462858772 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -181,3 +181,22 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, return new_entry; } + +void oe_set_delta_ext(struct packing_data *pdata, + struct object_entry *delta, + const unsigned char *sha1) +{ + struct object_entry *base; + + ALLOC_GROW(pdata->ext_bases, pdata->nr_ext + 1, pdata->alloc_ext); + base = &pdata->ext_bases[pdata->nr_ext++]; + memset(base, 0, sizeof(*base)); + hashcpy(base->idx.oid.hash, sha1); + + /* These flags mark that we are not part of the actual pack output. */ + base->preferred_base = 1; + base->filled = 1; + + delta->ext_base = 1; + delta->delta_idx = base - pdata->ext_bases + 1; +} diff --git a/pack-objects.h b/pack-objects.h index 62806ccc39..b0c1f137c6 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -112,6 +112,7 @@ struct object_entry { unsigned filled:1; /* assigned write-order */ unsigned dfs_state:OE_DFS_STATE_BITS; unsigned depth:OE_DEPTH_BITS; + unsigned ext_base:1; /* delta_idx points outside packlist */ /* * pahole results on 64-bit linux (gcc and clang) @@ -147,6 +148,14 @@ struct packing_data { pthread_mutex_t lock; #endif + /* + * This list contains entries for bases which we know the other side + * has (e.g., via reachability bitmaps), but which aren't in our + * "objects" list. + */ + struct object_entry *ext_bases; + uint32_t nr_ext, alloc_ext; + uintmax_t oe_size_limit; uintmax_t oe_delta_size_limit; }; @@ -249,9 +258,12 @@ static inline struct object_entry *oe_delta( const struct packing_data *pack, const struct object_entry *e) { - if (e->delta_idx) + if (!e->delta_idx) + return NULL; + if (e->ext_base) + return &pack->ext_bases[e->delta_idx - 1]; + else return &pack->objects[e->delta_idx - 1]; - return NULL; } static inline void oe_set_delta(struct packing_data *pack, @@ -264,6 +276,10 @@ static inline void oe_set_delta(struct packing_data *pack, e->delta_idx = 0; } +void oe_set_delta_ext(struct packing_data *pack, + struct object_entry *e, + const unsigned char *sha1); + static inline struct object_entry *oe_delta_child( const struct packing_data *pack, const struct object_entry *e) diff --git a/t/perf/README b/t/perf/README index 21321a0f36..be12090c38 100644 --- a/t/perf/README +++ b/t/perf/README @@ -168,3 +168,28 @@ that While we have tried to make sure that it can cope with embedded whitespace and other special characters, it will not work with multi-line data. + +Rather than tracking the performance by run-time as `test_perf` does, you +may also track output size by using `test_size`. The stdout of the +function should be a single numeric value, which will be captured and +shown in the aggregated output. For example: + + test_perf 'time foo' ' + ./foo >foo.out + ' + + test_size 'output size' + wc -c ; return undef if not defined $line; close $fh or die "cannot close $name: $!"; - $line =~ /^(?:(\d+):)?(\d+):(\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?)$/ - or die "bad input line: $line"; - my $rt = ((defined $1 ? $1 : 0.0)*60+$2)*60+$3; - return ($rt, $4, $5); + # times + if ($line =~ /^(?:(\d+):)?(\d+):(\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?)$/) { + my $rt = ((defined $1 ? $1 : 0.0)*60+$2)*60+$3; + return ($rt, $4, $5); + # size + } elsif ($line =~ /^\d+$/) { + return $&; + } else { + die "bad input line: $line"; + } +} + +sub relative_change { + my ($r, $firstr) = @_; + if ($firstr > 0) { + return sprintf "%+.1f%%", 100.0*($r-$firstr)/$firstr; + } elsif ($r == 0) { + return "="; + } else { + return "+inf"; + } } sub format_times { my ($r, $u, $s, $firstr) = @_; + # no value means we did not finish the test if (!defined $r) { return ""; } - my $out = sprintf "%.2f(%.2f+%.2f)", $r, $u, $s; - if (defined $firstr) { - if ($firstr > 0) { - $out .= sprintf " %+.1f%%", 100.0*($r-$firstr)/$firstr; - } elsif ($r == 0) { - $out .= " ="; - } else { - $out .= " +inf"; - } + # a single value means we have a size, not times + if (!defined $u) { + return format_size($r, $firstr); } + # otherwise, we have real/user/system times + my $out = sprintf "%.2f(%.2f+%.2f)", $r, $u, $s; + $out .= ' ' . relative_change($r, $firstr) if defined $firstr; return $out; } @@ -51,6 +66,25 @@ EOT exit(1); } +sub human_size { + my $n = shift; + my @units = ('', qw(K M G)); + while ($n > 900 && @units > 1) { + $n /= 1000; + shift @units; + } + return $n unless length $units[0]; + return sprintf '%.1f%s', $n, $units[0]; +} + +sub format_size { + my ($size, $first) = @_; + # match the width of a time: 0.00(0.00+0.00) + my $out = sprintf '%15s', human_size($size); + $out .= ' ' . relative_change($size, $first) if defined $first; + return $out; +} + my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests, $codespeed, $sortby, $subsection, $reponame); @@ -181,7 +215,14 @@ sub print_default_results { my $firstr; for my $i (0..$#dirs) { my $d = $dirs[$i]; - $times{$prefixes{$d}.$t} = [get_times("$resultsdir/$prefixes{$d}$t.times")]; + my $base = "$resultsdir/$prefixes{$d}$t"; + $times{$prefixes{$d}.$t} = []; + foreach my $type (qw(times size)) { + if (-e "$base.$type") { + $times{$prefixes{$d}.$t} = [get_times("$base.$type")]; + last; + } + } my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}}; my $w = length format_times($r,$u,$s,$firstr); $colwidth[$i] = $w if $w > $colwidth[$i]; diff --git a/t/perf/p5311-pack-bitmaps-fetch.sh b/t/perf/p5311-pack-bitmaps-fetch.sh new file mode 100755 index 0000000000..b04575951f --- /dev/null +++ b/t/perf/p5311-pack-bitmaps-fetch.sh @@ -0,0 +1,45 @@ +#!/bin/sh + +test_description='performance of fetches from bitmapped packs' +. ./perf-lib.sh + +test_perf_default_repo + +test_expect_success 'create bitmapped server repo' ' + git config pack.writebitmaps true && + git config pack.writebitmaphashcache true && + git repack -ad +' + +# simulate a fetch from a repository that last fetched N days ago, for +# various values of N. We do so by following the first-parent chain, +# and assume the first entry in the chain that is N days older than the current +# HEAD is where the HEAD would have been then. +for days in 1 2 4 8 16 32 64 128; do + title=$(printf '%10s' "($days days)") + test_expect_success "setup revs from $days days ago" ' + now=$(git log -1 --format=%ct HEAD) && + then=$(($now - ($days * 86400))) && + tip=$(git rev-list -1 --first-parent --until=$then HEAD) && + { + echo HEAD && + echo ^$tip + } >revs + ' + + test_perf "server $title" ' + git pack-objects --stdout --revs \ + --thin --delta-base-offset \ + tmp.pack + ' + + test_size "size $title" ' + wc -c &3 2>&4 return "$eval_ret" } - -test_perf () { +test_wrapper_ () { + test_wrapper_func_=$1; shift test_start_ test "$#" = 3 && { test_prereq=$1; shift; } || test_prereq= test "$#" = 2 || @@ -191,37 +191,59 @@ test_perf () { base=$(basename "$0" .sh) echo "$test_count" >>"$perf_results_dir"/$base.subtests echo "$1" >"$perf_results_dir"/$base.$test_count.descr - if test -z "$verbose"; then - printf "%s" "perf $test_count - $1:" - else - echo "perf $test_count - $1:" - fi - for i in $(test_seq 1 $GIT_PERF_REPEAT_COUNT); do - say >&3 "running: $2" - if test_run_perf_ "$2" - then - if test -z "$verbose"; then - printf " %s" "$i" - else - echo "* timing run $i/$GIT_PERF_REPEAT_COUNT:" - fi - else - test -z "$verbose" && echo - test_failure_ "$@" - break - fi - done - if test -z "$verbose"; then - echo " ok" - else - test_ok_ "$1" - fi base="$perf_results_dir"/"$perf_results_prefix$(basename "$0" .sh)"."$test_count" - "$TEST_DIRECTORY"/perf/min_time.perl test_time.* >"$base".times + "$test_wrapper_func_" "$@" fi + test_finish_ } +test_perf_ () { + if test -z "$verbose"; then + printf "%s" "perf $test_count - $1:" + else + echo "perf $test_count - $1:" + fi + for i in $(test_seq 1 $GIT_PERF_REPEAT_COUNT); do + say >&3 "running: $2" + if test_run_perf_ "$2" + then + if test -z "$verbose"; then + printf " %s" "$i" + else + echo "* timing run $i/$GIT_PERF_REPEAT_COUNT:" + fi + else + test -z "$verbose" && echo + test_failure_ "$@" + break + fi + done + if test -z "$verbose"; then + echo " ok" + else + test_ok_ "$1" + fi + "$TEST_DIRECTORY"/perf/min_time.perl test_time.* >"$base".times +} + +test_perf () { + test_wrapper_ test_perf_ "$@" +} + +test_size_ () { + say >&3 "running: $2" + if test_eval_ "$2" 3>"$base".size; then + test_ok_ "$1" + else + test_failure_ "$@" + fi +} + +test_size () { + test_wrapper_ test_size_ "$@" +} + # We extend test_done to print timings at the end (./run disables this # and does it after running everything) test_at_end_hook_ () {