Merge branch 'jk/fetch-quick-tag-following'

When fetching from a remote that has many tags that are irrelevant to branches we are following, we used to waste way too many cycles when checking if the object pointed at by a tag (that we are not going to fetch!) exists in our repository too carefully. * jk/fetch-quick-tag-following: fetch: use "quick" has_sha1_file for tag following
2016-10-26 13:14:47 -07:00 · 2016-10-26 13:14:47 -07:00 · 9fcd14491d
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@ -241,9 +241,10 @@ static void find_non_local_tags(struct transport *transport,
 		 * as one to ignore by setting util to NULL.
 		 */
 		if (ends_with(ref->name, "^{}")) {
-			if (item && !has_object_file(&ref->old_oid) &&
+			if (item &&
+			    !has_object_file_with_flags(&ref->old_oid, HAS_SHA1_QUICK) &&
 			    !will_fetch(head, ref->old_oid.hash) &&
-			    !has_sha1_file(item->util) &&
+			    !has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) &&
 			    !will_fetch(head, item->util))
 				item->util = NULL;
 			item = NULL;
@ -256,7 +257,8 @@ static void find_non_local_tags(struct transport *transport,
 		 * to check if it is a lightweight tag that we want to
 		 * fetch.
 		 */
-		if (item && !has_sha1_file(item->util) &&
+		if (item &&
+		    !has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) &&
 		    !will_fetch(head, item->util))
 			item->util = NULL;

@ -276,7 +278,8 @@ static void find_non_local_tags(struct transport *transport,
 	 * We may have a final lightweight tag that needs to be
 	 * checked to see if it needs fetching.
 	 */
-	if (item && !has_sha1_file(item->util) &&
+	if (item &&
+	    !has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) &&
 	    !will_fetch(head, item->util))
 		item->util = NULL;

--- a/cache.h
+++ b/cache.h
@ -1157,6 +1157,7 @@ static inline int has_sha1_file(const unsigned char *sha1)

 /* Same as the above, except for struct object_id. */
 extern int has_object_file(const struct object_id *oid);
+extern int has_object_file_with_flags(const struct object_id *oid, int flags);

 /*
 * Return true iff an alternate object database has a loose object
--- a/sha1_file.c
+++ b/sha1_file.c
@ -3335,6 +3335,11 @@ int has_object_file(const struct object_id *oid)
 	return has_sha1_file(oid->hash);
 }

+int has_object_file_with_flags(const struct object_id *oid, int flags)
+{
+	return has_sha1_file_with_flags(oid->hash, flags);
+}
+
 static void check_tree(const void *buf, size_t size)
 {
 	struct tree_desc desc;
--- a/t/perf/p5550-fetch-tags.sh
+++ b/t/perf/p5550-fetch-tags.sh
@ -0,0 +1,99 @@
+#!/bin/sh
+
+test_description='performance of tag-following with many tags
+
+This tests a fairly pathological case, so rather than rely on a real-world
+case, we will construct our own repository. The situation is roughly as
+follows.
+
+The parent repository has a large number of tags which are disconnected from
+the rest of history. That makes them candidates for tag-following, but we never
+actually grab them (and thus they will impact each subsequent fetch).
+
+The child repository is a clone of parent, without the tags, and is at least
+one commit behind the parent (meaning that we will fetch one object and then
+examine the tags to see if they need followed). Furthermore, it has a large
+number of packs.
+
+The exact values of "large" here are somewhat arbitrary; I picked values that
+start to show a noticeable performance problem on my machine, but without
+taking too long to set up and run the tests.
+'
+. ./perf-lib.sh
+
+# make a long nonsense history on branch $1, consisting of $2 commits, each
+# with a unique file pointing to the blob at $2.
+create_history () {
+	perl -le '
+		my ($branch, $n, $blob) = @ARGV;
+		for (1..$n) {
+			print "commit refs/heads/$branch";
+			print "committer nobody <nobody@example.com> now";
+			print "data 4";
+			print "foo";
+			print "M 100644 $blob $_";
+		}
+	' "$@" |
+	git fast-import --date-format=now
+}
+
+# make a series of tags, one per commit in the revision range given by $@
+create_tags () {
+	git rev-list "$@" |
+	perl -lne 'print "create refs/tags/$. $_"' |
+	git update-ref --stdin
+}
+
+# create $1 nonsense packs, each with a single blob
+create_packs () {
+	perl -le '
+		my ($n) = @ARGV;
+		for (1..$n) {
+			print "blob";
+			print "data <<EOF";
+			print "$_";
+			print "EOF";
+		}
+	' "$@" |
+	git fast-import &&
+
+	git cat-file --batch-all-objects --batch-check='%(objectname)' |
+	while read sha1
+	do
+		echo $sha1 | git pack-objects .git/objects/pack/pack
+	done
+}
+
+test_expect_success 'create parent and child' '
+	git init parent &&
+	git -C parent commit --allow-empty -m base &&
+	git clone parent child &&
+	git -C parent commit --allow-empty -m trigger-fetch
+'
+
+test_expect_success 'populate parent tags' '
+	(
+		cd parent &&
+		blob=$(echo content | git hash-object -w --stdin) &&
+		create_history cruft 3000 $blob &&
+		create_tags cruft &&
+		git branch -D cruft
+	)
+'
+
+test_expect_success 'create child packs' '
+	(
+		cd child &&
+		git config gc.auto 0 &&
+		git config gc.autopacklimit 0 &&
+		create_packs 500
+	)
+'
+
+test_perf 'fetch' '
+	# make sure there is something to fetch on each iteration
+	git -C child update-ref -d refs/remotes/origin/master &&
+	git -C child fetch
+'
+
+test_done