From 45d4bdae5906cfe6b7cb1ba1cec82fd80381e07e Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Mon, 25 Mar 2013 16:16:50 -0400
Subject: [PATCH 01/10] stream_blob_to_fd: detect errors reading from stream

We call read_istream, but never check its return value for
errors. This can lead to us looping infinitely, as we just
keep trying to write "-1" bytes (and we do not notice the
error, as we simply check that write_in_full reports the
same number of bytes we fed it, which of course is also -1).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 streaming.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/streaming.c b/streaming.c
index 4d978e54e4..f4126a7da5 100644
--- a/streaming.c
+++ b/streaming.c
@@ -514,6 +514,8 @@ int stream_blob_to_fd(int fd, unsigned const char *sha1, struct stream_filter *f
 		ssize_t wrote, holeto;
 		ssize_t readlen = read_istream(st, buf, sizeof(buf));
 
+		if (readlen < 0)
+			goto close_and_exit;
 		if (!readlen)
 			break;
 		if (can_seek && sizeof(buf) == readlen) {

From f54fac53786808130c82936e59be16000deba04a Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Mon, 25 Mar 2013 16:17:17 -0400
Subject: [PATCH 02/10] check_sha1_signature: check return value from
 read_istream

It's possible for read_istream to return an error, in which
case we just end up in an infinite loop (aside from EOF, we
do not even look at the result, but just feed it straight
into our running hash).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sha1_file.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sha1_file.c b/sha1_file.c
index 16967d3b9a..0b99f336e6 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1266,6 +1266,10 @@ int check_sha1_signature(const unsigned char *sha1, void *map,
 		char buf[1024 * 16];
 		ssize_t readlen = read_istream(st, buf, sizeof(buf));
 
+		if (readlen < 0) {
+			close_istream(st);
+			return -1;
+		}
 		if (!readlen)
 			break;
 		git_SHA1_Update(&c, buf, readlen);

From 42e7e2a53492ed1772b7b5d8b328f8d0a66f8b33 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Mon, 25 Mar 2013 16:18:16 -0400
Subject: [PATCH 03/10] read_istream_filtered: propagate read error from
 upstream

The filter istream pulls data from an "upstream" stream,
running it through a filter function. However, we did not
properly notice when the upstream filter yielded an error,
and just returned what we had read. Instead, we should
propagate the error.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 streaming.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/streaming.c b/streaming.c
index f4126a7da5..f4ab12ba42 100644
--- a/streaming.c
+++ b/streaming.c
@@ -237,7 +237,7 @@ static read_method_decl(filtered)
 		if (!fs->input_finished) {
 			fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER);
 			if (fs->i_end < 0)
-				break;
+				return -1;
 			if (fs->i_end)
 				continue;
 		}

From 692f0bc7ae0cb818bf3c757d509773d6f2e48c68 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Mon, 25 Mar 2013 16:21:14 -0400
Subject: [PATCH 04/10] avoid infinite loop in read_istream_loose

The read_istream_loose function loops on inflating a chunk of data
from an mmap'd loose object. We end the loop when we run out
of space in our output buffer, or if we see a zlib error.

We need to treat Z_BUF_ERROR specially, though, as it is not
fatal; it is just zlib's way of telling us that we need to
either feed it more input or give it more output space. It
is perfectly normal for us to hit this when we are at the
end of our buffer.

However, we may also get Z_BUF_ERROR because we have run out
of input. In a well-formed object, this should not happen,
because we have fed the whole mmap'd contents to zlib. But
if the object is truncated or corrupt, we will loop forever,
never giving zlib any more data, but continuing to ask it to
inflate.

We can fix this by considering it an error when zlib returns
Z_BUF_ERROR but we still have output space left (which means
it must want more input, which we know is a truncation
error). It would not be sufficient to just check whether
zlib had consumed all the input at the start of the loop, as
it might still want to generate output from what is in its
internal state.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 streaming.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/streaming.c b/streaming.c
index f4ab12ba42..cabcd9d157 100644
--- a/streaming.c
+++ b/streaming.c
@@ -309,7 +309,7 @@ static read_method_decl(loose)
 			st->z_state = z_done;
 			break;
 		}
-		if (status != Z_OK && status != Z_BUF_ERROR) {
+		if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
 			git_inflate_end(&st->z);
 			st->z_state = z_error;
 			return -1;

From 7b6257b0d45f562629ac660cb1845c8a2aac7df1 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Mon, 25 Mar 2013 16:21:34 -0400
Subject: [PATCH 05/10] add test for streaming corrupt blobs

We do not have many tests for handling corrupt objects. This
new test at least checks that we detect a byte error in a
corrupt blob object while streaming it out with cat-file.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1060-object-corruption.sh | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100755 t/t1060-object-corruption.sh

diff --git a/t/t1060-object-corruption.sh b/t/t1060-object-corruption.sh
new file mode 100755
index 0000000000..c887dd86b0
--- /dev/null
+++ b/t/t1060-object-corruption.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+test_description='see how we handle various forms of corruption'
+. ./test-lib.sh
+
+# convert "1234abcd" to ".git/objects/12/34abcd"
+obj_to_file() {
+	echo "$(git rev-parse --git-dir)/objects/$(git rev-parse "$1" | sed 's,..,&/,')"
+}
+
+# Convert byte at offset "$2" of object "$1" into '\0'
+corrupt_byte() {
+	obj_file=$(obj_to_file "$1") &&
+	chmod +w "$obj_file" &&
+	printf '\0' | dd of="$obj_file" bs=1 seek="$2" conv=notrunc
+}
+
+test_expect_success 'setup corrupt repo' '
+	git init bit-error &&
+	(
+		cd bit-error &&
+		test_commit content &&
+		corrupt_byte HEAD:content.t 10
+	)
+'
+
+test_expect_success 'streaming a corrupt blob fails' '
+	(
+		cd bit-error &&
+		test_must_fail git cat-file blob HEAD:content.t
+	)
+'
+
+test_done

From d9c31e14d0aafdd45a382d01fcfd66c65a5f4b95 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Mon, 25 Mar 2013 17:49:36 -0400
Subject: [PATCH 06/10] streaming_write_entry: propagate streaming errors

When we are streaming an index blob to disk, we store the
error from stream_blob_to_fd in the "result" variable, and
then immediately overwrite that with the return value of
"close". That means we catch errors on close (e.g., problems
committing the file to disk), but miss anything which
happened before then.

We can fix this by using bitwise-OR to accumulate errors in
our result variable.

While we're here, we can also simplify the error handling
with an early return, which makes it easier to see under
which circumstances we need to clean up.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 entry.c                      | 16 +++++++++-------
 t/t1060-object-corruption.sh | 25 +++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/entry.c b/entry.c
index 17a6bccec6..a20bcbc47a 100644
--- a/entry.c
+++ b/entry.c
@@ -120,16 +120,18 @@ static int streaming_write_entry(struct cache_entry *ce, char *path,
 				 const struct checkout *state, int to_tempfile,
 				 int *fstat_done, struct stat *statbuf)
 {
-	int result = -1;
+	int result = 0;
 	int fd;
 
 	fd = open_output_fd(path, ce, to_tempfile);
-	if (0 <= fd) {
-		result = stream_blob_to_fd(fd, ce->sha1, filter, 1);
-		*fstat_done = fstat_output(fd, state, statbuf);
-		result = close(fd);
-	}
-	if (result && 0 <= fd)
+	if (fd < 0)
+		return -1;
+
+	result |= stream_blob_to_fd(fd, ce->sha1, filter, 1);
+	*fstat_done = fstat_output(fd, state, statbuf);
+	result |= close(fd);
+
+	if (result)
 		unlink(path);
 	return result;
 }
diff --git a/t/t1060-object-corruption.sh b/t/t1060-object-corruption.sh
index c887dd86b0..cfd1f23a11 100755
--- a/t/t1060-object-corruption.sh
+++ b/t/t1060-object-corruption.sh
@@ -24,6 +24,15 @@ test_expect_success 'setup corrupt repo' '
 	)
 '
 
+test_expect_success 'setup repo with missing object' '
+	git init missing &&
+	(
+		cd missing &&
+		test_commit content &&
+		rm -f "$(obj_to_file HEAD:content.t)"
+	)
+'
+
 test_expect_success 'streaming a corrupt blob fails' '
 	(
 		cd bit-error &&
@@ -31,4 +40,20 @@ test_expect_success 'streaming a corrupt blob fails' '
 	)
 '
 
+test_expect_success 'read-tree -u detects bit-errors in blobs' '
+	(
+		cd bit-error &&
+		rm -f content.t &&
+		test_must_fail git read-tree --reset -u HEAD
+	)
+'
+
+test_expect_success 'read-tree -u detects missing objects' '
+	(
+		cd missing &&
+		rm -f content.t &&
+		test_must_fail git read-tree --reset -u HEAD
+	)
+'
+
 test_done

From 0e15ad9b730a1516f8a786523266707c4d26f5ab Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Mon, 25 Mar 2013 16:22:29 -0400
Subject: [PATCH 07/10] add tests for cloning corrupted repositories

We try not to let corruption pass unnoticed over fetches and
clones. For the most part, this works, but there are some
broken corner cases, including:

  1. We do not detect missing objects over git-aware
     transports. This is a little hard to test, because the
     sending side will actually complain about the missing
     object.

     To fool it, we corrupt a repository such that we have a
     "misnamed" object: it claims to be sha1 X, but is
     really Y. This lets the sender blindly transmit it, but
     it is the receiver's responsibility to verify that what
     it got is sane (and it does not).

  2. We do not detect missing or misnamed blobs during the
     checkout phase of clone.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1060-object-corruption.sh | 41 ++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/t/t1060-object-corruption.sh b/t/t1060-object-corruption.sh
index cfd1f23a11..e29406e27a 100755
--- a/t/t1060-object-corruption.sh
+++ b/t/t1060-object-corruption.sh
@@ -33,6 +33,19 @@ test_expect_success 'setup repo with missing object' '
 	)
 '
 
+test_expect_success 'setup repo with misnamed object' '
+	git init misnamed &&
+	(
+		cd misnamed &&
+		test_commit content &&
+		good=$(obj_to_file HEAD:content.t) &&
+		blob=$(echo corrupt | git hash-object -w --stdin) &&
+		bad=$(obj_to_file $blob) &&
+		rm -f "$good" &&
+		mv "$bad" "$good"
+	)
+'
+
 test_expect_success 'streaming a corrupt blob fails' '
 	(
 		cd bit-error &&
@@ -56,4 +69,32 @@ test_expect_success 'read-tree -u detects missing objects' '
 	)
 '
 
+# We use --bare to make sure that the transport detects it, not the checkout
+# phase.
+test_expect_success 'clone --no-local --bare detects corruption' '
+	test_must_fail git clone --no-local --bare bit-error corrupt-transport
+'
+
+test_expect_success 'clone --no-local --bare detects missing object' '
+	test_must_fail git clone --no-local --bare missing missing-transport
+'
+
+test_expect_failure 'clone --no-local --bare detects misnamed object' '
+	test_must_fail git clone --no-local --bare misnamed misnamed-transport
+'
+
+# We do not expect --local to detect corruption at the transport layer,
+# so we are really checking the checkout() code path.
+test_expect_success 'clone --local detects corruption' '
+	test_must_fail git clone --local bit-error corrupt-checkout
+'
+
+test_expect_failure 'clone --local detects missing objects' '
+	test_must_fail git clone --local missing missing-checkout
+'
+
+test_expect_failure 'clone --local detects misnamed objects' '
+	test_must_fail git clone --local misnamed misnamed-checkout
+'
+
 test_done

From 0aac7bb287645dd72ad8ad6b805128b8ff7f111f Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Mon, 25 Mar 2013 16:23:59 -0400
Subject: [PATCH 08/10] clone: die on errors from unpack_trees

When clone is populating the working tree, it ignores the
return status from unpack_trees; this means we may report a
successful clone, even when the checkout fails.

When checkout fails, we may want to leave the $GIT_DIR in
place, as it might be possible to recover the data through
further use of "git checkout" (e.g., if the checkout failed
due to a transient error, disk full, etc). However, we
already die on a number of other checkout-related errors, so
this patch follows that pattern.

In addition to marking a now-passing test, we need to adjust
t5710, which blindly assumed it could make bogus clones of
very deep alternates hierarchies. By using "--bare", we can
avoid it actually touching any objects.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/clone.c              | 3 ++-
 t/t1060-object-corruption.sh | 2 +-
 t/t5710-info-alternate.sh    | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index e0aaf13583..7d48ef3b4e 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -579,7 +579,8 @@ static int checkout(void)
 	tree = parse_tree_indirect(sha1);
 	parse_tree(tree);
 	init_tree_desc(&t, tree->buffer, tree->size);
-	unpack_trees(1, &t, &opts);
+	if (unpack_trees(1, &t, &opts) < 0)
+		die(_("unable to checkout working tree"));
 
 	if (write_cache(fd, active_cache, active_nr) ||
 	    commit_locked_index(lock_file))
diff --git a/t/t1060-object-corruption.sh b/t/t1060-object-corruption.sh
index e29406e27a..4e7030e613 100755
--- a/t/t1060-object-corruption.sh
+++ b/t/t1060-object-corruption.sh
@@ -89,7 +89,7 @@ test_expect_success 'clone --local detects corruption' '
 	test_must_fail git clone --local bit-error corrupt-checkout
 '
 
-test_expect_failure 'clone --local detects missing objects' '
+test_expect_success 'clone --local detects missing objects' '
 	test_must_fail git clone --local missing missing-checkout
 '
 
diff --git a/t/t5710-info-alternate.sh b/t/t5710-info-alternate.sh
index aa045295de..5a6e49d18d 100755
--- a/t/t5710-info-alternate.sh
+++ b/t/t5710-info-alternate.sh
@@ -58,7 +58,7 @@ test_expect_success 'creating too deep nesting' \
 git clone -l -s D E &&
 git clone -l -s E F &&
 git clone -l -s F G &&
-git clone -l -s G H'
+git clone --bare -l -s G H'
 
 test_expect_success 'invalidity of deepest repository' \
 'cd H && {

From 0433ad128c59f233046b3f8a68246ca3a8a77af8 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Mon, 25 Mar 2013 16:26:27 -0400
Subject: [PATCH 09/10] clone: run check_everything_connected

When we fetch from a remote, we do a revision walk to make
sure that what we received is connected to our existing
history. We do not do the same check for clone, which should
be able to check that we received an intact history graph.

The upside of this patch is that it will make clone more
resilient against propagating repository corruption. The
downside is that we will now traverse "rev-list --objects
--all" down to the roots, which may take some time (it is
especially noticeable for a "--local --bare" clone).

Note that we need to adjust t5710, which tries to make such
a bogus clone. Rather than checking after the fact that our
clone is bogus, we can simplify it to just make sure "git
clone" reports failure.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/clone.c              | 26 ++++++++++++++++++++++++++
 t/t1060-object-corruption.sh |  2 +-
 t/t5710-info-alternate.sh    |  8 +-------
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 7d48ef3b4e..eceaa74922 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -23,6 +23,7 @@
 #include "branch.h"
 #include "remote.h"
 #include "run-command.h"
+#include "connected.h"
 
 /*
  * Overall FIXMEs:
@@ -485,12 +486,37 @@ static void write_followtags(const struct ref *refs, const char *msg)
 	}
 }
 
+static int iterate_ref_map(void *cb_data, unsigned char sha1[20])
+{
+	struct ref **rm = cb_data;
+	struct ref *ref = *rm;
+
+	/*
+	 * Skip anything missing a peer_ref, which we are not
+	 * actually going to write a ref for.
+	 */
+	while (ref && !ref->peer_ref)
+		ref = ref->next;
+	/* Returning -1 notes "end of list" to the caller. */
+	if (!ref)
+		return -1;
+
+	hashcpy(sha1, ref->old_sha1);
+	*rm = ref->next;
+	return 0;
+}
+
 static void update_remote_refs(const struct ref *refs,
 			       const struct ref *mapped_refs,
 			       const struct ref *remote_head_points_at,
 			       const char *branch_top,
 			       const char *msg)
 {
+	const struct ref *rm = mapped_refs;
+
+	if (check_everything_connected(iterate_ref_map, 0, &rm))
+		die(_("remote did not send all necessary objects"));
+
 	if (refs) {
 		write_remote_refs(mapped_refs);
 		if (option_single_branch)
diff --git a/t/t1060-object-corruption.sh b/t/t1060-object-corruption.sh
index 4e7030e613..c65a57cd22 100755
--- a/t/t1060-object-corruption.sh
+++ b/t/t1060-object-corruption.sh
@@ -79,7 +79,7 @@ test_expect_success 'clone --no-local --bare detects missing object' '
 	test_must_fail git clone --no-local --bare missing missing-transport
 '
 
-test_expect_failure 'clone --no-local --bare detects misnamed object' '
+test_expect_success 'clone --no-local --bare detects misnamed object' '
 	test_must_fail git clone --no-local --bare misnamed misnamed-transport
 '
 
diff --git a/t/t5710-info-alternate.sh b/t/t5710-info-alternate.sh
index 5a6e49d18d..8956c21617 100755
--- a/t/t5710-info-alternate.sh
+++ b/t/t5710-info-alternate.sh
@@ -58,13 +58,7 @@ test_expect_success 'creating too deep nesting' \
 git clone -l -s D E &&
 git clone -l -s E F &&
 git clone -l -s F G &&
-git clone --bare -l -s G H'
-
-test_expect_success 'invalidity of deepest repository' \
-'cd H && {
-	test_valid_repo
-	test $? -ne 0
-}'
+test_must_fail git clone --bare -l -s G H'
 
 cd "$base_dir"
 

From d3b34622f699ff14646de4ec1b1ab9afb0bcb056 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Tue, 26 Mar 2013 18:22:09 -0400
Subject: [PATCH 10/10] clone: leave repo in place after checkout errors

If we manage to clone a remote repository but run into an
error in the checkout, it is probably sane to leave the repo
directory in place. That lets the user examine the situation
without spending time to re-clone from the remote (which may
be a lengthy process).

Rather than try to convert each die() from the checkout code
path into an error(), we simply set a flag that tells the
"remove_junk" atexit function to print a helpful message and
leave the repo in place.

Note that the test added in this patch actually passes
without the code change. The reason is that the cleanup code
is buggy; we chdir into the working tree for the checkout,
but still may use relative paths to remove the directories
(which means if you cloned into "foo", we would accidentally
remove "foo" from the working tree!).  There's no point in
fixing it now, since this patch means we will never try to
remove anything after the chdir, anyway.

[jc: replaced the message with a more succinct version from
Jonathan]

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/clone.c              | 25 ++++++++++++++++++++++++-
 t/t1060-object-corruption.sh |  4 ++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index eceaa74922..f9c380eb6c 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -377,10 +377,32 @@ static void clone_local(const char *src_repo, const char *dest_repo)
 static const char *junk_work_tree;
 static const char *junk_git_dir;
 static pid_t junk_pid;
+enum {
+	JUNK_LEAVE_NONE,
+	JUNK_LEAVE_REPO,
+	JUNK_LEAVE_ALL
+} junk_mode = JUNK_LEAVE_NONE;
+
+static const char junk_leave_repo_msg[] =
+N_("Clone succeeded, but checkout failed.\n"
+   "You can inspect what was checked out with 'git status'\n"
+   "and retry the checkout with 'git checkout -f HEAD'\n");
 
 static void remove_junk(void)
 {
 	struct strbuf sb = STRBUF_INIT;
+
+	switch (junk_mode) {
+	case JUNK_LEAVE_REPO:
+		warning("%s", _(junk_leave_repo_msg));
+		/* fall-through */
+	case JUNK_LEAVE_ALL:
+		return;
+	default:
+		/* proceed to removal */
+		break;
+	}
+
 	if (getpid() != junk_pid)
 		return;
 	if (junk_git_dir) {
@@ -925,12 +947,13 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 	transport_unlock_pack(transport);
 	transport_disconnect(transport);
 
+	junk_mode = JUNK_LEAVE_REPO;
 	err = checkout();
 
 	strbuf_release(&reflog_msg);
 	strbuf_release(&branch_top);
 	strbuf_release(&key);
 	strbuf_release(&value);
-	junk_pid = 0;
+	junk_mode = JUNK_LEAVE_ALL;
 	return err;
 }
diff --git a/t/t1060-object-corruption.sh b/t/t1060-object-corruption.sh
index c65a57cd22..3f8705139d 100755
--- a/t/t1060-object-corruption.sh
+++ b/t/t1060-object-corruption.sh
@@ -89,6 +89,10 @@ test_expect_success 'clone --local detects corruption' '
 	test_must_fail git clone --local bit-error corrupt-checkout
 '
 
+test_expect_success 'error detected during checkout leaves repo intact' '
+	test_path_is_dir corrupt-checkout/.git
+'
+
 test_expect_success 'clone --local detects missing objects' '
 	test_must_fail git clone --local missing missing-checkout
 '