From 45d4bdae5906cfe6b7cb1ba1cec82fd80381e07e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 25 Mar 2013 16:16:50 -0400 Subject: [PATCH 01/10] stream_blob_to_fd: detect errors reading from stream We call read_istream, but never check its return value for errors. This can lead to us looping infinitely, as we just keep trying to write "-1" bytes (and we do not notice the error, as we simply check that write_in_full reports the same number of bytes we fed it, which of course is also -1). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- streaming.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/streaming.c b/streaming.c index 4d978e54e4..f4126a7da5 100644 --- a/streaming.c +++ b/streaming.c @@ -514,6 +514,8 @@ int stream_blob_to_fd(int fd, unsigned const char *sha1, struct stream_filter *f ssize_t wrote, holeto; ssize_t readlen = read_istream(st, buf, sizeof(buf)); + if (readlen < 0) + goto close_and_exit; if (!readlen) break; if (can_seek && sizeof(buf) == readlen) { From f54fac53786808130c82936e59be16000deba04a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 25 Mar 2013 16:17:17 -0400 Subject: [PATCH 02/10] check_sha1_signature: check return value from read_istream It's possible for read_istream to return an error, in which case we just end up in an infinite loop (aside from EOF, we do not even look at the result, but just feed it straight into our running hash). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sha1_file.c b/sha1_file.c index 16967d3b9a..0b99f336e6 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1266,6 +1266,10 @@ int check_sha1_signature(const unsigned char *sha1, void *map, char buf[1024 * 16]; ssize_t readlen = read_istream(st, buf, sizeof(buf)); + if (readlen < 0) { + close_istream(st); + return -1; + } if (!readlen) break; git_SHA1_Update(&c, buf, readlen); From 42e7e2a53492ed1772b7b5d8b328f8d0a66f8b33 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 25 Mar 2013 16:18:16 -0400 Subject: [PATCH 03/10] read_istream_filtered: propagate read error from upstream The filter istream pulls data from an "upstream" stream, running it through a filter function. However, we did not properly notice when the upstream filter yielded an error, and just returned what we had read. Instead, we should propagate the error. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- streaming.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streaming.c b/streaming.c index f4126a7da5..f4ab12ba42 100644 --- a/streaming.c +++ b/streaming.c @@ -237,7 +237,7 @@ static read_method_decl(filtered) if (!fs->input_finished) { fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER); if (fs->i_end < 0) - break; + return -1; if (fs->i_end) continue; } From 692f0bc7ae0cb818bf3c757d509773d6f2e48c68 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 25 Mar 2013 16:21:14 -0400 Subject: [PATCH 04/10] avoid infinite loop in read_istream_loose The read_istream_loose function loops on inflating a chunk of data from an mmap'd loose object. We end the loop when we run out of space in our output buffer, or if we see a zlib error. We need to treat Z_BUF_ERROR specially, though, as it is not fatal; it is just zlib's way of telling us that we need to either feed it more input or give it more output space. It is perfectly normal for us to hit this when we are at the end of our buffer. However, we may also get Z_BUF_ERROR because we have run out of input. In a well-formed object, this should not happen, because we have fed the whole mmap'd contents to zlib. But if the object is truncated or corrupt, we will loop forever, never giving zlib any more data, but continuing to ask it to inflate. We can fix this by considering it an error when zlib returns Z_BUF_ERROR but we still have output space left (which means it must want more input, which we know is a truncation error). It would not be sufficient to just check whether zlib had consumed all the input at the start of the loop, as it might still want to generate output from what is in its internal state. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- streaming.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streaming.c b/streaming.c index f4ab12ba42..cabcd9d157 100644 --- a/streaming.c +++ b/streaming.c @@ -309,7 +309,7 @@ static read_method_decl(loose) st->z_state = z_done; break; } - if (status != Z_OK && status != Z_BUF_ERROR) { + if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { git_inflate_end(&st->z); st->z_state = z_error; return -1; From 7b6257b0d45f562629ac660cb1845c8a2aac7df1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 25 Mar 2013 16:21:34 -0400 Subject: [PATCH 05/10] add test for streaming corrupt blobs We do not have many tests for handling corrupt objects. This new test at least checks that we detect a byte error in a corrupt blob object while streaming it out with cat-file. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t1060-object-corruption.sh | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100755 t/t1060-object-corruption.sh diff --git a/t/t1060-object-corruption.sh b/t/t1060-object-corruption.sh new file mode 100755 index 0000000000..c887dd86b0 --- /dev/null +++ b/t/t1060-object-corruption.sh @@ -0,0 +1,34 @@ +#!/bin/sh + +test_description='see how we handle various forms of corruption' +. ./test-lib.sh + +# convert "1234abcd" to ".git/objects/12/34abcd" +obj_to_file() { + echo "$(git rev-parse --git-dir)/objects/$(git rev-parse "$1" | sed 's,..,&/,')" +} + +# Convert byte at offset "$2" of object "$1" into '\0' +corrupt_byte() { + obj_file=$(obj_to_file "$1") && + chmod +w "$obj_file" && + printf '\0' | dd of="$obj_file" bs=1 seek="$2" conv=notrunc +} + +test_expect_success 'setup corrupt repo' ' + git init bit-error && + ( + cd bit-error && + test_commit content && + corrupt_byte HEAD:content.t 10 + ) +' + +test_expect_success 'streaming a corrupt blob fails' ' + ( + cd bit-error && + test_must_fail git cat-file blob HEAD:content.t + ) +' + +test_done From d9c31e14d0aafdd45a382d01fcfd66c65a5f4b95 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 25 Mar 2013 17:49:36 -0400 Subject: [PATCH 06/10] streaming_write_entry: propagate streaming errors When we are streaming an index blob to disk, we store the error from stream_blob_to_fd in the "result" variable, and then immediately overwrite that with the return value of "close". That means we catch errors on close (e.g., problems committing the file to disk), but miss anything which happened before then. We can fix this by using bitwise-OR to accumulate errors in our result variable. While we're here, we can also simplify the error handling with an early return, which makes it easier to see under which circumstances we need to clean up. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- entry.c | 16 +++++++++------- t/t1060-object-corruption.sh | 25 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/entry.c b/entry.c index 17a6bccec6..a20bcbc47a 100644 --- a/entry.c +++ b/entry.c @@ -120,16 +120,18 @@ static int streaming_write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile, int *fstat_done, struct stat *statbuf) { - int result = -1; + int result = 0; int fd; fd = open_output_fd(path, ce, to_tempfile); - if (0 <= fd) { - result = stream_blob_to_fd(fd, ce->sha1, filter, 1); - *fstat_done = fstat_output(fd, state, statbuf); - result = close(fd); - } - if (result && 0 <= fd) + if (fd < 0) + return -1; + + result |= stream_blob_to_fd(fd, ce->sha1, filter, 1); + *fstat_done = fstat_output(fd, state, statbuf); + result |= close(fd); + + if (result) unlink(path); return result; } diff --git a/t/t1060-object-corruption.sh b/t/t1060-object-corruption.sh index c887dd86b0..cfd1f23a11 100755 --- a/t/t1060-object-corruption.sh +++ b/t/t1060-object-corruption.sh @@ -24,6 +24,15 @@ test_expect_success 'setup corrupt repo' ' ) ' +test_expect_success 'setup repo with missing object' ' + git init missing && + ( + cd missing && + test_commit content && + rm -f "$(obj_to_file HEAD:content.t)" + ) +' + test_expect_success 'streaming a corrupt blob fails' ' ( cd bit-error && @@ -31,4 +40,20 @@ test_expect_success 'streaming a corrupt blob fails' ' ) ' +test_expect_success 'read-tree -u detects bit-errors in blobs' ' + ( + cd bit-error && + rm -f content.t && + test_must_fail git read-tree --reset -u HEAD + ) +' + +test_expect_success 'read-tree -u detects missing objects' ' + ( + cd missing && + rm -f content.t && + test_must_fail git read-tree --reset -u HEAD + ) +' + test_done From 0e15ad9b730a1516f8a786523266707c4d26f5ab Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 25 Mar 2013 16:22:29 -0400 Subject: [PATCH 07/10] add tests for cloning corrupted repositories We try not to let corruption pass unnoticed over fetches and clones. For the most part, this works, but there are some broken corner cases, including: 1. We do not detect missing objects over git-aware transports. This is a little hard to test, because the sending side will actually complain about the missing object. To fool it, we corrupt a repository such that we have a "misnamed" object: it claims to be sha1 X, but is really Y. This lets the sender blindly transmit it, but it is the receiver's responsibility to verify that what it got is sane (and it does not). 2. We do not detect missing or misnamed blobs during the checkout phase of clone. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t1060-object-corruption.sh | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/t/t1060-object-corruption.sh b/t/t1060-object-corruption.sh index cfd1f23a11..e29406e27a 100755 --- a/t/t1060-object-corruption.sh +++ b/t/t1060-object-corruption.sh @@ -33,6 +33,19 @@ test_expect_success 'setup repo with missing object' ' ) ' +test_expect_success 'setup repo with misnamed object' ' + git init misnamed && + ( + cd misnamed && + test_commit content && + good=$(obj_to_file HEAD:content.t) && + blob=$(echo corrupt | git hash-object -w --stdin) && + bad=$(obj_to_file $blob) && + rm -f "$good" && + mv "$bad" "$good" + ) +' + test_expect_success 'streaming a corrupt blob fails' ' ( cd bit-error && @@ -56,4 +69,32 @@ test_expect_success 'read-tree -u detects missing objects' ' ) ' +# We use --bare to make sure that the transport detects it, not the checkout +# phase. +test_expect_success 'clone --no-local --bare detects corruption' ' + test_must_fail git clone --no-local --bare bit-error corrupt-transport +' + +test_expect_success 'clone --no-local --bare detects missing object' ' + test_must_fail git clone --no-local --bare missing missing-transport +' + +test_expect_failure 'clone --no-local --bare detects misnamed object' ' + test_must_fail git clone --no-local --bare misnamed misnamed-transport +' + +# We do not expect --local to detect corruption at the transport layer, +# so we are really checking the checkout() code path. +test_expect_success 'clone --local detects corruption' ' + test_must_fail git clone --local bit-error corrupt-checkout +' + +test_expect_failure 'clone --local detects missing objects' ' + test_must_fail git clone --local missing missing-checkout +' + +test_expect_failure 'clone --local detects misnamed objects' ' + test_must_fail git clone --local misnamed misnamed-checkout +' + test_done From 0aac7bb287645dd72ad8ad6b805128b8ff7f111f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 25 Mar 2013 16:23:59 -0400 Subject: [PATCH 08/10] clone: die on errors from unpack_trees When clone is populating the working tree, it ignores the return status from unpack_trees; this means we may report a successful clone, even when the checkout fails. When checkout fails, we may want to leave the $GIT_DIR in place, as it might be possible to recover the data through further use of "git checkout" (e.g., if the checkout failed due to a transient error, disk full, etc). However, we already die on a number of other checkout-related errors, so this patch follows that pattern. In addition to marking a now-passing test, we need to adjust t5710, which blindly assumed it could make bogus clones of very deep alternates hierarchies. By using "--bare", we can avoid it actually touching any objects. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/clone.c | 3 ++- t/t1060-object-corruption.sh | 2 +- t/t5710-info-alternate.sh | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index e0aaf13583..7d48ef3b4e 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -579,7 +579,8 @@ static int checkout(void) tree = parse_tree_indirect(sha1); parse_tree(tree); init_tree_desc(&t, tree->buffer, tree->size); - unpack_trees(1, &t, &opts); + if (unpack_trees(1, &t, &opts) < 0) + die(_("unable to checkout working tree")); if (write_cache(fd, active_cache, active_nr) || commit_locked_index(lock_file)) diff --git a/t/t1060-object-corruption.sh b/t/t1060-object-corruption.sh index e29406e27a..4e7030e613 100755 --- a/t/t1060-object-corruption.sh +++ b/t/t1060-object-corruption.sh @@ -89,7 +89,7 @@ test_expect_success 'clone --local detects corruption' ' test_must_fail git clone --local bit-error corrupt-checkout ' -test_expect_failure 'clone --local detects missing objects' ' +test_expect_success 'clone --local detects missing objects' ' test_must_fail git clone --local missing missing-checkout ' diff --git a/t/t5710-info-alternate.sh b/t/t5710-info-alternate.sh index aa045295de..5a6e49d18d 100755 --- a/t/t5710-info-alternate.sh +++ b/t/t5710-info-alternate.sh @@ -58,7 +58,7 @@ test_expect_success 'creating too deep nesting' \ git clone -l -s D E && git clone -l -s E F && git clone -l -s F G && -git clone -l -s G H' +git clone --bare -l -s G H' test_expect_success 'invalidity of deepest repository' \ 'cd H && { From 0433ad128c59f233046b3f8a68246ca3a8a77af8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 25 Mar 2013 16:26:27 -0400 Subject: [PATCH 09/10] clone: run check_everything_connected When we fetch from a remote, we do a revision walk to make sure that what we received is connected to our existing history. We do not do the same check for clone, which should be able to check that we received an intact history graph. The upside of this patch is that it will make clone more resilient against propagating repository corruption. The downside is that we will now traverse "rev-list --objects --all" down to the roots, which may take some time (it is especially noticeable for a "--local --bare" clone). Note that we need to adjust t5710, which tries to make such a bogus clone. Rather than checking after the fact that our clone is bogus, we can simplify it to just make sure "git clone" reports failure. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/clone.c | 26 ++++++++++++++++++++++++++ t/t1060-object-corruption.sh | 2 +- t/t5710-info-alternate.sh | 8 +------- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 7d48ef3b4e..eceaa74922 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -23,6 +23,7 @@ #include "branch.h" #include "remote.h" #include "run-command.h" +#include "connected.h" /* * Overall FIXMEs: @@ -485,12 +486,37 @@ static void write_followtags(const struct ref *refs, const char *msg) } } +static int iterate_ref_map(void *cb_data, unsigned char sha1[20]) +{ + struct ref **rm = cb_data; + struct ref *ref = *rm; + + /* + * Skip anything missing a peer_ref, which we are not + * actually going to write a ref for. + */ + while (ref && !ref->peer_ref) + ref = ref->next; + /* Returning -1 notes "end of list" to the caller. */ + if (!ref) + return -1; + + hashcpy(sha1, ref->old_sha1); + *rm = ref->next; + return 0; +} + static void update_remote_refs(const struct ref *refs, const struct ref *mapped_refs, const struct ref *remote_head_points_at, const char *branch_top, const char *msg) { + const struct ref *rm = mapped_refs; + + if (check_everything_connected(iterate_ref_map, 0, &rm)) + die(_("remote did not send all necessary objects")); + if (refs) { write_remote_refs(mapped_refs); if (option_single_branch) diff --git a/t/t1060-object-corruption.sh b/t/t1060-object-corruption.sh index 4e7030e613..c65a57cd22 100755 --- a/t/t1060-object-corruption.sh +++ b/t/t1060-object-corruption.sh @@ -79,7 +79,7 @@ test_expect_success 'clone --no-local --bare detects missing object' ' test_must_fail git clone --no-local --bare missing missing-transport ' -test_expect_failure 'clone --no-local --bare detects misnamed object' ' +test_expect_success 'clone --no-local --bare detects misnamed object' ' test_must_fail git clone --no-local --bare misnamed misnamed-transport ' diff --git a/t/t5710-info-alternate.sh b/t/t5710-info-alternate.sh index 5a6e49d18d..8956c21617 100755 --- a/t/t5710-info-alternate.sh +++ b/t/t5710-info-alternate.sh @@ -58,13 +58,7 @@ test_expect_success 'creating too deep nesting' \ git clone -l -s D E && git clone -l -s E F && git clone -l -s F G && -git clone --bare -l -s G H' - -test_expect_success 'invalidity of deepest repository' \ -'cd H && { - test_valid_repo - test $? -ne 0 -}' +test_must_fail git clone --bare -l -s G H' cd "$base_dir" From d3b34622f699ff14646de4ec1b1ab9afb0bcb056 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 26 Mar 2013 18:22:09 -0400 Subject: [PATCH 10/10] clone: leave repo in place after checkout errors If we manage to clone a remote repository but run into an error in the checkout, it is probably sane to leave the repo directory in place. That lets the user examine the situation without spending time to re-clone from the remote (which may be a lengthy process). Rather than try to convert each die() from the checkout code path into an error(), we simply set a flag that tells the "remove_junk" atexit function to print a helpful message and leave the repo in place. Note that the test added in this patch actually passes without the code change. The reason is that the cleanup code is buggy; we chdir into the working tree for the checkout, but still may use relative paths to remove the directories (which means if you cloned into "foo", we would accidentally remove "foo" from the working tree!). There's no point in fixing it now, since this patch means we will never try to remove anything after the chdir, anyway. [jc: replaced the message with a more succinct version from Jonathan] Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/clone.c | 25 ++++++++++++++++++++++++- t/t1060-object-corruption.sh | 4 ++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/builtin/clone.c b/builtin/clone.c index eceaa74922..f9c380eb6c 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -377,10 +377,32 @@ static void clone_local(const char *src_repo, const char *dest_repo) static const char *junk_work_tree; static const char *junk_git_dir; static pid_t junk_pid; +enum { + JUNK_LEAVE_NONE, + JUNK_LEAVE_REPO, + JUNK_LEAVE_ALL +} junk_mode = JUNK_LEAVE_NONE; + +static const char junk_leave_repo_msg[] = +N_("Clone succeeded, but checkout failed.\n" + "You can inspect what was checked out with 'git status'\n" + "and retry the checkout with 'git checkout -f HEAD'\n"); static void remove_junk(void) { struct strbuf sb = STRBUF_INIT; + + switch (junk_mode) { + case JUNK_LEAVE_REPO: + warning("%s", _(junk_leave_repo_msg)); + /* fall-through */ + case JUNK_LEAVE_ALL: + return; + default: + /* proceed to removal */ + break; + } + if (getpid() != junk_pid) return; if (junk_git_dir) { @@ -925,12 +947,13 @@ int cmd_clone(int argc, const char **argv, const char *prefix) transport_unlock_pack(transport); transport_disconnect(transport); + junk_mode = JUNK_LEAVE_REPO; err = checkout(); strbuf_release(&reflog_msg); strbuf_release(&branch_top); strbuf_release(&key); strbuf_release(&value); - junk_pid = 0; + junk_mode = JUNK_LEAVE_ALL; return err; } diff --git a/t/t1060-object-corruption.sh b/t/t1060-object-corruption.sh index c65a57cd22..3f8705139d 100755 --- a/t/t1060-object-corruption.sh +++ b/t/t1060-object-corruption.sh @@ -89,6 +89,10 @@ test_expect_success 'clone --local detects corruption' ' test_must_fail git clone --local bit-error corrupt-checkout ' +test_expect_success 'error detected during checkout leaves repo intact' ' + test_path_is_dir corrupt-checkout/.git +' + test_expect_success 'clone --local detects missing objects' ' test_must_fail git clone --local missing missing-checkout '