git/t/t5312-prune-corruption.sh

#!/bin/sh

test_description='
Test pruning of repositories with minor corruptions. The goal
here is that we should always be erring on the side of safety. So
if we see, for example, a ref with a bogus name, it is OK either to
bail out or to proceed using it as a reachable tip, but it is _not_
OK to proceed as if it did not exist. Otherwise we might silently
delete objects that cannot be recovered.
'
. ./test-lib.sh

test_expect_success 'disable reflogs' '
	git config core.logallrefupdates false &&
	rm -rf .git/logs
'

test_expect_success 'create history reachable only from a bogus-named ref' '
	test_tick && git commit --allow-empty -m master &&
	base=$(git rev-parse HEAD) &&
	test_tick && git commit --allow-empty -m bogus &&
	bogus=$(git rev-parse HEAD) &&
	git cat-file commit $bogus >saved &&
	echo $bogus >.git/refs/heads/bogus..name &&
	git reset --hard HEAD^
'

test_expect_success 'pruning does not drop bogus object' '
	test_when_finished "git hash-object -w -t commit saved" &&
	test_might_fail git prune --expire=now &&
	verbose git cat-file -e $bogus
'

test_expect_success 'put bogus object into pack' '
	git tag reachable $bogus &&
	git repack -ad &&
	git tag -d reachable &&
	verbose git cat-file -e $bogus
'

test_expect_success 'destructive repack keeps packed object' '
	test_might_fail git repack -Ad --unpack-unreachable=now &&
	verbose git cat-file -e $bogus &&
	test_might_fail git repack -ad &&
	verbose git cat-file -e $bogus
'

# subsequent tests will have different corruptions
test_expect_success 'clean up bogus ref' '
	rm .git/refs/heads/bogus..name
'

# We create two new objects here, "one" and "two". Our
# master branch points to "two", which is deleted,
# corrupting the repository. But we'd like to make sure
# that the otherwise unreachable "one" is not pruned
# (since it is the user's best bet for recovering
# from the corruption).
#
# Note that we also point HEAD somewhere besides "two",
# as we want to make sure we test the case where we
# pick up the reference to "two" by iterating the refs,
# not by resolving HEAD.
test_expect_success 'create history with missing tip commit' '
	test_tick && git commit --allow-empty -m one &&
	recoverable=$(git rev-parse HEAD) &&
	git cat-file commit $recoverable >saved &&
	test_tick && git commit --allow-empty -m two &&
	missing=$(git rev-parse HEAD) &&
	git checkout --detach $base &&
	rm .git/objects/$(echo $missing | sed "s,..,&/,") &&
	test_must_fail git cat-file -e $missing
'

test_expect_success 'pruning with a corrupted tip does not drop history' '
	test_when_finished "git hash-object -w -t commit saved" &&
	test_might_fail git prune --expire=now &&
	verbose git cat-file -e $recoverable
'

test_expect_success 'pack-refs does not silently delete broken loose ref' '
	git pack-refs --all --prune &&
	echo $missing >expect &&
	git rev-parse refs/heads/master >actual &&
	test_cmp expect actual
'

# we do not want to count on running pack-refs to
# actually pack it, as it is perfectly reasonable to
# skip processing a broken ref
test_expect_success 'create packed-refs file with broken ref' '
	rm -f .git/refs/heads/master &&
	cat >.git/packed-refs <<-EOF &&
	$missing refs/heads/master
	$recoverable refs/heads/other
	EOF
	echo $missing >expect &&
	git rev-parse refs/heads/master >actual &&
	test_cmp expect actual
'

test_expect_success 'pack-refs does not silently delete broken packed ref' '
	git pack-refs --all --prune &&
	git rev-parse refs/heads/master >actual &&
	test_cmp expect actual
'

test_expect_success 'pack-refs does not drop broken refs during deletion' '
	git update-ref -d refs/heads/other &&
	git rev-parse refs/heads/master >actual &&
	test_cmp expect actual
'

test_done
t5312: test object deletion code paths in a corrupted repository When we are doing a destructive operation like "git prune", we want to be extra careful that the set of reachable tips we compute is valid. If there is any corruption or oddity, we are better off aborting the operation and letting the user figure things out rather than plowing ahead and possibly deleting some data that cannot be recovered. The tests here include: 1. Pruning objects mentioned only be refs with invalid names. This used to abort prior to d0f810f (refs.c: allow listing and deleting badly named refs, 2014-09-03), but since then we silently ignore the tip. Likewise, we test repacking that can drop objects (either "-ad", which drops anything unreachable, or "-Ad --unpack-unreachable=<time>", which tries to optimize out a loose object write that would be directly pruned). 2. Pruning objects when some refs point to missing objects. We don't know whether any dangling objects would have been reachable from the missing objects. We are better to keep them around, as they are better than nothing for helping the user recover history. 3. Packed refs that point to missing objects can sometimes be dropped. By itself, this is more of an annoyance (you do not have the object anyway; even if you can recover it from elsewhere, all you are losing is a placeholder for your state at the time of corruption). But coupled with (2), if we drop the ref and then go on to prune, we may lose unrecoverable objects. Note that we use test_might_fail for some of the operations. In some cases, it would be appropriate to abort the operation, and in others, it might be acceptable to continue but taking the information into account. The tests don't care either way, and check only for data loss. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2015-03-20 21:43:02 +03:00			`#!/bin/sh`

			`test_description='`
			`Test pruning of repositories with minor corruptions. The goal`
			`here is that we should always be erring on the side of safety. So`
			`if we see, for example, a ref with a bogus name, it is OK either to`
			`bail out or to proceed using it as a reachable tip, but it is _not_`
			`OK to proceed as if it did not exist. Otherwise we might silently`
			`delete objects that cannot be recovered.`
			`'`
			`. ./test-lib.sh`

			`test_expect_success 'disable reflogs' '`
			`git config core.logallrefupdates false &&`
			`rm -rf .git/logs`
			`'`

			`test_expect_success 'create history reachable only from a bogus-named ref' '`
			`test_tick && git commit --allow-empty -m master &&`
			`base=$(git rev-parse HEAD) &&`
			`test_tick && git commit --allow-empty -m bogus &&`
			`bogus=$(git rev-parse HEAD) &&`
			`git cat-file commit $bogus >saved &&`
			`echo $bogus >.git/refs/heads/bogus..name &&`
			`git reset --hard HEAD^`
			`'`

prune: turn on ref_paranoia flag Prune should know about broken objects at the tips of refs, so that we can feed them to our traversal rather than ignoring them. It's better for us to abort the operation on the broken object than it is to start deleting objects with an incomplete view of the reachability namespace. Note that for missing objects, aborting is the best we can do. For a badly-named ref, we technically could use its sha1 as a reachability tip. However, the iteration code just feeds us a null sha1, so there would be a reasonable amount of code involved to pass down our wishes. It's not really worth trying to do better, because this is a case that should happen extremely rarely, and the message we provide: fatal: unable to parse object: refs/heads/bogus:name is probably enough to point the user in the right direction. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2015-03-20 21:43:09 +03:00			`test_expect_success 'pruning does not drop bogus object' '`
t5312: test object deletion code paths in a corrupted repository When we are doing a destructive operation like "git prune", we want to be extra careful that the set of reachable tips we compute is valid. If there is any corruption or oddity, we are better off aborting the operation and letting the user figure things out rather than plowing ahead and possibly deleting some data that cannot be recovered. The tests here include: 1. Pruning objects mentioned only be refs with invalid names. This used to abort prior to d0f810f (refs.c: allow listing and deleting badly named refs, 2014-09-03), but since then we silently ignore the tip. Likewise, we test repacking that can drop objects (either "-ad", which drops anything unreachable, or "-Ad --unpack-unreachable=<time>", which tries to optimize out a loose object write that would be directly pruned). 2. Pruning objects when some refs point to missing objects. We don't know whether any dangling objects would have been reachable from the missing objects. We are better to keep them around, as they are better than nothing for helping the user recover history. 3. Packed refs that point to missing objects can sometimes be dropped. By itself, this is more of an annoyance (you do not have the object anyway; even if you can recover it from elsewhere, all you are losing is a placeholder for your state at the time of corruption). But coupled with (2), if we drop the ref and then go on to prune, we may lose unrecoverable objects. Note that we use test_might_fail for some of the operations. In some cases, it would be appropriate to abort the operation, and in others, it might be acceptable to continue but taking the information into account. The tests don't care either way, and check only for data loss. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2015-03-20 21:43:02 +03:00			`test_when_finished "git hash-object -w -t commit saved" &&`
			`test_might_fail git prune --expire=now &&`
			`verbose git cat-file -e $bogus`
			`'`

			`test_expect_success 'put bogus object into pack' '`
			`git tag reachable $bogus &&`
			`git repack -ad &&`
			`git tag -d reachable &&`
			`verbose git cat-file -e $bogus`
			`'`

repack: turn on "ref paranoia" when doing a destructive repack If we are repacking with "-ad", we will drop any unreachable objects. Likewise, using "-Ad --unpack-unreachable=<time>" will drop any old, unreachable objects. In these cases, we want to make sure the reachability we compute with "--all" is complete. We can do this by passing GIT_REF_PARANOIA=1 in the environment to pack-objects. Note that "-Ad" is safe already, because it only loosens unreachable objects. It is up to "git prune" to avoid deleting them. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2015-03-20 21:43:13 +03:00			`test_expect_success 'destructive repack keeps packed object' '`
t5312: test object deletion code paths in a corrupted repository When we are doing a destructive operation like "git prune", we want to be extra careful that the set of reachable tips we compute is valid. If there is any corruption or oddity, we are better off aborting the operation and letting the user figure things out rather than plowing ahead and possibly deleting some data that cannot be recovered. The tests here include: 1. Pruning objects mentioned only be refs with invalid names. This used to abort prior to d0f810f (refs.c: allow listing and deleting badly named refs, 2014-09-03), but since then we silently ignore the tip. Likewise, we test repacking that can drop objects (either "-ad", which drops anything unreachable, or "-Ad --unpack-unreachable=<time>", which tries to optimize out a loose object write that would be directly pruned). 2. Pruning objects when some refs point to missing objects. We don't know whether any dangling objects would have been reachable from the missing objects. We are better to keep them around, as they are better than nothing for helping the user recover history. 3. Packed refs that point to missing objects can sometimes be dropped. By itself, this is more of an annoyance (you do not have the object anyway; even if you can recover it from elsewhere, all you are losing is a placeholder for your state at the time of corruption). But coupled with (2), if we drop the ref and then go on to prune, we may lose unrecoverable objects. Note that we use test_might_fail for some of the operations. In some cases, it would be appropriate to abort the operation, and in others, it might be acceptable to continue but taking the information into account. The tests don't care either way, and check only for data loss. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2015-03-20 21:43:02 +03:00			`test_might_fail git repack -Ad --unpack-unreachable=now &&`
			`verbose git cat-file -e $bogus &&`
			`test_might_fail git repack -ad &&`
			`verbose git cat-file -e $bogus`
			`'`

			`# subsequent tests will have different corruptions`
			`test_expect_success 'clean up bogus ref' '`
			`rm .git/refs/heads/bogus..name`
			`'`

			`# We create two new objects here, "one" and "two". Our`
			`# master branch points to "two", which is deleted,`
			`# corrupting the repository. But we'd like to make sure`
			`# that the otherwise unreachable "one" is not pruned`
			`# (since it is the user's best bet for recovering`
			`# from the corruption).`
			`#`
			`# Note that we also point HEAD somewhere besides "two",`
			`# as we want to make sure we test the case where we`
			`# pick up the reference to "two" by iterating the refs,`
			`# not by resolving HEAD.`
			`test_expect_success 'create history with missing tip commit' '`
			`test_tick && git commit --allow-empty -m one &&`
			`recoverable=$(git rev-parse HEAD) &&`
			`git cat-file commit $recoverable >saved &&`
			`test_tick && git commit --allow-empty -m two &&`
			`missing=$(git rev-parse HEAD) &&`
			`git checkout --detach $base &&`
			`rm .git/objects/$(echo $missing \| sed "s,..,&/,") &&`
			`test_must_fail git cat-file -e $missing`
			`'`

prune: turn on ref_paranoia flag Prune should know about broken objects at the tips of refs, so that we can feed them to our traversal rather than ignoring them. It's better for us to abort the operation on the broken object than it is to start deleting objects with an incomplete view of the reachability namespace. Note that for missing objects, aborting is the best we can do. For a badly-named ref, we technically could use its sha1 as a reachability tip. However, the iteration code just feeds us a null sha1, so there would be a reasonable amount of code involved to pass down our wishes. It's not really worth trying to do better, because this is a case that should happen extremely rarely, and the message we provide: fatal: unable to parse object: refs/heads/bogus:name is probably enough to point the user in the right direction. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2015-03-20 21:43:09 +03:00			`test_expect_success 'pruning with a corrupted tip does not drop history' '`
t5312: test object deletion code paths in a corrupted repository When we are doing a destructive operation like "git prune", we want to be extra careful that the set of reachable tips we compute is valid. If there is any corruption or oddity, we are better off aborting the operation and letting the user figure things out rather than plowing ahead and possibly deleting some data that cannot be recovered. The tests here include: 1. Pruning objects mentioned only be refs with invalid names. This used to abort prior to d0f810f (refs.c: allow listing and deleting badly named refs, 2014-09-03), but since then we silently ignore the tip. Likewise, we test repacking that can drop objects (either "-ad", which drops anything unreachable, or "-Ad --unpack-unreachable=<time>", which tries to optimize out a loose object write that would be directly pruned). 2. Pruning objects when some refs point to missing objects. We don't know whether any dangling objects would have been reachable from the missing objects. We are better to keep them around, as they are better than nothing for helping the user recover history. 3. Packed refs that point to missing objects can sometimes be dropped. By itself, this is more of an annoyance (you do not have the object anyway; even if you can recover it from elsewhere, all you are losing is a placeholder for your state at the time of corruption). But coupled with (2), if we drop the ref and then go on to prune, we may lose unrecoverable objects. Note that we use test_might_fail for some of the operations. In some cases, it would be appropriate to abort the operation, and in others, it might be acceptable to continue but taking the information into account. The tests don't care either way, and check only for data loss. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2015-03-20 21:43:02 +03:00			`test_when_finished "git hash-object -w -t commit saved" &&`
			`test_might_fail git prune --expire=now &&`
			`verbose git cat-file -e $recoverable`
			`'`

			`test_expect_success 'pack-refs does not silently delete broken loose ref' '`
			`git pack-refs --all --prune &&`
			`echo $missing >expect &&`
			`git rev-parse refs/heads/master >actual &&`
			`test_cmp expect actual`
			`'`

			`# we do not want to count on running pack-refs to`
			`# actually pack it, as it is perfectly reasonable to`
			`# skip processing a broken ref`
			`test_expect_success 'create packed-refs file with broken ref' '`
			`rm -f .git/refs/heads/master &&`
			`cat >.git/packed-refs <<-EOF &&`
			`$missing refs/heads/master`
			`$recoverable refs/heads/other`
			`EOF`
			`echo $missing >expect &&`
			`git rev-parse refs/heads/master >actual &&`
			`test_cmp expect actual`
			`'`

			`test_expect_success 'pack-refs does not silently delete broken packed ref' '`
			`git pack-refs --all --prune &&`
			`git rev-parse refs/heads/master >actual &&`
			`test_cmp expect actual`
			`'`

refs.c: drop curate_packed_refs When we delete a ref, we have to rewrite the entire packed-refs file. We take this opportunity to "curate" the packed-refs file and drop any entries that are crufty or broken. Dropping broken entries (e.g., with bogus names, or ones that point to missing objects) is actively a bad idea, as it means that we lose any notion that the data was there in the first place. Aside from the general hackiness that we might lose any information about ref "foo" while deleting an unrelated ref "bar", this may seriously hamper any attempts by the user at recovering from the corruption in "foo". They will lose the sha1 and name of "foo"; the exact pointer may still be useful even if they recover missing objects from a different copy of the repository. But worse, once the ref is gone, there is no trace of the corruption. A follow-up "git prune" may delete objects, even though it would otherwise bail when seeing corruption. We could just drop the "broken" bits from curate_packed_refs, and continue to drop the "crufty" bits: refs whose loose counterpart exists in the filesystem. This is not wrong to do, and it does have the advantage that we may write out a slightly smaller packed-refs file. But it has two disadvantages: 1. It is a potential source of races or mistakes with respect to these refs that are otherwise unrelated to the operation. To my knowledge, there aren't any active problems in this area, but it seems like an unnecessary risk. 2. We have to spend time looking up the matching loose refs for every item in the packed-refs file. If you have a large number of packed refs that do not change, that outweighs the benefit from writing out a smaller packed-refs file (it doesn't get smaller, and you do a bunch of directory traversal to find that out). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2015-03-20 21:43:17 +03:00			`test_expect_success 'pack-refs does not drop broken refs during deletion' '`
t5312: test object deletion code paths in a corrupted repository When we are doing a destructive operation like "git prune", we want to be extra careful that the set of reachable tips we compute is valid. If there is any corruption or oddity, we are better off aborting the operation and letting the user figure things out rather than plowing ahead and possibly deleting some data that cannot be recovered. The tests here include: 1. Pruning objects mentioned only be refs with invalid names. This used to abort prior to d0f810f (refs.c: allow listing and deleting badly named refs, 2014-09-03), but since then we silently ignore the tip. Likewise, we test repacking that can drop objects (either "-ad", which drops anything unreachable, or "-Ad --unpack-unreachable=<time>", which tries to optimize out a loose object write that would be directly pruned). 2. Pruning objects when some refs point to missing objects. We don't know whether any dangling objects would have been reachable from the missing objects. We are better to keep them around, as they are better than nothing for helping the user recover history. 3. Packed refs that point to missing objects can sometimes be dropped. By itself, this is more of an annoyance (you do not have the object anyway; even if you can recover it from elsewhere, all you are losing is a placeholder for your state at the time of corruption). But coupled with (2), if we drop the ref and then go on to prune, we may lose unrecoverable objects. Note that we use test_might_fail for some of the operations. In some cases, it would be appropriate to abort the operation, and in others, it might be acceptable to continue but taking the information into account. The tests don't care either way, and check only for data loss. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2015-03-20 21:43:02 +03:00			`git update-ref -d refs/heads/other &&`
			`git rev-parse refs/heads/master >actual &&`
			`test_cmp expect actual`
			`'`

			`test_done`