зеркало из https://github.com/microsoft/git.git
Harden gvfs-helper to validate the packfiles in a multipart prefetch response (#571)
Teach `gvfs-helper` to ignore the optional `.idx` files that may be included in a `prefetch` response and always use `git index-pack` to create them from the `.pack` files received in the data stream. This is a little wasteful in terms of client-side compute and of the network bandwidth, but allows us to use the full packfile verification code contained within `git index-pack` to ensure that the received packfiles are valid.
This commit is contained in:
Коммит
7f191e8594
|
@ -105,6 +105,11 @@
|
|||
// The GVFS Protocol defines this value as a way to
|
||||
// request cached packfiles NEWER THAN this timestamp.
|
||||
//
|
||||
// --max-retries=<n> // defaults to "6"
|
||||
//
|
||||
// Number of retries after transient network errors.
|
||||
// Set to zero to disable such retries.
|
||||
//
|
||||
// server
|
||||
//
|
||||
// Interactive/sub-process mode. Listen for a series of commands
|
||||
|
@ -2127,7 +2132,6 @@ static void extract_packfile_from_multipack(
|
|||
{
|
||||
struct ph ph;
|
||||
struct tempfile *tempfile_pack = NULL;
|
||||
struct tempfile *tempfile_idx = NULL;
|
||||
int result = -1;
|
||||
int b_no_idx_in_multipack;
|
||||
struct object_id packfile_checksum;
|
||||
|
@ -2161,16 +2165,14 @@ static void extract_packfile_from_multipack(
|
|||
b_no_idx_in_multipack = (ph.idx_len == maximum_unsigned_value_of_type(uint64_t) ||
|
||||
ph.idx_len == 0);
|
||||
|
||||
if (b_no_idx_in_multipack) {
|
||||
my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL);
|
||||
if (!tempfile_pack)
|
||||
goto done;
|
||||
} else {
|
||||
/* create a pair of tempfiles with the same basename */
|
||||
my_create_tempfile(status, 0, "pack", &tempfile_pack, "idx", &tempfile_idx);
|
||||
if (!tempfile_pack || !tempfile_idx)
|
||||
goto done;
|
||||
}
|
||||
/*
|
||||
* We are going to harden `gvfs-helper` here and ignore the .idx file
|
||||
* if it is provided and always compute it locally so that we get the
|
||||
* added verification that `git index-pack` provides.
|
||||
*/
|
||||
my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL);
|
||||
if (!tempfile_pack)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* Copy the current packfile from the open stream and capture
|
||||
|
@ -2197,38 +2199,31 @@ static void extract_packfile_from_multipack(
|
|||
|
||||
oid_to_hex_r(hex_checksum, &packfile_checksum);
|
||||
|
||||
if (b_no_idx_in_multipack) {
|
||||
/*
|
||||
* The server did not send the corresponding .idx, so
|
||||
* we have to compute it ourselves.
|
||||
*/
|
||||
strbuf_addbuf(&temp_path_idx, &temp_path_pack);
|
||||
strbuf_strip_suffix(&temp_path_idx, ".pack");
|
||||
strbuf_addstr(&temp_path_idx, ".idx");
|
||||
/*
|
||||
* Always compute the .idx file from the .pack file.
|
||||
*/
|
||||
strbuf_addbuf(&temp_path_idx, &temp_path_pack);
|
||||
strbuf_strip_suffix(&temp_path_idx, ".pack");
|
||||
strbuf_addstr(&temp_path_idx, ".idx");
|
||||
|
||||
my_run_index_pack(params, status,
|
||||
&temp_path_pack, &temp_path_idx,
|
||||
NULL);
|
||||
if (status->ec != GH__ERROR_CODE__OK)
|
||||
goto done;
|
||||
my_run_index_pack(params, status,
|
||||
&temp_path_pack, &temp_path_idx,
|
||||
NULL);
|
||||
if (status->ec != GH__ERROR_CODE__OK)
|
||||
goto done;
|
||||
|
||||
} else {
|
||||
if (!b_no_idx_in_multipack) {
|
||||
/*
|
||||
* Server sent the .idx immediately after the .pack in the
|
||||
* data stream. I'm tempted to verify it, but that defeats
|
||||
* the purpose of having it cached...
|
||||
* data stream. Skip over it.
|
||||
*/
|
||||
if (my_copy_fd_len(fd_multipack, get_tempfile_fd(tempfile_idx),
|
||||
ph.idx_len) < 0) {
|
||||
if (lseek(fd_multipack, ph.idx_len, SEEK_CUR) < 0) {
|
||||
strbuf_addf(&status->error_message,
|
||||
"could not extract index[%d] in multipack",
|
||||
"could not skip index[%d] in multipack",
|
||||
k);
|
||||
status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH;
|
||||
goto done;
|
||||
}
|
||||
|
||||
strbuf_addstr(&temp_path_idx, get_tempfile_path(tempfile_idx));
|
||||
close_tempfile_gently(tempfile_idx);
|
||||
}
|
||||
|
||||
strbuf_addf(&buf_timestamp, "%u", (unsigned int)ph.timestamp);
|
||||
|
@ -2243,7 +2238,6 @@ static void extract_packfile_from_multipack(
|
|||
|
||||
done:
|
||||
delete_tempfile(&tempfile_pack);
|
||||
delete_tempfile(&tempfile_idx);
|
||||
strbuf_release(&temp_path_pack);
|
||||
strbuf_release(&temp_path_idx);
|
||||
strbuf_release(&final_path_pack);
|
||||
|
@ -3792,6 +3786,8 @@ static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv)
|
|||
static const char *since_str;
|
||||
static struct option prefetch_options[] = {
|
||||
OPT_STRING(0, "since", &since_str, N_("since"), N_("seconds since epoch")),
|
||||
OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries,
|
||||
N_("retries for transient network errors")),
|
||||
OPT_END(),
|
||||
};
|
||||
|
||||
|
@ -3811,6 +3807,8 @@ static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv)
|
|||
if (my_parse_since(since_str, &seconds_since_epoch))
|
||||
die("could not parse 'since' field");
|
||||
}
|
||||
if (gh__cmd_opts.max_retries < 0)
|
||||
gh__cmd_opts.max_retries = 0;
|
||||
|
||||
finish_init(1);
|
||||
|
||||
|
|
|
@ -1146,6 +1146,82 @@ static int ct_pack_sort_compare(const void *_a, const void *_b)
|
|||
return (a->ph.timestamp < b->ph.timestamp) ? -1 : (a->ph.timestamp != b->ph.timestamp);
|
||||
}
|
||||
|
||||
#define MY_MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
/*
|
||||
* Like copy.c:copy_fd(), but corrupt part of the trailing SHA (if the
|
||||
* given mayhem key is defined) as we copy it to the destination file.
|
||||
*
|
||||
* We don't know (or care) if the input file is a pack file or idx
|
||||
* file, just that the final bytes are part of a SHA that we can
|
||||
* corrupt.
|
||||
*/
|
||||
static int copy_fd_with_checksum_mayhem(int ifd, int ofd,
|
||||
const char *mayhem_key,
|
||||
ssize_t nr_wrong_bytes)
|
||||
{
|
||||
off_t in_cur, in_len;
|
||||
ssize_t bytes_to_copy;
|
||||
ssize_t bytes_remaining_to_copy;
|
||||
char buffer[8192];
|
||||
|
||||
if (!mayhem_key || !*mayhem_key || !nr_wrong_bytes ||
|
||||
!string_list_has_string(&mayhem_list, mayhem_key))
|
||||
return copy_fd(ifd, ofd);
|
||||
|
||||
in_cur = lseek(ifd, 0, SEEK_CUR);
|
||||
if (in_cur < 0)
|
||||
return in_cur;
|
||||
|
||||
in_len = lseek(ifd, 0, SEEK_END);
|
||||
if (in_len < 0)
|
||||
return in_len;
|
||||
|
||||
if (lseek(ifd, in_cur, SEEK_SET) < 0)
|
||||
return -1;
|
||||
|
||||
/* Copy the entire file except for the last few bytes. */
|
||||
|
||||
bytes_to_copy = (ssize_t)in_len - nr_wrong_bytes;
|
||||
bytes_remaining_to_copy = bytes_to_copy;
|
||||
while (bytes_remaining_to_copy) {
|
||||
ssize_t to_read = MY_MIN(sizeof(buffer), bytes_remaining_to_copy);
|
||||
ssize_t len = xread(ifd, buffer, to_read);
|
||||
|
||||
if (!len)
|
||||
return -1; /* error on unexpected EOF */
|
||||
if (len < 0)
|
||||
return -1;
|
||||
if (write_in_full(ofd, buffer, len) < 0)
|
||||
return -1;
|
||||
|
||||
bytes_remaining_to_copy -= len;
|
||||
}
|
||||
|
||||
/* Read the trailing bytes so that we can alter them before copying. */
|
||||
|
||||
while (nr_wrong_bytes) {
|
||||
ssize_t to_read = MY_MIN(sizeof(buffer), nr_wrong_bytes);
|
||||
ssize_t len = xread(ifd, buffer, to_read);
|
||||
ssize_t k;
|
||||
|
||||
if (!len)
|
||||
return -1; /* error on unexpected EOF */
|
||||
if (len < 0)
|
||||
return -1;
|
||||
|
||||
for (k = 0; k < len; k++)
|
||||
buffer[k] ^= 0xff;
|
||||
|
||||
if (write_in_full(ofd, buffer, len) < 0)
|
||||
return -1;
|
||||
|
||||
nr_wrong_bytes -= len;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static enum worker_result send_ct_item(const struct ct_pack_item *item)
|
||||
{
|
||||
struct ph ph_le;
|
||||
|
@ -1167,7 +1243,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item)
|
|||
trace2_printf("%s: sending prefetch pack '%s'", TR2_CAT, item->path_pack.buf);
|
||||
|
||||
fd_pack = git_open_cloexec(item->path_pack.buf, O_RDONLY);
|
||||
if (fd_pack == -1 || copy_fd(fd_pack, 1)) {
|
||||
if (fd_pack == -1 ||
|
||||
copy_fd_with_checksum_mayhem(fd_pack, 1, "bad_prefetch_pack_sha", 4)) {
|
||||
logerror("could not send packfile");
|
||||
wr = WR_IO_ERROR;
|
||||
goto done;
|
||||
|
@ -1177,7 +1254,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item)
|
|||
trace2_printf("%s: sending prefetch idx '%s'", TR2_CAT, item->path_idx.buf);
|
||||
|
||||
fd_idx = git_open_cloexec(item->path_idx.buf, O_RDONLY);
|
||||
if (fd_idx == -1 || copy_fd(fd_idx, 1)) {
|
||||
if (fd_idx == -1 ||
|
||||
copy_fd_with_checksum_mayhem(fd_idx, 1, "bad_prefetch_idx_sha", 4)) {
|
||||
logerror("could not send idx");
|
||||
wr = WR_IO_ERROR;
|
||||
goto done;
|
||||
|
|
|
@ -1299,7 +1299,7 @@ test_expect_success 'duplicate and busy: vfs- packfile' '
|
|||
# content matches the requested SHA.
|
||||
#
|
||||
test_expect_success 'catch corrupted loose object' '
|
||||
# test_when_finished "per_test_cleanup" &&
|
||||
test_when_finished "per_test_cleanup" &&
|
||||
start_gvfs_protocol_server_with_mayhem corrupt_loose &&
|
||||
|
||||
test_must_fail \
|
||||
|
@ -1322,4 +1322,67 @@ test_expect_success 'catch corrupted loose object' '
|
|||
git -C "$REPO_T1" fsck
|
||||
'
|
||||
|
||||
#################################################################
|
||||
# Ensure that we can detect when we receive a corrupted packfile
|
||||
# from the server. This is not concerned with network IO errors,
|
||||
# but rather cases when the cache or origin server generates or
|
||||
# sends an invalid packfile.
|
||||
#
|
||||
# For example, if the server throws an exception and writes the
|
||||
# stack trace to the socket rather than or in addition to the
|
||||
# packfile content.
|
||||
#
|
||||
# Or for example, if the packfile on the server's disk is corrupt
|
||||
# and it sends it correctly, but the original data was already
|
||||
# garbage, so the client still has garbage (and retrying won't
|
||||
# help).
|
||||
#################################################################
|
||||
|
||||
# Send corrupt PACK files w/o IDX files (so that `gvfs-helper`
|
||||
# must use `index-pack` to create it. (And as a side-effect,
|
||||
# validate the PACK file is not corrupt.)
|
||||
test_expect_success 'prefetch corrupt pack without idx' '
|
||||
test_when_finished "per_test_cleanup" &&
|
||||
start_gvfs_protocol_server_with_mayhem \
|
||||
bad_prefetch_pack_sha \
|
||||
no_prefetch_idx &&
|
||||
|
||||
test_must_fail \
|
||||
git -C "$REPO_T1" gvfs-helper \
|
||||
--cache-server=disable \
|
||||
--remote=origin \
|
||||
--no-progress \
|
||||
prefetch \
|
||||
--max-retries=0 \
|
||||
--since="1000000000" \
|
||||
>OUT.output 2>OUT.stderr &&
|
||||
|
||||
stop_gvfs_protocol_server &&
|
||||
|
||||
# Verify corruption detected in pack when building
|
||||
# local idx file for it.
|
||||
|
||||
grep -q "error: .* index-pack failed" <OUT.stderr
|
||||
'
|
||||
|
||||
# Send corrupt PACK files with IDX files. Since the cache server
|
||||
# sends both, `gvfs-helper` might fail to verify both of them.
|
||||
test_expect_success 'prefetch corrupt pack with corrupt idx' '
|
||||
test_when_finished "per_test_cleanup" &&
|
||||
start_gvfs_protocol_server_with_mayhem \
|
||||
bad_prefetch_pack_sha &&
|
||||
|
||||
test_must_fail \
|
||||
git -C "$REPO_T1" gvfs-helper \
|
||||
--cache-server=disable \
|
||||
--remote=origin \
|
||||
--no-progress \
|
||||
prefetch \
|
||||
--max-retries=0 \
|
||||
--since="1000000000" \
|
||||
>OUT.output 2>OUT.stderr &&
|
||||
|
||||
stop_gvfs_protocol_server
|
||||
'
|
||||
|
||||
test_done
|
||||
|
|
Загрузка…
Ссылка в новой задаче