midx: read `RIDX` chunk when present

When a MIDX contains the new `RIDX` chunk, ensure that the reverse index
is read from it instead of the on-disk .rev file. Since we need to
encode the object order in the MIDX itself for correctness reasons,
there is no point in storing the same data again outside of the MIDX.

So, this patch stops writing separate .rev files, and reads it out of
the MIDX itself. This is possible to do with relatively little new code,
since the format of the RIDX chunk is identical to the data in the .rev
file. In other words, we can implement this by pointing the
`revindex_data` field at the reverse index chunk of the MIDX instead of
the .rev file without any other changes.

Note that we have two knobs that are adjusted for the new tests:
GIT_TEST_MIDX_WRITE_REV and GIT_TEST_MIDX_READ_RIDX. The former controls
whether the MIDX .rev is written at all, and the latter controls whether
we read the MIDX's RIDX chunk.

Both are necessary to ensure that the test added at the beginning of
this series continues to work. This is because we always need to write
the RIDX chunk in the MIDX in order to change its checksum, but we want
to make sure reading the existing .rev file still works (since the RIDX
chunk takes precedence by default).

Arguably this isn't a very interesting mode to test, because the
precedence rules mean that we'll always read the RIDX chunk over the
.rev file. But it makes it impossible for a user to induce corruption in
their repository by adjusting the test knobs (since if we had an
either/or knob they could stop writing the RIDX chunk, allowing them to
tweak the MIDX's object order without changing its checksum).

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Taylor Blau 2022-01-25 17:41:17 -05:00 коммит произвёл Junio C Hamano
Родитель a80f0f91b1
Коммит 7f514b7a5e
7 изменённых файлов: 54 добавлений и 7 удалений

6
midx.c
Просмотреть файл

@ -162,6 +162,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets);
if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex);
m->num_objects = ntohl(m->chunk_oid_fanout[255]);
CALLOC_ARRAY(m->pack_names, m->num_packs);
@ -1429,7 +1432,8 @@ static int write_midx_internal(const char *object_dir,
finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
free_chunkfile(cf);
if (flags & MIDX_WRITE_REV_INDEX)
if (flags & MIDX_WRITE_REV_INDEX &&
git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0))
write_midx_reverse_index(midx_name.buf, midx_hash, &ctx);
if (flags & MIDX_WRITE_BITMAP) {
if (write_midx_bitmap(midx_name.buf, midx_hash, &ctx,

1
midx.h
Просмотреть файл

@ -36,6 +36,7 @@ struct multi_pack_index {
const unsigned char *chunk_oid_lookup;
const unsigned char *chunk_object_offsets;
const unsigned char *chunk_large_offsets;
const unsigned char *chunk_revindex;
const char **pack_names;
struct packed_git **packs;

Просмотреть файл

@ -298,9 +298,26 @@ int load_midx_revindex(struct multi_pack_index *m)
{
struct strbuf revindex_name = STRBUF_INIT;
int ret;
if (m->revindex_data)
return 0;
if (m->chunk_revindex) {
/*
* If the MIDX `m` has a `RIDX` chunk, then use its contents for
* the reverse index instead of trying to load a separate `.rev`
* file.
*
* Note that we do *not* set `m->revindex_map` here, since we do
* not want to accidentally call munmap() in the middle of the
* MIDX.
*/
trace2_data_string("load_midx_revindex", the_repository,
"source", "midx");
m->revindex_data = (const uint32_t *)m->chunk_revindex;
return 0;
}
trace2_data_string("load_midx_revindex", the_repository,
"source", "rev");

Просмотреть файл

@ -290,7 +290,7 @@ test_rev_exists () {
}
midx_bitmap_core () {
rev_kind="${1:-rev}"
rev_kind="${1:-midx}"
setup_bitmap_history
@ -434,7 +434,7 @@ midx_bitmap_core () {
}
midx_bitmap_partial_tests () {
rev_kind="${1:-rev}"
rev_kind="${1:-midx}"
test_expect_success 'setup partial bitmaps' '
test_commit packed &&

Просмотреть файл

@ -9,6 +9,12 @@ test_description='exercise basic multi-pack bitmap functionality'
GIT_TEST_MULTI_PACK_INDEX=0
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
# This test exercise multi-pack bitmap functionality where the object order is
# stored and read from a special chunk within the MIDX, so use the default
# behavior here.
sane_unset GIT_TEST_MIDX_WRITE_REV
sane_unset GIT_TEST_MIDX_READ_RIDX
midx_bitmap_core
bitmap_reuse_tests() {

Просмотреть файл

@ -0,0 +1,23 @@
#!/bin/sh
test_description='exercise basic multi-pack bitmap functionality (.rev files)'
. ./test-lib.sh
. "${TEST_DIRECTORY}/lib-bitmap.sh"
# We'll be writing our own midx and bitmaps, so avoid getting confused by the
# automatic ones.
GIT_TEST_MULTI_PACK_INDEX=0
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
# Unlike t5326, this test exercise multi-pack bitmap functionality where the
# object order is stored in a separate .rev file.
GIT_TEST_MIDX_WRITE_REV=1
GIT_TEST_MIDX_READ_RIDX=0
export GIT_TEST_MIDX_WRITE_REV
export GIT_TEST_MIDX_READ_RIDX
midx_bitmap_core rev
midx_bitmap_partial_tests rev
test_done

Просмотреть файл

@ -311,16 +311,13 @@ test_expect_success 'cleans up MIDX when appropriate' '
checksum=$(midx_checksum $objdir) &&
test_path_is_file $midx &&
test_path_is_file $midx-$checksum.bitmap &&
test_path_is_file $midx-$checksum.rev &&
test_commit repack-3 &&
GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb --write-midx &&
test_path_is_file $midx &&
test_path_is_missing $midx-$checksum.bitmap &&
test_path_is_missing $midx-$checksum.rev &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
test_path_is_file $midx-$(midx_checksum $objdir).rev &&
test_commit repack-4 &&
GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb &&
@ -353,7 +350,6 @@ test_expect_success '--write-midx with preferred bitmap tips' '
test_line_count = 1 before &&
rm -fr $midx-$(midx_checksum $objdir).bitmap &&
rm -fr $midx-$(midx_checksum $objdir).rev &&
rm -fr $midx &&
# instead of constructing the snapshot ourselves (c.f., the test