зеркало из https://github.com/microsoft/git.git
Merge branch 'jt/pack-objects-prefetch-in-batch'
While packing many objects in a repository with a promissor remote, lazily fetching missing objects from the promissor remote one by one may be inefficient---the code now attempts to fetch all the missing objects in batch (obviously this won't work for a lazy clone that lazily fetches tree objects as you cannot even enumerate what blobs are missing until you learn which trees are missing). * jt/pack-objects-prefetch-in-batch: pack-objects: prefetch objects to be packed pack-objects: refactor to oid_object_info_extended
This commit is contained in:
Коммит
5c454b3825
|
@ -35,6 +35,7 @@
|
|||
#include "midx.h"
|
||||
#include "trace2.h"
|
||||
#include "shallow.h"
|
||||
#include "promisor-remote.h"
|
||||
|
||||
#define IN_PACK(obj) oe_in_pack(&to_pack, obj)
|
||||
#define SIZE(obj) oe_size(&to_pack, obj)
|
||||
|
@ -1704,9 +1705,30 @@ static int can_reuse_delta(const struct object_id *base_oid,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void check_object(struct object_entry *entry)
|
||||
static void prefetch_to_pack(uint32_t object_index_start) {
|
||||
struct oid_array to_fetch = OID_ARRAY_INIT;
|
||||
uint32_t i;
|
||||
|
||||
for (i = object_index_start; i < to_pack.nr_objects; i++) {
|
||||
struct object_entry *entry = to_pack.objects + i;
|
||||
|
||||
if (!oid_object_info_extended(the_repository,
|
||||
&entry->idx.oid,
|
||||
NULL,
|
||||
OBJECT_INFO_FOR_PREFETCH))
|
||||
continue;
|
||||
oid_array_append(&to_fetch, &entry->idx.oid);
|
||||
}
|
||||
promisor_remote_get_direct(the_repository,
|
||||
to_fetch.oid, to_fetch.nr);
|
||||
oid_array_clear(&to_fetch);
|
||||
}
|
||||
|
||||
static void check_object(struct object_entry *entry, uint32_t object_index)
|
||||
{
|
||||
unsigned long canonical_size;
|
||||
enum object_type type;
|
||||
struct object_info oi = {.typep = &type, .sizep = &canonical_size};
|
||||
|
||||
if (IN_PACK(entry)) {
|
||||
struct packed_git *p = IN_PACK(entry);
|
||||
|
@ -1840,8 +1862,18 @@ static void check_object(struct object_entry *entry)
|
|||
unuse_pack(&w_curs);
|
||||
}
|
||||
|
||||
oe_set_type(entry,
|
||||
oid_object_info(the_repository, &entry->idx.oid, &canonical_size));
|
||||
if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
|
||||
OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) {
|
||||
if (has_promisor_remote()) {
|
||||
prefetch_to_pack(object_index);
|
||||
if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
|
||||
OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0)
|
||||
type = -1;
|
||||
} else {
|
||||
type = -1;
|
||||
}
|
||||
}
|
||||
oe_set_type(entry, type);
|
||||
if (entry->type_valid) {
|
||||
SET_SIZE(entry, canonical_size);
|
||||
} else {
|
||||
|
@ -2061,7 +2093,7 @@ static void get_object_details(void)
|
|||
|
||||
for (i = 0; i < to_pack.nr_objects; i++) {
|
||||
struct object_entry *entry = sorted_by_offset[i];
|
||||
check_object(entry);
|
||||
check_object(entry, i);
|
||||
if (entry->type_valid &&
|
||||
oe_size_greater_than(&to_pack, entry, big_file_threshold))
|
||||
entry->no_try_delta = 1;
|
||||
|
|
|
@ -497,4 +497,40 @@ test_expect_success 'make sure index-pack detects the SHA1 collision (large blob
|
|||
)
|
||||
'
|
||||
|
||||
test_expect_success 'prefetch objects' '
|
||||
rm -rf server client &&
|
||||
|
||||
git init server &&
|
||||
test_config -C server uploadpack.allowanysha1inwant 1 &&
|
||||
test_config -C server uploadpack.allowfilter 1 &&
|
||||
test_config -C server protocol.version 2 &&
|
||||
|
||||
echo one >server/one &&
|
||||
git -C server add one &&
|
||||
git -C server commit -m one &&
|
||||
git -C server branch one_branch &&
|
||||
|
||||
echo two_a >server/two_a &&
|
||||
echo two_b >server/two_b &&
|
||||
git -C server add two_a two_b &&
|
||||
git -C server commit -m two &&
|
||||
|
||||
echo three >server/three &&
|
||||
git -C server add three &&
|
||||
git -C server commit -m three &&
|
||||
git -C server branch three_branch &&
|
||||
|
||||
# Clone, fetch "two" with blobs excluded, and re-push it. This requires
|
||||
# the client to have the blobs of "two" - verify that these are
|
||||
# prefetched in one batch.
|
||||
git clone --filter=blob:none --single-branch -b one_branch \
|
||||
"file://$(pwd)/server" client &&
|
||||
test_config -C client protocol.version 2 &&
|
||||
TWO=$(git -C server rev-parse three_branch^) &&
|
||||
git -C client fetch --filter=blob:none origin "$TWO" &&
|
||||
GIT_TRACE_PACKET=$(pwd)/trace git -C client push origin "$TWO":refs/heads/two_branch &&
|
||||
grep "git> done" trace >donelines &&
|
||||
test_line_count = 1 donelines
|
||||
'
|
||||
|
||||
test_done
|
||||
|
|
Загрузка…
Ссылка в новой задаче