From b2896d27391afcbc990439e63972bb33693a7d6b Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Fri, 23 Jul 2021 11:52:22 -0700 Subject: [PATCH 1/2] unpack-trees: refactor prefetching code Refactor the prefetching code in unpack-trees.c into its own function, because it will be used elsewhere in a subsequent commit. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- cache.h | 9 +++++++++ read-cache.c | 23 +++++++++++++++++++++++ unpack-trees.c | 27 ++++++++------------------- 3 files changed, 40 insertions(+), 19 deletions(-) diff --git a/cache.h b/cache.h index ba04ff8bd3..6f952e22c6 100644 --- a/cache.h +++ b/cache.h @@ -410,6 +410,15 @@ struct cache_entry *dup_cache_entry(const struct cache_entry *ce, struct index_s */ void validate_cache_entries(const struct index_state *istate); +/* + * Bulk prefetch all missing cache entries that are not GITLINKs and that match + * the given predicate. This function should only be called if + * has_promisor_remote() returns true. + */ +typedef int (*must_prefetch_predicate)(const struct cache_entry *); +void prefetch_cache_entries(const struct index_state *istate, + must_prefetch_predicate must_prefetch); + #ifdef USE_THE_INDEX_COMPATIBILITY_MACROS extern struct index_state the_index; diff --git a/read-cache.c b/read-cache.c index ba2b012a6c..4e396bf17f 100644 --- a/read-cache.c +++ b/read-cache.c @@ -27,6 +27,7 @@ #include "progress.h" #include "sparse-index.h" #include "csum-file.h" +#include "promisor-remote.h" /* Mask for the name length in ce_flags in the on-disk index */ @@ -3657,3 +3658,25 @@ static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_ta strbuf_add(sb, &buffer, sizeof(uint32_t)); } } + +void prefetch_cache_entries(const struct index_state *istate, + must_prefetch_predicate must_prefetch) +{ + int i; + struct oid_array to_fetch = OID_ARRAY_INIT; + + for (i = 0; i < istate->cache_nr; i++) { + struct cache_entry *ce = istate->cache[i]; + + if (S_ISGITLINK(ce->ce_mode) || !must_prefetch(ce)) + continue; + if (!oid_object_info_extended(the_repository, &ce->oid, + NULL, + OBJECT_INFO_FOR_PREFETCH)) + continue; + oid_array_append(&to_fetch, &ce->oid); + } + promisor_remote_get_direct(the_repository, + to_fetch.oid, to_fetch.nr); + oid_array_clear(&to_fetch); +} diff --git a/unpack-trees.c b/unpack-trees.c index f88a69f8e7..ed92794032 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -392,6 +392,11 @@ static void report_collided_checkout(struct index_state *index) string_list_clear(&list, 0); } +static int must_checkout(const struct cache_entry *ce) +{ + return ce->ce_flags & CE_UPDATE; +} + static int check_updates(struct unpack_trees_options *o, struct index_state *index) { @@ -442,28 +447,12 @@ static int check_updates(struct unpack_trees_options *o, if (should_update_submodules()) load_gitmodules_file(index, &state); - if (has_promisor_remote()) { + if (has_promisor_remote()) /* * Prefetch the objects that are to be checked out in the loop * below. */ - struct oid_array to_fetch = OID_ARRAY_INIT; - for (i = 0; i < index->cache_nr; i++) { - struct cache_entry *ce = index->cache[i]; - - if (!(ce->ce_flags & CE_UPDATE) || - S_ISGITLINK(ce->ce_mode)) - continue; - if (!oid_object_info_extended(the_repository, &ce->oid, - NULL, - OBJECT_INFO_FOR_PREFETCH)) - continue; - oid_array_append(&to_fetch, &ce->oid); - } - promisor_remote_get_direct(the_repository, - to_fetch.oid, to_fetch.nr); - oid_array_clear(&to_fetch); - } + prefetch_cache_entries(index, must_checkout); get_parallel_checkout_configs(&pc_workers, &pc_threshold); @@ -473,7 +462,7 @@ static int check_updates(struct unpack_trees_options *o, for (i = 0; i < index->cache_nr; i++) { struct cache_entry *ce = index->cache[i]; - if (ce->ce_flags & CE_UPDATE) { + if (must_checkout(ce)) { size_t last_pc_queue_size = pc_queue_size(); if (ce->ce_flags & CE_WT_REMOVE) From d3da223f2214ebc1527ccf66428aa975de916682 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Fri, 23 Jul 2021 11:52:23 -0700 Subject: [PATCH 2/2] cache-tree: prefetch in partial clone read-tree "git read-tree" checks the existence of the blobs referenced by the given tree, but does not bulk prefetch them. Add a bulk prefetch. The lack of prefetch here was noticed at $DAYJOB during a merge involving some specific commits, but I couldn't find a minimal merge that didn't also trigger the prefetch in check_updates() in unpack-trees.c (and in all these cases, the lack of prefetch in cache-tree.c didn't matter because all the relevant blobs would have already been prefetched by then). This is why I used read-tree here to exercise this code path. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- cache-tree.c | 11 ++++++++-- t/t1022-read-tree-partial-clone.sh | 33 ++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) create mode 100755 t/t1022-read-tree-partial-clone.sh diff --git a/cache-tree.c b/cache-tree.c index 45e58666af..9ba2c7c6b2 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -237,6 +237,11 @@ int cache_tree_fully_valid(struct cache_tree *it) return 1; } +static int must_check_existence(const struct cache_entry *ce) +{ + return !(has_promisor_remote() && ce_skip_worktree(ce)); +} + static int update_one(struct cache_tree *it, struct cache_entry **cache, int entries, @@ -378,8 +383,7 @@ static int update_one(struct cache_tree *it, } ce_missing_ok = mode == S_IFGITLINK || missing_ok || - (has_promisor_remote() && - ce_skip_worktree(ce)); + !must_check_existence(ce); if (is_null_oid(oid) || (!ce_missing_ok && !has_object_file(oid))) { strbuf_release(&buffer); @@ -466,6 +470,9 @@ int cache_tree_update(struct index_state *istate, int flags) if (!istate->cache_tree) istate->cache_tree = cache_tree(); + if (!(flags & WRITE_TREE_MISSING_OK) && has_promisor_remote()) + prefetch_cache_entries(istate, must_check_existence); + trace_performance_enter(); trace2_region_enter("cache_tree", "update", the_repository); i = update_one(istate->cache_tree, istate->cache, istate->cache_nr, diff --git a/t/t1022-read-tree-partial-clone.sh b/t/t1022-read-tree-partial-clone.sh new file mode 100755 index 0000000000..a763e27c7d --- /dev/null +++ b/t/t1022-read-tree-partial-clone.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +test_description='git read-tree in partial clones' + +TEST_NO_CREATE_REPO=1 + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +. ./test-lib.sh + +test_expect_success 'read-tree in partial clone prefetches in one batch' ' + test_when_finished "rm -rf server client trace" && + + git init server && + echo foo >server/one && + echo bar >server/two && + git -C server add one two && + git -C server commit -m "initial commit" && + TREE=$(git -C server rev-parse HEAD^{tree}) && + + git -C server config uploadpack.allowfilter 1 && + git -C server config uploadpack.allowanysha1inwant 1 && + git clone --bare --filter=blob:none "file://$(pwd)/server" client && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client read-tree $TREE && + + # "done" marks the end of negotiation (once per fetch). Expect that + # only one fetch occurs. + grep "fetch> done" trace >donelines && + test_line_count = 1 donelines +' + +test_done