From 1836836593e90b515269b564b384dcf091a4493d Mon Sep 17 00:00:00 2001 From: Robert Coup Date: Mon, 28 Mar 2022 14:02:05 +0000 Subject: [PATCH 1/7] fetch-negotiator: add specific noop initializer Add a specific initializer for the noop fetch negotiator. This is introduced to support allowing partial clones to skip commit negotiation when performing a "refetch". Signed-off-by: Robert Coup Signed-off-by: Junio C Hamano --- fetch-negotiator.c | 5 +++++ fetch-negotiator.h | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/fetch-negotiator.c b/fetch-negotiator.c index 874797d767..be383367f5 100644 --- a/fetch-negotiator.c +++ b/fetch-negotiator.c @@ -23,3 +23,8 @@ void fetch_negotiator_init(struct repository *r, return; } } + +void fetch_negotiator_init_noop(struct fetch_negotiator *negotiator) +{ + noop_negotiator_init(negotiator); +} diff --git a/fetch-negotiator.h b/fetch-negotiator.h index ea78868504..e348905a1f 100644 --- a/fetch-negotiator.h +++ b/fetch-negotiator.h @@ -53,7 +53,15 @@ struct fetch_negotiator { void *data; }; +/* + * Initialize a negotiator based on the repository settings. + */ void fetch_negotiator_init(struct repository *r, struct fetch_negotiator *negotiator); +/* + * Initialize a noop negotiator. + */ +void fetch_negotiator_init_noop(struct fetch_negotiator *negotiator); + #endif From 4dfd0925cbba78cc737e3af29faa5774bbc7b6a3 Mon Sep 17 00:00:00 2001 From: Robert Coup Date: Mon, 28 Mar 2022 14:02:06 +0000 Subject: [PATCH 2/7] fetch-pack: add refetch Allow a "refetch" where the contents of the local object store are ignored and a full fetch is performed, not attempting to find or negotiate common commits with the remote. A key use case is to apply a new partial clone blob/tree filter and refetch all the associated matching content, which would otherwise not be transferred when the commit objects are already present locally. Signed-off-by: Robert Coup Signed-off-by: Junio C Hamano --- fetch-pack.c | 46 +++++++++++++++++++++++++++++----------------- fetch-pack.h | 1 + 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/fetch-pack.c b/fetch-pack.c index 87657907e7..4e1e88eea0 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -312,19 +312,21 @@ static int find_common(struct fetch_negotiator *negotiator, const char *remote_hex; struct object *o; - /* - * If that object is complete (i.e. it is an ancestor of a - * local ref), we tell them we have it but do not have to - * tell them about its ancestors, which they already know - * about. - * - * We use lookup_object here because we are only - * interested in the case we *know* the object is - * reachable and we have already scanned it. - */ - if (((o = lookup_object(the_repository, remote)) != NULL) && - (o->flags & COMPLETE)) { - continue; + if (!args->refetch) { + /* + * If that object is complete (i.e. it is an ancestor of a + * local ref), we tell them we have it but do not have to + * tell them about its ancestors, which they already know + * about. + * + * We use lookup_object here because we are only + * interested in the case we *know* the object is + * reachable and we have already scanned it. + */ + if (((o = lookup_object(the_repository, remote)) != NULL) && + (o->flags & COMPLETE)) { + continue; + } } remote_hex = oid_to_hex(remote); @@ -692,6 +694,9 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, int old_save_commit_buffer = save_commit_buffer; timestamp_t cutoff = 0; + if (args->refetch) + return; + save_commit_buffer = 0; trace2_region_enter("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); @@ -1028,7 +1033,11 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, struct fetch_negotiator *negotiator; negotiator = &negotiator_alloc; - fetch_negotiator_init(r, negotiator); + if (args->refetch) { + fetch_negotiator_init_noop(negotiator); + } else { + fetch_negotiator_init(r, negotiator); + } sort_ref_list(&ref, ref_compare_name); QSORT(sought, nr_sought, cmp_ref_by_name); @@ -1121,7 +1130,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, mark_complete_and_common_ref(negotiator, args, &ref); filter_refs(args, &ref, sought, nr_sought); - if (everything_local(args, &ref)) { + if (!args->refetch && everything_local(args, &ref)) { packet_flush(fd[1]); goto all_done; } @@ -1587,7 +1596,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct strvec index_pack_args = STRVEC_INIT; negotiator = &negotiator_alloc; - fetch_negotiator_init(r, negotiator); + if (args->refetch) + fetch_negotiator_init_noop(negotiator); + else + fetch_negotiator_init(r, negotiator); packet_reader_init(&reader, fd[0], NULL, 0, PACKET_READ_CHOMP_NEWLINE | @@ -1613,7 +1625,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, /* Filter 'ref' by 'sought' and those that aren't local */ mark_complete_and_common_ref(negotiator, args, &ref); filter_refs(args, &ref, sought, nr_sought); - if (everything_local(args, &ref)) + if (!args->refetch && everything_local(args, &ref)) state = FETCH_DONE; else state = FETCH_SEND_REQUEST; diff --git a/fetch-pack.h b/fetch-pack.h index 7f94a2a583..8c7752fc82 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -42,6 +42,7 @@ struct fetch_pack_args { unsigned update_shallow:1; unsigned reject_shallow_remote:1; unsigned deepen:1; + unsigned refetch:1; /* * Indicate that the remote of this request is a promisor remote. The From 869a0eb4ebddad9ea758464526524ed06f5a13a9 Mon Sep 17 00:00:00 2001 From: Robert Coup Date: Mon, 28 Mar 2022 14:02:07 +0000 Subject: [PATCH 3/7] builtin/fetch-pack: add --refetch option Add a refetch option to fetch-pack to force a full fetch. Use when applying a new partial clone filter to refetch all matching objects. Signed-off-by: Robert Coup Signed-off-by: Junio C Hamano --- Documentation/git-fetch-pack.txt | 4 ++++ builtin/fetch-pack.c | 4 ++++ remote-curl.c | 6 ++++++ 3 files changed, 14 insertions(+) diff --git a/Documentation/git-fetch-pack.txt b/Documentation/git-fetch-pack.txt index c975884793..46747d5f42 100644 --- a/Documentation/git-fetch-pack.txt +++ b/Documentation/git-fetch-pack.txt @@ -101,6 +101,10 @@ be in a separate packet, and the list must end with a flush packet. current shallow boundary instead of from the tip of each remote branch history. +--refetch:: + Skips negotiating commits with the server in order to fetch all matching + objects. Use to reapply a new partial clone blob/tree filter. + --no-progress:: Do not show the progress. diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index c2d96f4c89..1f8aec97d4 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -153,6 +153,10 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) args.from_promisor = 1; continue; } + if (!strcmp("--refetch", arg)) { + args.refetch = 1; + continue; + } if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) { parse_list_objects_filter(&args.filter_options, arg); continue; diff --git a/remote-curl.c b/remote-curl.c index 0dabef2dd7..fc75600d4c 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -43,6 +43,7 @@ struct options { /* see documentation of corresponding flag in fetch-pack.h */ from_promisor : 1, + refetch : 1, atomic : 1, object_format : 1, force_if_includes : 1; @@ -198,6 +199,9 @@ static int set_option(const char *name, const char *value) } else if (!strcmp(name, "from-promisor")) { options.from_promisor = 1; return 0; + } else if (!strcmp(name, "refetch")) { + options.refetch = 1; + return 0; } else if (!strcmp(name, "filter")) { options.filter = xstrdup(value); return 0; @@ -1182,6 +1186,8 @@ static int fetch_git(struct discovery *heads, strvec_push(&args, "--deepen-relative"); if (options.from_promisor) strvec_push(&args, "--from-promisor"); + if (options.refetch) + strvec_push(&args, "--refetch"); if (options.filter) strvec_pushf(&args, "--filter=%s", options.filter); strvec_push(&args, url.buf); From 3c7bab06e12922fbcb375187eb60ac426fc72a3a Mon Sep 17 00:00:00 2001 From: Robert Coup Date: Mon, 28 Mar 2022 14:02:08 +0000 Subject: [PATCH 4/7] fetch: add --refetch option Teach fetch and transports the --refetch option to force a full fetch without negotiating common commits with the remote. Use when applying a new partial clone filter to refetch all matching objects. Signed-off-by: Robert Coup Signed-off-by: Junio C Hamano --- Documentation/fetch-options.txt | 9 +++++++++ builtin/fetch.c | 15 ++++++++++++++- transport-helper.c | 3 +++ transport.c | 4 ++++ transport.h | 4 ++++ 5 files changed, 34 insertions(+), 1 deletion(-) diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt index f903683189..21a247abfa 100644 --- a/Documentation/fetch-options.txt +++ b/Documentation/fetch-options.txt @@ -163,6 +163,15 @@ endif::git-pull[] behavior for a remote may be specified with the remote..tagOpt setting. See linkgit:git-config[1]. +ifndef::git-pull[] +--refetch:: + Instead of negotiating with the server to avoid transferring commits and + associated objects that are already present locally, this option fetches + all objects as a fresh clone would. Use this to reapply a partial clone + filter from configuration or using `--filter=` when the filter + definition has changed. +endif::git-pull[] + --refmap=:: When fetching refs listed on the command line, use the specified refspec (can be given more than once) to map the diff --git a/builtin/fetch.c b/builtin/fetch.c index 79ee959185..aa53ada58a 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -59,7 +59,7 @@ static int prune_tags = -1; /* unspecified */ static int all, append, dry_run, force, keep, multiple, update_head_ok; static int write_fetch_head = 1; -static int verbosity, deepen_relative, set_upstream; +static int verbosity, deepen_relative, set_upstream, refetch; static int progress = -1; static int enable_auto_gc = 1; static int tags = TAGS_DEFAULT, unshallow, update_shallow, deepen; @@ -190,6 +190,9 @@ static struct option builtin_fetch_options[] = { OPT_SET_INT_F(0, "unshallow", &unshallow, N_("convert to a complete repository"), 1, PARSE_OPT_NONEG), + OPT_SET_INT_F(0, "refetch", &refetch, + N_("re-fetch without negotiating common commits"), + 1, PARSE_OPT_NONEG), { OPTION_STRING, 0, "submodule-prefix", &submodule_prefix, N_("dir"), N_("prepend this to submodule path output"), PARSE_OPT_HIDDEN }, OPT_CALLBACK_F(0, "recurse-submodules-default", @@ -1296,6 +1299,14 @@ static int check_exist_and_connected(struct ref *ref_map) if (deepen) return -1; + /* + * Similarly, if we need to refetch, we always want to perform a full + * fetch ignoring existing objects. + */ + if (refetch) + return -1; + + /* * check_connected() allows objects to merely be promised, but * we need all direct targets to exist. @@ -1492,6 +1503,8 @@ static struct transport *prepare_transport(struct remote *remote, int deepen) set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes"); if (update_shallow) set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes"); + if (refetch) + set_option(transport, TRANS_OPT_REFETCH, "yes"); if (filter_options.choice) { const char *spec = expand_list_objects_filter_spec(&filter_options); diff --git a/transport-helper.c b/transport-helper.c index a0297b0986..b4dbbabb0c 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -715,6 +715,9 @@ static int fetch_refs(struct transport *transport, if (data->transport_options.update_shallow) set_helper_option(transport, "update-shallow", "true"); + if (data->transport_options.refetch) + set_helper_option(transport, "refetch", "true"); + if (data->transport_options.filter_options.choice) { const char *spec = expand_list_objects_filter_spec( &data->transport_options.filter_options); diff --git a/transport.c b/transport.c index 253d6671b1..e2817b7a71 100644 --- a/transport.c +++ b/transport.c @@ -243,6 +243,9 @@ static int set_git_option(struct git_transport_options *opts, list_objects_filter_die_if_populated(&opts->filter_options); parse_list_objects_filter(&opts->filter_options, value); return 0; + } else if (!strcmp(name, TRANS_OPT_REFETCH)) { + opts->refetch = !!value; + return 0; } else if (!strcmp(name, TRANS_OPT_REJECT_SHALLOW)) { opts->reject_shallow = !!value; return 0; @@ -377,6 +380,7 @@ static int fetch_refs_via_pack(struct transport *transport, args.update_shallow = data->options.update_shallow; args.from_promisor = data->options.from_promisor; args.filter_options = data->options.filter_options; + args.refetch = data->options.refetch; args.stateless_rpc = transport->stateless_rpc; args.server_options = transport->server_options; args.negotiation_tips = data->options.negotiation_tips; diff --git a/transport.h b/transport.h index a0bc6a1e9e..12bc08fc33 100644 --- a/transport.h +++ b/transport.h @@ -16,6 +16,7 @@ struct git_transport_options { unsigned update_shallow : 1; unsigned reject_shallow : 1; unsigned deepen_relative : 1; + unsigned refetch : 1; /* see documentation of corresponding flag in fetch-pack.h */ unsigned from_promisor : 1; @@ -216,6 +217,9 @@ void transport_check_allowed(const char *type); /* Filter objects for partial clone and fetch */ #define TRANS_OPT_LIST_OBJECTS_FILTER "filter" +/* Refetch all objects without negotiating */ +#define TRANS_OPT_REFETCH "refetch" + /* Request atomic (all-or-nothing) updates when pushing */ #define TRANS_OPT_ATOMIC "atomic" From 011b7757279e52043a0398b0a3b1d497d8417daf Mon Sep 17 00:00:00 2001 From: Robert Coup Date: Mon, 28 Mar 2022 14:02:09 +0000 Subject: [PATCH 5/7] t5615-partial-clone: add test for fetch --refetch Add a test for doing a refetch to apply a changed partial clone filter under protocol v0 and v2. Signed-off-by: Robert Coup Signed-off-by: Junio C Hamano --- t/t5616-partial-clone.sh | 52 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 34469b6ac1..87ebf4b0b1 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -166,6 +166,56 @@ test_expect_success 'manual prefetch of missing objects' ' test_line_count = 0 observed.oids ' +# create new commits in "src" repo to establish a history on file.4.txt +# and push to "srv.bare". +test_expect_success 'push new commits to server for file.4.txt' ' + for x in a b c d e f + do + echo "Mod file.4.txt $x" >src/file.4.txt && + if list_contains "a,b" "$x"; then + printf "%10000s" X >>src/file.4.txt + fi && + if list_contains "c,d" "$x"; then + printf "%20000s" X >>src/file.4.txt + fi && + git -C src add file.4.txt && + git -C src commit -m "mod $x" || return 1 + done && + git -C src push -u srv main +' + +# Do partial fetch to fetch smaller files; then verify that without --refetch +# applying a new filter does not refetch missing large objects. Then use +# --refetch to apply the new filter on existing commits. Test it under both +# protocol v2 & v0. +test_expect_success 'apply a different filter using --refetch' ' + git -C pc1 fetch --filter=blob:limit=999 origin && + git -C pc1 rev-list --quiet --objects --missing=print \ + main..origin/main >observed && + test_line_count = 4 observed && + + git -C pc1 fetch --filter=blob:limit=19999 --refetch origin && + git -C pc1 rev-list --quiet --objects --missing=print \ + main..origin/main >observed && + test_line_count = 2 observed && + + git -c protocol.version=0 -C pc1 fetch --filter=blob:limit=29999 \ + --refetch origin && + git -C pc1 rev-list --quiet --objects --missing=print \ + main..origin/main >observed && + test_line_count = 0 observed +' + +test_expect_success 'fetch --refetch works with a shallow clone' ' + git clone --no-checkout --depth=1 --filter=blob:none "file://$(pwd)/srv.bare" pc1s && + git -C pc1s rev-list --objects --missing=print HEAD >observed && + test_line_count = 6 observed && + + GIT_TRACE=1 git -C pc1s fetch --filter=blob:limit=999 --refetch origin && + git -C pc1s rev-list --objects --missing=print HEAD >observed && + test_line_count = 6 observed +' + test_expect_success 'partial clone with transfer.fsckobjects=1 works with submodules' ' test_create_repo submodule && test_commit -C submodule mycommit && @@ -225,7 +275,7 @@ test_expect_success 'use fsck before and after manually fetching a missing subtr # Auto-fetch all remaining trees and blobs with --missing=error git -C dst rev-list --missing=error --objects main >fetched_objects && - test_line_count = 70 fetched_objects && + test_line_count = 88 fetched_objects && awk -f print_1.awk fetched_objects | xargs -n1 git -C dst cat-file -t >fetched_types && From 7390f05a3c674e354ba2f52632046fa0a5c3e501 Mon Sep 17 00:00:00 2001 From: Robert Coup Date: Mon, 28 Mar 2022 14:02:10 +0000 Subject: [PATCH 6/7] fetch: after refetch, encourage auto gc repacking After invoking `fetch --refetch`, the object db will likely contain many duplicate objects. If auto-maintenance is enabled, invoke it with appropriate settings to encourage repacking/consolidation. * gc.autoPackLimit: unless this is set to 0 (disabled), override the value to 1 to force pack consolidation. * maintenance.incremental-repack.auto: unless this is set to 0, override the value to -1 to force incremental repacking. Signed-off-by: Robert Coup Signed-off-by: Junio C Hamano --- Documentation/fetch-options.txt | 3 ++- builtin/fetch.c | 19 ++++++++++++++++++- t/t5616-partial-clone.sh | 29 +++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt index 21a247abfa..49ae48dca3 100644 --- a/Documentation/fetch-options.txt +++ b/Documentation/fetch-options.txt @@ -169,7 +169,8 @@ ifndef::git-pull[] associated objects that are already present locally, this option fetches all objects as a fresh clone would. Use this to reapply a partial clone filter from configuration or using `--filter=` when the filter - definition has changed. + definition has changed. Automatic post-fetch maintenance will perform + object database pack consolidation to remove any duplicate objects. endif::git-pull[] --refmap=:: diff --git a/builtin/fetch.c b/builtin/fetch.c index aa53ada58a..a47b696b89 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -2226,8 +2226,25 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) NULL); } - if (enable_auto_gc) + if (enable_auto_gc) { + if (refetch) { + /* + * Hint auto-maintenance strongly to encourage repacking, + * but respect config settings disabling it. + */ + int opt_val; + if (git_config_get_int("gc.autopacklimit", &opt_val)) + opt_val = -1; + if (opt_val != 0) + git_config_push_parameter("gc.autoPackLimit=1"); + + if (git_config_get_int("maintenance.incremental-repack.auto", &opt_val)) + opt_val = -1; + if (opt_val != 0) + git_config_push_parameter("maintenance.incremental-repack.auto=-1"); + } run_auto_maintenance(verbosity < 0); + } cleanup: string_list_clear(&list, 0); diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 87ebf4b0b1..4a3778d04a 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -216,6 +216,35 @@ test_expect_success 'fetch --refetch works with a shallow clone' ' test_line_count = 6 observed ' +test_expect_success 'fetch --refetch triggers repacking' ' + GIT_TRACE2_CONFIG_PARAMS=gc.autoPackLimit,maintenance.incremental-repack.auto && + export GIT_TRACE2_CONFIG_PARAMS && + + GIT_TRACE2_EVENT="$PWD/trace1.event" \ + git -C pc1 fetch --refetch origin && + test_subcommand git maintenance run --auto --no-quiet Date: Mon, 28 Mar 2022 14:02:11 +0000 Subject: [PATCH 7/7] docs: mention --refetch fetch option Document it for partial clones as a means to apply a new filter, and reference it from the remote..partialclonefilter config parameter. Signed-off-by: Robert Coup Signed-off-by: Junio C Hamano --- Documentation/config/remote.txt | 6 ++++-- Documentation/technical/partial-clone.txt | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt index a8e6437a90..0678b4bcfe 100644 --- a/Documentation/config/remote.txt +++ b/Documentation/config/remote.txt @@ -82,5 +82,7 @@ remote..promisor:: objects. remote..partialclonefilter:: - The filter that will be applied when fetching from this - promisor remote. + The filter that will be applied when fetching from this promisor remote. + Changing or clearing this value will only affect fetches for new commits. + To fetch associated objects for commits already present in the local object + database, use the `--refetch` option of linkgit:git-fetch[1]. diff --git a/Documentation/technical/partial-clone.txt b/Documentation/technical/partial-clone.txt index a0dd7c66f2..99f0eb3040 100644 --- a/Documentation/technical/partial-clone.txt +++ b/Documentation/technical/partial-clone.txt @@ -181,6 +181,9 @@ Fetching Missing Objects currently fetches all objects referred to by the requested objects, even though they are not necessary. +- Fetching with `--refetch` will request a complete new filtered packfile from + the remote, which can be used to change a filter without needing to + dynamically fetch missing objects. Using many promisor remotes ---------------------------