2008-04-27 21:39:30 +04:00
|
|
|
/*
|
|
|
|
* Builtin "git clone"
|
|
|
|
*
|
|
|
|
* Copyright (c) 2007 Kristian Høgsberg <krh@redhat.com>,
|
|
|
|
* 2008 Daniel Barkalow <barkalow@iabervon.org>
|
|
|
|
* Based on git-commit.sh by Junio C Hamano and Linus Torvalds
|
|
|
|
*
|
|
|
|
* Clone a repository into a different directory that does not yet exist.
|
|
|
|
*/
|
|
|
|
|
2022-11-19 16:07:38 +03:00
|
|
|
#define USE_THE_INDEX_VARIABLE
|
Fix sparse warnings
Fix warnings from 'make check'.
- These files don't include 'builtin.h' causing sparse to complain that
cmd_* isn't declared:
builtin/clone.c:364, builtin/fetch-pack.c:797,
builtin/fmt-merge-msg.c:34, builtin/hash-object.c:78,
builtin/merge-index.c:69, builtin/merge-recursive.c:22
builtin/merge-tree.c:341, builtin/mktag.c:156, builtin/notes.c:426
builtin/notes.c:822, builtin/pack-redundant.c:596,
builtin/pack-refs.c:10, builtin/patch-id.c:60, builtin/patch-id.c:149,
builtin/remote.c:1512, builtin/remote-ext.c:240,
builtin/remote-fd.c:53, builtin/reset.c:236, builtin/send-pack.c:384,
builtin/unpack-file.c:25, builtin/var.c:75
- These files have symbols which should be marked static since they're
only file scope:
submodule.c:12, diff.c:631, replace_object.c:92, submodule.c:13,
submodule.c:14, trace.c:78, transport.c:195, transport-helper.c:79,
unpack-trees.c:19, url.c:3, url.c:18, url.c:104, url.c:117, url.c:123,
url.c:129, url.c:136, thread-utils.c:21, thread-utils.c:48
- These files redeclare symbols to be different types:
builtin/index-pack.c:210, parse-options.c:564, parse-options.c:571,
usage.c:49, usage.c:58, usage.c:63, usage.c:72
- These files use a literal integer 0 when they really should use a NULL
pointer:
daemon.c:663, fast-import.c:2942, imap-send.c:1072, notes-merge.c:362
While we're in the area, clean up some unused #includes in builtin files
(mostly exec_cmd.h).
Signed-off-by: Stephen Boyd <bebarino@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-22 10:51:05 +03:00
|
|
|
#include "builtin.h"
|
2023-03-21 09:25:58 +03:00
|
|
|
#include "abspath.h"
|
2023-04-11 06:00:39 +03:00
|
|
|
#include "advice.h"
|
2017-06-14 21:07:36 +03:00
|
|
|
#include "config.h"
|
2023-04-22 23:17:12 +03:00
|
|
|
#include "copy.h"
|
2023-03-21 09:26:03 +03:00
|
|
|
#include "environment.h"
|
2023-03-21 09:25:54 +03:00
|
|
|
#include "gettext.h"
|
2023-02-24 03:09:27 +03:00
|
|
|
#include "hex.h"
|
2014-10-01 14:28:42 +04:00
|
|
|
#include "lockfile.h"
|
2008-04-27 21:39:30 +04:00
|
|
|
#include "parse-options.h"
|
|
|
|
#include "refs.h"
|
2018-05-17 01:57:48 +03:00
|
|
|
#include "refspec.h"
|
2023-04-11 10:41:53 +03:00
|
|
|
#include "object-file.h"
|
2023-05-16 09:34:06 +03:00
|
|
|
#include "object-store-ll.h"
|
2008-04-27 21:39:30 +04:00
|
|
|
#include "tree.h"
|
|
|
|
#include "tree-walk.h"
|
|
|
|
#include "unpack-trees.h"
|
|
|
|
#include "transport.h"
|
|
|
|
#include "strbuf.h"
|
|
|
|
#include "dir.h"
|
2019-07-11 02:59:03 +03:00
|
|
|
#include "dir-iterator.h"
|
|
|
|
#include "iterator.h"
|
chain kill signals for cleanup functions
If a piece of code wanted to do some cleanup before exiting
(e.g., cleaning up a lockfile or a tempfile), our usual
strategy was to install a signal handler that did something
like this:
do_cleanup(); /* actual work */
signal(signo, SIG_DFL); /* restore previous behavior */
raise(signo); /* deliver signal, killing ourselves */
For a single handler, this works fine. However, if we want
to clean up two _different_ things, we run into a problem.
The most recently installed handler will run, but when it
removes itself as a handler, it doesn't put back the first
handler.
This patch introduces sigchain, a tiny library for handling
a stack of signal handlers. You sigchain_push each handler,
and use sigchain_pop to restore whoever was before you in
the stack.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-01-22 09:02:35 +03:00
|
|
|
#include "sigchain.h"
|
2009-03-04 09:29:55 +03:00
|
|
|
#include "branch.h"
|
2009-02-25 11:32:13 +03:00
|
|
|
#include "remote.h"
|
2009-03-03 08:37:51 +03:00
|
|
|
#include "run-command.h"
|
2023-03-21 09:26:05 +03:00
|
|
|
#include "setup.h"
|
2013-03-26 00:26:27 +04:00
|
|
|
#include "connected.h"
|
2017-08-19 01:20:21 +03:00
|
|
|
#include "packfile.h"
|
2023-05-16 09:33:59 +03:00
|
|
|
#include "path.h"
|
2023-04-22 23:17:14 +03:00
|
|
|
#include "pkt-line.h"
|
2017-12-08 18:58:46 +03:00
|
|
|
#include "list-objects-filter-options.h"
|
2021-12-22 06:59:35 +03:00
|
|
|
#include "hook.h"
|
2022-03-09 19:01:43 +03:00
|
|
|
#include "bundle.h"
|
2022-08-09 16:11:41 +03:00
|
|
|
#include "bundle-uri.h"
|
2008-04-27 21:39:30 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Overall FIXMEs:
|
|
|
|
* - respect DB_ENVIRONMENT for .git/objects.
|
|
|
|
*
|
|
|
|
* Implementation notes:
|
|
|
|
* - dropping use-separate-remote and no-separate-remote compatibility
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
static const char * const builtin_clone_usage[] = {
|
2015-01-13 10:44:47 +03:00
|
|
|
N_("git clone [<options>] [--] <repo> [<dir>]"),
|
2008-04-27 21:39:30 +04:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2012-01-07 18:45:59 +04:00
|
|
|
static int option_no_checkout, option_bare, option_mirror, option_single_branch = -1;
|
2017-03-18 01:38:03 +03:00
|
|
|
static int option_local = -1, option_no_hardlinks, option_shared;
|
2017-04-27 02:12:33 +03:00
|
|
|
static int option_no_tags;
|
2016-06-19 23:51:56 +03:00
|
|
|
static int option_shallow_submodules;
|
2021-04-01 13:46:59 +03:00
|
|
|
static int option_reject_shallow = -1; /* unspecified */
|
|
|
|
static int config_reject_shallow = -1; /* unspecified */
|
2016-06-12 13:54:00 +03:00
|
|
|
static int deepen;
|
|
|
|
static char *option_template, *option_depth, *option_since;
|
2008-04-27 21:39:30 +04:00
|
|
|
static char *option_origin = NULL;
|
2020-10-01 06:46:16 +03:00
|
|
|
static char *remote_name = NULL;
|
2009-08-26 23:05:08 +04:00
|
|
|
static char *option_branch = NULL;
|
2016-06-12 13:54:05 +03:00
|
|
|
static struct string_list option_not = STRING_LIST_INIT_NODUP;
|
2011-03-19 18:16:56 +03:00
|
|
|
static const char *real_git_dir;
|
2023-12-29 10:27:09 +03:00
|
|
|
static const char *ref_format;
|
2008-04-27 21:39:30 +04:00
|
|
|
static char *option_upload_pack = "git-upload-pack";
|
2010-02-24 15:50:25 +03:00
|
|
|
static int option_verbosity;
|
2012-02-14 00:17:15 +04:00
|
|
|
static int option_progress = -1;
|
2019-11-22 01:04:35 +03:00
|
|
|
static int option_sparse_checkout;
|
2016-02-03 07:09:14 +03:00
|
|
|
static enum transport_family family;
|
2016-06-13 13:04:20 +03:00
|
|
|
static struct string_list option_config = STRING_LIST_INIT_NODUP;
|
2016-08-16 00:53:25 +03:00
|
|
|
static struct string_list option_required_reference = STRING_LIST_INIT_NODUP;
|
2016-08-16 00:53:26 +03:00
|
|
|
static struct string_list option_optional_reference = STRING_LIST_INIT_NODUP;
|
2014-10-14 23:38:52 +04:00
|
|
|
static int option_dissociate;
|
2016-03-01 05:07:20 +03:00
|
|
|
static int max_jobs = -1;
|
2017-03-18 01:38:03 +03:00
|
|
|
static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP;
|
list-objects-filter: add and use initializers
In 7e2619d8ff (list_objects_filter_options: plug leak of filter_spec
strings, 2022-09-08), we noted that the filter_spec string_list was
inconsistent in how it handled memory ownership of strings stored in the
list. The fix there was a bit of a band-aid to set the "strdup_strings"
variable right before adding anything.
That works OK, and it lets the users of the API continue to
zero-initialize the struct. But it makes the code a bit hard to follow
and accident-prone, as any other spots appending the filter_spec need to
think about whether to set the strdup_strings value, too (there's one
such spot in partial_clone_get_default_filter_spec(), which is probably
a possible memory leak).
So let's do that full cleanup now. We'll introduce a
LIST_OBJECTS_FILTER_INIT macro and matching function, and use them as
appropriate (though it is for the "_options" struct, this matches the
corresponding list_objects_filter_release() function).
This is harder than it seems! Many other structs, like
git_transport_data, embed the filter struct. So they need to initialize
it themselves even if the rest of the enclosing struct is OK with
zero-initialization. I found all of the relevant spots by grepping
manually for declarations of list_objects_filter_options. And then doing
so recursively for structs which embed it, and ones which embed those,
and so on.
I'm pretty sure I got everything, but there's no change that would alert
the compiler if any topics in flight added new declarations. To catch
this case, we now double-check in the parsing function that things were
initialized as expected and BUG() if appropriate.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 08:03:07 +03:00
|
|
|
static struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT;
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-05 08:00:49 +03:00
|
|
|
static int option_filter_submodules = -1; /* unspecified */
|
|
|
|
static int config_filter_submodules = -1; /* unspecified */
|
2019-04-12 22:51:22 +03:00
|
|
|
static struct string_list server_options = STRING_LIST_INIT_NODUP;
|
2019-05-19 17:26:49 +03:00
|
|
|
static int option_remote_submodules;
|
2022-08-09 16:11:41 +03:00
|
|
|
static const char *bundle_uri;
|
2017-03-18 01:38:03 +03:00
|
|
|
|
|
|
|
static int recurse_submodules_cb(const struct option *opt,
|
|
|
|
const char *arg, int unset)
|
|
|
|
{
|
|
|
|
if (unset)
|
|
|
|
string_list_clear((struct string_list *)opt->value, 0);
|
|
|
|
else if (arg)
|
|
|
|
string_list_append((struct string_list *)opt->value, arg);
|
|
|
|
else
|
|
|
|
string_list_append((struct string_list *)opt->value,
|
|
|
|
(const char *)opt->defval);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2011-08-23 05:05:15 +04:00
|
|
|
|
2008-04-27 21:39:30 +04:00
|
|
|
static struct option builtin_clone_options[] = {
|
2010-02-24 15:50:25 +03:00
|
|
|
OPT__VERBOSITY(&option_verbosity),
|
2012-02-14 00:17:15 +04:00
|
|
|
OPT_BOOL(0, "progress", &option_progress,
|
2012-08-20 16:32:02 +04:00
|
|
|
N_("force progress reporting")),
|
2021-04-01 13:46:59 +03:00
|
|
|
OPT_BOOL(0, "reject-shallow", &option_reject_shallow,
|
|
|
|
N_("don't clone shallow repository")),
|
2013-08-03 15:51:19 +04:00
|
|
|
OPT_BOOL('n', "no-checkout", &option_no_checkout,
|
|
|
|
N_("don't create a checkout")),
|
2013-08-03 15:51:18 +04:00
|
|
|
OPT_BOOL(0, "bare", &option_bare, N_("create a bare repository")),
|
|
|
|
OPT_HIDDEN_BOOL(0, "naked", &option_bare,
|
|
|
|
N_("create a bare repository")),
|
2013-08-03 15:51:19 +04:00
|
|
|
OPT_BOOL(0, "mirror", &option_mirror,
|
2024-02-16 13:15:36 +03:00
|
|
|
N_("create a mirror repository (implies --bare)")),
|
2012-05-30 15:10:16 +04:00
|
|
|
OPT_BOOL('l', "local", &option_local,
|
2012-08-20 16:32:02 +04:00
|
|
|
N_("to clone from a local repository")),
|
2013-08-03 15:51:19 +04:00
|
|
|
OPT_BOOL(0, "no-hardlinks", &option_no_hardlinks,
|
2012-08-20 16:32:02 +04:00
|
|
|
N_("don't use local hardlinks, always copy")),
|
2013-08-03 15:51:19 +04:00
|
|
|
OPT_BOOL('s', "shared", &option_shared,
|
2012-08-20 16:32:02 +04:00
|
|
|
N_("setup as shared repository")),
|
2017-03-18 01:38:03 +03:00
|
|
|
{ OPTION_CALLBACK, 0, "recurse-submodules", &option_recurse_submodules,
|
|
|
|
N_("pathspec"), N_("initialize submodules in the clone"),
|
|
|
|
PARSE_OPT_OPTARG, recurse_submodules_cb, (intptr_t)"." },
|
2020-03-16 23:27:43 +03:00
|
|
|
OPT_ALIAS(0, "recursive", "recurse-submodules"),
|
2016-03-01 05:07:20 +03:00
|
|
|
OPT_INTEGER('j', "jobs", &max_jobs,
|
|
|
|
N_("number of submodules cloned in parallel")),
|
2012-08-20 16:32:02 +04:00
|
|
|
OPT_STRING(0, "template", &option_template, N_("template-directory"),
|
|
|
|
N_("directory from which templates will be used")),
|
2016-08-16 00:53:25 +03:00
|
|
|
OPT_STRING_LIST(0, "reference", &option_required_reference, N_("repo"),
|
2015-05-21 07:15:19 +03:00
|
|
|
N_("reference repository")),
|
2016-08-16 00:53:26 +03:00
|
|
|
OPT_STRING_LIST(0, "reference-if-able", &option_optional_reference,
|
|
|
|
N_("repo"), N_("reference repository")),
|
2015-05-21 07:16:04 +03:00
|
|
|
OPT_BOOL(0, "dissociate", &option_dissociate,
|
|
|
|
N_("use --reference only while cloning")),
|
2012-08-20 16:32:02 +04:00
|
|
|
OPT_STRING('o', "origin", &option_origin, N_("name"),
|
|
|
|
N_("use <name> instead of 'origin' to track upstream")),
|
|
|
|
OPT_STRING('b', "branch", &option_branch, N_("branch"),
|
|
|
|
N_("checkout <branch> instead of the remote's HEAD")),
|
|
|
|
OPT_STRING('u', "upload-pack", &option_upload_pack, N_("path"),
|
|
|
|
N_("path to git-upload-pack on the remote")),
|
|
|
|
OPT_STRING(0, "depth", &option_depth, N_("depth"),
|
|
|
|
N_("create a shallow clone of that depth")),
|
2016-06-12 13:54:00 +03:00
|
|
|
OPT_STRING(0, "shallow-since", &option_since, N_("time"),
|
|
|
|
N_("create a shallow clone since a specific time")),
|
2016-06-12 13:54:05 +03:00
|
|
|
OPT_STRING_LIST(0, "shallow-exclude", &option_not, N_("revision"),
|
2016-12-05 01:03:59 +03:00
|
|
|
N_("deepen history of shallow clone, excluding rev")),
|
2012-01-07 18:45:59 +04:00
|
|
|
OPT_BOOL(0, "single-branch", &option_single_branch,
|
2012-08-20 16:32:02 +04:00
|
|
|
N_("clone only one branch, HEAD or --branch")),
|
2017-04-27 02:12:33 +03:00
|
|
|
OPT_BOOL(0, "no-tags", &option_no_tags,
|
|
|
|
N_("don't clone any tags, and make later fetches not to follow them")),
|
2016-04-26 04:12:27 +03:00
|
|
|
OPT_BOOL(0, "shallow-submodules", &option_shallow_submodules,
|
|
|
|
N_("any cloned submodules will be shallow")),
|
2012-08-20 16:32:02 +04:00
|
|
|
OPT_STRING(0, "separate-git-dir", &real_git_dir, N_("gitdir"),
|
|
|
|
N_("separate git dir from working tree")),
|
2023-12-29 10:27:09 +03:00
|
|
|
OPT_STRING(0, "ref-format", &ref_format, N_("format"),
|
|
|
|
N_("specify the reference format to use")),
|
2012-08-20 16:32:02 +04:00
|
|
|
OPT_STRING_LIST('c', "config", &option_config, N_("key=value"),
|
|
|
|
N_("set config inside the new repository")),
|
2019-04-12 22:51:22 +03:00
|
|
|
OPT_STRING_LIST(0, "server-option", &server_options,
|
|
|
|
N_("server-specific"), N_("option to transmit")),
|
2023-07-19 00:34:33 +03:00
|
|
|
OPT_IPVERSION(&family),
|
2017-12-08 18:58:46 +03:00
|
|
|
OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-05 08:00:49 +03:00
|
|
|
OPT_BOOL(0, "also-filter-submodules", &option_filter_submodules,
|
|
|
|
N_("apply partial clone filters to submodules")),
|
2019-05-19 17:26:49 +03:00
|
|
|
OPT_BOOL(0, "remote-submodules", &option_remote_submodules,
|
|
|
|
N_("any cloned submodules will use their remote-tracking branch")),
|
2019-11-22 01:04:35 +03:00
|
|
|
OPT_BOOL(0, "sparse", &option_sparse_checkout,
|
|
|
|
N_("initialize sparse-checkout file to include only files at root")),
|
2022-08-09 16:11:41 +03:00
|
|
|
OPT_STRING(0, "bundle-uri", &bundle_uri,
|
|
|
|
N_("uri"), N_("a URI for downloading bundles before fetching from origin remote")),
|
2008-04-27 21:39:30 +04:00
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
2015-08-10 12:37:55 +03:00
|
|
|
static const char *get_repo_path_1(struct strbuf *path, int *is_bundle)
|
2008-04-27 21:39:30 +04:00
|
|
|
{
|
standardize and improve lookup rules for external local repos
When you specify a local repository on the command line of
clone, ls-remote, upload-pack, receive-pack, or upload-archive,
or in a request to git-daemon, we perform a little bit of
lookup magic, doing things like looking in working trees for
.git directories and appending ".git" for bare repos.
For clone, this magic happens in get_repo_path. For
everything else, it happens in enter_repo. In both cases,
there are some ambiguous or confusing cases that aren't
handled well, and there is one case that is not handled the
same by both methods.
This patch tries to provide (and test!) standard, sensible
lookup rules for both code paths. The intended changes are:
1. When looking up "foo", we have always preferred
a working tree "foo" (containing "foo/.git" over the
bare "foo.git". But we did not prefer a bare "foo" over
"foo.git". With this patch, we do so.
2. We would select directories that existed but didn't
actually look like git repositories. With this patch,
we make sure a selected directory looks like a git
repo. Not only is this more sensible in general, but it
will help anybody who is negatively affected by change
(1) negatively (e.g., if they had "foo.git" next to its
separate work tree "foo", and expect to keep finding
"foo.git" when they reference "foo").
3. The enter_repo code path would, given "foo", look for
"foo.git/.git" (i.e., do the ".git" append magic even
for a repo with working tree). The clone code path did
not; with this patch, they now behave the same.
In the unlikely case of a working tree overlaying a bare
repo (i.e., a ".git" directory _inside_ a bare repo), we
continue to treat it as a working tree (prefering the
"inner" .git over the bare repo). This is mainly because the
combination seems nonsensical, and I'd rather stick with
existing behavior on the off chance that somebody is relying
on it.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-02-03 01:59:13 +04:00
|
|
|
static char *suffix[] = { "/.git", "", ".git/.git", ".git" };
|
2008-04-27 21:39:30 +04:00
|
|
|
static char *bundle_suffix[] = { ".bundle", "" };
|
2015-08-10 12:37:55 +03:00
|
|
|
size_t baselen = path->len;
|
2008-04-27 21:39:30 +04:00
|
|
|
struct stat st;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(suffix); i++) {
|
2015-08-10 12:37:55 +03:00
|
|
|
strbuf_setlen(path, baselen);
|
|
|
|
strbuf_addstr(path, suffix[i]);
|
|
|
|
if (stat(path->buf, &st))
|
2011-08-21 15:58:09 +04:00
|
|
|
continue;
|
2015-08-10 12:37:55 +03:00
|
|
|
if (S_ISDIR(st.st_mode) && is_git_directory(path->buf)) {
|
2008-04-27 21:39:30 +04:00
|
|
|
*is_bundle = 0;
|
2015-08-10 12:37:55 +03:00
|
|
|
return path->buf;
|
2011-08-21 15:58:09 +04:00
|
|
|
} else if (S_ISREG(st.st_mode) && st.st_size > 8) {
|
|
|
|
/* Is it a "gitfile"? */
|
|
|
|
char signature[8];
|
2015-08-10 12:37:55 +03:00
|
|
|
const char *dst;
|
|
|
|
int len, fd = open(path->buf, O_RDONLY);
|
2011-08-21 15:58:09 +04:00
|
|
|
if (fd < 0)
|
|
|
|
continue;
|
|
|
|
len = read_in_full(fd, signature, 8);
|
|
|
|
close(fd);
|
|
|
|
if (len != 8 || strncmp(signature, "gitdir: ", 8))
|
|
|
|
continue;
|
2015-08-10 12:37:55 +03:00
|
|
|
dst = read_gitfile(path->buf);
|
|
|
|
if (dst) {
|
2011-08-21 15:58:09 +04:00
|
|
|
*is_bundle = 0;
|
2015-08-10 12:37:55 +03:00
|
|
|
return dst;
|
2011-08-21 15:58:09 +04:00
|
|
|
}
|
2008-04-27 21:39:30 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(bundle_suffix); i++) {
|
2015-08-10 12:37:55 +03:00
|
|
|
strbuf_setlen(path, baselen);
|
|
|
|
strbuf_addstr(path, bundle_suffix[i]);
|
|
|
|
if (!stat(path->buf, &st) && S_ISREG(st.st_mode)) {
|
2008-04-27 21:39:30 +04:00
|
|
|
*is_bundle = 1;
|
2015-08-10 12:37:55 +03:00
|
|
|
return path->buf;
|
2008-04-27 21:39:30 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2015-08-10 12:37:55 +03:00
|
|
|
static char *get_repo_path(const char *repo, int *is_bundle)
|
|
|
|
{
|
|
|
|
struct strbuf path = STRBUF_INIT;
|
|
|
|
const char *raw;
|
|
|
|
char *canon;
|
|
|
|
|
|
|
|
strbuf_addstr(&path, repo);
|
|
|
|
raw = get_repo_path_1(&path, is_bundle);
|
2017-01-26 20:54:23 +03:00
|
|
|
canon = raw ? absolute_pathdup(raw) : NULL;
|
2015-08-10 12:37:55 +03:00
|
|
|
strbuf_release(&path);
|
|
|
|
return canon;
|
|
|
|
}
|
|
|
|
|
2011-08-23 05:05:15 +04:00
|
|
|
static int add_one_reference(struct string_list_item *item, void *cb_data)
|
2008-04-27 21:39:30 +04:00
|
|
|
{
|
2016-08-16 00:53:24 +03:00
|
|
|
struct strbuf err = STRBUF_INIT;
|
2016-08-16 00:53:26 +03:00
|
|
|
int *required = cb_data;
|
2016-08-16 00:53:24 +03:00
|
|
|
char *ref_git = compute_alternate_path(item->string, &err);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2016-08-16 00:53:26 +03:00
|
|
|
if (!ref_git) {
|
|
|
|
if (*required)
|
|
|
|
die("%s", err.buf);
|
|
|
|
else
|
|
|
|
fprintf(stderr,
|
|
|
|
_("info: Could not add alternate for '%s': %s\n"),
|
|
|
|
item->string, err.buf);
|
|
|
|
} else {
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
strbuf_addf(&sb, "%s/objects", ref_git);
|
|
|
|
add_to_alternates_file(sb.buf);
|
|
|
|
strbuf_release(&sb);
|
|
|
|
}
|
2013-12-05 17:02:31 +04:00
|
|
|
|
2016-08-16 00:53:24 +03:00
|
|
|
strbuf_release(&err);
|
2016-08-16 00:53:26 +03:00
|
|
|
free(ref_git);
|
2011-08-23 05:05:15 +04:00
|
|
|
return 0;
|
|
|
|
}
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2011-08-23 05:05:15 +04:00
|
|
|
static void setup_reference(void)
|
|
|
|
{
|
2016-08-16 00:53:26 +03:00
|
|
|
int required = 1;
|
|
|
|
for_each_string_list(&option_required_reference,
|
|
|
|
add_one_reference, &required);
|
|
|
|
required = 0;
|
|
|
|
for_each_string_list(&option_optional_reference,
|
|
|
|
add_one_reference, &required);
|
2008-04-27 21:39:30 +04:00
|
|
|
}
|
|
|
|
|
2019-05-10 00:29:22 +03:00
|
|
|
static void copy_alternates(struct strbuf *src, const char *src_repo)
|
2011-08-23 05:05:16 +04:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Read from the source objects/info/alternates file
|
|
|
|
* and copy the entries to corresponding file in the
|
|
|
|
* destination repository with add_to_alternates_file().
|
|
|
|
* Both src and dst have "$path/objects/info/alternates".
|
|
|
|
*
|
|
|
|
* Instead of copying bit-for-bit from the original,
|
|
|
|
* we need to append to existing one so that the already
|
|
|
|
* created entry via "clone -s" is not lost, and also
|
|
|
|
* to turn entries with paths relative to the original
|
|
|
|
* absolute, so that they can be used in the new repository.
|
|
|
|
*/
|
2017-05-03 13:16:47 +03:00
|
|
|
FILE *in = xfopen(src->buf, "r");
|
2011-08-23 05:05:16 +04:00
|
|
|
struct strbuf line = STRBUF_INIT;
|
|
|
|
|
2015-10-28 23:29:24 +03:00
|
|
|
while (strbuf_getline(&line, in) != EOF) {
|
2014-11-30 11:24:27 +03:00
|
|
|
char *abs_path;
|
2011-08-23 05:05:16 +04:00
|
|
|
if (!line.len || line.buf[0] == '#')
|
|
|
|
continue;
|
|
|
|
if (is_absolute_path(line.buf)) {
|
|
|
|
add_to_alternates_file(line.buf);
|
|
|
|
continue;
|
|
|
|
}
|
2014-11-30 11:24:27 +03:00
|
|
|
abs_path = mkpathdup("%s/objects/%s", src_repo, line.buf);
|
clone: detect errors in normalize_path_copy
When we are copying the alternates from the source
repository, if we find a relative path that is too deep for
the source (e.g., "../../../objects" from "/repo.git/objects"),
then normalize_path_copy will report an error and leave
trash in the buffer, which we will add to our new alternates
file. Instead, let's detect the error, print a warning, and
skip copying that alternate.
There's no need to die. The relative path is probably just
broken cruft in the source repo. If it turns out to have
been important for accessing some objects, we rely on other
parts of the clone to detect that, just as they would with a
missing object in the source repo itself (though note that
clones with "-s" are inherently local, which may do fewer
object-quality checks in the first place).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-05 17:29:29 +03:00
|
|
|
if (!normalize_path_copy(abs_path, abs_path))
|
|
|
|
add_to_alternates_file(abs_path);
|
|
|
|
else
|
|
|
|
warning("skipping invalid relative alternate: %s/%s",
|
|
|
|
src_repo, line.buf);
|
2014-11-30 11:24:27 +03:00
|
|
|
free(abs_path);
|
2011-08-23 05:05:16 +04:00
|
|
|
}
|
|
|
|
strbuf_release(&line);
|
|
|
|
fclose(in);
|
|
|
|
}
|
|
|
|
|
2019-07-11 02:59:02 +03:00
|
|
|
static void mkdir_if_missing(const char *pathname, mode_t mode)
|
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
if (!mkdir(pathname, mode))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (errno != EEXIST)
|
|
|
|
die_errno(_("failed to create directory '%s'"), pathname);
|
|
|
|
else if (stat(pathname, &st))
|
|
|
|
die_errno(_("failed to stat '%s'"), pathname);
|
|
|
|
else if (!S_ISDIR(st.st_mode))
|
|
|
|
die(_("%s exists and is not a directory"), pathname);
|
|
|
|
}
|
|
|
|
|
2011-08-23 05:05:16 +04:00
|
|
|
static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
|
2019-07-11 02:59:03 +03:00
|
|
|
const char *src_repo)
|
2008-04-27 21:39:30 +04:00
|
|
|
{
|
|
|
|
int src_len, dest_len;
|
2019-07-11 02:59:03 +03:00
|
|
|
struct dir_iterator *iter;
|
|
|
|
int iter_status;
|
2020-03-10 16:11:22 +03:00
|
|
|
struct strbuf realpath = STRBUF_INIT;
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2019-07-11 02:59:02 +03:00
|
|
|
mkdir_if_missing(dest->buf, 0777);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
builtin/clone.c: disallow `--local` clones with symlinks
When cloning a repository with `--local`, Git relies on either making a
hardlink or copy to every file in the "objects" directory of the source
repository. This is done through the callpath `cmd_clone()` ->
`clone_local()` -> `copy_or_link_directory()`.
The way this optimization works is by enumerating every file and
directory recursively in the source repository's `$GIT_DIR/objects`
directory, and then either making a copy or hardlink of each file. The
only exception to this rule is when copying the "alternates" file, in
which case paths are rewritten to be absolute before writing a new
"alternates" file in the destination repo.
One quirk of this implementation is that it dereferences symlinks when
cloning. This behavior was most recently modified in 36596fd2df (clone:
better handle symlinked files at .git/objects/, 2019-07-10), which
attempted to support `--local` clones of repositories with symlinks in
their objects directory in a platform-independent way.
Unfortunately, this behavior of dereferencing symlinks (that is,
creating a hardlink or copy of the source's link target in the
destination repository) can be used as a component in attacking a
victim by inadvertently exposing the contents of file stored outside of
the repository.
Take, for example, a repository that stores a Dockerfile and is used to
build Docker images. When building an image, Docker copies the directory
contents into the VM, and then instructs the VM to execute the
Dockerfile at the root of the copied directory. This protects against
directory traversal attacks by copying symbolic links as-is without
dereferencing them.
That is, if a user has a symlink pointing at their private key material
(where the symlink is present in the same directory as the Dockerfile,
but the key itself is present outside of that directory), the key is
unreadable to a Docker image, since the link will appear broken from the
container's point of view.
This behavior enables an attack whereby a victim is convinced to clone a
repository containing an embedded submodule (with a URL like
"file:///proc/self/cwd/path/to/submodule") which has a symlink pointing
at a path containing sensitive information on the victim's machine. If a
user is tricked into doing this, the contents at the destination of
those symbolic links are exposed to the Docker image at runtime.
One approach to preventing this behavior is to recreate symlinks in the
destination repository. But this is problematic, since symlinking the
objects directory are not well-supported. (One potential problem is that
when sharing, e.g. a "pack" directory via symlinks, different writers
performing garbage collection may consider different sets of objects to
be reachable, enabling a situation whereby garbage collecting one
repository may remove reachable objects in another repository).
Instead, prohibit the local clone optimization when any symlinks are
present in the `$GIT_DIR/objects` directory of the source repository.
Users may clone the repository again by prepending the "file://" scheme
to their clone URL, or by adding the `--no-local` option to their `git
clone` invocation.
The directory iterator used by `copy_or_link_directory()` must no longer
dereference symlinks (i.e., it *must* call `lstat()` instead of `stat()`
in order to discover whether or not there are symlinks present). This has
no bearing on the overall behavior, since we will immediately `die()` on
encounter a symlink.
Note that t5604.33 suggests that we do support local clones with
symbolic links in the source repository's objects directory, but this
was likely unintentional, or at least did not take into consideration
the problem with sharing parts of the objects directory with symbolic
links at the time. Update this test to reflect which options are and
aren't supported.
Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-07-29 00:35:17 +03:00
|
|
|
iter = dir_iterator_begin(src->buf, DIR_ITERATOR_PEDANTIC);
|
2019-07-11 02:59:03 +03:00
|
|
|
|
clone: error specifically with --local and symlinked objects
6f054f9fb3 (builtin/clone.c: disallow --local clones with
symlinks, 2022-07-28) gives a good error message when "git clone
--local" fails when the repo to clone has symlinks in
"$GIT_DIR/objects". In bffc762f87 (dir-iterator: prevent top-level
symlinks without FOLLOW_SYMLINKS, 2023-01-24), we later extended this
restriction to the case where "$GIT_DIR/objects" is itself a symlink,
but we didn't update the error message then - bffc762f87's tests show
that we print a generic "failed to start iterator over" message.
This is exacerbated by the fact that Documentation/git-clone.txt
mentions neither restriction, so users are left wondering if this is
intentional behavior or not.
Fix this by adding a check to builtin/clone.c: when doing a local clone,
perform an extra check to see if "$GIT_DIR/objects" is a symlink, and if
so, assume that that was the reason for the failure and report the
relevant information. Ideally, dir_iterator_begin() would tell us that
the real failure reason is the presence of the symlink, but (as far as I
can tell) there isn't an appropriate errno value for that.
Also, update Documentation/git-clone.txt to reflect that this
restriction exists.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-11 01:18:50 +03:00
|
|
|
if (!iter) {
|
|
|
|
if (errno == ENOTDIR) {
|
|
|
|
int saved_errno = errno;
|
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
if (!lstat(src->buf, &st) && S_ISLNK(st.st_mode))
|
|
|
|
die(_("'%s' is a symlink, refusing to clone with --local"),
|
|
|
|
src->buf);
|
|
|
|
errno = saved_errno;
|
|
|
|
}
|
2019-07-11 02:59:03 +03:00
|
|
|
die_errno(_("failed to start iterator over '%s'"), src->buf);
|
clone: error specifically with --local and symlinked objects
6f054f9fb3 (builtin/clone.c: disallow --local clones with
symlinks, 2022-07-28) gives a good error message when "git clone
--local" fails when the repo to clone has symlinks in
"$GIT_DIR/objects". In bffc762f87 (dir-iterator: prevent top-level
symlinks without FOLLOW_SYMLINKS, 2023-01-24), we later extended this
restriction to the case where "$GIT_DIR/objects" is itself a symlink,
but we didn't update the error message then - bffc762f87's tests show
that we print a generic "failed to start iterator over" message.
This is exacerbated by the fact that Documentation/git-clone.txt
mentions neither restriction, so users are left wondering if this is
intentional behavior or not.
Fix this by adding a check to builtin/clone.c: when doing a local clone,
perform an extra check to see if "$GIT_DIR/objects" is a symlink, and if
so, assume that that was the reason for the failure and report the
relevant information. Ideally, dir_iterator_begin() would tell us that
the real failure reason is the presence of the symlink, but (as far as I
can tell) there isn't an appropriate errno value for that.
Also, update Documentation/git-clone.txt to reflect that this
restriction exists.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-11 01:18:50 +03:00
|
|
|
}
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2008-11-21 03:45:00 +03:00
|
|
|
strbuf_addch(src, '/');
|
|
|
|
src_len = src->len;
|
|
|
|
strbuf_addch(dest, '/');
|
|
|
|
dest_len = dest->len;
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2019-07-11 02:59:03 +03:00
|
|
|
while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
|
2008-11-21 03:45:00 +03:00
|
|
|
strbuf_setlen(src, src_len);
|
2019-07-11 02:59:03 +03:00
|
|
|
strbuf_addstr(src, iter->relative_path);
|
2008-11-21 03:45:00 +03:00
|
|
|
strbuf_setlen(dest, dest_len);
|
2019-07-11 02:59:03 +03:00
|
|
|
strbuf_addstr(dest, iter->relative_path);
|
|
|
|
|
builtin/clone.c: disallow `--local` clones with symlinks
When cloning a repository with `--local`, Git relies on either making a
hardlink or copy to every file in the "objects" directory of the source
repository. This is done through the callpath `cmd_clone()` ->
`clone_local()` -> `copy_or_link_directory()`.
The way this optimization works is by enumerating every file and
directory recursively in the source repository's `$GIT_DIR/objects`
directory, and then either making a copy or hardlink of each file. The
only exception to this rule is when copying the "alternates" file, in
which case paths are rewritten to be absolute before writing a new
"alternates" file in the destination repo.
One quirk of this implementation is that it dereferences symlinks when
cloning. This behavior was most recently modified in 36596fd2df (clone:
better handle symlinked files at .git/objects/, 2019-07-10), which
attempted to support `--local` clones of repositories with symlinks in
their objects directory in a platform-independent way.
Unfortunately, this behavior of dereferencing symlinks (that is,
creating a hardlink or copy of the source's link target in the
destination repository) can be used as a component in attacking a
victim by inadvertently exposing the contents of file stored outside of
the repository.
Take, for example, a repository that stores a Dockerfile and is used to
build Docker images. When building an image, Docker copies the directory
contents into the VM, and then instructs the VM to execute the
Dockerfile at the root of the copied directory. This protects against
directory traversal attacks by copying symbolic links as-is without
dereferencing them.
That is, if a user has a symlink pointing at their private key material
(where the symlink is present in the same directory as the Dockerfile,
but the key itself is present outside of that directory), the key is
unreadable to a Docker image, since the link will appear broken from the
container's point of view.
This behavior enables an attack whereby a victim is convinced to clone a
repository containing an embedded submodule (with a URL like
"file:///proc/self/cwd/path/to/submodule") which has a symlink pointing
at a path containing sensitive information on the victim's machine. If a
user is tricked into doing this, the contents at the destination of
those symbolic links are exposed to the Docker image at runtime.
One approach to preventing this behavior is to recreate symlinks in the
destination repository. But this is problematic, since symlinking the
objects directory are not well-supported. (One potential problem is that
when sharing, e.g. a "pack" directory via symlinks, different writers
performing garbage collection may consider different sets of objects to
be reachable, enabling a situation whereby garbage collecting one
repository may remove reachable objects in another repository).
Instead, prohibit the local clone optimization when any symlinks are
present in the `$GIT_DIR/objects` directory of the source repository.
Users may clone the repository again by prepending the "file://" scheme
to their clone URL, or by adding the `--no-local` option to their `git
clone` invocation.
The directory iterator used by `copy_or_link_directory()` must no longer
dereference symlinks (i.e., it *must* call `lstat()` instead of `stat()`
in order to discover whether or not there are symlinks present). This has
no bearing on the overall behavior, since we will immediately `die()` on
encounter a symlink.
Note that t5604.33 suggests that we do support local clones with
symbolic links in the source repository's objects directory, but this
was likely unintentional, or at least did not take into consideration
the problem with sharing parts of the objects directory with symbolic
links at the time. Update this test to reflect which options are and
aren't supported.
Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-07-29 00:35:17 +03:00
|
|
|
if (S_ISLNK(iter->st.st_mode))
|
|
|
|
die(_("symlink '%s' exists, refusing to clone with --local"),
|
|
|
|
iter->relative_path);
|
2019-07-11 02:59:03 +03:00
|
|
|
|
|
|
|
if (S_ISDIR(iter->st.st_mode)) {
|
|
|
|
mkdir_if_missing(dest->buf, 0777);
|
2011-08-23 05:05:16 +04:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Files that cannot be copied bit-for-bit... */
|
2019-07-11 02:59:04 +03:00
|
|
|
if (!fspathcmp(iter->relative_path, "info/alternates")) {
|
2019-05-10 00:29:22 +03:00
|
|
|
copy_alternates(src, src_repo);
|
2008-04-27 21:39:30 +04:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2008-11-21 03:45:00 +03:00
|
|
|
if (unlink(dest->buf) && errno != ENOENT)
|
2011-02-23 02:41:26 +03:00
|
|
|
die_errno(_("failed to unlink '%s'"), dest->buf);
|
2008-05-20 22:15:14 +04:00
|
|
|
if (!option_no_hardlinks) {
|
2020-03-10 16:11:22 +03:00
|
|
|
strbuf_realpath(&realpath, src->buf, 1);
|
|
|
|
if (!link(realpath.buf, dest->buf))
|
2008-05-20 22:15:14 +04:00
|
|
|
continue;
|
2012-05-30 15:10:16 +04:00
|
|
|
if (option_local > 0)
|
2011-02-23 02:41:26 +03:00
|
|
|
die_errno(_("failed to create link '%s'"), dest->buf);
|
2008-05-20 22:15:14 +04:00
|
|
|
option_no_hardlinks = 1;
|
2008-04-27 21:39:30 +04:00
|
|
|
}
|
2009-09-12 13:03:48 +04:00
|
|
|
if (copy_file_with_time(dest->buf, src->buf, 0666))
|
2011-02-23 02:41:26 +03:00
|
|
|
die_errno(_("failed to copy file to '%s'"), dest->buf);
|
2008-04-27 21:39:30 +04:00
|
|
|
}
|
2019-07-11 02:59:03 +03:00
|
|
|
|
|
|
|
if (iter_status != ITER_DONE) {
|
|
|
|
strbuf_setlen(src, src_len);
|
|
|
|
die(_("failed to iterate over '%s'"), src->buf);
|
|
|
|
}
|
2020-03-10 16:11:22 +03:00
|
|
|
|
|
|
|
strbuf_release(&realpath);
|
2008-04-27 21:39:30 +04:00
|
|
|
}
|
|
|
|
|
2012-01-16 13:46:12 +04:00
|
|
|
static void clone_local(const char *src_repo, const char *dest_repo)
|
2008-04-27 21:39:30 +04:00
|
|
|
{
|
2011-08-23 05:05:16 +04:00
|
|
|
if (option_shared) {
|
|
|
|
struct strbuf alt = STRBUF_INIT;
|
2017-12-12 02:16:12 +03:00
|
|
|
get_common_dir(&alt, src_repo);
|
|
|
|
strbuf_addstr(&alt, "/objects");
|
2011-08-23 05:05:16 +04:00
|
|
|
add_to_alternates_file(alt.buf);
|
|
|
|
strbuf_release(&alt);
|
|
|
|
} else {
|
|
|
|
struct strbuf src = STRBUF_INIT;
|
|
|
|
struct strbuf dest = STRBUF_INIT;
|
2015-09-28 16:06:15 +03:00
|
|
|
get_common_dir(&src, src_repo);
|
|
|
|
get_common_dir(&dest, dest_repo);
|
|
|
|
strbuf_addstr(&src, "/objects");
|
|
|
|
strbuf_addstr(&dest, "/objects");
|
2019-07-11 02:59:03 +03:00
|
|
|
copy_or_link_directory(&src, &dest, src_repo);
|
2008-11-21 03:45:00 +03:00
|
|
|
strbuf_release(&src);
|
|
|
|
strbuf_release(&dest);
|
2008-04-27 21:39:30 +04:00
|
|
|
}
|
|
|
|
|
2010-04-23 16:37:22 +04:00
|
|
|
if (0 <= option_verbosity)
|
2013-09-19 00:05:13 +04:00
|
|
|
fprintf(stderr, _("done.\n"));
|
2008-04-27 21:39:30 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static const char *junk_work_tree;
|
2018-01-03 00:11:39 +03:00
|
|
|
static int junk_work_tree_flags;
|
2008-04-27 21:39:30 +04:00
|
|
|
static const char *junk_git_dir;
|
2018-01-03 00:11:39 +03:00
|
|
|
static int junk_git_dir_flags;
|
2013-04-27 22:39:04 +04:00
|
|
|
static enum {
|
2013-03-27 02:22:09 +04:00
|
|
|
JUNK_LEAVE_NONE,
|
|
|
|
JUNK_LEAVE_REPO,
|
|
|
|
JUNK_LEAVE_ALL
|
|
|
|
} junk_mode = JUNK_LEAVE_NONE;
|
|
|
|
|
|
|
|
static const char junk_leave_repo_msg[] =
|
|
|
|
N_("Clone succeeded, but checkout failed.\n"
|
|
|
|
"You can inspect what was checked out with 'git status'\n"
|
2019-04-25 12:45:58 +03:00
|
|
|
"and retry with 'git restore --source=HEAD :/'\n");
|
2008-04-27 21:39:30 +04:00
|
|
|
|
|
|
|
static void remove_junk(void)
|
|
|
|
{
|
2008-10-09 23:12:12 +04:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2013-03-27 02:22:09 +04:00
|
|
|
|
|
|
|
switch (junk_mode) {
|
|
|
|
case JUNK_LEAVE_REPO:
|
|
|
|
warning("%s", _(junk_leave_repo_msg));
|
|
|
|
/* fall-through */
|
|
|
|
case JUNK_LEAVE_ALL:
|
|
|
|
return;
|
|
|
|
default:
|
|
|
|
/* proceed to removal */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2008-04-27 21:39:30 +04:00
|
|
|
if (junk_git_dir) {
|
|
|
|
strbuf_addstr(&sb, junk_git_dir);
|
2018-01-03 00:11:39 +03:00
|
|
|
remove_dir_recursively(&sb, junk_git_dir_flags);
|
2008-04-27 21:39:30 +04:00
|
|
|
strbuf_reset(&sb);
|
|
|
|
}
|
|
|
|
if (junk_work_tree) {
|
|
|
|
strbuf_addstr(&sb, junk_work_tree);
|
2018-01-03 00:11:39 +03:00
|
|
|
remove_dir_recursively(&sb, junk_work_tree_flags);
|
2008-04-27 21:39:30 +04:00
|
|
|
}
|
2017-08-30 20:49:37 +03:00
|
|
|
strbuf_release(&sb);
|
2008-04-27 21:39:30 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void remove_junk_on_signal(int signo)
|
|
|
|
{
|
|
|
|
remove_junk();
|
chain kill signals for cleanup functions
If a piece of code wanted to do some cleanup before exiting
(e.g., cleaning up a lockfile or a tempfile), our usual
strategy was to install a signal handler that did something
like this:
do_cleanup(); /* actual work */
signal(signo, SIG_DFL); /* restore previous behavior */
raise(signo); /* deliver signal, killing ourselves */
For a single handler, this works fine. However, if we want
to clean up two _different_ things, we run into a problem.
The most recently installed handler will run, but when it
removes itself as a handler, it doesn't put back the first
handler.
This patch introduces sigchain, a tiny library for handling
a stack of signal handlers. You sigchain_push each handler,
and use sigchain_pop to restore whoever was before you in
the stack.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-01-22 09:02:35 +03:00
|
|
|
sigchain_pop(signo);
|
2008-04-27 21:39:30 +04:00
|
|
|
raise(signo);
|
|
|
|
}
|
|
|
|
|
2012-01-16 13:46:13 +04:00
|
|
|
static struct ref *find_remote_branch(const struct ref *refs, const char *branch)
|
|
|
|
{
|
|
|
|
struct ref *ref;
|
|
|
|
struct strbuf head = STRBUF_INIT;
|
|
|
|
strbuf_addstr(&head, "refs/heads/");
|
|
|
|
strbuf_addstr(&head, branch);
|
|
|
|
ref = find_ref_by_name(refs, head.buf);
|
|
|
|
strbuf_release(&head);
|
2012-01-16 13:46:15 +04:00
|
|
|
|
|
|
|
if (ref)
|
|
|
|
return ref;
|
|
|
|
|
|
|
|
strbuf_addstr(&head, "refs/tags/");
|
|
|
|
strbuf_addstr(&head, branch);
|
|
|
|
ref = find_ref_by_name(refs, head.buf);
|
|
|
|
strbuf_release(&head);
|
|
|
|
|
2012-01-16 13:46:13 +04:00
|
|
|
return ref;
|
|
|
|
}
|
|
|
|
|
2009-09-26 07:54:42 +04:00
|
|
|
static struct ref *wanted_peer_refs(const struct ref *refs,
|
clone: respect additional configured fetch refspecs during initial fetch
The initial fetch during a clone doesn't transfer refs matching
additional fetch refspecs given on the command line as configuration
variables, e.g. '-c remote.origin.fetch=<refspec>'. This contradicts
the documentation stating that configuration variables specified via
'git clone -c <key>=<value> ...' "take effect immediately after the
repository is initialized, but before the remote history is fetched"
and the given example specifically mentions "adding additional fetch
refspecs to the origin remote". Furthermore, one-shot configuration
variables specified via 'git -c <key>=<value> clone ...', though not
written to the newly created repository's config file, live during the
lifetime of the 'clone' command, including the initial fetch. All
this implies that any fetch refspecs specified this way should already
be taken into account during the initial fetch.
The reason for this is that the initial fetch is not a fully fledged
'git fetch' but a bunch of direct calls into the fetch/transport
machinery with clone's own refs-to-refspec matching logic, which
bypasses parts of 'git fetch' processing configured fetch refspecs.
This logic only considers a single default refspec, potentially
influenced by options like '--single-branch' and '--mirror'. The
configured refspecs are, however, already read and parsed properly
when clone calls remote.c:remote_get(), but it never looks at the
parsed refspecs in the resulting 'struct remote'.
Modify clone to take the remote's configured fetch refspecs into
account to retrieve all matching refs during the initial fetch. Note
that we have to explicitly add the default fetch refspec to the
remote's refspecs, because at that point the remote only includes the
fetch refspecs specified on the command line.
Add tests to check that refspecs given both via 'git clone -c ...' and
'git -c ... clone' retrieve all refs matching either the default or
the additional refspecs, and that it works even when the user
specifies an alternative remote name via '--origin=<name>'.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-14 13:46:19 +03:00
|
|
|
struct refspec *refspec)
|
2008-04-27 21:39:30 +04:00
|
|
|
{
|
clone: always fetch remote HEAD
In most cases, fetching the remote HEAD explicitly is
unnecessary. It's just a symref pointing to a branch which
we are already fetching, so we will already ask for its sha1.
However, if the remote has a detached HEAD, things are less
certain. We do not ask for HEAD's sha1, but we do try to
write it into a local detached HEAD. In most cases this is
fine, as the remote HEAD is pointing to some part of the
history graph that we will fetch via the refs.
But if the remote HEAD points to an "orphan" commit (one
which was is not an ancestor of any refs), then we will not
have the object, and update_ref will complain when we try to
write the detached HEAD, aborting the whole clone.
This patch makes clone always explicitly ask the remote for
the sha1 of its HEAD commit. In the non-detached case, this
is a no-op, as we were going to ask for that sha1 anyway. In
the regular detached case, this will add an extra "want" to
the protocol negotiation, but will not change the history
that gets sent. And in the detached orphan case, we will
fetch the orphaned history so that we can write it into our
local detached HEAD.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-06-08 03:03:22 +04:00
|
|
|
struct ref *head = copy_ref(find_ref_by_name(refs, "HEAD"));
|
|
|
|
struct ref *local_refs = head;
|
|
|
|
struct ref **tail = head ? &head->next : &local_refs;
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2012-01-07 18:45:59 +04:00
|
|
|
if (option_single_branch) {
|
|
|
|
struct ref *remote_head = NULL;
|
|
|
|
|
|
|
|
if (!option_branch)
|
|
|
|
remote_head = guess_remote_head(head, refs, 0);
|
2012-06-22 13:35:47 +04:00
|
|
|
else {
|
|
|
|
local_refs = NULL;
|
|
|
|
tail = &local_refs;
|
|
|
|
remote_head = copy_ref(find_remote_branch(refs, option_branch));
|
|
|
|
}
|
2012-01-07 18:45:59 +04:00
|
|
|
|
|
|
|
if (!remote_head && option_branch)
|
|
|
|
warning(_("Could not find remote branch %s to clone."),
|
|
|
|
option_branch);
|
2012-01-16 13:46:15 +04:00
|
|
|
else {
|
clone: respect additional configured fetch refspecs during initial fetch
The initial fetch during a clone doesn't transfer refs matching
additional fetch refspecs given on the command line as configuration
variables, e.g. '-c remote.origin.fetch=<refspec>'. This contradicts
the documentation stating that configuration variables specified via
'git clone -c <key>=<value> ...' "take effect immediately after the
repository is initialized, but before the remote history is fetched"
and the given example specifically mentions "adding additional fetch
refspecs to the origin remote". Furthermore, one-shot configuration
variables specified via 'git -c <key>=<value> clone ...', though not
written to the newly created repository's config file, live during the
lifetime of the 'clone' command, including the initial fetch. All
this implies that any fetch refspecs specified this way should already
be taken into account during the initial fetch.
The reason for this is that the initial fetch is not a fully fledged
'git fetch' but a bunch of direct calls into the fetch/transport
machinery with clone's own refs-to-refspec matching logic, which
bypasses parts of 'git fetch' processing configured fetch refspecs.
This logic only considers a single default refspec, potentially
influenced by options like '--single-branch' and '--mirror'. The
configured refspecs are, however, already read and parsed properly
when clone calls remote.c:remote_get(), but it never looks at the
parsed refspecs in the resulting 'struct remote'.
Modify clone to take the remote's configured fetch refspecs into
account to retrieve all matching refs during the initial fetch. Note
that we have to explicitly add the default fetch refspec to the
remote's refspecs, because at that point the remote only includes the
fetch refspecs specified on the command line.
Add tests to check that refspecs given both via 'git clone -c ...' and
'git -c ... clone' retrieve all refs matching either the default or
the additional refspecs, and that it works even when the user
specifies an alternative remote name via '--origin=<name>'.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-14 13:46:19 +03:00
|
|
|
int i;
|
|
|
|
for (i = 0; i < refspec->nr; i++)
|
|
|
|
get_fetch_map(remote_head, &refspec->items[i],
|
|
|
|
&tail, 0);
|
2012-01-16 13:46:15 +04:00
|
|
|
|
|
|
|
/* if --branch=tag, pull the requested tag explicitly */
|
|
|
|
get_fetch_map(remote_head, tag_refspec, &tail, 0);
|
|
|
|
}
|
2022-07-01 13:42:51 +03:00
|
|
|
free_refs(remote_head);
|
clone: respect additional configured fetch refspecs during initial fetch
The initial fetch during a clone doesn't transfer refs matching
additional fetch refspecs given on the command line as configuration
variables, e.g. '-c remote.origin.fetch=<refspec>'. This contradicts
the documentation stating that configuration variables specified via
'git clone -c <key>=<value> ...' "take effect immediately after the
repository is initialized, but before the remote history is fetched"
and the given example specifically mentions "adding additional fetch
refspecs to the origin remote". Furthermore, one-shot configuration
variables specified via 'git -c <key>=<value> clone ...', though not
written to the newly created repository's config file, live during the
lifetime of the 'clone' command, including the initial fetch. All
this implies that any fetch refspecs specified this way should already
be taken into account during the initial fetch.
The reason for this is that the initial fetch is not a fully fledged
'git fetch' but a bunch of direct calls into the fetch/transport
machinery with clone's own refs-to-refspec matching logic, which
bypasses parts of 'git fetch' processing configured fetch refspecs.
This logic only considers a single default refspec, potentially
influenced by options like '--single-branch' and '--mirror'. The
configured refspecs are, however, already read and parsed properly
when clone calls remote.c:remote_get(), but it never looks at the
parsed refspecs in the resulting 'struct remote'.
Modify clone to take the remote's configured fetch refspecs into
account to retrieve all matching refs during the initial fetch. Note
that we have to explicitly add the default fetch refspec to the
remote's refspecs, because at that point the remote only includes the
fetch refspecs specified on the command line.
Add tests to check that refspecs given both via 'git clone -c ...' and
'git -c ... clone' retrieve all refs matching either the default or
the additional refspecs, and that it works even when the user
specifies an alternative remote name via '--origin=<name>'.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-14 13:46:19 +03:00
|
|
|
} else {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < refspec->nr; i++)
|
|
|
|
get_fetch_map(refs, &refspec->items[i], &tail, 0);
|
|
|
|
}
|
2012-01-07 18:45:59 +04:00
|
|
|
|
2017-04-27 02:12:33 +03:00
|
|
|
if (!option_mirror && !option_single_branch && !option_no_tags)
|
2008-08-08 06:29:35 +04:00
|
|
|
get_fetch_map(refs, tag_refspec, &tail, 0);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2009-09-26 07:54:42 +04:00
|
|
|
return local_refs;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void write_remote_refs(const struct ref *local_refs)
|
|
|
|
{
|
|
|
|
const struct ref *r;
|
|
|
|
|
2015-06-22 17:03:01 +03:00
|
|
|
struct ref_transaction *t;
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
|
|
|
|
|
|
|
t = ref_transaction_begin(&err);
|
|
|
|
if (!t)
|
|
|
|
die("%s", err.buf);
|
2013-06-20 12:37:46 +04:00
|
|
|
|
clone: always fetch remote HEAD
In most cases, fetching the remote HEAD explicitly is
unnecessary. It's just a symref pointing to a branch which
we are already fetching, so we will already ask for its sha1.
However, if the remote has a detached HEAD, things are less
certain. We do not ask for HEAD's sha1, but we do try to
write it into a local detached HEAD. In most cases this is
fine, as the remote HEAD is pointing to some part of the
history graph that we will fetch via the refs.
But if the remote HEAD points to an "orphan" commit (one
which was is not an ancestor of any refs), then we will not
have the object, and update_ref will complain when we try to
write the detached HEAD, aborting the whole clone.
This patch makes clone always explicitly ask the remote for
the sha1 of its HEAD commit. In the non-detached case, this
is a no-op, as we were going to ask for that sha1 anyway. In
the regular detached case, this will add an extra "want" to
the protocol negotiation, but will not change the history
that gets sent. And in the detached orphan case, we will
fetch the orphaned history so that we can write it into our
local detached HEAD.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-06-08 03:03:22 +04:00
|
|
|
for (r = local_refs; r; r = r->next) {
|
|
|
|
if (!r->peer_ref)
|
|
|
|
continue;
|
2017-10-16 01:06:53 +03:00
|
|
|
if (ref_transaction_create(t, r->peer_ref->name, &r->old_oid,
|
2015-06-22 17:03:01 +03:00
|
|
|
0, NULL, &err))
|
|
|
|
die("%s", err.buf);
|
clone: always fetch remote HEAD
In most cases, fetching the remote HEAD explicitly is
unnecessary. It's just a symref pointing to a branch which
we are already fetching, so we will already ask for its sha1.
However, if the remote has a detached HEAD, things are less
certain. We do not ask for HEAD's sha1, but we do try to
write it into a local detached HEAD. In most cases this is
fine, as the remote HEAD is pointing to some part of the
history graph that we will fetch via the refs.
But if the remote HEAD points to an "orphan" commit (one
which was is not an ancestor of any refs), then we will not
have the object, and update_ref will complain when we try to
write the detached HEAD, aborting the whole clone.
This patch makes clone always explicitly ask the remote for
the sha1 of its HEAD commit. In the non-detached case, this
is a no-op, as we were going to ask for that sha1 anyway. In
the regular detached case, this will add an extra "want" to
the protocol negotiation, but will not change the history
that gets sent. And in the detached orphan case, we will
fetch the orphaned history so that we can write it into our
local detached HEAD.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-06-08 03:03:22 +04:00
|
|
|
}
|
2008-06-15 18:06:16 +04:00
|
|
|
|
2015-06-22 17:03:01 +03:00
|
|
|
if (initial_ref_transaction_commit(t, &err))
|
|
|
|
die("%s", err.buf);
|
|
|
|
|
|
|
|
strbuf_release(&err);
|
|
|
|
ref_transaction_free(t);
|
2008-04-27 21:39:30 +04:00
|
|
|
}
|
|
|
|
|
2012-01-07 18:45:59 +04:00
|
|
|
static void write_followtags(const struct ref *refs, const char *msg)
|
|
|
|
{
|
|
|
|
const struct ref *ref;
|
|
|
|
for (ref = refs; ref; ref = ref->next) {
|
2013-12-01 00:55:40 +04:00
|
|
|
if (!starts_with(ref->name, "refs/tags/"))
|
2012-01-07 18:45:59 +04:00
|
|
|
continue;
|
2013-12-01 00:55:40 +04:00
|
|
|
if (ends_with(ref->name, "^{}"))
|
2012-01-07 18:45:59 +04:00
|
|
|
continue;
|
2023-03-28 16:58:50 +03:00
|
|
|
if (!repo_has_object_file_with_flags(the_repository, &ref->old_oid,
|
|
|
|
OBJECT_INFO_QUICK |
|
|
|
|
OBJECT_INFO_SKIP_FETCH_OBJECT))
|
2012-01-07 18:45:59 +04:00
|
|
|
continue;
|
2017-10-16 01:06:51 +03:00
|
|
|
update_ref(msg, ref->name, &ref->old_oid, NULL, 0,
|
|
|
|
UPDATE_REFS_DIE_ON_ERR);
|
2012-01-07 18:45:59 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-01 16:09:50 +03:00
|
|
|
static const struct object_id *iterate_ref_map(void *cb_data)
|
2013-03-26 00:26:27 +04:00
|
|
|
{
|
|
|
|
struct ref **rm = cb_data;
|
|
|
|
struct ref *ref = *rm;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Skip anything missing a peer_ref, which we are not
|
|
|
|
* actually going to write a ref for.
|
|
|
|
*/
|
|
|
|
while (ref && !ref->peer_ref)
|
|
|
|
ref = ref->next;
|
|
|
|
if (!ref)
|
2021-09-01 16:09:50 +03:00
|
|
|
return NULL;
|
2013-03-26 00:26:27 +04:00
|
|
|
|
|
|
|
*rm = ref->next;
|
2021-09-01 16:09:50 +03:00
|
|
|
return &ref->old_oid;
|
2013-03-26 00:26:27 +04:00
|
|
|
}
|
|
|
|
|
2012-01-16 13:46:11 +04:00
|
|
|
static void update_remote_refs(const struct ref *refs,
|
|
|
|
const struct ref *mapped_refs,
|
|
|
|
const struct ref *remote_head_points_at,
|
|
|
|
const char *branch_top,
|
2013-05-26 05:16:17 +04:00
|
|
|
const char *msg,
|
2013-07-18 23:48:28 +04:00
|
|
|
struct transport *transport,
|
connected: always use partial clone optimization
With 50033772d5 ("connected: verify promisor-ness of partial clone",
2020-01-30), the fast path (checking promisor packs) in
check_connected() now passes a subset of the slow path (rev-list) - if
all objects to be checked are found in promisor packs, both the fast
path and the slow path will pass; otherwise, the fast path will
definitely not pass. This means that we can always attempt the fast path
whenever we need to do the slow path.
The fast path is currently guarded by a flag; therefore, remove that
flag. Also, make the fast path fallback to the slow path - if the fast
path fails, the failing OID and all remaining OIDs will be passed to
rev-list.
The main user-visible benefit is the performance of fetch from a partial
clone - specifically, the speedup of the connectivity check done before
the fetch. In particular, a no-op fetch into a partial clone on my
computer was sped up from 7 seconds to 0.01 seconds. This is a
complement to the work in 2df1aa239c ("fetch: forgo full
connectivity check if --filter", 2020-01-30), which is the child of the
aforementioned 50033772d5. In that commit, the connectivity check
*after* the fetch was sped up.
The addition of the fast path might cause performance reductions in
these cases:
- If a partial clone or a fetch into a partial clone fails, Git will
fruitlessly run rev-list (it is expected that everything fetched
would go into promisor packs, so if that didn't happen, it is most
likely that rev-list will fail too).
- Any connectivity checks done by receive-pack, in the (in my opinion,
unlikely) event that a partial clone serves receive-pack.
I think that these cases are rare enough, and the performance reduction
in this case minor enough (additional object DB access), that the
benefit of avoiding a flag outweighs these.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Reviewed-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-03-21 01:00:45 +03:00
|
|
|
int check_connectivity)
|
2012-01-16 13:46:11 +04:00
|
|
|
{
|
2013-03-26 00:26:27 +04:00
|
|
|
const struct ref *rm = mapped_refs;
|
|
|
|
|
clone: drop connectivity check for local clones
Commit 0433ad1 (clone: run check_everything_connected,
2013-03-25) added the same connectivity check to clone that
we use for fetching. The intent was to provide enough safety
checks that "git clone git://..." could be counted on to
detect bit errors and other repo corruption, and not
silently propagate them to the clone.
For local clones, this turns out to be a bad idea, for two
reasons:
1. Local clones use hard linking (or even shared object
stores), and so complete far more quickly. The time
spent on the connectivity check is therefore
proportionally much more painful.
2. Local clones do not actually meet our safety guarantee
anyway. The connectivity check makes sure we have all
of the objects we claim to, but it does not check for
bit errors. We will notice bit errors in commits and
trees, but we do not load blob objects at all. Whereas
over the pack transport, we actually recompute the sha1
of each object in the incoming packfile; bit errors
change the sha1 of the object, which is then caught by
the connectivity check.
This patch drops the connectivity check in the local case.
Note that we have to revert the changes from 0433ad1 to
t5710, as we no longer notice the corruption during clone.
We could go a step further and provide a "verify even local
clones" option, but it is probably not worthwhile. You can
already spell that as "cd foo.git && git fsck && git clone ."
or as "git clone --no-local foo.git".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-08 11:30:41 +04:00
|
|
|
if (check_connectivity) {
|
check_everything_connected: use a struct with named options
The number of variants of check_everything_connected has
grown over the years, so that the "real" function takes
several possibly-zero, possibly-NULL arguments. We hid the
complexity behind some wrapper functions, but this doesn't
scale well when we want to add new options.
If we add more wrapper variants to handle the new options,
then we can get a combinatorial explosion when those options
might be used together (right now nobody wants to use both
"shallow" and "transport" together, so we get by with just a
few wrappers).
If instead we add new parameters to each function, each of
which can have a default value, then callers who want the
defaults end up with confusing invocations like:
check_everything_connected(fn, 0, data, -1, 0, NULL);
where it is unclear which parameter is which (and every
caller needs updated when we add new options).
Instead, let's add a struct to hold all of the optional
parameters. This is a little more verbose for the callers
(who have to declare the struct and fill it in), but it
makes their code much easier to follow, because every option
is named as it is set (and unused options do not have to be
mentioned at all).
Note that we could also stick the iteration function and its
callback data into the option struct, too. But since those
are required for each call, by avoiding doing so, we can let
very simple callers just pass "NULL" for the options and not
worry about the struct at all.
While we're touching each site, let's also rename the
function to check_connected(). The existing name was quite
long, and not all of the wrappers even used the full name.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-15 13:30:40 +03:00
|
|
|
struct check_connected_options opt = CHECK_CONNECTED_INIT;
|
|
|
|
|
|
|
|
opt.transport = transport;
|
2016-07-15 13:33:18 +03:00
|
|
|
opt.progress = transport->progress;
|
check_everything_connected: use a struct with named options
The number of variants of check_everything_connected has
grown over the years, so that the "real" function takes
several possibly-zero, possibly-NULL arguments. We hid the
complexity behind some wrapper functions, but this doesn't
scale well when we want to add new options.
If we add more wrapper variants to handle the new options,
then we can get a combinatorial explosion when those options
might be used together (right now nobody wants to use both
"shallow" and "transport" together, so we get by with just a
few wrappers).
If instead we add new parameters to each function, each of
which can have a default value, then callers who want the
defaults end up with confusing invocations like:
check_everything_connected(fn, 0, data, -1, 0, NULL);
where it is unclear which parameter is which (and every
caller needs updated when we add new options).
Instead, let's add a struct to hold all of the optional
parameters. This is a little more verbose for the callers
(who have to declare the struct and fill it in), but it
makes their code much easier to follow, because every option
is named as it is set (and unused options do not have to be
mentioned at all).
Note that we could also stick the iteration function and its
callback data into the option struct, too. But since those
are required for each call, by avoiding doing so, we can let
very simple callers just pass "NULL" for the options and not
worry about the struct at all.
While we're touching each site, let's also rename the
function to check_connected(). The existing name was quite
long, and not all of the wrappers even used the full name.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-15 13:30:40 +03:00
|
|
|
|
|
|
|
if (check_connected(iterate_ref_map, &rm, &opt))
|
clone: drop connectivity check for local clones
Commit 0433ad1 (clone: run check_everything_connected,
2013-03-25) added the same connectivity check to clone that
we use for fetching. The intent was to provide enough safety
checks that "git clone git://..." could be counted on to
detect bit errors and other repo corruption, and not
silently propagate them to the clone.
For local clones, this turns out to be a bad idea, for two
reasons:
1. Local clones use hard linking (or even shared object
stores), and so complete far more quickly. The time
spent on the connectivity check is therefore
proportionally much more painful.
2. Local clones do not actually meet our safety guarantee
anyway. The connectivity check makes sure we have all
of the objects we claim to, but it does not check for
bit errors. We will notice bit errors in commits and
trees, but we do not load blob objects at all. Whereas
over the pack transport, we actually recompute the sha1
of each object in the incoming packfile; bit errors
change the sha1 of the object, which is then caught by
the connectivity check.
This patch drops the connectivity check in the local case.
Note that we have to revert the changes from 0433ad1 to
t5710, as we no longer notice the corruption during clone.
We could go a step further and provide a "verify even local
clones" option, but it is probably not worthwhile. You can
already spell that as "cd foo.git && git fsck && git clone ."
or as "git clone --no-local foo.git".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-08 11:30:41 +04:00
|
|
|
die(_("remote did not send all necessary objects"));
|
|
|
|
}
|
2013-03-26 00:26:27 +04:00
|
|
|
|
2012-01-16 13:46:11 +04:00
|
|
|
if (refs) {
|
|
|
|
write_remote_refs(mapped_refs);
|
2017-04-27 02:12:33 +03:00
|
|
|
if (option_single_branch && !option_no_tags)
|
2012-01-16 13:46:11 +04:00
|
|
|
write_followtags(refs, msg);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (remote_head_points_at && !option_bare) {
|
|
|
|
struct strbuf head_ref = STRBUF_INIT;
|
|
|
|
strbuf_addstr(&head_ref, branch_top);
|
|
|
|
strbuf_addstr(&head_ref, "HEAD");
|
2016-01-12 12:57:34 +03:00
|
|
|
if (create_symref(head_ref.buf,
|
|
|
|
remote_head_points_at->peer_ref->name,
|
|
|
|
msg) < 0)
|
2016-02-27 09:41:55 +03:00
|
|
|
die(_("unable to update %s"), head_ref.buf);
|
2016-01-12 12:57:34 +03:00
|
|
|
strbuf_release(&head_ref);
|
2012-01-16 13:46:11 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-01-16 13:46:10 +04:00
|
|
|
static void update_head(const struct ref *our, const struct ref *remote,
|
2022-07-11 12:21:52 +03:00
|
|
|
const char *unborn, const char *msg)
|
2012-01-16 13:46:10 +04:00
|
|
|
{
|
refactor skip_prefix to return a boolean
The skip_prefix() function returns a pointer to the content
past the prefix, or NULL if the prefix was not found. While
this is nice and simple, in practice it makes it hard to use
for two reasons:
1. When you want to conditionally skip or keep the string
as-is, you have to introduce a temporary variable.
For example:
tmp = skip_prefix(buf, "foo");
if (tmp)
buf = tmp;
2. It is verbose to check the outcome in a conditional, as
you need extra parentheses to silence compiler
warnings. For example:
if ((cp = skip_prefix(buf, "foo"))
/* do something with cp */
Both of these make it harder to use for long if-chains, and
we tend to use starts_with() instead. However, the first line
of "do something" is often to then skip forward in buf past
the prefix, either using a magic constant or with an extra
strlen(3) (which is generally computed at compile time, but
means we are repeating ourselves).
This patch refactors skip_prefix() to return a simple boolean,
and to provide the pointer value as an out-parameter. If the
prefix is not found, the out-parameter is untouched. This
lets you write:
if (skip_prefix(arg, "foo ", &arg))
do_foo(arg);
else if (skip_prefix(arg, "bar ", &arg))
do_bar(arg);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-06-18 23:44:19 +04:00
|
|
|
const char *head;
|
|
|
|
if (our && skip_prefix(our->name, "refs/heads/", &head)) {
|
2012-01-16 13:46:10 +04:00
|
|
|
/* Local default branch link */
|
2016-01-12 12:57:34 +03:00
|
|
|
if (create_symref("HEAD", our->name, NULL) < 0)
|
2016-02-27 09:41:55 +03:00
|
|
|
die(_("unable to update HEAD"));
|
2012-01-16 13:46:10 +04:00
|
|
|
if (!option_bare) {
|
2017-10-16 01:06:51 +03:00
|
|
|
update_ref(msg, "HEAD", &our->old_oid, NULL, 0,
|
2014-04-07 17:47:56 +04:00
|
|
|
UPDATE_REFS_DIE_ON_ERR);
|
2020-10-01 06:46:15 +03:00
|
|
|
install_branch_config(0, head, remote_name, our->name);
|
2012-01-16 13:46:10 +04:00
|
|
|
}
|
2012-01-16 13:46:15 +04:00
|
|
|
} else if (our) {
|
2018-06-29 04:21:58 +03:00
|
|
|
struct commit *c = lookup_commit_reference(the_repository,
|
|
|
|
&our->old_oid);
|
2012-01-16 13:46:15 +04:00
|
|
|
/* --branch specifies a non-branch (i.e. tags), detach HEAD */
|
2017-11-05 11:42:06 +03:00
|
|
|
update_ref(msg, "HEAD", &c->object.oid, NULL, REF_NO_DEREF,
|
2017-10-16 01:06:51 +03:00
|
|
|
UPDATE_REFS_DIE_ON_ERR);
|
2012-01-16 13:46:10 +04:00
|
|
|
} else if (remote) {
|
|
|
|
/*
|
|
|
|
* We know remote HEAD points to a non-branch, or
|
2012-01-16 13:46:14 +04:00
|
|
|
* HEAD points to a branch but we don't know which one.
|
2012-01-16 13:46:10 +04:00
|
|
|
* Detach HEAD in all these cases.
|
|
|
|
*/
|
2017-11-05 11:42:06 +03:00
|
|
|
update_ref(msg, "HEAD", &remote->old_oid, NULL, REF_NO_DEREF,
|
2017-10-16 01:06:51 +03:00
|
|
|
UPDATE_REFS_DIE_ON_ERR);
|
2022-07-11 12:21:52 +03:00
|
|
|
} else if (unborn && skip_prefix(unborn, "refs/heads/", &head)) {
|
|
|
|
/*
|
|
|
|
* Unborn head from remote; same as "our" case above except
|
|
|
|
* that we have no ref to update.
|
|
|
|
*/
|
|
|
|
if (create_symref("HEAD", unborn, NULL) < 0)
|
|
|
|
die(_("unable to update HEAD"));
|
|
|
|
if (!option_bare)
|
|
|
|
install_branch_config(0, head, remote_name, unborn);
|
2012-01-16 13:46:10 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-22 01:04:35 +03:00
|
|
|
static int git_sparse_checkout_init(const char *repo)
|
|
|
|
{
|
2022-10-30 14:51:14 +03:00
|
|
|
struct child_process cmd = CHILD_PROCESS_INIT;
|
2019-11-22 01:04:35 +03:00
|
|
|
int result = 0;
|
2022-10-30 14:51:14 +03:00
|
|
|
strvec_pushl(&cmd.args, "-C", repo, "sparse-checkout", "set", NULL);
|
2019-11-22 01:04:35 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We must apply the setting in the current process
|
|
|
|
* for the later checkout to use the sparse-checkout file.
|
|
|
|
*/
|
|
|
|
core_apply_sparse_checkout = 1;
|
|
|
|
|
2022-10-30 14:51:14 +03:00
|
|
|
cmd.git_cmd = 1;
|
|
|
|
if (run_command(&cmd)) {
|
2019-11-22 01:04:35 +03:00
|
|
|
error(_("failed to initialize sparse-checkout"));
|
|
|
|
result = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-05 08:00:49 +03:00
|
|
|
static int checkout(int submodule_progress, int filter_submodules)
|
2012-01-16 13:46:09 +04:00
|
|
|
{
|
2017-02-22 02:47:27 +03:00
|
|
|
struct object_id oid;
|
2012-01-16 13:46:09 +04:00
|
|
|
char *head;
|
2017-10-05 23:32:04 +03:00
|
|
|
struct lock_file lock_file = LOCK_INIT;
|
2012-01-16 13:46:09 +04:00
|
|
|
struct unpack_trees_options opts;
|
|
|
|
struct tree *tree;
|
|
|
|
struct tree_desc t;
|
2014-06-13 16:19:23 +04:00
|
|
|
int err = 0;
|
2012-01-16 13:46:09 +04:00
|
|
|
|
|
|
|
if (option_no_checkout)
|
|
|
|
return 0;
|
|
|
|
|
refs: convert resolve_refdup and refs_resolve_refdup to struct object_id
All of the callers already pass the hash member of struct object_id, so
update them to pass a pointer to the struct directly,
This transformation was done with an update to declaration and
definition and the following semantic patch:
@@
expression E1, E2, E3, E4;
@@
- resolve_refdup(E1, E2, E3.hash, E4)
+ resolve_refdup(E1, E2, &E3, E4)
@@
expression E1, E2, E3, E4;
@@
- resolve_refdup(E1, E2, E3->hash, E4)
+ resolve_refdup(E1, E2, E3, E4)
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 01:06:55 +03:00
|
|
|
head = resolve_refdup("HEAD", RESOLVE_REF_READING, &oid, NULL);
|
2012-01-16 13:46:09 +04:00
|
|
|
if (!head) {
|
|
|
|
warning(_("remote HEAD refers to nonexistent ref, "
|
2022-07-08 02:54:51 +03:00
|
|
|
"unable to checkout"));
|
2012-01-16 13:46:09 +04:00
|
|
|
return 0;
|
|
|
|
}
|
2012-01-16 13:46:16 +04:00
|
|
|
if (!strcmp(head, "HEAD")) {
|
2021-08-23 13:44:00 +03:00
|
|
|
if (advice_enabled(ADVICE_DETACHED_HEAD))
|
2017-02-22 02:47:27 +03:00
|
|
|
detach_advice(oid_to_hex(&oid));
|
2020-03-16 21:05:05 +03:00
|
|
|
FREE_AND_NULL(head);
|
2012-01-16 13:46:16 +04:00
|
|
|
} else {
|
2013-12-01 00:55:40 +04:00
|
|
|
if (!starts_with(head, "refs/heads/"))
|
2012-01-16 13:46:09 +04:00
|
|
|
die(_("HEAD not found below refs/heads!"));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We need to be in the new work tree for the checkout */
|
|
|
|
setup_work_tree();
|
|
|
|
|
2022-11-19 16:07:38 +03:00
|
|
|
repo_hold_locked_index(the_repository, &lock_file, LOCK_DIE_ON_ERROR);
|
2012-01-16 13:46:09 +04:00
|
|
|
|
|
|
|
memset(&opts, 0, sizeof opts);
|
|
|
|
opts.update = 1;
|
|
|
|
opts.merge = 1;
|
2018-08-17 21:00:39 +03:00
|
|
|
opts.clone = 1;
|
2021-09-27 19:33:43 +03:00
|
|
|
opts.preserve_ignored = 0;
|
2012-01-16 13:46:09 +04:00
|
|
|
opts.fn = oneway_merge;
|
2012-05-07 23:35:36 +04:00
|
|
|
opts.verbose_update = (option_verbosity >= 0);
|
2012-01-16 13:46:09 +04:00
|
|
|
opts.src_index = &the_index;
|
|
|
|
opts.dst_index = &the_index;
|
2020-03-16 21:05:05 +03:00
|
|
|
init_checkout_metadata(&opts.meta, head, &oid, NULL);
|
2012-01-16 13:46:09 +04:00
|
|
|
|
2017-05-07 01:10:37 +03:00
|
|
|
tree = parse_tree_indirect(&oid);
|
2022-03-02 03:36:13 +03:00
|
|
|
if (!tree)
|
|
|
|
die(_("unable to parse commit %s"), oid_to_hex(&oid));
|
2024-02-23 11:34:23 +03:00
|
|
|
if (parse_tree(tree) < 0)
|
|
|
|
exit(128);
|
2023-10-02 05:40:28 +03:00
|
|
|
init_tree_desc(&t, &tree->object.oid, tree->buffer, tree->size);
|
clone: die on errors from unpack_trees
When clone is populating the working tree, it ignores the
return status from unpack_trees; this means we may report a
successful clone, even when the checkout fails.
When checkout fails, we may want to leave the $GIT_DIR in
place, as it might be possible to recover the data through
further use of "git checkout" (e.g., if the checkout failed
due to a transient error, disk full, etc). However, we
already die on a number of other checkout-related errors, so
this patch follows that pattern.
In addition to marking a now-passing test, we need to adjust
t5710, which blindly assumed it could make bogus clones of
very deep alternates hierarchies. By using "--bare", we can
avoid it actually touching any objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-26 00:23:59 +04:00
|
|
|
if (unpack_trees(1, &t, &opts) < 0)
|
|
|
|
die(_("unable to checkout working tree"));
|
2012-01-16 13:46:09 +04:00
|
|
|
|
2020-03-16 21:05:05 +03:00
|
|
|
free(head);
|
|
|
|
|
2017-10-05 23:32:04 +03:00
|
|
|
if (write_locked_index(&the_index, &lock_file, COMMIT_LOCK))
|
2012-01-16 13:46:09 +04:00
|
|
|
die(_("unable to write new index file"));
|
|
|
|
|
2021-12-22 06:59:35 +03:00
|
|
|
err |= run_hooks_l("post-checkout", oid_to_hex(null_oid()),
|
2017-02-22 02:47:27 +03:00
|
|
|
oid_to_hex(&oid), "1", NULL);
|
2012-01-16 13:46:09 +04:00
|
|
|
|
2017-03-18 01:38:03 +03:00
|
|
|
if (!err && (option_recurse_submodules.nr > 0)) {
|
2022-10-30 14:51:14 +03:00
|
|
|
struct child_process cmd = CHILD_PROCESS_INIT;
|
|
|
|
strvec_pushl(&cmd.args, "submodule", "update", "--require-init",
|
|
|
|
"--recursive", NULL);
|
2016-03-01 05:07:20 +03:00
|
|
|
|
2016-06-19 23:51:56 +03:00
|
|
|
if (option_shallow_submodules == 1)
|
2022-10-30 14:51:14 +03:00
|
|
|
strvec_push(&cmd.args, "--depth=1");
|
2016-04-26 04:12:27 +03:00
|
|
|
|
2016-03-01 05:07:20 +03:00
|
|
|
if (max_jobs != -1)
|
2022-10-30 14:51:14 +03:00
|
|
|
strvec_pushf(&cmd.args, "--jobs=%d", max_jobs);
|
2016-03-01 05:07:20 +03:00
|
|
|
|
clone: pass --progress decision to recursive submodules
When cloning with "--recursive", we'd generally expect
submodules to show progress reports if the main clone did,
too.
In older versions of git, this mostly worked out of the
box. Since we show progress by default when stderr is a tty,
and since the child clones inherit the parent stderr, then
both processes would come to the same decision by default.
If the parent clone was asked for "--quiet", we passed down
"--quiet" to the child. However, if stderr was not a tty and
the user specified "--progress", we did not propagate this
to the child.
That's a minor bug, but things got much worse when we
switched recently to submodule--helper's update_clone
command. With that change, the stderr of the child clones
are always connected to a pipe, and we never output
progress at all.
This patch teaches git-submodule and git-submodule--helper
how to pass down an explicit "--progress" flag when cloning.
The clone command then decides to propagate that flag based
on the cloning decision made earlier (which takes into
account isatty(2) of the parent process, existing --progress
or --quiet flags, etc). Since the child processes always run
without a tty on stderr, we don't have to worry about
passing an explicit "--no-progress"; it's the default for
them.
This fixes the recent loss of progress during recursive
clones. And as a bonus, it makes:
git clone --recursive --progress ... 2>&1 | cat
work by triggering progress explicitly in the children.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-22 08:24:46 +03:00
|
|
|
if (submodule_progress)
|
2022-10-30 14:51:14 +03:00
|
|
|
strvec_push(&cmd.args, "--progress");
|
clone: pass --progress decision to recursive submodules
When cloning with "--recursive", we'd generally expect
submodules to show progress reports if the main clone did,
too.
In older versions of git, this mostly worked out of the
box. Since we show progress by default when stderr is a tty,
and since the child clones inherit the parent stderr, then
both processes would come to the same decision by default.
If the parent clone was asked for "--quiet", we passed down
"--quiet" to the child. However, if stderr was not a tty and
the user specified "--progress", we did not propagate this
to the child.
That's a minor bug, but things got much worse when we
switched recently to submodule--helper's update_clone
command. With that change, the stderr of the child clones
are always connected to a pipe, and we never output
progress at all.
This patch teaches git-submodule and git-submodule--helper
how to pass down an explicit "--progress" flag when cloning.
The clone command then decides to propagate that flag based
on the cloning decision made earlier (which takes into
account isatty(2) of the parent process, existing --progress
or --quiet flags, etc). Since the child processes always run
without a tty on stderr, we don't have to worry about
passing an explicit "--no-progress"; it's the default for
them.
This fixes the recent loss of progress during recursive
clones. And as a bonus, it makes:
git clone --recursive --progress ... 2>&1 | cat
work by triggering progress explicitly in the children.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-22 08:24:46 +03:00
|
|
|
|
2017-08-04 01:25:44 +03:00
|
|
|
if (option_verbosity < 0)
|
2022-10-30 14:51:14 +03:00
|
|
|
strvec_push(&cmd.args, "--quiet");
|
2017-08-04 01:25:44 +03:00
|
|
|
|
2019-05-19 17:26:49 +03:00
|
|
|
if (option_remote_submodules) {
|
2022-10-30 14:51:14 +03:00
|
|
|
strvec_push(&cmd.args, "--remote");
|
|
|
|
strvec_push(&cmd.args, "--no-fetch");
|
2019-05-19 17:26:49 +03:00
|
|
|
}
|
|
|
|
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-05 08:00:49 +03:00
|
|
|
if (filter_submodules && filter_options.choice)
|
2022-10-30 14:51:14 +03:00
|
|
|
strvec_pushf(&cmd.args, "--filter=%s",
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-05 08:00:49 +03:00
|
|
|
expand_list_objects_filter_spec(&filter_options));
|
|
|
|
|
2020-02-21 06:10:27 +03:00
|
|
|
if (option_single_branch >= 0)
|
2022-10-30 14:51:14 +03:00
|
|
|
strvec_push(&cmd.args, option_single_branch ?
|
2020-02-21 06:10:27 +03:00
|
|
|
"--single-branch" :
|
|
|
|
"--no-single-branch");
|
|
|
|
|
2022-10-30 14:51:14 +03:00
|
|
|
cmd.git_cmd = 1;
|
|
|
|
err = run_command(&cmd);
|
2016-03-01 05:07:20 +03:00
|
|
|
}
|
2012-01-16 13:46:09 +04:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
config: add ctx arg to config_fn_t
Add a new "const struct config_context *ctx" arg to config_fn_t to hold
additional information about the config iteration operation.
config_context has a "struct key_value_info kvi" member that holds
metadata about the config source being read (e.g. what kind of config
source it is, the filename, etc). In this series, we're only interested
in .kvi, so we could have just used "struct key_value_info" as an arg,
but config_context makes it possible to add/adjust members in the future
without changing the config_fn_t signature. We could also consider other
ways of organizing the args (e.g. moving the config name and value into
config_context or key_value_info), but in my experiments, the
incremental benefit doesn't justify the added complexity (e.g. a
config_fn_t will sometimes invoke another config_fn_t but with a
different config value).
In subsequent commits, the .kvi member will replace the global "struct
config_reader" in config.c, making config iteration a global-free
operation. It requires much more work for the machinery to provide
meaningful values of .kvi, so for now, merely change the signature and
call sites, pass NULL as a placeholder value, and don't rely on the arg
in any meaningful way.
Most of the changes are performed by
contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every
config_fn_t:
- Modifies the signature to accept "const struct config_context *ctx"
- Passes "ctx" to any inner config_fn_t, if needed
- Adds UNUSED attributes to "ctx", if needed
Most config_fn_t instances are easily identified by seeing if they are
called by the various config functions. Most of the remaining ones are
manually named in the .cocci patch. Manual cleanups are still needed,
but the majority of it is trivial; it's either adjusting config_fn_t
that the .cocci patch didn't catch, or adding forward declarations of
"struct config_context ctx" to make the signatures make sense.
The non-trivial changes are in cases where we are invoking a config_fn_t
outside of config machinery, and we now need to decide what value of
"ctx" to pass. These cases are:
- trace2/tr2_cfg.c:tr2_cfg_set_fl()
This is indirectly called by git_config_set() so that the trace2
machinery can notice the new config values and update its settings
using the tr2 config parsing function, i.e. tr2_cfg_cb().
- builtin/checkout.c:checkout_main()
This calls git_xmerge_config() as a shorthand for parsing a CLI arg.
This might be worth refactoring away in the future, since
git_xmerge_config() can call git_default_config(), which can do much
more than just parsing.
Handle them by creating a KVI_INIT macro that initializes "struct
key_value_info" to a reasonable default, and use that to construct the
"ctx" arg.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 22:26:22 +03:00
|
|
|
static int git_clone_config(const char *k, const char *v,
|
|
|
|
const struct config_context *ctx, void *cb)
|
2020-10-01 06:46:11 +03:00
|
|
|
{
|
2020-10-01 06:46:16 +03:00
|
|
|
if (!strcmp(k, "clone.defaultremotename")) {
|
2023-12-07 10:11:14 +03:00
|
|
|
if (!v)
|
|
|
|
return config_error_nonbool(k);
|
2020-10-01 06:46:16 +03:00
|
|
|
free(remote_name);
|
|
|
|
remote_name = xstrdup(v);
|
|
|
|
}
|
2021-04-01 13:46:59 +03:00
|
|
|
if (!strcmp(k, "clone.rejectshallow"))
|
|
|
|
config_reject_shallow = git_config_bool(k, v);
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-05 08:00:49 +03:00
|
|
|
if (!strcmp(k, "clone.filtersubmodules"))
|
|
|
|
config_filter_submodules = git_config_bool(k, v);
|
2021-04-01 13:46:59 +03:00
|
|
|
|
config: add ctx arg to config_fn_t
Add a new "const struct config_context *ctx" arg to config_fn_t to hold
additional information about the config iteration operation.
config_context has a "struct key_value_info kvi" member that holds
metadata about the config source being read (e.g. what kind of config
source it is, the filename, etc). In this series, we're only interested
in .kvi, so we could have just used "struct key_value_info" as an arg,
but config_context makes it possible to add/adjust members in the future
without changing the config_fn_t signature. We could also consider other
ways of organizing the args (e.g. moving the config name and value into
config_context or key_value_info), but in my experiments, the
incremental benefit doesn't justify the added complexity (e.g. a
config_fn_t will sometimes invoke another config_fn_t but with a
different config value).
In subsequent commits, the .kvi member will replace the global "struct
config_reader" in config.c, making config iteration a global-free
operation. It requires much more work for the machinery to provide
meaningful values of .kvi, so for now, merely change the signature and
call sites, pass NULL as a placeholder value, and don't rely on the arg
in any meaningful way.
Most of the changes are performed by
contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every
config_fn_t:
- Modifies the signature to accept "const struct config_context *ctx"
- Passes "ctx" to any inner config_fn_t, if needed
- Adds UNUSED attributes to "ctx", if needed
Most config_fn_t instances are easily identified by seeing if they are
called by the various config functions. Most of the remaining ones are
manually named in the .cocci patch. Manual cleanups are still needed,
but the majority of it is trivial; it's either adjusting config_fn_t
that the .cocci patch didn't catch, or adding forward declarations of
"struct config_context ctx" to make the signatures make sense.
The non-trivial changes are in cases where we are invoking a config_fn_t
outside of config machinery, and we now need to decide what value of
"ctx" to pass. These cases are:
- trace2/tr2_cfg.c:tr2_cfg_set_fl()
This is indirectly called by git_config_set() so that the trace2
machinery can notice the new config values and update its settings
using the tr2 config parsing function, i.e. tr2_cfg_cb().
- builtin/checkout.c:checkout_main()
This calls git_xmerge_config() as a shorthand for parsing a CLI arg.
This might be worth refactoring away in the future, since
git_xmerge_config() can call git_default_config(), which can do much
more than just parsing.
Handle them by creating a KVI_INIT macro that initializes "struct
key_value_info" to a reasonable default, and use that to construct the
"ctx" arg.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 22:26:22 +03:00
|
|
|
return git_default_config(k, v, ctx, cb);
|
2020-10-01 06:46:11 +03:00
|
|
|
}
|
|
|
|
|
config: add ctx arg to config_fn_t
Add a new "const struct config_context *ctx" arg to config_fn_t to hold
additional information about the config iteration operation.
config_context has a "struct key_value_info kvi" member that holds
metadata about the config source being read (e.g. what kind of config
source it is, the filename, etc). In this series, we're only interested
in .kvi, so we could have just used "struct key_value_info" as an arg,
but config_context makes it possible to add/adjust members in the future
without changing the config_fn_t signature. We could also consider other
ways of organizing the args (e.g. moving the config name and value into
config_context or key_value_info), but in my experiments, the
incremental benefit doesn't justify the added complexity (e.g. a
config_fn_t will sometimes invoke another config_fn_t but with a
different config value).
In subsequent commits, the .kvi member will replace the global "struct
config_reader" in config.c, making config iteration a global-free
operation. It requires much more work for the machinery to provide
meaningful values of .kvi, so for now, merely change the signature and
call sites, pass NULL as a placeholder value, and don't rely on the arg
in any meaningful way.
Most of the changes are performed by
contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every
config_fn_t:
- Modifies the signature to accept "const struct config_context *ctx"
- Passes "ctx" to any inner config_fn_t, if needed
- Adds UNUSED attributes to "ctx", if needed
Most config_fn_t instances are easily identified by seeing if they are
called by the various config functions. Most of the remaining ones are
manually named in the .cocci patch. Manual cleanups are still needed,
but the majority of it is trivial; it's either adjusting config_fn_t
that the .cocci patch didn't catch, or adding forward declarations of
"struct config_context ctx" to make the signatures make sense.
The non-trivial changes are in cases where we are invoking a config_fn_t
outside of config machinery, and we now need to decide what value of
"ctx" to pass. These cases are:
- trace2/tr2_cfg.c:tr2_cfg_set_fl()
This is indirectly called by git_config_set() so that the trace2
machinery can notice the new config values and update its settings
using the tr2 config parsing function, i.e. tr2_cfg_cb().
- builtin/checkout.c:checkout_main()
This calls git_xmerge_config() as a shorthand for parsing a CLI arg.
This might be worth refactoring away in the future, since
git_xmerge_config() can call git_default_config(), which can do much
more than just parsing.
Handle them by creating a KVI_INIT macro that initializes "struct
key_value_info" to a reasonable default, and use that to construct the
"ctx" arg.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 22:26:22 +03:00
|
|
|
static int write_one_config(const char *key, const char *value,
|
|
|
|
const struct config_context *ctx,
|
|
|
|
void *data)
|
2011-06-10 00:56:19 +04:00
|
|
|
{
|
2020-10-01 06:46:11 +03:00
|
|
|
/*
|
|
|
|
* give git_clone_config a chance to write config values back to the
|
|
|
|
* environment, since git_config_set_multivar_gently only deals with
|
|
|
|
* config-file writes
|
|
|
|
*/
|
config: add ctx arg to config_fn_t
Add a new "const struct config_context *ctx" arg to config_fn_t to hold
additional information about the config iteration operation.
config_context has a "struct key_value_info kvi" member that holds
metadata about the config source being read (e.g. what kind of config
source it is, the filename, etc). In this series, we're only interested
in .kvi, so we could have just used "struct key_value_info" as an arg,
but config_context makes it possible to add/adjust members in the future
without changing the config_fn_t signature. We could also consider other
ways of organizing the args (e.g. moving the config name and value into
config_context or key_value_info), but in my experiments, the
incremental benefit doesn't justify the added complexity (e.g. a
config_fn_t will sometimes invoke another config_fn_t but with a
different config value).
In subsequent commits, the .kvi member will replace the global "struct
config_reader" in config.c, making config iteration a global-free
operation. It requires much more work for the machinery to provide
meaningful values of .kvi, so for now, merely change the signature and
call sites, pass NULL as a placeholder value, and don't rely on the arg
in any meaningful way.
Most of the changes are performed by
contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every
config_fn_t:
- Modifies the signature to accept "const struct config_context *ctx"
- Passes "ctx" to any inner config_fn_t, if needed
- Adds UNUSED attributes to "ctx", if needed
Most config_fn_t instances are easily identified by seeing if they are
called by the various config functions. Most of the remaining ones are
manually named in the .cocci patch. Manual cleanups are still needed,
but the majority of it is trivial; it's either adjusting config_fn_t
that the .cocci patch didn't catch, or adding forward declarations of
"struct config_context ctx" to make the signatures make sense.
The non-trivial changes are in cases where we are invoking a config_fn_t
outside of config machinery, and we now need to decide what value of
"ctx" to pass. These cases are:
- trace2/tr2_cfg.c:tr2_cfg_set_fl()
This is indirectly called by git_config_set() so that the trace2
machinery can notice the new config values and update its settings
using the tr2 config parsing function, i.e. tr2_cfg_cb().
- builtin/checkout.c:checkout_main()
This calls git_xmerge_config() as a shorthand for parsing a CLI arg.
This might be worth refactoring away in the future, since
git_xmerge_config() can call git_default_config(), which can do much
more than just parsing.
Handle them by creating a KVI_INIT macro that initializes "struct
key_value_info" to a reasonable default, and use that to construct the
"ctx" arg.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 22:26:22 +03:00
|
|
|
int apply_failed = git_clone_config(key, value, ctx, data);
|
2020-10-01 06:46:11 +03:00
|
|
|
if (apply_failed)
|
|
|
|
return apply_failed;
|
|
|
|
|
2017-05-02 03:05:15 +03:00
|
|
|
return git_config_set_multivar_gently(key,
|
|
|
|
value ? value : "true",
|
|
|
|
CONFIG_REGEX_NONE, 0);
|
2011-06-10 00:56:19 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void write_config(struct string_list *config)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < config->nr; i++) {
|
|
|
|
if (git_config_parse_parameter(config->items[i].string,
|
|
|
|
write_one_config, NULL) < 0)
|
2016-02-27 09:41:55 +03:00
|
|
|
die(_("unable to write parameters to config file"));
|
2011-06-10 00:56:19 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-09-01 00:11:31 +04:00
|
|
|
static void write_refspec_config(const char *src_ref_prefix,
|
|
|
|
const struct ref *our_head_points_at,
|
|
|
|
const struct ref *remote_head_points_at,
|
|
|
|
struct strbuf *branch_top)
|
2012-09-20 22:04:08 +04:00
|
|
|
{
|
|
|
|
struct strbuf key = STRBUF_INIT;
|
|
|
|
struct strbuf value = STRBUF_INIT;
|
|
|
|
|
|
|
|
if (option_mirror || !option_bare) {
|
|
|
|
if (option_single_branch && !option_mirror) {
|
|
|
|
if (option_branch) {
|
2014-06-24 01:27:36 +04:00
|
|
|
if (starts_with(our_head_points_at->name, "refs/tags/"))
|
2012-09-20 22:04:08 +04:00
|
|
|
strbuf_addf(&value, "+%s:%s", our_head_points_at->name,
|
|
|
|
our_head_points_at->name);
|
|
|
|
else
|
|
|
|
strbuf_addf(&value, "+%s:%s%s", our_head_points_at->name,
|
|
|
|
branch_top->buf, option_branch);
|
|
|
|
} else if (remote_head_points_at) {
|
refactor skip_prefix to return a boolean
The skip_prefix() function returns a pointer to the content
past the prefix, or NULL if the prefix was not found. While
this is nice and simple, in practice it makes it hard to use
for two reasons:
1. When you want to conditionally skip or keep the string
as-is, you have to introduce a temporary variable.
For example:
tmp = skip_prefix(buf, "foo");
if (tmp)
buf = tmp;
2. It is verbose to check the outcome in a conditional, as
you need extra parentheses to silence compiler
warnings. For example:
if ((cp = skip_prefix(buf, "foo"))
/* do something with cp */
Both of these make it harder to use for long if-chains, and
we tend to use starts_with() instead. However, the first line
of "do something" is often to then skip forward in buf past
the prefix, either using a magic constant or with an extra
strlen(3) (which is generally computed at compile time, but
means we are repeating ourselves).
This patch refactors skip_prefix() to return a simple boolean,
and to provide the pointer value as an out-parameter. If the
prefix is not found, the out-parameter is untouched. This
lets you write:
if (skip_prefix(arg, "foo ", &arg))
do_foo(arg);
else if (skip_prefix(arg, "bar ", &arg))
do_bar(arg);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-06-18 23:44:19 +04:00
|
|
|
const char *head = remote_head_points_at->name;
|
|
|
|
if (!skip_prefix(head, "refs/heads/", &head))
|
2018-05-02 12:38:39 +03:00
|
|
|
BUG("remote HEAD points at non-head?");
|
refactor skip_prefix to return a boolean
The skip_prefix() function returns a pointer to the content
past the prefix, or NULL if the prefix was not found. While
this is nice and simple, in practice it makes it hard to use
for two reasons:
1. When you want to conditionally skip or keep the string
as-is, you have to introduce a temporary variable.
For example:
tmp = skip_prefix(buf, "foo");
if (tmp)
buf = tmp;
2. It is verbose to check the outcome in a conditional, as
you need extra parentheses to silence compiler
warnings. For example:
if ((cp = skip_prefix(buf, "foo"))
/* do something with cp */
Both of these make it harder to use for long if-chains, and
we tend to use starts_with() instead. However, the first line
of "do something" is often to then skip forward in buf past
the prefix, either using a magic constant or with an extra
strlen(3) (which is generally computed at compile time, but
means we are repeating ourselves).
This patch refactors skip_prefix() to return a simple boolean,
and to provide the pointer value as an out-parameter. If the
prefix is not found, the out-parameter is untouched. This
lets you write:
if (skip_prefix(arg, "foo ", &arg))
do_foo(arg);
else if (skip_prefix(arg, "bar ", &arg))
do_bar(arg);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-06-18 23:44:19 +04:00
|
|
|
|
2012-09-20 22:04:08 +04:00
|
|
|
strbuf_addf(&value, "+%s:%s%s", remote_head_points_at->name,
|
refactor skip_prefix to return a boolean
The skip_prefix() function returns a pointer to the content
past the prefix, or NULL if the prefix was not found. While
this is nice and simple, in practice it makes it hard to use
for two reasons:
1. When you want to conditionally skip or keep the string
as-is, you have to introduce a temporary variable.
For example:
tmp = skip_prefix(buf, "foo");
if (tmp)
buf = tmp;
2. It is verbose to check the outcome in a conditional, as
you need extra parentheses to silence compiler
warnings. For example:
if ((cp = skip_prefix(buf, "foo"))
/* do something with cp */
Both of these make it harder to use for long if-chains, and
we tend to use starts_with() instead. However, the first line
of "do something" is often to then skip forward in buf past
the prefix, either using a magic constant or with an extra
strlen(3) (which is generally computed at compile time, but
means we are repeating ourselves).
This patch refactors skip_prefix() to return a simple boolean,
and to provide the pointer value as an out-parameter. If the
prefix is not found, the out-parameter is untouched. This
lets you write:
if (skip_prefix(arg, "foo ", &arg))
do_foo(arg);
else if (skip_prefix(arg, "bar ", &arg))
do_bar(arg);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-06-18 23:44:19 +04:00
|
|
|
branch_top->buf, head);
|
2012-09-20 22:04:08 +04:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* otherwise, the next "git fetch" will
|
|
|
|
* simply fetch from HEAD without updating
|
2013-07-03 13:12:34 +04:00
|
|
|
* any remote-tracking branch, which is what
|
2012-09-20 22:04:08 +04:00
|
|
|
* we want.
|
|
|
|
*/
|
|
|
|
} else {
|
|
|
|
strbuf_addf(&value, "+%s*:%s*", src_ref_prefix, branch_top->buf);
|
|
|
|
}
|
|
|
|
/* Configure the remote */
|
|
|
|
if (value.len) {
|
2020-10-01 06:46:15 +03:00
|
|
|
strbuf_addf(&key, "remote.%s.fetch", remote_name);
|
2012-09-20 22:04:08 +04:00
|
|
|
git_config_set_multivar(key.buf, value.buf, "^$", 0);
|
|
|
|
strbuf_reset(&key);
|
|
|
|
|
|
|
|
if (option_mirror) {
|
2020-10-01 06:46:15 +03:00
|
|
|
strbuf_addf(&key, "remote.%s.mirror", remote_name);
|
2012-09-20 22:04:08 +04:00
|
|
|
git_config_set(key.buf, "true");
|
|
|
|
strbuf_reset(&key);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_release(&key);
|
|
|
|
strbuf_release(&value);
|
|
|
|
}
|
|
|
|
|
2014-10-14 23:38:52 +04:00
|
|
|
static void dissociate_from_references(void)
|
|
|
|
{
|
2015-10-22 19:41:17 +03:00
|
|
|
char *alternates = git_pathdup("objects/info/alternates");
|
2014-10-14 23:38:52 +04:00
|
|
|
|
2015-10-22 19:41:17 +03:00
|
|
|
if (!access(alternates, F_OK)) {
|
2022-10-30 14:55:06 +03:00
|
|
|
struct child_process cmd = CHILD_PROCESS_INIT;
|
|
|
|
|
|
|
|
cmd.git_cmd = 1;
|
|
|
|
cmd.no_stdin = 1;
|
|
|
|
strvec_pushl(&cmd.args, "repack", "-a", "-d", NULL);
|
|
|
|
if (run_command(&cmd))
|
2015-10-22 19:41:17 +03:00
|
|
|
die(_("cannot repack to clean up"));
|
|
|
|
if (unlink(alternates) && errno != ENOENT)
|
|
|
|
die_errno(_("cannot unlink temporary alternates file"));
|
|
|
|
}
|
|
|
|
free(alternates);
|
2014-10-14 23:38:52 +04:00
|
|
|
}
|
|
|
|
|
2019-10-28 19:55:23 +03:00
|
|
|
static int path_exists(const char *path)
|
2018-01-03 00:10:14 +03:00
|
|
|
{
|
|
|
|
struct stat sb;
|
|
|
|
return !stat(path, &sb);
|
|
|
|
}
|
|
|
|
|
2008-04-27 21:39:30 +04:00
|
|
|
int cmd_clone(int argc, const char **argv, const char *prefix)
|
|
|
|
{
|
2010-08-23 16:08:22 +04:00
|
|
|
int is_bundle = 0, is_local;
|
2021-04-01 13:46:59 +03:00
|
|
|
int reject_shallow = 0;
|
2008-04-27 21:39:30 +04:00
|
|
|
const char *repo_name, *repo, *work_tree, *git_dir;
|
2023-02-07 02:07:39 +03:00
|
|
|
char *repo_to_free = NULL;
|
2021-03-14 21:47:36 +03:00
|
|
|
char *path = NULL, *dir, *display_repo = NULL;
|
2020-07-10 11:47:32 +03:00
|
|
|
int dest_exists, real_dest_exists = 0;
|
2009-11-18 04:42:24 +03:00
|
|
|
const struct ref *refs, *remote_head;
|
2021-03-14 21:47:36 +03:00
|
|
|
struct ref *remote_head_points_at = NULL;
|
2009-08-26 23:05:08 +04:00
|
|
|
const struct ref *our_head_points_at;
|
2022-07-11 12:21:52 +03:00
|
|
|
char *unborn_head = NULL;
|
2022-01-24 21:09:09 +03:00
|
|
|
struct ref *mapped_refs = NULL;
|
2012-01-24 15:10:38 +04:00
|
|
|
const struct ref *ref;
|
2018-11-14 13:46:18 +03:00
|
|
|
struct strbuf key = STRBUF_INIT;
|
2024-02-27 17:27:44 +03:00
|
|
|
struct strbuf buf = STRBUF_INIT;
|
2008-11-21 03:45:01 +03:00
|
|
|
struct strbuf branch_top = STRBUF_INIT, reflog_msg = STRBUF_INIT;
|
2008-07-08 08:46:06 +04:00
|
|
|
struct transport *transport = NULL;
|
2012-01-16 13:46:13 +04:00
|
|
|
const char *src_ref_prefix = "refs/heads/";
|
2012-01-16 13:46:12 +04:00
|
|
|
struct remote *remote;
|
2012-01-24 15:10:38 +04:00
|
|
|
int err = 0, complete_refs_before_fetch = 1;
|
clone: pass --progress decision to recursive submodules
When cloning with "--recursive", we'd generally expect
submodules to show progress reports if the main clone did,
too.
In older versions of git, this mostly worked out of the
box. Since we show progress by default when stderr is a tty,
and since the child clones inherit the parent stderr, then
both processes would come to the same decision by default.
If the parent clone was asked for "--quiet", we passed down
"--quiet" to the child. However, if stderr was not a tty and
the user specified "--progress", we did not propagate this
to the child.
That's a minor bug, but things got much worse when we
switched recently to submodule--helper's update_clone
command. With that change, the stderr of the child clones
are always connected to a pipe, and we never output
progress at all.
This patch teaches git-submodule and git-submodule--helper
how to pass down an explicit "--progress" flag when cloning.
The clone command then decides to propagate that flag based
on the cloning decision made earlier (which takes into
account isatty(2) of the parent process, existing --progress
or --quiet flags, etc). Since the child processes always run
without a tty on stderr, we don't have to worry about
passing an explicit "--no-progress"; it's the default for
them.
This fixes the recent loss of progress during recursive
clones. And as a bonus, it makes:
git clone --recursive --progress ... 2>&1 | cat
work by triggering progress explicitly in the children.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-22 08:24:46 +03:00
|
|
|
int submodule_progress;
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-05 08:00:49 +03:00
|
|
|
int filter_submodules = 0;
|
2023-04-06 00:15:33 +03:00
|
|
|
int hash_algo;
|
2023-12-29 10:27:09 +03:00
|
|
|
unsigned int ref_storage_format = REF_STORAGE_FORMAT_UNKNOWN;
|
2023-05-16 09:33:43 +03:00
|
|
|
const int do_not_override_repo_unix_permissions = -1;
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2021-02-05 23:48:48 +03:00
|
|
|
struct transport_ls_refs_options transport_ls_refs_options =
|
|
|
|
TRANSPORT_LS_REFS_OPTIONS_INIT;
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2011-02-24 17:30:19 +03:00
|
|
|
packet_trace_identity("clone");
|
2020-10-01 06:46:11 +03:00
|
|
|
|
|
|
|
git_config(git_clone_config, NULL);
|
|
|
|
|
2009-05-23 22:53:12 +04:00
|
|
|
argc = parse_options(argc, argv, prefix, builtin_clone_options,
|
2008-04-27 21:39:30 +04:00
|
|
|
builtin_clone_usage, 0);
|
|
|
|
|
2009-10-29 11:10:30 +03:00
|
|
|
if (argc > 2)
|
2011-02-23 02:41:26 +03:00
|
|
|
usage_msg_opt(_("Too many arguments."),
|
2009-10-29 11:10:30 +03:00
|
|
|
builtin_clone_usage, builtin_clone_options);
|
|
|
|
|
2008-04-27 21:39:30 +04:00
|
|
|
if (argc == 0)
|
2011-02-23 02:41:26 +03:00
|
|
|
usage_msg_opt(_("You must specify a repository to clone."),
|
2009-10-29 11:10:30 +03:00
|
|
|
builtin_clone_usage, builtin_clone_options);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2016-06-12 13:54:05 +03:00
|
|
|
if (option_depth || option_since || option_not.nr)
|
2016-06-12 13:54:00 +03:00
|
|
|
deepen = 1;
|
2012-01-07 18:45:59 +04:00
|
|
|
if (option_single_branch == -1)
|
2016-06-12 13:54:00 +03:00
|
|
|
option_single_branch = deepen ? 1 : 0;
|
2012-01-07 18:45:59 +04:00
|
|
|
|
2023-12-29 10:27:09 +03:00
|
|
|
if (ref_format) {
|
|
|
|
ref_storage_format = ref_storage_format_by_name(ref_format);
|
|
|
|
if (ref_storage_format == REF_STORAGE_FORMAT_UNKNOWN)
|
|
|
|
die(_("unknown ref storage format '%s'"), ref_format);
|
|
|
|
}
|
|
|
|
|
2008-08-02 23:38:56 +04:00
|
|
|
if (option_mirror)
|
|
|
|
option_bare = 1;
|
|
|
|
|
2008-04-27 21:39:30 +04:00
|
|
|
if (option_bare) {
|
2013-01-11 07:09:59 +04:00
|
|
|
if (real_git_dir)
|
2022-01-05 23:02:16 +03:00
|
|
|
die(_("options '%s' and '%s' cannot be used together"), "--bare", "--separate-git-dir");
|
2008-04-27 21:39:30 +04:00
|
|
|
option_no_checkout = 1;
|
|
|
|
}
|
|
|
|
|
2022-08-09 16:11:43 +03:00
|
|
|
if (bundle_uri && deepen)
|
2023-11-26 14:57:43 +03:00
|
|
|
die(_("options '%s' and '%s' cannot be used together"),
|
|
|
|
"--bundle-uri",
|
|
|
|
"--depth/--shallow-since/--shallow-exclude");
|
2022-08-09 16:11:43 +03:00
|
|
|
|
2008-04-27 21:39:30 +04:00
|
|
|
repo_name = argv[0];
|
|
|
|
|
|
|
|
path = get_repo_path(repo_name, &is_bundle);
|
2021-03-14 21:47:36 +03:00
|
|
|
if (path) {
|
|
|
|
FREE_AND_NULL(path);
|
2023-02-07 02:07:39 +03:00
|
|
|
repo = repo_to_free = absolute_pathdup(repo_name);
|
2021-03-14 21:47:36 +03:00
|
|
|
} else if (strchr(repo_name, ':')) {
|
2008-04-27 21:39:30 +04:00
|
|
|
repo = repo_name;
|
2020-06-04 23:08:29 +03:00
|
|
|
display_repo = transport_anonymize_url(repo);
|
|
|
|
} else
|
|
|
|
die(_("repository '%s' does not exist"), repo_name);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2013-12-05 07:31:11 +04:00
|
|
|
/* no need to be strict, transport_set_option() will validate it again */
|
|
|
|
if (option_depth && atoi(option_depth) < 1)
|
|
|
|
die(_("depth %s is not a positive number"), option_depth);
|
|
|
|
|
2008-04-27 21:39:30 +04:00
|
|
|
if (argc == 2)
|
|
|
|
dir = xstrdup(argv[1]);
|
|
|
|
else
|
2021-08-10 14:46:36 +03:00
|
|
|
dir = git_url_basename(repo_name, is_bundle, option_bare);
|
|
|
|
strip_dir_trailing_slashes(dir);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2019-10-28 19:55:23 +03:00
|
|
|
dest_exists = path_exists(dir);
|
2009-01-11 15:19:12 +03:00
|
|
|
if (dest_exists && !is_empty_dir(dir))
|
2011-02-23 02:41:26 +03:00
|
|
|
die(_("destination path '%s' already exists and is not "
|
|
|
|
"an empty directory."), dir);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2020-07-10 11:47:32 +03:00
|
|
|
if (real_git_dir) {
|
|
|
|
real_dest_exists = path_exists(real_git_dir);
|
|
|
|
if (real_dest_exists && !is_empty_dir(real_git_dir))
|
|
|
|
die(_("repository path '%s' already exists and is not "
|
|
|
|
"an empty directory."), real_git_dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-06-04 23:08:29 +03:00
|
|
|
strbuf_addf(&reflog_msg, "clone: from %s",
|
|
|
|
display_repo ? display_repo : repo);
|
|
|
|
free(display_repo);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
|
|
|
if (option_bare)
|
|
|
|
work_tree = NULL;
|
|
|
|
else {
|
|
|
|
work_tree = getenv("GIT_WORK_TREE");
|
2019-10-28 19:55:23 +03:00
|
|
|
if (work_tree && path_exists(work_tree))
|
2011-02-23 02:41:26 +03:00
|
|
|
die(_("working tree '%s' already exists."), work_tree);
|
2008-04-27 21:39:30 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (option_bare || work_tree)
|
|
|
|
git_dir = xstrdup(dir);
|
|
|
|
else {
|
|
|
|
work_tree = dir;
|
2012-09-04 21:31:14 +04:00
|
|
|
git_dir = mkpathdup("%s/.git", dir);
|
2008-04-27 21:39:30 +04:00
|
|
|
}
|
|
|
|
|
clone: initialize atexit cleanup handler earlier
If clone fails, we generally try to clean up any directories
we've created. We do this by installing an atexit handler,
so that we don't have to manually trigger cleanup. However,
since we install this after touching the filesystem, any
errors between our initial mkdir() and our atexit() call
will result in us leaving a crufty directory around.
We can fix this by moving our atexit() call earlier. It's OK
to do it before the junk_work_tree variable is set, because
remove_junk makes sure the variable is initialized. This
means we "activate" the handler by assigning to the
junk_work_tree variable, which we now bump down to just
after we call mkdir(). We probably do not want to do it
before, because a plausible reason for mkdir() to fail is
EEXIST (i.e., we are racing with another "git init"), and we
would not want to remove their work.
OTOH, this is probably not that big a deal; we will allow
cloning into an empty directory (and skip the mkdir), which
is already racy (i.e., one clone may see the other's empty
dir and start writing into it). Still, it does not hurt to
err on the side of caution here.
Note that writing into junk_work_tree and junk_git_dir after
installing the handler is also technically racy, as we call
our handler on an async signal. Depending on the platform,
we could see a sheared write to the variables. Traditionally
we have not worried about this, and indeed we already do
this later in the function. If we want to address that, it
can come as a separate topic.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-18 21:55:32 +03:00
|
|
|
atexit(remove_junk);
|
|
|
|
sigchain_push_common(remove_junk_on_signal);
|
|
|
|
|
2008-04-27 21:39:30 +04:00
|
|
|
if (!option_bare) {
|
2008-06-25 09:41:34 +04:00
|
|
|
if (safe_create_leading_directories_const(work_tree) < 0)
|
2011-02-23 02:41:26 +03:00
|
|
|
die_errno(_("could not create leading directories of '%s'"),
|
2009-06-27 19:58:46 +04:00
|
|
|
work_tree);
|
2018-01-03 00:11:39 +03:00
|
|
|
if (dest_exists)
|
|
|
|
junk_work_tree_flags |= REMOVE_DIR_KEEP_TOPLEVEL;
|
|
|
|
else if (mkdir(work_tree, 0777))
|
2015-03-18 22:02:01 +03:00
|
|
|
die_errno(_("could not create work tree dir '%s'"),
|
2009-06-27 19:58:46 +04:00
|
|
|
work_tree);
|
clone: initialize atexit cleanup handler earlier
If clone fails, we generally try to clean up any directories
we've created. We do this by installing an atexit handler,
so that we don't have to manually trigger cleanup. However,
since we install this after touching the filesystem, any
errors between our initial mkdir() and our atexit() call
will result in us leaving a crufty directory around.
We can fix this by moving our atexit() call earlier. It's OK
to do it before the junk_work_tree variable is set, because
remove_junk makes sure the variable is initialized. This
means we "activate" the handler by assigning to the
junk_work_tree variable, which we now bump down to just
after we call mkdir(). We probably do not want to do it
before, because a plausible reason for mkdir() to fail is
EEXIST (i.e., we are racing with another "git init"), and we
would not want to remove their work.
OTOH, this is probably not that big a deal; we will allow
cloning into an empty directory (and skip the mkdir), which
is already racy (i.e., one clone may see the other's empty
dir and start writing into it). Still, it does not hurt to
err on the side of caution here.
Note that writing into junk_work_tree and junk_git_dir after
installing the handler is also technically racy, as we call
our handler on an async signal. Depending on the platform,
we could see a sheared write to the variables. Traditionally
we have not worried about this, and indeed we already do
this later in the function. If we want to address that, it
can come as a separate topic.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-18 21:55:32 +03:00
|
|
|
junk_work_tree = work_tree;
|
2008-04-27 21:39:30 +04:00
|
|
|
set_git_work_tree(work_tree);
|
|
|
|
}
|
|
|
|
|
2018-01-03 00:11:39 +03:00
|
|
|
if (real_git_dir) {
|
2020-07-10 11:47:32 +03:00
|
|
|
if (real_dest_exists)
|
2018-01-03 00:11:39 +03:00
|
|
|
junk_git_dir_flags |= REMOVE_DIR_KEEP_TOPLEVEL;
|
|
|
|
junk_git_dir = real_git_dir;
|
|
|
|
} else {
|
|
|
|
if (dest_exists)
|
|
|
|
junk_git_dir_flags |= REMOVE_DIR_KEEP_TOPLEVEL;
|
|
|
|
junk_git_dir = git_dir;
|
|
|
|
}
|
2008-06-25 09:41:34 +04:00
|
|
|
if (safe_create_leading_directories_const(git_dir) < 0)
|
2011-02-23 02:41:26 +03:00
|
|
|
die(_("could not create leading directories of '%s'"), git_dir);
|
2011-03-19 18:16:56 +03:00
|
|
|
|
2011-02-23 02:41:27 +03:00
|
|
|
if (0 <= option_verbosity) {
|
|
|
|
if (option_bare)
|
2013-09-19 00:05:13 +04:00
|
|
|
fprintf(stderr, _("Cloning into bare repository '%s'...\n"), dir);
|
2011-02-23 02:41:27 +03:00
|
|
|
else
|
2013-09-19 00:05:13 +04:00
|
|
|
fprintf(stderr, _("Cloning into '%s'...\n"), dir);
|
2011-02-23 02:41:27 +03:00
|
|
|
}
|
2016-08-18 01:45:35 +03:00
|
|
|
|
2017-03-18 01:38:03 +03:00
|
|
|
if (option_recurse_submodules.nr > 0) {
|
|
|
|
struct string_list_item *item;
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2021-08-14 04:09:56 +03:00
|
|
|
int val;
|
2017-03-18 01:38:03 +03:00
|
|
|
|
|
|
|
/* remove duplicates */
|
|
|
|
string_list_sort(&option_recurse_submodules);
|
|
|
|
string_list_remove_duplicates(&option_recurse_submodules, 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* NEEDSWORK: In a multi-working-tree world, this needs to be
|
|
|
|
* set in the per-worktree config.
|
|
|
|
*/
|
|
|
|
for_each_string_list_item(item, &option_recurse_submodules) {
|
|
|
|
strbuf_addf(&sb, "submodule.active=%s",
|
|
|
|
item->string);
|
|
|
|
string_list_append(&option_config,
|
|
|
|
strbuf_detach(&sb, NULL));
|
|
|
|
}
|
|
|
|
|
2021-08-14 04:09:56 +03:00
|
|
|
if (!git_config_get_bool("submodule.stickyRecursiveClone", &val) &&
|
|
|
|
val)
|
|
|
|
string_list_append(&option_config, "submodule.recurse=true");
|
|
|
|
|
2016-08-18 01:45:35 +03:00
|
|
|
if (option_required_reference.nr &&
|
|
|
|
option_optional_reference.nr)
|
|
|
|
die(_("clone --recursive is not compatible with "
|
|
|
|
"both --reference and --reference-if-able"));
|
|
|
|
else if (option_required_reference.nr) {
|
|
|
|
string_list_append(&option_config,
|
|
|
|
"submodule.alternateLocation=superproject");
|
|
|
|
string_list_append(&option_config,
|
|
|
|
"submodule.alternateErrorStrategy=die");
|
|
|
|
} else if (option_optional_reference.nr) {
|
|
|
|
string_list_append(&option_config,
|
|
|
|
"submodule.alternateLocation=superproject");
|
|
|
|
string_list_append(&option_config,
|
|
|
|
"submodule.alternateErrorStrategy=info");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-12 10:01:07 +03:00
|
|
|
/*
|
|
|
|
* Initialize the repository, but skip initializing the reference
|
|
|
|
* database. We do not yet know about the object format of the
|
|
|
|
* repository, and reference backends may persist that information into
|
|
|
|
* their on-disk data structures.
|
|
|
|
*/
|
2023-12-29 10:26:39 +03:00
|
|
|
init_db(git_dir, real_git_dir, option_template, GIT_HASH_UNKNOWN,
|
2023-12-29 10:27:09 +03:00
|
|
|
ref_storage_format, NULL,
|
2023-12-12 10:01:07 +03:00
|
|
|
do_not_override_repo_unix_permissions, INIT_DB_QUIET | INIT_DB_SKIP_REFDB);
|
2016-09-25 06:14:37 +03:00
|
|
|
|
2021-10-22 11:55:42 +03:00
|
|
|
if (real_git_dir) {
|
|
|
|
free((char *)git_dir);
|
2016-09-25 06:14:37 +03:00
|
|
|
git_dir = real_git_dir;
|
2021-10-22 11:55:42 +03:00
|
|
|
}
|
2016-09-25 06:14:37 +03:00
|
|
|
|
2024-02-27 17:27:44 +03:00
|
|
|
/*
|
|
|
|
* We have a chicken-and-egg situation between initializing the refdb
|
|
|
|
* and spawning transport helpers:
|
|
|
|
*
|
|
|
|
* - Initializing the refdb requires us to know about the object
|
|
|
|
* format. We thus have to spawn the transport helper to learn
|
|
|
|
* about it.
|
|
|
|
*
|
|
|
|
* - The transport helper may want to access the Git repository. But
|
|
|
|
* because the refdb has not been initialized, we don't have "HEAD"
|
|
|
|
* or "refs/". Thus, the helper cannot find the Git repository.
|
|
|
|
*
|
|
|
|
* Ideally, we would have structured the helper protocol such that it's
|
|
|
|
* mandatory for the helper to first announce its capabilities without
|
|
|
|
* yet assuming a fully initialized repository. Like that, we could
|
|
|
|
* have added a "lazy-refdb-init" capability that announces whether the
|
|
|
|
* helper is ready to handle not-yet-initialized refdbs. If any helper
|
|
|
|
* didn't support them, we would have fully initialized the refdb with
|
|
|
|
* the SHA1 object format, but later on bailed out if we found out that
|
|
|
|
* the remote repository used a different object format.
|
|
|
|
*
|
|
|
|
* But we didn't, and thus we use the following workaround to partially
|
|
|
|
* initialize the repository's refdb such that it can be discovered by
|
|
|
|
* Git commands. To do so, we:
|
|
|
|
*
|
|
|
|
* - Create an invalid HEAD ref pointing at "refs/heads/.invalid".
|
|
|
|
*
|
|
|
|
* - Create the "refs/" directory.
|
|
|
|
*
|
|
|
|
* - Set up the ref storage format and repository version as
|
|
|
|
* required.
|
|
|
|
*
|
|
|
|
* This is sufficient for Git commands to discover the Git directory.
|
|
|
|
*/
|
|
|
|
initialize_repository_version(GIT_HASH_UNKNOWN,
|
|
|
|
the_repository->ref_storage_format, 1);
|
|
|
|
|
|
|
|
strbuf_addf(&buf, "%s/HEAD", git_dir);
|
|
|
|
write_file(buf.buf, "ref: refs/heads/.invalid");
|
|
|
|
|
|
|
|
strbuf_reset(&buf);
|
|
|
|
strbuf_addf(&buf, "%s/refs", git_dir);
|
|
|
|
safe_create_dir(buf.buf, 1);
|
|
|
|
|
2020-10-01 06:46:11 +03:00
|
|
|
/*
|
|
|
|
* additional config can be injected with -c, make sure it's included
|
|
|
|
* after init_db, which clears the entire config environment.
|
|
|
|
*/
|
2011-06-10 00:56:19 +04:00
|
|
|
write_config(&option_config);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2020-10-01 06:46:11 +03:00
|
|
|
/*
|
|
|
|
* re-read config after init_db and write_config to pick up any config
|
|
|
|
* injected by --template and --config, respectively.
|
|
|
|
*/
|
|
|
|
git_config(git_clone_config, NULL);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2021-04-01 13:46:59 +03:00
|
|
|
/*
|
|
|
|
* If option_reject_shallow is specified from CLI option,
|
|
|
|
* ignore config_reject_shallow from git_clone_config.
|
|
|
|
*/
|
|
|
|
if (config_reject_shallow != -1)
|
|
|
|
reject_shallow = config_reject_shallow;
|
|
|
|
if (option_reject_shallow != -1)
|
|
|
|
reject_shallow = option_reject_shallow;
|
|
|
|
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-05 08:00:49 +03:00
|
|
|
/*
|
|
|
|
* If option_filter_submodules is specified from CLI option,
|
|
|
|
* ignore config_filter_submodules from git_clone_config.
|
|
|
|
*/
|
|
|
|
if (config_filter_submodules != -1)
|
|
|
|
filter_submodules = config_filter_submodules;
|
|
|
|
if (option_filter_submodules != -1)
|
|
|
|
filter_submodules = option_filter_submodules;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Exit if the user seems to be doing something silly with submodule
|
|
|
|
* filter flags (but not with filter configs, as those should be
|
|
|
|
* set-and-forget).
|
|
|
|
*/
|
|
|
|
if (option_filter_submodules > 0 && !filter_options.choice)
|
|
|
|
die(_("the option '%s' requires '%s'"),
|
|
|
|
"--also-filter-submodules", "--filter");
|
|
|
|
if (option_filter_submodules > 0 && !option_recurse_submodules.nr)
|
|
|
|
die(_("the option '%s' requires '%s'"),
|
|
|
|
"--also-filter-submodules", "--recurse-submodules");
|
|
|
|
|
2020-10-01 06:46:16 +03:00
|
|
|
/*
|
|
|
|
* apply the remote name provided by --origin only after this second
|
|
|
|
* call to git_config, to ensure it overrides all config-based values.
|
|
|
|
*/
|
2022-05-21 01:26:59 +03:00
|
|
|
if (option_origin) {
|
2022-05-01 08:17:15 +03:00
|
|
|
free(remote_name);
|
2020-10-01 06:46:16 +03:00
|
|
|
remote_name = xstrdup(option_origin);
|
2022-05-01 08:17:15 +03:00
|
|
|
}
|
2020-10-01 06:46:16 +03:00
|
|
|
|
2022-05-02 19:50:37 +03:00
|
|
|
if (!remote_name)
|
2020-10-01 06:46:16 +03:00
|
|
|
remote_name = xstrdup("origin");
|
|
|
|
|
|
|
|
if (!valid_remote_name(remote_name))
|
|
|
|
die(_("'%s' is not a valid remote name"), remote_name);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
|
|
|
if (option_bare) {
|
2008-08-02 23:38:56 +04:00
|
|
|
if (option_mirror)
|
|
|
|
src_ref_prefix = "refs/";
|
2008-11-21 03:45:01 +03:00
|
|
|
strbuf_addstr(&branch_top, src_ref_prefix);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
|
|
|
git_config_set("core.bare", "true");
|
|
|
|
} else {
|
2020-10-01 06:46:15 +03:00
|
|
|
strbuf_addf(&branch_top, "refs/remotes/%s/", remote_name);
|
2008-08-02 23:38:56 +04:00
|
|
|
}
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2020-10-01 06:46:15 +03:00
|
|
|
strbuf_addf(&key, "remote.%s.url", remote_name);
|
2010-03-29 20:48:24 +04:00
|
|
|
git_config_set(key.buf, repo);
|
|
|
|
strbuf_reset(&key);
|
|
|
|
|
2017-04-27 02:12:33 +03:00
|
|
|
if (option_no_tags) {
|
2020-10-01 06:46:15 +03:00
|
|
|
strbuf_addf(&key, "remote.%s.tagOpt", remote_name);
|
2017-04-27 02:12:33 +03:00
|
|
|
git_config_set(key.buf, "--no-tags");
|
|
|
|
strbuf_reset(&key);
|
|
|
|
}
|
|
|
|
|
2016-08-16 00:53:26 +03:00
|
|
|
if (option_required_reference.nr || option_optional_reference.nr)
|
2011-08-23 05:05:15 +04:00
|
|
|
setup_reference();
|
2010-03-29 20:48:23 +04:00
|
|
|
|
2023-12-12 10:01:03 +03:00
|
|
|
remote = remote_get_early(remote_name);
|
2009-03-06 07:56:16 +03:00
|
|
|
|
2020-09-05 17:49:30 +03:00
|
|
|
refspec_appendf(&remote->fetch, "+%s*:%s*", src_ref_prefix,
|
|
|
|
branch_top.buf);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2014-07-17 11:09:32 +04:00
|
|
|
path = get_repo_path(remote->url[0], &is_bundle);
|
|
|
|
is_local = option_local != 0 && path && !is_bundle;
|
|
|
|
if (is_local) {
|
|
|
|
if (option_depth)
|
|
|
|
warning(_("--depth is ignored in local clones; use file:// instead."));
|
2016-06-12 13:54:00 +03:00
|
|
|
if (option_since)
|
|
|
|
warning(_("--shallow-since is ignored in local clones; use file:// instead."));
|
2016-06-12 13:54:05 +03:00
|
|
|
if (option_not.nr)
|
|
|
|
warning(_("--shallow-exclude is ignored in local clones; use file:// instead."));
|
2017-12-08 18:58:46 +03:00
|
|
|
if (filter_options.choice)
|
|
|
|
warning(_("--filter is ignored in local clones; use file:// instead."));
|
2014-07-17 11:09:32 +04:00
|
|
|
if (!access(mkpath("%s/shallow", path), F_OK)) {
|
2021-04-01 13:46:59 +03:00
|
|
|
if (reject_shallow)
|
|
|
|
die(_("source repository is shallow, reject to clone."));
|
2014-07-17 11:09:32 +04:00
|
|
|
if (option_local > 0)
|
|
|
|
warning(_("source repository is shallow, ignoring --local"));
|
|
|
|
is_local = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (option_local > 0 && !is_local)
|
|
|
|
warning(_("--local is ignored"));
|
clone: delay picking a transport until after get_repo_path()
In the previous commit, t5619 demonstrates an issue where two calls to
`get_repo_path()` could trick Git into using its local clone mechanism
in conjunction with a non-local transport.
That sequence is:
- the starting state is that the local path https:/example.com/foo is a
symlink that points to ../../../.git/modules/foo. So it's dangling.
- get_repo_path() sees that no such path exists (because it's
dangling), and thus we do not canonicalize it into an absolute path
- because we're using --separate-git-dir, we create .git/modules/foo.
Now our symlink is no longer dangling!
- we pass the url to transport_get(), which sees it as an https URL.
- we call get_repo_path() again, on the url. This second call was
introduced by f38aa83f9a (use local cloning if insteadOf makes a
local URL, 2014-07-17). The idea is that we want to pull the url
fresh from the remote.c API, because it will apply any aliases.
And of course now it sees that there is a local file, which is a
mismatch with the transport we already selected.
The issue in the above sequence is calling `transport_get()` before
deciding whether or not the repository is indeed local, and not passing
in an absolute path if it is local.
This is reminiscent of a similar bug report in [1], where it was
suggested to perform the `insteadOf` lookup earlier. Taking that
approach may not be as straightforward, since the intent is to store the
original URL in the config, but to actually fetch from the insteadOf
one, so conflating the two early on is a non-starter.
Note: we pass the path returned by `get_repo_path(remote->url[0])`,
which should be the same as `repo_name` (aside from any `insteadOf`
rewrites).
We *could* pass `absolute_pathdup()` of the same argument, which
86521acaca (Bring local clone's origin URL in line with that of a remote
clone, 2008-09-01) indicates may differ depending on the presence of
".git/" for a non-bare repo. That matters for forming relative submodule
paths, but doesn't matter for the second call, since we're just feeding
it to the transport code, which is fine either way.
[1]: https://lore.kernel.org/git/CAMoD=Bi41mB3QRn3JdZL-FGHs4w3C2jGpnJB-CqSndO7FMtfzA@mail.gmail.com/
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-01-25 03:43:48 +03:00
|
|
|
|
|
|
|
transport = transport_get(remote, path ? path : remote->url[0]);
|
|
|
|
transport_set_verbosity(transport, option_verbosity, option_progress);
|
|
|
|
transport->family = family;
|
2013-12-05 17:02:39 +04:00
|
|
|
transport->cloning = 1;
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2022-03-09 19:01:43 +03:00
|
|
|
if (is_bundle) {
|
|
|
|
struct bundle_header header = BUNDLE_HEADER_INIT;
|
|
|
|
int fd = read_bundle_header(path, &header);
|
|
|
|
int has_filter = header.filter.choice != LOFC_DISABLED;
|
|
|
|
|
|
|
|
if (fd > 0)
|
|
|
|
close(fd);
|
|
|
|
bundle_header_release(&header);
|
|
|
|
if (has_filter)
|
|
|
|
die(_("cannot clone from filtered bundle"));
|
|
|
|
}
|
|
|
|
|
clone: always set transport options
A clone will always create a transport struct, whether we
are cloning locally or using an actual protocol. In the
local case, we only use the transport to get the list of
refs, and then transfer the objects out-of-band.
However, there are many options that we do not bother
setting up in the local case. For the most part, these are
noops, because they only affect the object-fetching stage
(e.g., the --depth option). However, some options do have a
visible impact. For example, giving the path to upload-pack
via "-u" does not currently work for a local clone, even
though we need upload-pack to get the ref list.
We can just drop the conditional entirely and set these
options for both local and non-local clones. Rather than
keep track of which options impact the object versus the ref
fetching stage, we can simply let the noops be noops (and
the cost of setting the options in the first place is not
high).
The one exception is that we also check that the transport
provides both a "get_refs_list" and a "fetch" method. We
will now be checking the former for both cases (which is
good, since a transport that cannot fetch refs would not
work for a local clone), and we tweak the conditional to
check for a "fetch" only when we are non-local.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-09-19 00:35:13 +04:00
|
|
|
transport_set_option(transport, TRANS_OPT_KEEP, "yes");
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2021-04-01 13:46:59 +03:00
|
|
|
if (reject_shallow)
|
|
|
|
transport_set_option(transport, TRANS_OPT_REJECT_SHALLOW, "1");
|
clone: always set transport options
A clone will always create a transport struct, whether we
are cloning locally or using an actual protocol. In the
local case, we only use the transport to get the list of
refs, and then transfer the objects out-of-band.
However, there are many options that we do not bother
setting up in the local case. For the most part, these are
noops, because they only affect the object-fetching stage
(e.g., the --depth option). However, some options do have a
visible impact. For example, giving the path to upload-pack
via "-u" does not currently work for a local clone, even
though we need upload-pack to get the ref list.
We can just drop the conditional entirely and set these
options for both local and non-local clones. Rather than
keep track of which options impact the object versus the ref
fetching stage, we can simply let the noops be noops (and
the cost of setting the options in the first place is not
high).
The one exception is that we also check that the transport
provides both a "get_refs_list" and a "fetch" method. We
will now be checking the former for both cases (which is
good, since a transport that cannot fetch refs would not
work for a local clone), and we tweak the conditional to
check for a "fetch" only when we are non-local.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-09-19 00:35:13 +04:00
|
|
|
if (option_depth)
|
|
|
|
transport_set_option(transport, TRANS_OPT_DEPTH,
|
|
|
|
option_depth);
|
2016-06-12 13:54:00 +03:00
|
|
|
if (option_since)
|
|
|
|
transport_set_option(transport, TRANS_OPT_DEEPEN_SINCE,
|
|
|
|
option_since);
|
2016-06-12 13:54:05 +03:00
|
|
|
if (option_not.nr)
|
|
|
|
transport_set_option(transport, TRANS_OPT_DEEPEN_NOT,
|
|
|
|
(const char *)&option_not);
|
clone: always set transport options
A clone will always create a transport struct, whether we
are cloning locally or using an actual protocol. In the
local case, we only use the transport to get the list of
refs, and then transfer the objects out-of-band.
However, there are many options that we do not bother
setting up in the local case. For the most part, these are
noops, because they only affect the object-fetching stage
(e.g., the --depth option). However, some options do have a
visible impact. For example, giving the path to upload-pack
via "-u" does not currently work for a local clone, even
though we need upload-pack to get the ref list.
We can just drop the conditional entirely and set these
options for both local and non-local clones. Rather than
keep track of which options impact the object versus the ref
fetching stage, we can simply let the noops be noops (and
the cost of setting the options in the first place is not
high).
The one exception is that we also check that the transport
provides both a "get_refs_list" and a "fetch" method. We
will now be checking the former for both cases (which is
good, since a transport that cannot fetch refs would not
work for a local clone), and we tweak the conditional to
check for a "fetch" only when we are non-local.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-09-19 00:35:13 +04:00
|
|
|
if (option_single_branch)
|
|
|
|
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, "1");
|
2008-04-27 21:39:30 +04:00
|
|
|
|
clone: always set transport options
A clone will always create a transport struct, whether we
are cloning locally or using an actual protocol. In the
local case, we only use the transport to get the list of
refs, and then transfer the objects out-of-band.
However, there are many options that we do not bother
setting up in the local case. For the most part, these are
noops, because they only affect the object-fetching stage
(e.g., the --depth option). However, some options do have a
visible impact. For example, giving the path to upload-pack
via "-u" does not currently work for a local clone, even
though we need upload-pack to get the ref list.
We can just drop the conditional entirely and set these
options for both local and non-local clones. Rather than
keep track of which options impact the object versus the ref
fetching stage, we can simply let the noops be noops (and
the cost of setting the options in the first place is not
high).
The one exception is that we also check that the transport
provides both a "get_refs_list" and a "fetch" method. We
will now be checking the former for both cases (which is
good, since a transport that cannot fetch refs would not
work for a local clone), and we tweak the conditional to
check for a "fetch" only when we are non-local.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-09-19 00:35:13 +04:00
|
|
|
if (option_upload_pack)
|
|
|
|
transport_set_option(transport, TRANS_OPT_UPLOADPACK,
|
|
|
|
option_upload_pack);
|
2013-05-26 05:16:17 +04:00
|
|
|
|
2019-04-12 22:51:22 +03:00
|
|
|
if (server_options.nr)
|
|
|
|
transport->server_options = &server_options;
|
|
|
|
|
2017-12-08 18:58:46 +03:00
|
|
|
if (filter_options.choice) {
|
2019-06-28 01:54:10 +03:00
|
|
|
const char *spec =
|
|
|
|
expand_list_objects_filter_spec(&filter_options);
|
2017-12-08 18:58:46 +03:00
|
|
|
transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
|
2019-06-28 01:54:10 +03:00
|
|
|
spec);
|
2017-12-08 18:58:46 +03:00
|
|
|
transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (transport->smart_options && !deepen && !filter_options.choice)
|
clone: always set transport options
A clone will always create a transport struct, whether we
are cloning locally or using an actual protocol. In the
local case, we only use the transport to get the list of
refs, and then transfer the objects out-of-band.
However, there are many options that we do not bother
setting up in the local case. For the most part, these are
noops, because they only affect the object-fetching stage
(e.g., the --depth option). However, some options do have a
visible impact. For example, giving the path to upload-pack
via "-u" does not currently work for a local clone, even
though we need upload-pack to get the ref list.
We can just drop the conditional entirely and set these
options for both local and non-local clones. Rather than
keep track of which options impact the object versus the ref
fetching stage, we can simply let the noops be noops (and
the cost of setting the options in the first place is not
high).
The one exception is that we also check that the transport
provides both a "get_refs_list" and a "fetch" method. We
will now be checking the former for both cases (which is
good, since a transport that cannot fetch refs would not
work for a local clone), and we tweak the conditional to
check for a "fetch" only when we are non-local.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-09-19 00:35:13 +04:00
|
|
|
transport->smart_options->check_self_contained_and_connected = 1;
|
2008-04-27 21:39:30 +04:00
|
|
|
|
builtin/clone: fix bundle URIs with mismatching object formats
We create the reference database in git-clone(1) quite early before
connecting to the remote repository. Given that we do not yet know about
the object format that the remote repository uses at that point in time
the consequence is that the refdb may be initialized with the wrong
object format.
This is not a problem in the context of the files backend as we do not
encode the object format anywhere, and furthermore the only reference
that we write between initializing the refdb and learning about the
object format is the "HEAD" symref. It will become a problem though once
we land the reftable backend, which indeed does require to know about
the proper object format at the time of creation. We thus need to
rearrange the logic in git-clone(1) so that we only initialize the refdb
once we have learned about the actual object format.
As a first step, move listing of remote references to happen earlier,
which also allow us to set up the hash algorithm of the repository
earlier now. While we aim to execute this logic as late as possible
until after most of the setup has happened already, detection of the
object format and thus later the setup of the reference database must
happen before any other logic that may spawn Git commands or otherwise
these Git commands may not recognize the repository as such.
The first Git step where we expect the repository to be fully initalized
is when we fetch bundles via bundle URIs. Funny enough, the comments
there also state that "the_repository must match the cloned repo", which
is indeed not necessarily the case for the hash algorithm right now. So
in practice it is the right thing to detect the remote's object format
before downloading bundle URIs anyway, and not doing so causes clones
with bundle URIs to fail when the local default object format does not
match the remote repository's format.
Unfortunately though, this creates a new issue: downloading bundles may
take a long time, so if we list refs beforehand they might've grown
stale meanwhile. It is not clear how to solve this issue except for a
second reference listing though after we have downloaded the bundles,
which may be an expensive thing to do.
Arguably though, it's preferable to have a staleness issue compared to
being unable to clone a repository altogether.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-12-12 10:00:54 +03:00
|
|
|
strvec_push(&transport_ls_refs_options.ref_prefixes, "HEAD");
|
|
|
|
refspec_ref_prefixes(&remote->fetch,
|
|
|
|
&transport_ls_refs_options.ref_prefixes);
|
|
|
|
if (option_branch)
|
|
|
|
expand_ref_prefix(&transport_ls_refs_options.ref_prefixes,
|
|
|
|
option_branch);
|
|
|
|
if (!option_no_tags)
|
|
|
|
strvec_push(&transport_ls_refs_options.ref_prefixes,
|
|
|
|
"refs/tags/");
|
|
|
|
|
|
|
|
refs = transport_get_remote_refs(transport, &transport_ls_refs_options);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now that we know what algorithm the remote side is using, let's set
|
|
|
|
* ours to the same thing.
|
|
|
|
*/
|
|
|
|
hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport));
|
setup: introduce "extensions.refStorage" extension
Introduce a new "extensions.refStorage" extension that allows us to
specify the ref storage format used by a repository. For now, the only
supported format is the "files" format, but this list will likely soon
be extended to also support the upcoming "reftable" format.
There have been discussions on the Git mailing list in the past around
how exactly this extension should look like. One alternative [1] that
was discussed was whether it would make sense to model the extension in
such a way that backends are arbitrarily stackable. This would allow for
a combined value of e.g. "loose,packed-refs" or "loose,reftable", which
indicates that new refs would be written via "loose" files backend and
compressed into "packed-refs" or "reftable" backends, respectively.
It is arguable though whether this flexibility and the complexity that
it brings with it is really required for now. It is not foreseeable that
there will be a proliferation of backends in the near-term future, and
the current set of existing formats and formats which are on the horizon
can easily be configured with the much simpler proposal where we have a
single value, only.
Furthermore, if we ever see that we indeed want to gain the ability to
arbitrarily stack the ref formats, then we can adapt the current
extension rather easily. Given that Git clients will refuse any unknown
value for the "extensions.refStorage" extension they would also know to
ignore a stacked "loose,packed-refs" in the future.
So let's stick with the easy proposal for the time being and wire up the
extension.
[1]: <pull.1408.git.1667846164.gitgitgadget@gmail.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-12-29 10:26:47 +03:00
|
|
|
initialize_repository_version(hash_algo, the_repository->ref_storage_format, 1);
|
builtin/clone: fix bundle URIs with mismatching object formats
We create the reference database in git-clone(1) quite early before
connecting to the remote repository. Given that we do not yet know about
the object format that the remote repository uses at that point in time
the consequence is that the refdb may be initialized with the wrong
object format.
This is not a problem in the context of the files backend as we do not
encode the object format anywhere, and furthermore the only reference
that we write between initializing the refdb and learning about the
object format is the "HEAD" symref. It will become a problem though once
we land the reftable backend, which indeed does require to know about
the proper object format at the time of creation. We thus need to
rearrange the logic in git-clone(1) so that we only initialize the refdb
once we have learned about the actual object format.
As a first step, move listing of remote references to happen earlier,
which also allow us to set up the hash algorithm of the repository
earlier now. While we aim to execute this logic as late as possible
until after most of the setup has happened already, detection of the
object format and thus later the setup of the reference database must
happen before any other logic that may spawn Git commands or otherwise
these Git commands may not recognize the repository as such.
The first Git step where we expect the repository to be fully initalized
is when we fetch bundles via bundle URIs. Funny enough, the comments
there also state that "the_repository must match the cloned repo", which
is indeed not necessarily the case for the hash algorithm right now. So
in practice it is the right thing to detect the remote's object format
before downloading bundle URIs anyway, and not doing so causes clones
with bundle URIs to fail when the local default object format does not
match the remote repository's format.
Unfortunately though, this creates a new issue: downloading bundles may
take a long time, so if we list refs beforehand they might've grown
stale meanwhile. It is not clear how to solve this issue except for a
second reference listing though after we have downloaded the bundles,
which may be an expensive thing to do.
Arguably though, it's preferable to have a staleness issue compared to
being unable to clone a repository altogether.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-12-12 10:00:54 +03:00
|
|
|
repo_set_hash_algo(the_repository, hash_algo);
|
2023-12-29 10:26:39 +03:00
|
|
|
create_reference_database(the_repository->ref_storage_format, NULL, 1);
|
builtin/clone: fix bundle URIs with mismatching object formats
We create the reference database in git-clone(1) quite early before
connecting to the remote repository. Given that we do not yet know about
the object format that the remote repository uses at that point in time
the consequence is that the refdb may be initialized with the wrong
object format.
This is not a problem in the context of the files backend as we do not
encode the object format anywhere, and furthermore the only reference
that we write between initializing the refdb and learning about the
object format is the "HEAD" symref. It will become a problem though once
we land the reftable backend, which indeed does require to know about
the proper object format at the time of creation. We thus need to
rearrange the logic in git-clone(1) so that we only initialize the refdb
once we have learned about the actual object format.
As a first step, move listing of remote references to happen earlier,
which also allow us to set up the hash algorithm of the repository
earlier now. While we aim to execute this logic as late as possible
until after most of the setup has happened already, detection of the
object format and thus later the setup of the reference database must
happen before any other logic that may spawn Git commands or otherwise
these Git commands may not recognize the repository as such.
The first Git step where we expect the repository to be fully initalized
is when we fetch bundles via bundle URIs. Funny enough, the comments
there also state that "the_repository must match the cloned repo", which
is indeed not necessarily the case for the hash algorithm right now. So
in practice it is the right thing to detect the remote's object format
before downloading bundle URIs anyway, and not doing so causes clones
with bundle URIs to fail when the local default object format does not
match the remote repository's format.
Unfortunately though, this creates a new issue: downloading bundles may
take a long time, so if we list refs beforehand they might've grown
stale meanwhile. It is not clear how to solve this issue except for a
second reference listing though after we have downloaded the bundles,
which may be an expensive thing to do.
Arguably though, it's preferable to have a staleness issue compared to
being unable to clone a repository altogether.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-12-12 10:00:54 +03:00
|
|
|
|
2022-08-09 16:11:41 +03:00
|
|
|
/*
|
|
|
|
* Before fetching from the remote, download and install bundle
|
|
|
|
* data from the --bundle-uri option.
|
|
|
|
*/
|
|
|
|
if (bundle_uri) {
|
2023-01-31 16:29:15 +03:00
|
|
|
int has_heuristic = 0;
|
|
|
|
|
2022-08-09 16:11:41 +03:00
|
|
|
/* At this point, we need the_repository to match the cloned repo. */
|
2022-08-23 17:05:13 +03:00
|
|
|
if (repo_init(the_repository, git_dir, work_tree))
|
|
|
|
warning(_("failed to initialize the repo, skipping bundle URI"));
|
2023-01-31 16:29:15 +03:00
|
|
|
else if (fetch_bundle_uri(the_repository, bundle_uri, &has_heuristic))
|
2022-08-09 16:11:41 +03:00
|
|
|
warning(_("failed to fetch objects from bundle URI '%s'"),
|
|
|
|
bundle_uri);
|
2023-01-31 16:29:15 +03:00
|
|
|
else if (has_heuristic)
|
|
|
|
git_config_set_gently("fetch.bundleuri", bundle_uri);
|
builtin/clone: fix bundle URIs with mismatching object formats
We create the reference database in git-clone(1) quite early before
connecting to the remote repository. Given that we do not yet know about
the object format that the remote repository uses at that point in time
the consequence is that the refdb may be initialized with the wrong
object format.
This is not a problem in the context of the files backend as we do not
encode the object format anywhere, and furthermore the only reference
that we write between initializing the refdb and learning about the
object format is the "HEAD" symref. It will become a problem though once
we land the reftable backend, which indeed does require to know about
the proper object format at the time of creation. We thus need to
rearrange the logic in git-clone(1) so that we only initialize the refdb
once we have learned about the actual object format.
As a first step, move listing of remote references to happen earlier,
which also allow us to set up the hash algorithm of the repository
earlier now. While we aim to execute this logic as late as possible
until after most of the setup has happened already, detection of the
object format and thus later the setup of the reference database must
happen before any other logic that may spawn Git commands or otherwise
these Git commands may not recognize the repository as such.
The first Git step where we expect the repository to be fully initalized
is when we fetch bundles via bundle URIs. Funny enough, the comments
there also state that "the_repository must match the cloned repo", which
is indeed not necessarily the case for the hash algorithm right now. So
in practice it is the right thing to detect the remote's object format
before downloading bundle URIs anyway, and not doing so causes clones
with bundle URIs to fail when the local default object format does not
match the remote repository's format.
Unfortunately though, this creates a new issue: downloading bundles may
take a long time, so if we list refs beforehand they might've grown
stale meanwhile. It is not clear how to solve this issue except for a
second reference listing though after we have downloaded the bundles,
which may be an expensive thing to do.
Arguably though, it's preferable to have a staleness issue compared to
being unable to clone a repository altogether.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-12-12 10:00:54 +03:00
|
|
|
} else {
|
2022-12-22 18:14:17 +03:00
|
|
|
/*
|
|
|
|
* Populate transport->got_remote_bundle_uri and
|
|
|
|
* transport->bundle_uri. We might get nothing.
|
|
|
|
*/
|
|
|
|
transport_get_remote_bundle_uri(transport);
|
|
|
|
|
|
|
|
if (transport->bundles &&
|
|
|
|
hashmap_get_size(&transport->bundles->bundles)) {
|
|
|
|
/* At this point, we need the_repository to match the cloned repo. */
|
|
|
|
if (repo_init(the_repository, git_dir, work_tree))
|
|
|
|
warning(_("failed to initialize the repo, skipping bundle URI"));
|
|
|
|
else if (fetch_bundle_list(the_repository,
|
|
|
|
transport->bundles))
|
|
|
|
warning(_("failed to fetch advertised bundles"));
|
|
|
|
} else {
|
|
|
|
clear_bundle_list(transport->bundles);
|
|
|
|
FREE_AND_NULL(transport->bundles);
|
|
|
|
}
|
|
|
|
}
|
2022-12-22 18:14:09 +03:00
|
|
|
|
builtin/clone: fix bundle URIs with mismatching object formats
We create the reference database in git-clone(1) quite early before
connecting to the remote repository. Given that we do not yet know about
the object format that the remote repository uses at that point in time
the consequence is that the refdb may be initialized with the wrong
object format.
This is not a problem in the context of the files backend as we do not
encode the object format anywhere, and furthermore the only reference
that we write between initializing the refdb and learning about the
object format is the "HEAD" symref. It will become a problem though once
we land the reftable backend, which indeed does require to know about
the proper object format at the time of creation. We thus need to
rearrange the logic in git-clone(1) so that we only initialize the refdb
once we have learned about the actual object format.
As a first step, move listing of remote references to happen earlier,
which also allow us to set up the hash algorithm of the repository
earlier now. While we aim to execute this logic as late as possible
until after most of the setup has happened already, detection of the
object format and thus later the setup of the reference database must
happen before any other logic that may spawn Git commands or otherwise
these Git commands may not recognize the repository as such.
The first Git step where we expect the repository to be fully initalized
is when we fetch bundles via bundle URIs. Funny enough, the comments
there also state that "the_repository must match the cloned repo", which
is indeed not necessarily the case for the hash algorithm right now. So
in practice it is the right thing to detect the remote's object format
before downloading bundle URIs anyway, and not doing so causes clones
with bundle URIs to fail when the local default object format does not
match the remote repository's format.
Unfortunately though, this creates a new issue: downloading bundles may
take a long time, so if we list refs beforehand they might've grown
stale meanwhile. It is not clear how to solve this issue except for a
second reference listing though after we have downloaded the bundles,
which may be an expensive thing to do.
Arguably though, it's preferable to have a staleness issue compared to
being unable to clone a repository altogether.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-12-12 10:00:54 +03:00
|
|
|
if (refs)
|
|
|
|
mapped_refs = wanted_peer_refs(refs, &remote->fetch);
|
2023-04-06 00:15:33 +03:00
|
|
|
|
|
|
|
if (mapped_refs) {
|
2012-02-11 10:20:56 +04:00
|
|
|
/*
|
|
|
|
* transport_get_remote_refs() may return refs with null sha-1
|
|
|
|
* in mapped_refs (see struct transport->get_refs_list
|
|
|
|
* comment). In that case we need fetch it early because
|
|
|
|
* remote_head code below relies on it.
|
|
|
|
*
|
|
|
|
* for normal clones, transport_get_remote_refs() should
|
|
|
|
* return reliable ref set, we can delay cloning until after
|
|
|
|
* remote HEAD check.
|
|
|
|
*/
|
|
|
|
for (ref = refs; ref; ref = ref->next)
|
2015-11-10 05:22:20 +03:00
|
|
|
if (is_null_oid(&ref->old_oid)) {
|
2012-02-11 10:20:56 +04:00
|
|
|
complete_refs_before_fetch = 0;
|
|
|
|
break;
|
|
|
|
}
|
2012-01-24 15:10:38 +04:00
|
|
|
|
2020-12-03 21:55:13 +03:00
|
|
|
if (!is_local && !complete_refs_before_fetch) {
|
clone: clean up directory after transport_fetch_refs() failure
git-clone started respecting errors from the transport subsystem in
aab179d937 (builtin/clone.c: don't ignore transport_fetch_refs() errors,
2020-12-03). However, that commit didn't handle the cleanup of the
filesystem quite right.
The cleanup of the directory that cmd_clone() creates is done by an
atexit() handler, which we control with a flag. It starts as
JUNK_LEAVE_NONE ("clean up everything"), then progresses to
JUNK_LEAVE_REPO when we know we have a valid repo but not working tree,
and then finally JUNK_LEAVE_ALL when we have a successful checkout.
Most errors cause us to die(), which then triggers the handler to do the
right thing based on how far into cmd_clone() we got. But the checks
added by aab179d937 instead set the "err" variable and then jump to a
new "cleanup" label, which then returns our non-zero status. However,
the code after the cleanup label includes setting the flag to
JUNK_LEAVE_ALL, and so we accidentally leave the repository and working
tree in place.
One obvious option to fix this is to reorder the end of the function to
set the flag first, before cleanup code, and put the label between them.
But we can observe another small bug: the error return from
transport_fetch_refs() is generally "-1", and we propagate that to the
return value of cmd_clone(), which ultimately becomes the exit code of
the process. And we try to avoid transmitting negative values via exit
codes (only the low 8 bits are passed along as an unsigned value, though
in practice for "-1" this at least retains the property that it's
non-zero).
Instead, let's just die(). That makes us consistent with rest of the
code in the function. It does add a new "fatal:" line to the output, but
I'd argue that's a good thing:
- in the rare case that the transport code didn't say anything, now
the user gets _some_ error message
- even if the transport code said something like "error: ssh died of
signal 9", it's nice to also say "fatal" to indicate that we
considered that to be a show-stopper.
Triggering this in the test suite turns out to be surprisingly
difficult. Almost every error we'd encounter, including ones deep inside
the transport code, cause us to just die() right there! However, one way
is to put a fake wrapper around git-upload-pack that sends the complete
packfile but exits with a failure code.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-19 14:17:15 +03:00
|
|
|
if (transport_fetch_refs(transport, mapped_refs))
|
|
|
|
die(_("remote transport reported error"));
|
2020-12-03 21:55:13 +03:00
|
|
|
}
|
2009-01-23 03:07:32 +03:00
|
|
|
}
|
clone: handle unborn branch in bare repos
When cloning a repository with an unborn HEAD, we'll set the local HEAD
to match it only if the local repository is non-bare. This is
inconsistent with all other combinations:
remote HEAD | local repo | local HEAD
-----------------------------------------------
points to commit | non-bare | same as remote
points to commit | bare | same as remote
unborn | non-bare | same as remote
unborn | bare | local default
So I don't think this is some clever or subtle behavior, but just a bug
in 4f37d45706 (clone: respect remote unborn HEAD, 2021-02-05). And it's
easy to see how we ended up there. Before that commit, the code to set
up the HEAD for an empty repo was guarded by "if (!option_bare)". That's
because the only thing it did was call install_branch_config(), and we
don't want to do so for a bare repository (unborn HEAD or not).
That commit put the handling of unborn HEADs into the same block, since
those also need to call install_branch_config(). But the unborn case has
an additional side effect of calling create_symref(), and we want that
to happen whether we are bare or not.
This patch just pulls all of the "figure out the default branch" code
out of the "!option_bare" block. Only the actual config installation is
kept there.
Note that this does mean we might allocate "ref" and not use it (if the
remote is empty but did not advertise an unborn HEAD). But that's not
really a big deal since this isn't a hot code path, and it keeps the
code simple. The alternative would be handling unborn_head_target
separately, but that gets confusing since its memory ownership is
tangled up with the "ref" variable.
There's just one new test, for the case we're fixing. The other ones in
the table are handled elsewhere (the unborn non-bare case just above,
and the actually-born cases in t5601, t5606, and t5609, as they do not
require v2's "unborn" protocol extension).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-20 22:04:10 +03:00
|
|
|
|
clone: propagate empty remote HEAD even with other branches
Unless "--branch" was given, clone generally tries to match the local
HEAD to the remote one. For most repositories, this is easy: the remote
tells us which branch HEAD was pointing to, and we call our local
checkout() function on that branch.
When cloning an empty repository, it's a little more tricky: we have
special code that checks the transport's "unborn" extension, or falls back
to our local idea of what the default branch should be. In either case,
we point the new HEAD to that, and set up the branch.* config.
But that leaves one case unhandled: when the remote repository _isn't_
empty, but its HEAD is unborn. The checkout() function is smart enough
to realize we didn't fetch the remote HEAD and it bails with a warning.
But we'll have ignored any information the remote gave us via the unborn
extension. This leads to nonsense outcomes:
- If the remote has its HEAD pointing to an unborn "foo" and contains
another branch "bar", cloning will get branch "bar" but leave the
local HEAD pointing at "master" (or whatever our local default is),
which is useless. The project does not use "master" as a branch.
- Worse, if the other branch "bar" is instead called "master" (but
again, the remote HEAD is not pointing to it), then we end up with a
local unborn branch "master", which is not connected to the remote
"master" (it shares no history, and there's no branch.* config).
Instead, we should try to use the remote's HEAD, even if its unborn, to
be consistent with the other cases.
The reason this case was missed is that cmd_clone() handles empty and
non-empty repositories on two different sides of a conditional:
if (we have any refs) {
fetch refs;
check for --branch;
otherwise, try to point our head at remote head;
otherwise, our head is NULL;
} else {
check for --branch;
otherwise, try to use "unborn" extension;
otherwise, fall back to our default name name;
}
So the smallest change would be to repeat the "unborn" logic at the end
of the first block. But we can note some other overlaps and
inconsistencies:
- both sides have to handle --branch (though note that it's always an
error for the empty repo case, since an empty repo by definition
does not have a matching branch)
- the fall back to the default name is much more explicit in the
empty-repo case. The non-empty case eventually ends up bailing
from checkout() with a warning, which produces a similar result, but
fails to set up the branch config we do in the empty case.
So let's pull the HEAD setup out of this conditional entirely. This
de-duplicates some of the code and the result is easy to follow, because
helper functions like find_ref_by_name() do the right thing even in the
empty-repo case (i.e., by returning NULL).
There are two subtleties:
- for a remote with a detached HEAD, it will advertise an oid for HEAD
(which we store in our "remote_head" variable), but we won't find a
matching refname (so our "remote_head_points_at" is NULL). In this
case we make a local detached HEAD to match. Right now this happens
implicitly by reaching update_head() with a non-NULL remote_head
(since we skip all of the unborn-fallback). We'll now need to
account for it explicitly before doing the fallback.
- for an empty repo, we issue a warning to the user that they've
cloned an empty repo. The text of that warning doesn't make sense
for a non-empty repo with an unborn HEAD, so we'll have to
differentiate the two cases there. We could just use different text,
but instead let's allow the code to continue down to checkout(),
which will issue an appropriate warning, like:
remote HEAD refers to nonexistent ref, unable to checkout
Continuing down to checkout() will make it easier to do more fixes
on top (see below).
Note that this patch fixes the case where the other side reports an
unborn head to us using the protocol extension. It _doesn't_ fix the
case where the other side doesn't tell us, we locally guess "master",
and the other side happens to have a "master" which its HEAD doesn't
point. But it doesn't make anything worse there, and it should actually
make it easier to fix that problem on top.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-08 02:57:45 +03:00
|
|
|
remote_head = find_ref_by_name(refs, "HEAD");
|
|
|
|
remote_head_points_at = guess_remote_head(remote_head, mapped_refs, 0);
|
2013-10-11 20:49:02 +04:00
|
|
|
|
clone: propagate empty remote HEAD even with other branches
Unless "--branch" was given, clone generally tries to match the local
HEAD to the remote one. For most repositories, this is easy: the remote
tells us which branch HEAD was pointing to, and we call our local
checkout() function on that branch.
When cloning an empty repository, it's a little more tricky: we have
special code that checks the transport's "unborn" extension, or falls back
to our local idea of what the default branch should be. In either case,
we point the new HEAD to that, and set up the branch.* config.
But that leaves one case unhandled: when the remote repository _isn't_
empty, but its HEAD is unborn. The checkout() function is smart enough
to realize we didn't fetch the remote HEAD and it bails with a warning.
But we'll have ignored any information the remote gave us via the unborn
extension. This leads to nonsense outcomes:
- If the remote has its HEAD pointing to an unborn "foo" and contains
another branch "bar", cloning will get branch "bar" but leave the
local HEAD pointing at "master" (or whatever our local default is),
which is useless. The project does not use "master" as a branch.
- Worse, if the other branch "bar" is instead called "master" (but
again, the remote HEAD is not pointing to it), then we end up with a
local unborn branch "master", which is not connected to the remote
"master" (it shares no history, and there's no branch.* config).
Instead, we should try to use the remote's HEAD, even if its unborn, to
be consistent with the other cases.
The reason this case was missed is that cmd_clone() handles empty and
non-empty repositories on two different sides of a conditional:
if (we have any refs) {
fetch refs;
check for --branch;
otherwise, try to point our head at remote head;
otherwise, our head is NULL;
} else {
check for --branch;
otherwise, try to use "unborn" extension;
otherwise, fall back to our default name name;
}
So the smallest change would be to repeat the "unborn" logic at the end
of the first block. But we can note some other overlaps and
inconsistencies:
- both sides have to handle --branch (though note that it's always an
error for the empty repo case, since an empty repo by definition
does not have a matching branch)
- the fall back to the default name is much more explicit in the
empty-repo case. The non-empty case eventually ends up bailing
from checkout() with a warning, which produces a similar result, but
fails to set up the branch config we do in the empty case.
So let's pull the HEAD setup out of this conditional entirely. This
de-duplicates some of the code and the result is easy to follow, because
helper functions like find_ref_by_name() do the right thing even in the
empty-repo case (i.e., by returning NULL).
There are two subtleties:
- for a remote with a detached HEAD, it will advertise an oid for HEAD
(which we store in our "remote_head" variable), but we won't find a
matching refname (so our "remote_head_points_at" is NULL). In this
case we make a local detached HEAD to match. Right now this happens
implicitly by reaching update_head() with a non-NULL remote_head
(since we skip all of the unborn-fallback). We'll now need to
account for it explicitly before doing the fallback.
- for an empty repo, we issue a warning to the user that they've
cloned an empty repo. The text of that warning doesn't make sense
for a non-empty repo with an unborn HEAD, so we'll have to
differentiate the two cases there. We could just use different text,
but instead let's allow the code to continue down to checkout(),
which will issue an appropriate warning, like:
remote HEAD refers to nonexistent ref, unable to checkout
Continuing down to checkout() will make it easier to do more fixes
on top (see below).
Note that this patch fixes the case where the other side reports an
unborn head to us using the protocol extension. It _doesn't_ fix the
case where the other side doesn't tell us, we locally guess "master",
and the other side happens to have a "master" which its HEAD doesn't
point. But it doesn't make anything worse there, and it should actually
make it easier to fix that problem on top.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-08 02:57:45 +03:00
|
|
|
if (option_branch) {
|
|
|
|
our_head_points_at = find_remote_branch(mapped_refs, option_branch);
|
|
|
|
if (!our_head_points_at)
|
|
|
|
die(_("Remote branch %s not found in upstream %s"),
|
|
|
|
option_branch, remote_name);
|
|
|
|
} else if (remote_head_points_at) {
|
|
|
|
our_head_points_at = remote_head_points_at;
|
|
|
|
} else if (remote_head) {
|
2009-08-26 23:05:08 +04:00
|
|
|
our_head_points_at = NULL;
|
clone: propagate empty remote HEAD even with other branches
Unless "--branch" was given, clone generally tries to match the local
HEAD to the remote one. For most repositories, this is easy: the remote
tells us which branch HEAD was pointing to, and we call our local
checkout() function on that branch.
When cloning an empty repository, it's a little more tricky: we have
special code that checks the transport's "unborn" extension, or falls back
to our local idea of what the default branch should be. In either case,
we point the new HEAD to that, and set up the branch.* config.
But that leaves one case unhandled: when the remote repository _isn't_
empty, but its HEAD is unborn. The checkout() function is smart enough
to realize we didn't fetch the remote HEAD and it bails with a warning.
But we'll have ignored any information the remote gave us via the unborn
extension. This leads to nonsense outcomes:
- If the remote has its HEAD pointing to an unborn "foo" and contains
another branch "bar", cloning will get branch "bar" but leave the
local HEAD pointing at "master" (or whatever our local default is),
which is useless. The project does not use "master" as a branch.
- Worse, if the other branch "bar" is instead called "master" (but
again, the remote HEAD is not pointing to it), then we end up with a
local unborn branch "master", which is not connected to the remote
"master" (it shares no history, and there's no branch.* config).
Instead, we should try to use the remote's HEAD, even if its unborn, to
be consistent with the other cases.
The reason this case was missed is that cmd_clone() handles empty and
non-empty repositories on two different sides of a conditional:
if (we have any refs) {
fetch refs;
check for --branch;
otherwise, try to point our head at remote head;
otherwise, our head is NULL;
} else {
check for --branch;
otherwise, try to use "unborn" extension;
otherwise, fall back to our default name name;
}
So the smallest change would be to repeat the "unborn" logic at the end
of the first block. But we can note some other overlaps and
inconsistencies:
- both sides have to handle --branch (though note that it's always an
error for the empty repo case, since an empty repo by definition
does not have a matching branch)
- the fall back to the default name is much more explicit in the
empty-repo case. The non-empty case eventually ends up bailing
from checkout() with a warning, which produces a similar result, but
fails to set up the branch config we do in the empty case.
So let's pull the HEAD setup out of this conditional entirely. This
de-duplicates some of the code and the result is easy to follow, because
helper functions like find_ref_by_name() do the right thing even in the
empty-repo case (i.e., by returning NULL).
There are two subtleties:
- for a remote with a detached HEAD, it will advertise an oid for HEAD
(which we store in our "remote_head" variable), but we won't find a
matching refname (so our "remote_head_points_at" is NULL). In this
case we make a local detached HEAD to match. Right now this happens
implicitly by reaching update_head() with a non-NULL remote_head
(since we skip all of the unborn-fallback). We'll now need to
account for it explicitly before doing the fallback.
- for an empty repo, we issue a warning to the user that they've
cloned an empty repo. The text of that warning doesn't make sense
for a non-empty repo with an unborn HEAD, so we'll have to
differentiate the two cases there. We could just use different text,
but instead let's allow the code to continue down to checkout(),
which will issue an appropriate warning, like:
remote HEAD refers to nonexistent ref, unable to checkout
Continuing down to checkout() will make it easier to do more fixes
on top (see below).
Note that this patch fixes the case where the other side reports an
unborn head to us using the protocol extension. It _doesn't_ fix the
case where the other side doesn't tell us, we locally guess "master",
and the other side happens to have a "master" which its HEAD doesn't
point. But it doesn't make anything worse there, and it should actually
make it easier to fix that problem on top.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-08 02:57:45 +03:00
|
|
|
} else {
|
clone: handle unborn branch in bare repos
When cloning a repository with an unborn HEAD, we'll set the local HEAD
to match it only if the local repository is non-bare. This is
inconsistent with all other combinations:
remote HEAD | local repo | local HEAD
-----------------------------------------------
points to commit | non-bare | same as remote
points to commit | bare | same as remote
unborn | non-bare | same as remote
unborn | bare | local default
So I don't think this is some clever or subtle behavior, but just a bug
in 4f37d45706 (clone: respect remote unborn HEAD, 2021-02-05). And it's
easy to see how we ended up there. Before that commit, the code to set
up the HEAD for an empty repo was guarded by "if (!option_bare)". That's
because the only thing it did was call install_branch_config(), and we
don't want to do so for a bare repository (unborn HEAD or not).
That commit put the handling of unborn HEADs into the same block, since
those also need to call install_branch_config(). But the unborn case has
an additional side effect of calling create_symref(), and we want that
to happen whether we are bare or not.
This patch just pulls all of the "figure out the default branch" code
out of the "!option_bare" block. Only the actual config installation is
kept there.
Note that this does mean we might allocate "ref" and not use it (if the
remote is empty but did not advertise an unborn HEAD). But that's not
really a big deal since this isn't a hot code path, and it keeps the
code simple. The alternative would be handling unborn_head_target
separately, but that gets confusing since its memory ownership is
tangled up with the "ref" variable.
There's just one new test, for the case we're fixing. The other ones in
the table are handled elsewhere (the unborn non-bare case just above,
and the actually-born cases in t5601, t5606, and t5609, as they do not
require v2's "unborn" protocol extension).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-20 22:04:10 +03:00
|
|
|
const char *branch;
|
|
|
|
|
clone: propagate empty remote HEAD even with other branches
Unless "--branch" was given, clone generally tries to match the local
HEAD to the remote one. For most repositories, this is easy: the remote
tells us which branch HEAD was pointing to, and we call our local
checkout() function on that branch.
When cloning an empty repository, it's a little more tricky: we have
special code that checks the transport's "unborn" extension, or falls back
to our local idea of what the default branch should be. In either case,
we point the new HEAD to that, and set up the branch.* config.
But that leaves one case unhandled: when the remote repository _isn't_
empty, but its HEAD is unborn. The checkout() function is smart enough
to realize we didn't fetch the remote HEAD and it bails with a warning.
But we'll have ignored any information the remote gave us via the unborn
extension. This leads to nonsense outcomes:
- If the remote has its HEAD pointing to an unborn "foo" and contains
another branch "bar", cloning will get branch "bar" but leave the
local HEAD pointing at "master" (or whatever our local default is),
which is useless. The project does not use "master" as a branch.
- Worse, if the other branch "bar" is instead called "master" (but
again, the remote HEAD is not pointing to it), then we end up with a
local unborn branch "master", which is not connected to the remote
"master" (it shares no history, and there's no branch.* config).
Instead, we should try to use the remote's HEAD, even if its unborn, to
be consistent with the other cases.
The reason this case was missed is that cmd_clone() handles empty and
non-empty repositories on two different sides of a conditional:
if (we have any refs) {
fetch refs;
check for --branch;
otherwise, try to point our head at remote head;
otherwise, our head is NULL;
} else {
check for --branch;
otherwise, try to use "unborn" extension;
otherwise, fall back to our default name name;
}
So the smallest change would be to repeat the "unborn" logic at the end
of the first block. But we can note some other overlaps and
inconsistencies:
- both sides have to handle --branch (though note that it's always an
error for the empty repo case, since an empty repo by definition
does not have a matching branch)
- the fall back to the default name is much more explicit in the
empty-repo case. The non-empty case eventually ends up bailing
from checkout() with a warning, which produces a similar result, but
fails to set up the branch config we do in the empty case.
So let's pull the HEAD setup out of this conditional entirely. This
de-duplicates some of the code and the result is easy to follow, because
helper functions like find_ref_by_name() do the right thing even in the
empty-repo case (i.e., by returning NULL).
There are two subtleties:
- for a remote with a detached HEAD, it will advertise an oid for HEAD
(which we store in our "remote_head" variable), but we won't find a
matching refname (so our "remote_head_points_at" is NULL). In this
case we make a local detached HEAD to match. Right now this happens
implicitly by reaching update_head() with a non-NULL remote_head
(since we skip all of the unborn-fallback). We'll now need to
account for it explicitly before doing the fallback.
- for an empty repo, we issue a warning to the user that they've
cloned an empty repo. The text of that warning doesn't make sense
for a non-empty repo with an unborn HEAD, so we'll have to
differentiate the two cases there. We could just use different text,
but instead let's allow the code to continue down to checkout(),
which will issue an appropriate warning, like:
remote HEAD refers to nonexistent ref, unable to checkout
Continuing down to checkout() will make it easier to do more fixes
on top (see below).
Note that this patch fixes the case where the other side reports an
unborn head to us using the protocol extension. It _doesn't_ fix the
case where the other side doesn't tell us, we locally guess "master",
and the other side happens to have a "master" which its HEAD doesn't
point. But it doesn't make anything worse there, and it should actually
make it easier to fix that problem on top.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-08 02:57:45 +03:00
|
|
|
if (!mapped_refs) {
|
|
|
|
warning(_("You appear to have cloned an empty repository."));
|
|
|
|
option_no_checkout = 1;
|
|
|
|
}
|
2020-06-24 17:46:34 +03:00
|
|
|
|
clone: handle unborn branch in bare repos
When cloning a repository with an unborn HEAD, we'll set the local HEAD
to match it only if the local repository is non-bare. This is
inconsistent with all other combinations:
remote HEAD | local repo | local HEAD
-----------------------------------------------
points to commit | non-bare | same as remote
points to commit | bare | same as remote
unborn | non-bare | same as remote
unborn | bare | local default
So I don't think this is some clever or subtle behavior, but just a bug
in 4f37d45706 (clone: respect remote unborn HEAD, 2021-02-05). And it's
easy to see how we ended up there. Before that commit, the code to set
up the HEAD for an empty repo was guarded by "if (!option_bare)". That's
because the only thing it did was call install_branch_config(), and we
don't want to do so for a bare repository (unborn HEAD or not).
That commit put the handling of unborn HEADs into the same block, since
those also need to call install_branch_config(). But the unborn case has
an additional side effect of calling create_symref(), and we want that
to happen whether we are bare or not.
This patch just pulls all of the "figure out the default branch" code
out of the "!option_bare" block. Only the actual config installation is
kept there.
Note that this does mean we might allocate "ref" and not use it (if the
remote is empty but did not advertise an unborn HEAD). But that's not
really a big deal since this isn't a hot code path, and it keeps the
code simple. The alternative would be handling unborn_head_target
separately, but that gets confusing since its memory ownership is
tangled up with the "ref" variable.
There's just one new test, for the case we're fixing. The other ones in
the table are handled elsewhere (the unborn non-bare case just above,
and the actually-born cases in t5601, t5606, and t5609, as they do not
require v2's "unborn" protocol extension).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-20 22:04:10 +03:00
|
|
|
if (transport_ls_refs_options.unborn_head_target &&
|
|
|
|
skip_prefix(transport_ls_refs_options.unborn_head_target,
|
|
|
|
"refs/heads/", &branch)) {
|
2022-07-11 12:21:52 +03:00
|
|
|
unborn_head = xstrdup(transport_ls_refs_options.unborn_head_target);
|
clone: handle unborn branch in bare repos
When cloning a repository with an unborn HEAD, we'll set the local HEAD
to match it only if the local repository is non-bare. This is
inconsistent with all other combinations:
remote HEAD | local repo | local HEAD
-----------------------------------------------
points to commit | non-bare | same as remote
points to commit | bare | same as remote
unborn | non-bare | same as remote
unborn | bare | local default
So I don't think this is some clever or subtle behavior, but just a bug
in 4f37d45706 (clone: respect remote unborn HEAD, 2021-02-05). And it's
easy to see how we ended up there. Before that commit, the code to set
up the HEAD for an empty repo was guarded by "if (!option_bare)". That's
because the only thing it did was call install_branch_config(), and we
don't want to do so for a bare repository (unborn HEAD or not).
That commit put the handling of unborn HEADs into the same block, since
those also need to call install_branch_config(). But the unborn case has
an additional side effect of calling create_symref(), and we want that
to happen whether we are bare or not.
This patch just pulls all of the "figure out the default branch" code
out of the "!option_bare" block. Only the actual config installation is
kept there.
Note that this does mean we might allocate "ref" and not use it (if the
remote is empty but did not advertise an unborn HEAD). But that's not
really a big deal since this isn't a hot code path, and it keeps the
code simple. The alternative would be handling unborn_head_target
separately, but that gets confusing since its memory ownership is
tangled up with the "ref" variable.
There's just one new test, for the case we're fixing. The other ones in
the table are handled elsewhere (the unborn non-bare case just above,
and the actually-born cases in t5601, t5606, and t5609, as they do not
require v2's "unborn" protocol extension).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-20 22:04:10 +03:00
|
|
|
} else {
|
|
|
|
branch = git_default_branch_name(0);
|
2022-07-11 12:21:52 +03:00
|
|
|
unborn_head = xstrfmt("refs/heads/%s", branch);
|
2020-06-24 17:46:34 +03:00
|
|
|
}
|
clone: handle unborn branch in bare repos
When cloning a repository with an unborn HEAD, we'll set the local HEAD
to match it only if the local repository is non-bare. This is
inconsistent with all other combinations:
remote HEAD | local repo | local HEAD
-----------------------------------------------
points to commit | non-bare | same as remote
points to commit | bare | same as remote
unborn | non-bare | same as remote
unborn | bare | local default
So I don't think this is some clever or subtle behavior, but just a bug
in 4f37d45706 (clone: respect remote unborn HEAD, 2021-02-05). And it's
easy to see how we ended up there. Before that commit, the code to set
up the HEAD for an empty repo was guarded by "if (!option_bare)". That's
because the only thing it did was call install_branch_config(), and we
don't want to do so for a bare repository (unborn HEAD or not).
That commit put the handling of unborn HEADs into the same block, since
those also need to call install_branch_config(). But the unborn case has
an additional side effect of calling create_symref(), and we want that
to happen whether we are bare or not.
This patch just pulls all of the "figure out the default branch" code
out of the "!option_bare" block. Only the actual config installation is
kept there.
Note that this does mean we might allocate "ref" and not use it (if the
remote is empty but did not advertise an unborn HEAD). But that's not
really a big deal since this isn't a hot code path, and it keeps the
code simple. The alternative would be handling unborn_head_target
separately, but that gets confusing since its memory ownership is
tangled up with the "ref" variable.
There's just one new test, for the case we're fixing. The other ones in
the table are handled elsewhere (the unborn non-bare case just above,
and the actually-born cases in t5601, t5606, and t5609, as they do not
require v2's "unborn" protocol extension).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-20 22:04:10 +03:00
|
|
|
|
clone: use remote branch if it matches default HEAD
Usually clone tries to use the same local HEAD as the remote (unless the
user has given --branch explicitly). Even if the remote HEAD is detached
or unborn, we can detect those situations with modern versions of Git.
If the remote is too old to support the "unborn" extension (or it has
been disabled via config), then we can't know the name of the remote's
unborn HEAD, and we fall back whatever the local default branch name is
configured to be.
But that leads to one weird corner case. It's rare because it needs a
number of factors:
- the remote has an unborn HEAD
- the remote is too old to support "unborn", or has disabled it
- the remote has another branch "foo"
- the local default branch name is "foo"
In that case you end up with a local clone on an unborn "foo" branch,
disconnected completely from the remote's "foo". This is rare in
practice, but the result is quite confusing.
When choosing "foo", we can double check whether the remote has such a
name, and if so, start our local "foo" at the same spot, rather than
making it unborn.
Note that this causes a test failure in t5605, which is cloning from a
bundle that doesn't contain HEAD (so it behaves like a remote that
doesn't support "unborn"), but has a single "main" branch. That test
expects that we end up in the weird "unborn main" case, where we don't
actually check out the remote branch of the same name. Even though we
have to update the test, this seems like an argument in favor of this
patch: checking out main is what I'd expect from such a bundle.
So this patch updates the test for the new behavior and adds an adjacent
one that checks what the original was going for: if there's no HEAD and
the bundle _doesn't_ have a branch that matches our local default name,
then we end up with nothing checked out.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-08 02:59:35 +03:00
|
|
|
/*
|
|
|
|
* We may have selected a local default branch name "foo",
|
|
|
|
* and even though the remote's HEAD does not point there,
|
|
|
|
* it may still have a "foo" branch. If so, set it up so
|
|
|
|
* that we can follow the usual checkout code later.
|
|
|
|
*
|
|
|
|
* Note that for an empty repo we'll already have set
|
|
|
|
* option_no_checkout above, which would work against us here.
|
|
|
|
* But for an empty repo, find_remote_branch() can never find
|
|
|
|
* a match.
|
|
|
|
*/
|
|
|
|
our_head_points_at = find_remote_branch(mapped_refs, branch);
|
2009-01-23 03:07:32 +03:00
|
|
|
}
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2012-09-20 22:04:08 +04:00
|
|
|
write_refspec_config(src_ref_prefix, our_head_points_at,
|
|
|
|
remote_head_points_at, &branch_top);
|
|
|
|
|
2017-12-08 18:58:46 +03:00
|
|
|
if (filter_options.choice)
|
2020-10-01 06:46:15 +03:00
|
|
|
partial_clone_register(remote_name, &filter_options);
|
2017-12-08 18:58:46 +03:00
|
|
|
|
2012-01-16 13:46:12 +04:00
|
|
|
if (is_local)
|
|
|
|
clone_local(path, git_dir);
|
2022-01-24 21:09:09 +03:00
|
|
|
else if (mapped_refs && complete_refs_before_fetch) {
|
clone: clean up directory after transport_fetch_refs() failure
git-clone started respecting errors from the transport subsystem in
aab179d937 (builtin/clone.c: don't ignore transport_fetch_refs() errors,
2020-12-03). However, that commit didn't handle the cleanup of the
filesystem quite right.
The cleanup of the directory that cmd_clone() creates is done by an
atexit() handler, which we control with a flag. It starts as
JUNK_LEAVE_NONE ("clean up everything"), then progresses to
JUNK_LEAVE_REPO when we know we have a valid repo but not working tree,
and then finally JUNK_LEAVE_ALL when we have a successful checkout.
Most errors cause us to die(), which then triggers the handler to do the
right thing based on how far into cmd_clone() we got. But the checks
added by aab179d937 instead set the "err" variable and then jump to a
new "cleanup" label, which then returns our non-zero status. However,
the code after the cleanup label includes setting the flag to
JUNK_LEAVE_ALL, and so we accidentally leave the repository and working
tree in place.
One obvious option to fix this is to reorder the end of the function to
set the flag first, before cleanup code, and put the label between them.
But we can observe another small bug: the error return from
transport_fetch_refs() is generally "-1", and we propagate that to the
return value of cmd_clone(), which ultimately becomes the exit code of
the process. And we try to avoid transmitting negative values via exit
codes (only the low 8 bits are passed along as an unsigned value, though
in practice for "-1" this at least retains the property that it's
non-zero).
Instead, let's just die(). That makes us consistent with rest of the
code in the function. It does add a new "fatal:" line to the output, but
I'd argue that's a good thing:
- in the rare case that the transport code didn't say anything, now
the user gets _some_ error message
- even if the transport code said something like "error: ssh died of
signal 9", it's nice to also say "fatal" to indicate that we
considered that to be a show-stopper.
Triggering this in the test suite turns out to be surprisingly
difficult. Almost every error we'd encounter, including ones deep inside
the transport code, cause us to just die() right there! However, one way
is to put a fake wrapper around git-upload-pack that sends the complete
packfile but exits with a failure code.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-19 14:17:15 +03:00
|
|
|
if (transport_fetch_refs(transport, mapped_refs))
|
|
|
|
die(_("remote transport reported error"));
|
2020-12-03 21:55:13 +03:00
|
|
|
}
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2012-01-16 13:46:11 +04:00
|
|
|
update_remote_refs(refs, mapped_refs, remote_head_points_at,
|
2017-12-08 18:58:46 +03:00
|
|
|
branch_top.buf, reflog_msg.buf, transport,
|
connected: always use partial clone optimization
With 50033772d5 ("connected: verify promisor-ness of partial clone",
2020-01-30), the fast path (checking promisor packs) in
check_connected() now passes a subset of the slow path (rev-list) - if
all objects to be checked are found in promisor packs, both the fast
path and the slow path will pass; otherwise, the fast path will
definitely not pass. This means that we can always attempt the fast path
whenever we need to do the slow path.
The fast path is currently guarded by a flag; therefore, remove that
flag. Also, make the fast path fallback to the slow path - if the fast
path fails, the failing OID and all remaining OIDs will be passed to
rev-list.
The main user-visible benefit is the performance of fetch from a partial
clone - specifically, the speedup of the connectivity check done before
the fetch. In particular, a no-op fetch into a partial clone on my
computer was sped up from 7 seconds to 0.01 seconds. This is a
complement to the work in 2df1aa239c ("fetch: forgo full
connectivity check if --filter", 2020-01-30), which is the child of the
aforementioned 50033772d5. In that commit, the connectivity check
*after* the fetch was sped up.
The addition of the fast path might cause performance reductions in
these cases:
- If a partial clone or a fetch into a partial clone fails, Git will
fruitlessly run rev-list (it is expected that everything fetched
would go into promisor packs, so if that didn't happen, it is most
likely that rev-list will fail too).
- Any connectivity checks done by receive-pack, in the (in my opinion,
unlikely) event that a partial clone serves receive-pack.
I think that these cases are rare enough, and the performance reduction
in this case minor enough (additional object DB access), that the
benefit of avoiding a flag outweighs these.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Reviewed-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-03-21 01:00:45 +03:00
|
|
|
!is_local);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2022-07-11 12:21:52 +03:00
|
|
|
update_head(our_head_points_at, remote_head, unborn_head, reflog_msg.buf);
|
2008-07-08 08:46:06 +04:00
|
|
|
|
clone: pass --progress decision to recursive submodules
When cloning with "--recursive", we'd generally expect
submodules to show progress reports if the main clone did,
too.
In older versions of git, this mostly worked out of the
box. Since we show progress by default when stderr is a tty,
and since the child clones inherit the parent stderr, then
both processes would come to the same decision by default.
If the parent clone was asked for "--quiet", we passed down
"--quiet" to the child. However, if stderr was not a tty and
the user specified "--progress", we did not propagate this
to the child.
That's a minor bug, but things got much worse when we
switched recently to submodule--helper's update_clone
command. With that change, the stderr of the child clones
are always connected to a pipe, and we never output
progress at all.
This patch teaches git-submodule and git-submodule--helper
how to pass down an explicit "--progress" flag when cloning.
The clone command then decides to propagate that flag based
on the cloning decision made earlier (which takes into
account isatty(2) of the parent process, existing --progress
or --quiet flags, etc). Since the child processes always run
without a tty on stderr, we don't have to worry about
passing an explicit "--no-progress"; it's the default for
them.
This fixes the recent loss of progress during recursive
clones. And as a bonus, it makes:
git clone --recursive --progress ... 2>&1 | cat
work by triggering progress explicitly in the children.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-22 08:24:46 +03:00
|
|
|
/*
|
|
|
|
* We want to show progress for recursive submodule clones iff
|
|
|
|
* we did so for the main clone. But only the transport knows
|
|
|
|
* the final decision for this flag, so we need to rescue the value
|
|
|
|
* before we free the transport.
|
|
|
|
*/
|
|
|
|
submodule_progress = transport->progress;
|
|
|
|
|
fetch: fix deadlock when cleaning up lockfiles in async signals
When fetching packfiles, we write a bunch of lockfiles for the packfiles
we're writing into the repository. In order to not leave behind any
cruft in case we exit or receive a signal, we register both an exit
handler as well as signal handlers for common signals like SIGINT. These
handlers will then unlink the locks and free the data structure tracking
them. We have observed a deadlock in this logic though:
(gdb) bt
#0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969
#2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#3 0x0000000000662ab1 in string_list_clear ()
#4 0x000000000044f5bc in unlock_pack_on_signal ()
#5 <signal handler called>
#6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024
#7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#8 0x000000000065afd5 in strbuf_release ()
#9 0x000000000066ddb9 in delete_tempfile ()
#10 0x0000000000610d0b in files_transaction_cleanup.isra ()
#11 0x0000000000611718 in files_transaction_abort ()
#12 0x000000000060d2ef in ref_transaction_abort ()
#13 0x000000000060d441 in ref_transaction_prepare ()
#14 0x000000000060e0b5 in ref_transaction_commit ()
#15 0x00000000004511c2 in fetch_and_consume_refs ()
#16 0x000000000045279a in cmd_fetch ()
#17 0x0000000000407c48 in handle_builtin ()
#18 0x0000000000408df2 in cmd_main ()
#19 0x00000000004078b5 in main ()
The process was killed with a signal, which caused the signal handler to
kick in and try free the data structures after we have unlinked the
locks. It then deadlocks while calling free(3P).
The root cause of this is that it is not allowed to call certain
functions in async-signal handlers, as specified by signal-safety(7).
Next to most I/O functions, this list of disallowed functions also
includes memory-handling functions like malloc(3P) and free(3P) because
they may not be reentrant. As a result, if we execute such functions in
the signal handler, then they may operate on inconistent state and fail
in unexpected ways.
Fix this bug by not calling non-async-signal-safe functions when running
in the signal handler. We're about to re-raise the signal anyway and
will thus exit, so it's not much of a problem to keep the string list of
lockfiles untouched. Note that it's fine though to call unlink(2), so
we'll still clean up the lockfiles correctly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 13:55:47 +03:00
|
|
|
transport_unlock_pack(transport, 0);
|
2012-01-16 13:46:12 +04:00
|
|
|
transport_disconnect(transport);
|
2008-07-08 08:46:06 +04:00
|
|
|
|
2015-10-06 16:18:47 +03:00
|
|
|
if (option_dissociate) {
|
2019-05-17 21:41:49 +03:00
|
|
|
close_object_store(the_repository->objects);
|
2014-10-14 23:38:52 +04:00
|
|
|
dissociate_from_references();
|
2015-10-06 16:18:47 +03:00
|
|
|
}
|
2014-10-14 23:38:52 +04:00
|
|
|
|
2023-12-12 10:00:59 +03:00
|
|
|
if (option_sparse_checkout && git_sparse_checkout_init(dir))
|
|
|
|
return 1;
|
|
|
|
|
2013-03-27 02:22:09 +04:00
|
|
|
junk_mode = JUNK_LEAVE_REPO;
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-05 08:00:49 +03:00
|
|
|
err = checkout(submodule_progress, filter_submodules);
|
2008-04-27 21:39:30 +04:00
|
|
|
|
2020-10-01 06:46:16 +03:00
|
|
|
free(remote_name);
|
2008-04-27 21:39:30 +04:00
|
|
|
strbuf_release(&reflog_msg);
|
2008-11-21 03:45:01 +03:00
|
|
|
strbuf_release(&branch_top);
|
2024-02-27 17:27:44 +03:00
|
|
|
strbuf_release(&buf);
|
2008-11-21 03:45:01 +03:00
|
|
|
strbuf_release(&key);
|
2021-03-14 21:47:36 +03:00
|
|
|
free_refs(mapped_refs);
|
|
|
|
free_refs(remote_head_points_at);
|
2022-07-11 12:21:52 +03:00
|
|
|
free(unborn_head);
|
2021-03-14 21:47:36 +03:00
|
|
|
free(dir);
|
|
|
|
free(path);
|
2023-02-07 02:07:39 +03:00
|
|
|
free(repo_to_free);
|
2013-03-27 02:22:09 +04:00
|
|
|
junk_mode = JUNK_LEAVE_ALL;
|
2014-08-10 17:57:56 +04:00
|
|
|
|
2022-02-05 03:08:14 +03:00
|
|
|
transport_ls_refs_options_release(&transport_ls_refs_options);
|
2009-03-03 08:37:51 +03:00
|
|
|
return err;
|
2008-04-27 21:39:30 +04:00
|
|
|
}
|