2018-03-15 20:31:20 +03:00
|
|
|
#include "cache.h"
|
|
|
|
#include "repository.h"
|
|
|
|
#include "refs.h"
|
|
|
|
#include "remote.h"
|
2020-07-28 23:23:39 +03:00
|
|
|
#include "strvec.h"
|
2018-03-15 20:31:20 +03:00
|
|
|
#include "ls-refs.h"
|
|
|
|
#include "pkt-line.h"
|
2018-12-18 15:47:50 +03:00
|
|
|
#include "config.h"
|
2018-03-15 20:31:20 +03:00
|
|
|
|
ls-refs: report unborn targets of symrefs
When cloning, we choose the default branch based on the remote HEAD.
But if there is no remote HEAD reported (which could happen if the
target of the remote HEAD is unborn), we'll fall back to using our local
init.defaultBranch. Traditionally this hasn't been a big deal, because
most repos used "master" as the default. But these days it is likely to
cause confusion if the server and client implementations choose
different values (e.g., if the remote started with "main", we may choose
"master" locally, create commits there, and then the user is surprised
when they push to "master" and not "main").
To solve this, the remote needs to communicate the target of the HEAD
symref, even if it is unborn, and "git clone" needs to use this
information.
Currently, symrefs that have unborn targets (such as in this case) are
not communicated by the protocol. Teach Git to advertise and support the
"unborn" feature in "ls-refs" (by default, this is advertised, but
server administrators may turn this off through the lsrefs.unborn
config). This feature indicates that "ls-refs" supports the "unborn"
argument; when it is specified, "ls-refs" will send the HEAD symref with
the name of its unborn target.
This change is only for protocol v2. A similar change for protocol v0
would require independent protocol design (there being no analogous
position to signal support for "unborn") and client-side plumbing of the
data required, so the scope of this patch set is limited to protocol v2.
The client side will be updated to use this in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-05 23:48:47 +03:00
|
|
|
static int config_read;
|
|
|
|
static int advertise_unborn;
|
|
|
|
static int allow_unborn;
|
|
|
|
|
|
|
|
static void ensure_config_read(void)
|
|
|
|
{
|
|
|
|
const char *str = NULL;
|
|
|
|
|
|
|
|
if (config_read)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (repo_config_get_string_tmp(the_repository, "lsrefs.unborn", &str)) {
|
|
|
|
/*
|
|
|
|
* If there is no such config, advertise and allow it by
|
|
|
|
* default.
|
|
|
|
*/
|
|
|
|
advertise_unborn = 1;
|
|
|
|
allow_unborn = 1;
|
|
|
|
} else {
|
|
|
|
if (!strcmp(str, "advertise")) {
|
|
|
|
advertise_unborn = 1;
|
|
|
|
allow_unborn = 1;
|
|
|
|
} else if (!strcmp(str, "allow")) {
|
|
|
|
allow_unborn = 1;
|
|
|
|
} else if (!strcmp(str, "ignore")) {
|
|
|
|
/* do nothing */
|
|
|
|
} else {
|
|
|
|
die(_("invalid value '%s' for lsrefs.unborn"), str);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
config_read = 1;
|
|
|
|
}
|
|
|
|
|
ls-refs: ignore very long ref-prefix counts
Because each "ref-prefix" capability from the client comes in its own
pkt-line, there's no limit to the number of them that a misbehaving
client may send. We read them all into a strvec, which means the client
can waste arbitrary amounts of our memory by just sending us "ref-prefix
foo" over and over.
One possible solution is to just drop the connection when the limit is
reached. If we set it high enough, then only misbehaving or malicious
clients would hit it. But "high enough" is vague, and it's unfriendly if
we guess wrong and a legitimate client hits this.
But we can do better. Since supporting the ref-prefix capability is
optional anyway, the client has to further cull the response based on
their own patterns. So we can simply ignore the patterns once we cross a
certain threshold. Note that we have to ignore _all_ patterns, not just
the ones past our limit (since otherwise we'd send too little data).
The limit here is fairly arbitrary, and probably much higher than anyone
would need in practice. It might be worth limiting it further, if only
because we check it linearly (so with "m" local refs and "n" patterns,
we do "m * n" string comparisons). But if we care about optimizing this,
an even better solution may be a more advanced data structure anyway.
I didn't bother making the limit configurable, since it's so high and
since Git should behave correctly in either case. It wouldn't be too
hard to do, but it makes both the code and documentation more complex.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-15 21:35:31 +03:00
|
|
|
/*
|
|
|
|
* If we see this many or more "ref-prefix" lines from the client, we consider
|
|
|
|
* it "too many" and will avoid using the prefix feature entirely.
|
|
|
|
*/
|
|
|
|
#define TOO_MANY_PREFIXES 65536
|
|
|
|
|
2018-03-15 20:31:20 +03:00
|
|
|
/*
|
|
|
|
* Check if one of the prefixes is a prefix of the ref.
|
|
|
|
* If no prefixes were provided, all refs match.
|
|
|
|
*/
|
2020-07-28 23:24:53 +03:00
|
|
|
static int ref_match(const struct strvec *prefixes, const char *refname)
|
2018-03-15 20:31:20 +03:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2020-07-29 03:37:20 +03:00
|
|
|
if (!prefixes->nr)
|
2018-03-15 20:31:20 +03:00
|
|
|
return 1; /* no restriction */
|
|
|
|
|
2020-07-29 03:37:20 +03:00
|
|
|
for (i = 0; i < prefixes->nr; i++) {
|
|
|
|
const char *prefix = prefixes->v[i];
|
2018-03-15 20:31:20 +03:00
|
|
|
|
|
|
|
if (starts_with(refname, prefix))
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ls_refs_data {
|
|
|
|
unsigned peel;
|
|
|
|
unsigned symrefs;
|
2020-07-28 23:24:53 +03:00
|
|
|
struct strvec prefixes;
|
2021-08-25 16:49:51 +03:00
|
|
|
struct strbuf buf;
|
ls-refs: report unborn targets of symrefs
When cloning, we choose the default branch based on the remote HEAD.
But if there is no remote HEAD reported (which could happen if the
target of the remote HEAD is unborn), we'll fall back to using our local
init.defaultBranch. Traditionally this hasn't been a big deal, because
most repos used "master" as the default. But these days it is likely to
cause confusion if the server and client implementations choose
different values (e.g., if the remote started with "main", we may choose
"master" locally, create commits there, and then the user is surprised
when they push to "master" and not "main").
To solve this, the remote needs to communicate the target of the HEAD
symref, even if it is unborn, and "git clone" needs to use this
information.
Currently, symrefs that have unborn targets (such as in this case) are
not communicated by the protocol. Teach Git to advertise and support the
"unborn" feature in "ls-refs" (by default, this is advertised, but
server administrators may turn this off through the lsrefs.unborn
config). This feature indicates that "ls-refs" supports the "unborn"
argument; when it is specified, "ls-refs" will send the HEAD symref with
the name of its unborn target.
This change is only for protocol v2. A similar change for protocol v0
would require independent protocol design (there being no analogous
position to signal support for "unborn") and client-side plumbing of the
data required, so the scope of this patch set is limited to protocol v2.
The client side will be updated to use this in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-05 23:48:47 +03:00
|
|
|
unsigned unborn : 1;
|
2018-03-15 20:31:20 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
static int send_ref(const char *refname, const struct object_id *oid,
|
|
|
|
int flag, void *cb_data)
|
|
|
|
{
|
|
|
|
struct ls_refs_data *data = cb_data;
|
|
|
|
const char *refname_nons = strip_namespace(refname);
|
2021-08-25 16:49:51 +03:00
|
|
|
|
|
|
|
strbuf_reset(&data->buf);
|
2018-03-15 20:31:20 +03:00
|
|
|
|
2018-12-18 15:47:50 +03:00
|
|
|
if (ref_is_hidden(refname_nons, refname))
|
|
|
|
return 0;
|
|
|
|
|
2019-01-18 02:33:05 +03:00
|
|
|
if (!ref_match(&data->prefixes, refname_nons))
|
2018-03-15 20:31:20 +03:00
|
|
|
return 0;
|
|
|
|
|
ls-refs: report unborn targets of symrefs
When cloning, we choose the default branch based on the remote HEAD.
But if there is no remote HEAD reported (which could happen if the
target of the remote HEAD is unborn), we'll fall back to using our local
init.defaultBranch. Traditionally this hasn't been a big deal, because
most repos used "master" as the default. But these days it is likely to
cause confusion if the server and client implementations choose
different values (e.g., if the remote started with "main", we may choose
"master" locally, create commits there, and then the user is surprised
when they push to "master" and not "main").
To solve this, the remote needs to communicate the target of the HEAD
symref, even if it is unborn, and "git clone" needs to use this
information.
Currently, symrefs that have unborn targets (such as in this case) are
not communicated by the protocol. Teach Git to advertise and support the
"unborn" feature in "ls-refs" (by default, this is advertised, but
server administrators may turn this off through the lsrefs.unborn
config). This feature indicates that "ls-refs" supports the "unborn"
argument; when it is specified, "ls-refs" will send the HEAD symref with
the name of its unborn target.
This change is only for protocol v2. A similar change for protocol v0
would require independent protocol design (there being no analogous
position to signal support for "unborn") and client-side plumbing of the
data required, so the scope of this patch set is limited to protocol v2.
The client side will be updated to use this in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-05 23:48:47 +03:00
|
|
|
if (oid)
|
2021-08-25 16:49:51 +03:00
|
|
|
strbuf_addf(&data->buf, "%s %s", oid_to_hex(oid), refname_nons);
|
ls-refs: report unborn targets of symrefs
When cloning, we choose the default branch based on the remote HEAD.
But if there is no remote HEAD reported (which could happen if the
target of the remote HEAD is unborn), we'll fall back to using our local
init.defaultBranch. Traditionally this hasn't been a big deal, because
most repos used "master" as the default. But these days it is likely to
cause confusion if the server and client implementations choose
different values (e.g., if the remote started with "main", we may choose
"master" locally, create commits there, and then the user is surprised
when they push to "master" and not "main").
To solve this, the remote needs to communicate the target of the HEAD
symref, even if it is unborn, and "git clone" needs to use this
information.
Currently, symrefs that have unborn targets (such as in this case) are
not communicated by the protocol. Teach Git to advertise and support the
"unborn" feature in "ls-refs" (by default, this is advertised, but
server administrators may turn this off through the lsrefs.unborn
config). This feature indicates that "ls-refs" supports the "unborn"
argument; when it is specified, "ls-refs" will send the HEAD symref with
the name of its unborn target.
This change is only for protocol v2. A similar change for protocol v0
would require independent protocol design (there being no analogous
position to signal support for "unborn") and client-side plumbing of the
data required, so the scope of this patch set is limited to protocol v2.
The client side will be updated to use this in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-05 23:48:47 +03:00
|
|
|
else
|
2021-08-25 16:49:51 +03:00
|
|
|
strbuf_addf(&data->buf, "unborn %s", refname_nons);
|
2018-03-15 20:31:20 +03:00
|
|
|
if (data->symrefs && flag & REF_ISSYMREF) {
|
|
|
|
struct object_id unused;
|
|
|
|
const char *symref_target = resolve_ref_unsafe(refname, 0,
|
|
|
|
&unused,
|
|
|
|
&flag);
|
|
|
|
|
|
|
|
if (!symref_target)
|
|
|
|
die("'%s' is a symref but it is not?", refname);
|
|
|
|
|
2021-08-25 16:49:51 +03:00
|
|
|
strbuf_addf(&data->buf, " symref-target:%s",
|
upload-pack: strip namespace from symref data
Since 7171d8c15f (upload-pack: send symbolic ref information as
capability, 2013-09-17), we've sent cloning and fetching clients special
information about which branch HEAD is pointing to, so that they don't
have to guess based on matching up commit ids.
However, this feature has never worked properly with the GIT_NAMESPACE
feature. Because upload-pack uses head_ref_namespaced(find_symref), we
do find and report on refs/namespaces/foo/HEAD instead of the actual
HEAD of the repo. This makes sense, since the branch pointed to by the
top-level HEAD may not be advertised at all. But we do two things wrong:
1. We report the full name refs/namespaces/foo/HEAD, instead of just
HEAD. Meaning no client is going to bother doing anything with that
symref, since we're not otherwise advertising it.
2. We report the symref destination using its full name (e.g.,
refs/namespaces/foo/refs/heads/master). That's similarly useless to
the client, who only saw "refs/heads/master" in the advertisement.
We should be stripping the namespace prefix off of both places (which
this patch fixes).
Likely nobody noticed because we tend to do the right thing anyway. Bug
(1) means that we said nothing about HEAD (just refs/namespace/foo/HEAD).
And so the client half of the code, from a45b5f0552 (connect: annotate
refs with their symref information in get_remote_head(), 2013-09-17),
does not annotate HEAD, and we use the fallback in guess_remote_head(),
matching refs by object id. Which is usually right. It only falls down
in ambiguous cases, like the one laid out in the included test.
This also means that we don't have to worry about breaking anybody who
was putting pre-stripped names into their namespace symrefs when we fix
bug (2). Because of bug (1), nobody would have been using the symref we
advertised in the first place (not to mention that those symrefs would
have appeared broken for any non-namespaced access).
Note that we have separate fixes here for the v0 and v2 protocols. The
symref advertisement moved in v2 to be a part of the ls-refs command.
This actually gets part (1) right, since the symref annotation
piggy-backs on the existing ref advertisement, which is properly
stripped. But it still needs a fix for part (2). The included tests
cover both protocols.
Reported-by: Bryan Turner <bturner@atlassian.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-23 09:11:21 +03:00
|
|
|
strip_namespace(symref_target));
|
2018-03-15 20:31:20 +03:00
|
|
|
}
|
|
|
|
|
ls-refs: report unborn targets of symrefs
When cloning, we choose the default branch based on the remote HEAD.
But if there is no remote HEAD reported (which could happen if the
target of the remote HEAD is unborn), we'll fall back to using our local
init.defaultBranch. Traditionally this hasn't been a big deal, because
most repos used "master" as the default. But these days it is likely to
cause confusion if the server and client implementations choose
different values (e.g., if the remote started with "main", we may choose
"master" locally, create commits there, and then the user is surprised
when they push to "master" and not "main").
To solve this, the remote needs to communicate the target of the HEAD
symref, even if it is unborn, and "git clone" needs to use this
information.
Currently, symrefs that have unborn targets (such as in this case) are
not communicated by the protocol. Teach Git to advertise and support the
"unborn" feature in "ls-refs" (by default, this is advertised, but
server administrators may turn this off through the lsrefs.unborn
config). This feature indicates that "ls-refs" supports the "unborn"
argument; when it is specified, "ls-refs" will send the HEAD symref with
the name of its unborn target.
This change is only for protocol v2. A similar change for protocol v0
would require independent protocol design (there being no analogous
position to signal support for "unborn") and client-side plumbing of the
data required, so the scope of this patch set is limited to protocol v2.
The client side will be updated to use this in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-05 23:48:47 +03:00
|
|
|
if (data->peel && oid) {
|
2018-03-15 20:31:20 +03:00
|
|
|
struct object_id peeled;
|
refs: switch peel_ref() to peel_iterated_oid()
The peel_ref() interface is confusing and error-prone:
- it's typically used by ref iteration callbacks that have both a
refname and oid. But since they pass only the refname, we may load
the ref value from the filesystem again. This is inefficient, but
also means we are open to a race if somebody simultaneously updates
the ref. E.g., this:
int some_ref_cb(const char *refname, const struct object_id *oid, ...)
{
if (!peel_ref(refname, &peeled))
printf("%s peels to %s",
oid_to_hex(oid), oid_to_hex(&peeled);
}
could print nonsense. It is correct to say "refname peels to..."
(you may see the "before" value or the "after" value, either of
which is consistent), but mentioning both oids may be mixing
before/after values.
Worse, whether this is possible depends on whether the optimization
to read from the current iterator value kicks in. So it is actually
not possible with:
for_each_ref(some_ref_cb);
but it _is_ possible with:
head_ref(some_ref_cb);
which does not use the iterator mechanism (though in practice, HEAD
should never peel to anything, so this may not be triggerable).
- it must take a fully-qualified refname for the read_ref_full() code
path to work. Yet we routinely pass it partial refnames from
callbacks to for_each_tag_ref(), etc. This happens to work when
iterating because there we do not call read_ref_full() at all, and
only use the passed refname to check if it is the same as the
iterator. But the requirements for the function parameters are quite
unclear.
Instead of taking a refname, let's instead take an oid. That fixes both
problems. It's a little funny for a "ref" function not to involve refs
at all. The key thing is that it's optimizing under the hood based on
having access to the ref iterator. So let's change the name to make it
clear why you'd want this function versus just peel_object().
There are two other directions I considered but rejected:
- we could pass the peel information into the each_ref_fn callback.
However, we don't know if the caller actually wants it or not. For
packed-refs, providing it is essentially free. But for loose refs,
we actually have to peel the object, which would be wasteful in most
cases. We could likewise pass in a flag to the callback indicating
whether the peeled information is known, but that complicates those
callbacks, as they then have to decide whether to manually peel
themselves. Plus it requires changing the interface of every
callback, whether they care about peeling or not, and there are many
of them.
- we could make a function to return the peeled value of the current
iterated ref (computing it if necessary), and BUG() otherwise. I.e.:
int peel_current_iterated_ref(struct object_id *out);
Each of the current callers is an each_ref_fn callback, so they'd
mostly be happy. But:
- we use those callbacks with functions like head_ref(), which do
not use the iteration code. So we'd need to handle the fallback
case there, anyway.
- it's possible that a caller would want to call into generic code
that sometimes is used during iteration and sometimes not. This
encapsulates the logic to do the fast thing when possible, and
fallback when necessary.
The implementation is mostly obvious, but I want to call out a few
things in the patch:
- the test-tool coverage for peel_ref() is now meaningless, as it all
collapses to a single peel_object() call (arguably they were pretty
uninteresting before; the tricky part of that function is the
fast-path we see during iteration, but these calls didn't trigger
that). I've just dropped it entirely, though note that some other
tests relied on the tags we created; I've moved that creation to the
tests where it matters.
- we no longer need to take a ref_store parameter, since we'd never
look up a ref now. We do still rely on a global "current iterator"
variable which _could_ be kept per-ref-store. But in practice this
is only useful if there are multiple recursive iterations, at which
point the more appropriate solution is probably a stack of
iterators. No caller used the actual ref-store parameter anyway
(they all call the wrapper that passes the_repository).
- the original only kicked in the optimization when the "refname"
pointer matched (i.e., not string comparison). We do likewise with
the "oid" parameter here, but fall back to doing an actual oideq()
call. This in theory lets us kick in the optimization more often,
though in practice no current caller cares. It should never be
wrong, though (peeling is a property of an object, so two refs
pointing to the same object would peel identically).
- the original took care not to touch the peeled out-parameter unless
we found something to put in it. But no caller cares about this, and
anyway, it is enforced by peel_object() itself (and even in the
optimized iterator case, that's where we eventually end up). We can
shorten the code and avoid an extra copy by just passing the
out-parameter through the stack.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-20 22:44:43 +03:00
|
|
|
if (!peel_iterated_oid(oid, &peeled))
|
2021-08-25 16:49:51 +03:00
|
|
|
strbuf_addf(&data->buf, " peeled:%s", oid_to_hex(&peeled));
|
2018-03-15 20:31:20 +03:00
|
|
|
}
|
|
|
|
|
2021-08-25 16:49:51 +03:00
|
|
|
strbuf_addch(&data->buf, '\n');
|
2021-09-21 01:20:40 +03:00
|
|
|
packet_fwrite(stdout, data->buf.buf, data->buf.len);
|
2018-03-15 20:31:20 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
ls-refs: report unborn targets of symrefs
When cloning, we choose the default branch based on the remote HEAD.
But if there is no remote HEAD reported (which could happen if the
target of the remote HEAD is unborn), we'll fall back to using our local
init.defaultBranch. Traditionally this hasn't been a big deal, because
most repos used "master" as the default. But these days it is likely to
cause confusion if the server and client implementations choose
different values (e.g., if the remote started with "main", we may choose
"master" locally, create commits there, and then the user is surprised
when they push to "master" and not "main").
To solve this, the remote needs to communicate the target of the HEAD
symref, even if it is unborn, and "git clone" needs to use this
information.
Currently, symrefs that have unborn targets (such as in this case) are
not communicated by the protocol. Teach Git to advertise and support the
"unborn" feature in "ls-refs" (by default, this is advertised, but
server administrators may turn this off through the lsrefs.unborn
config). This feature indicates that "ls-refs" supports the "unborn"
argument; when it is specified, "ls-refs" will send the HEAD symref with
the name of its unborn target.
This change is only for protocol v2. A similar change for protocol v0
would require independent protocol design (there being no analogous
position to signal support for "unborn") and client-side plumbing of the
data required, so the scope of this patch set is limited to protocol v2.
The client side will be updated to use this in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-05 23:48:47 +03:00
|
|
|
static void send_possibly_unborn_head(struct ls_refs_data *data)
|
|
|
|
{
|
|
|
|
struct strbuf namespaced = STRBUF_INIT;
|
|
|
|
struct object_id oid;
|
|
|
|
int flag;
|
|
|
|
int oid_is_null;
|
|
|
|
|
|
|
|
strbuf_addf(&namespaced, "%sHEAD", get_git_namespace());
|
|
|
|
if (!resolve_ref_unsafe(namespaced.buf, 0, &oid, &flag))
|
|
|
|
return; /* bad ref */
|
|
|
|
oid_is_null = is_null_oid(&oid);
|
|
|
|
if (!oid_is_null ||
|
|
|
|
(data->unborn && data->symrefs && (flag & REF_ISSYMREF)))
|
|
|
|
send_ref(namespaced.buf, oid_is_null ? NULL : &oid, flag, data);
|
|
|
|
strbuf_release(&namespaced);
|
|
|
|
}
|
|
|
|
|
2018-12-18 15:47:50 +03:00
|
|
|
static int ls_refs_config(const char *var, const char *value, void *data)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We only serve fetches over v2 for now, so respect only "uploadpack"
|
|
|
|
* config. This may need to eventually be expanded to "receive", but we
|
|
|
|
* don't yet know how that information will be passed to ls-refs.
|
|
|
|
*/
|
|
|
|
return parse_hide_refs_config(var, value, "uploadpack");
|
|
|
|
}
|
|
|
|
|
2021-08-05 04:25:38 +03:00
|
|
|
int ls_refs(struct repository *r, struct packet_reader *request)
|
2018-03-15 20:31:20 +03:00
|
|
|
{
|
|
|
|
struct ls_refs_data data;
|
|
|
|
|
|
|
|
memset(&data, 0, sizeof(data));
|
2021-01-20 19:04:25 +03:00
|
|
|
strvec_init(&data.prefixes);
|
2021-08-25 16:49:51 +03:00
|
|
|
strbuf_init(&data.buf, 0);
|
2018-03-15 20:31:20 +03:00
|
|
|
|
ls-refs: report unborn targets of symrefs
When cloning, we choose the default branch based on the remote HEAD.
But if there is no remote HEAD reported (which could happen if the
target of the remote HEAD is unborn), we'll fall back to using our local
init.defaultBranch. Traditionally this hasn't been a big deal, because
most repos used "master" as the default. But these days it is likely to
cause confusion if the server and client implementations choose
different values (e.g., if the remote started with "main", we may choose
"master" locally, create commits there, and then the user is surprised
when they push to "master" and not "main").
To solve this, the remote needs to communicate the target of the HEAD
symref, even if it is unborn, and "git clone" needs to use this
information.
Currently, symrefs that have unborn targets (such as in this case) are
not communicated by the protocol. Teach Git to advertise and support the
"unborn" feature in "ls-refs" (by default, this is advertised, but
server administrators may turn this off through the lsrefs.unborn
config). This feature indicates that "ls-refs" supports the "unborn"
argument; when it is specified, "ls-refs" will send the HEAD symref with
the name of its unborn target.
This change is only for protocol v2. A similar change for protocol v0
would require independent protocol design (there being no analogous
position to signal support for "unborn") and client-side plumbing of the
data required, so the scope of this patch set is limited to protocol v2.
The client side will be updated to use this in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-05 23:48:47 +03:00
|
|
|
ensure_config_read();
|
2018-12-18 15:47:50 +03:00
|
|
|
git_config(ls_refs_config, NULL);
|
|
|
|
|
2020-03-27 11:03:38 +03:00
|
|
|
while (packet_reader_read(request) == PACKET_READ_NORMAL) {
|
2018-03-15 20:31:20 +03:00
|
|
|
const char *arg = request->line;
|
|
|
|
const char *out;
|
|
|
|
|
|
|
|
if (!strcmp("peel", arg))
|
|
|
|
data.peel = 1;
|
|
|
|
else if (!strcmp("symrefs", arg))
|
|
|
|
data.symrefs = 1;
|
ls-refs: ignore very long ref-prefix counts
Because each "ref-prefix" capability from the client comes in its own
pkt-line, there's no limit to the number of them that a misbehaving
client may send. We read them all into a strvec, which means the client
can waste arbitrary amounts of our memory by just sending us "ref-prefix
foo" over and over.
One possible solution is to just drop the connection when the limit is
reached. If we set it high enough, then only misbehaving or malicious
clients would hit it. But "high enough" is vague, and it's unfriendly if
we guess wrong and a legitimate client hits this.
But we can do better. Since supporting the ref-prefix capability is
optional anyway, the client has to further cull the response based on
their own patterns. So we can simply ignore the patterns once we cross a
certain threshold. Note that we have to ignore _all_ patterns, not just
the ones past our limit (since otherwise we'd send too little data).
The limit here is fairly arbitrary, and probably much higher than anyone
would need in practice. It might be worth limiting it further, if only
because we check it linearly (so with "m" local refs and "n" patterns,
we do "m * n" string comparisons). But if we care about optimizing this,
an even better solution may be a more advanced data structure anyway.
I didn't bother making the limit configurable, since it's so high and
since Git should behave correctly in either case. It wouldn't be too
hard to do, but it makes both the code and documentation more complex.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-15 21:35:31 +03:00
|
|
|
else if (skip_prefix(arg, "ref-prefix ", &out)) {
|
|
|
|
if (data.prefixes.nr < TOO_MANY_PREFIXES)
|
|
|
|
strvec_push(&data.prefixes, out);
|
|
|
|
}
|
ls-refs: report unborn targets of symrefs
When cloning, we choose the default branch based on the remote HEAD.
But if there is no remote HEAD reported (which could happen if the
target of the remote HEAD is unborn), we'll fall back to using our local
init.defaultBranch. Traditionally this hasn't been a big deal, because
most repos used "master" as the default. But these days it is likely to
cause confusion if the server and client implementations choose
different values (e.g., if the remote started with "main", we may choose
"master" locally, create commits there, and then the user is surprised
when they push to "master" and not "main").
To solve this, the remote needs to communicate the target of the HEAD
symref, even if it is unborn, and "git clone" needs to use this
information.
Currently, symrefs that have unborn targets (such as in this case) are
not communicated by the protocol. Teach Git to advertise and support the
"unborn" feature in "ls-refs" (by default, this is advertised, but
server administrators may turn this off through the lsrefs.unborn
config). This feature indicates that "ls-refs" supports the "unborn"
argument; when it is specified, "ls-refs" will send the HEAD symref with
the name of its unborn target.
This change is only for protocol v2. A similar change for protocol v0
would require independent protocol design (there being no analogous
position to signal support for "unborn") and client-side plumbing of the
data required, so the scope of this patch set is limited to protocol v2.
The client side will be updated to use this in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-05 23:48:47 +03:00
|
|
|
else if (!strcmp("unborn", arg))
|
|
|
|
data.unborn = allow_unborn;
|
2021-09-15 21:36:38 +03:00
|
|
|
else
|
|
|
|
die(_("unexpected line: '%s'"), arg);
|
2018-03-15 20:31:20 +03:00
|
|
|
}
|
|
|
|
|
2020-03-27 11:03:38 +03:00
|
|
|
if (request->status != PACKET_READ_FLUSH)
|
|
|
|
die(_("expected flush after ls-refs arguments"));
|
|
|
|
|
ls-refs: ignore very long ref-prefix counts
Because each "ref-prefix" capability from the client comes in its own
pkt-line, there's no limit to the number of them that a misbehaving
client may send. We read them all into a strvec, which means the client
can waste arbitrary amounts of our memory by just sending us "ref-prefix
foo" over and over.
One possible solution is to just drop the connection when the limit is
reached. If we set it high enough, then only misbehaving or malicious
clients would hit it. But "high enough" is vague, and it's unfriendly if
we guess wrong and a legitimate client hits this.
But we can do better. Since supporting the ref-prefix capability is
optional anyway, the client has to further cull the response based on
their own patterns. So we can simply ignore the patterns once we cross a
certain threshold. Note that we have to ignore _all_ patterns, not just
the ones past our limit (since otherwise we'd send too little data).
The limit here is fairly arbitrary, and probably much higher than anyone
would need in practice. It might be worth limiting it further, if only
because we check it linearly (so with "m" local refs and "n" patterns,
we do "m * n" string comparisons). But if we care about optimizing this,
an even better solution may be a more advanced data structure anyway.
I didn't bother making the limit configurable, since it's so high and
since Git should behave correctly in either case. It wouldn't be too
hard to do, but it makes both the code and documentation more complex.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-15 21:35:31 +03:00
|
|
|
/*
|
|
|
|
* If we saw too many prefixes, we must avoid using them at all; as
|
|
|
|
* soon as we have any prefix, they are meant to form a comprehensive
|
|
|
|
* list.
|
|
|
|
*/
|
|
|
|
if (data.prefixes.nr >= TOO_MANY_PREFIXES)
|
|
|
|
strvec_clear(&data.prefixes);
|
|
|
|
|
ls-refs: report unborn targets of symrefs
When cloning, we choose the default branch based on the remote HEAD.
But if there is no remote HEAD reported (which could happen if the
target of the remote HEAD is unborn), we'll fall back to using our local
init.defaultBranch. Traditionally this hasn't been a big deal, because
most repos used "master" as the default. But these days it is likely to
cause confusion if the server and client implementations choose
different values (e.g., if the remote started with "main", we may choose
"master" locally, create commits there, and then the user is surprised
when they push to "master" and not "main").
To solve this, the remote needs to communicate the target of the HEAD
symref, even if it is unborn, and "git clone" needs to use this
information.
Currently, symrefs that have unborn targets (such as in this case) are
not communicated by the protocol. Teach Git to advertise and support the
"unborn" feature in "ls-refs" (by default, this is advertised, but
server administrators may turn this off through the lsrefs.unborn
config). This feature indicates that "ls-refs" supports the "unborn"
argument; when it is specified, "ls-refs" will send the HEAD symref with
the name of its unborn target.
This change is only for protocol v2. A similar change for protocol v0
would require independent protocol design (there being no analogous
position to signal support for "unborn") and client-side plumbing of the
data required, so the scope of this patch set is limited to protocol v2.
The client side will be updated to use this in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-05 23:48:47 +03:00
|
|
|
send_possibly_unborn_head(&data);
|
ls-refs.c: traverse prefixes of disjoint "ref-prefix" sets
ls-refs performs a single revision walk over the whole ref namespace,
and sends ones that match with one of the given ref prefixes down to the
user.
This can be expensive if there are many refs overall, but the portion of
them covered by the given prefixes is small by comparison.
To attempt to reduce the difference between the number of refs
traversed, and the number of refs sent, only traverse references which
are in the longest common prefix of the given prefixes. This is very
reminiscent of the approach taken in b31e2680c4 (ref-filter.c: find
disjoint pattern prefixes, 2019-06-26) which does an analogous thing for
multi-patterned 'git for-each-ref' invocations.
The callback 'send_ref' is resilient to ignore extra patterns by
discarding any arguments which do not begin with at least one of the
specified prefixes.
Similarly, the code introduced in b31e2680c4 is resilient to stop early
at metacharacters, but we only pass strict prefixes here. At worst we
would return too many results, but the double checking done by send_ref
will throw away anything that doesn't start with something in the prefix
list.
Finally, if no prefixes were provided, then implicitly add the empty
string (which will match all references) since this matches the existing
behavior (see the "no restrictions" comment in "ls-refs.c:ref_match()").
Original-patch-by: Jacob Vosmaer <jacob@gitlab.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-20 19:04:30 +03:00
|
|
|
if (!data.prefixes.nr)
|
|
|
|
strvec_push(&data.prefixes, "");
|
|
|
|
for_each_fullref_in_prefixes(get_git_namespace(), data.prefixes.v,
|
2021-09-24 21:48:48 +03:00
|
|
|
send_ref, &data);
|
2021-09-01 15:54:42 +03:00
|
|
|
packet_fflush(stdout);
|
2020-07-28 23:24:53 +03:00
|
|
|
strvec_clear(&data.prefixes);
|
2021-08-25 16:49:51 +03:00
|
|
|
strbuf_release(&data.buf);
|
2018-03-15 20:31:20 +03:00
|
|
|
return 0;
|
|
|
|
}
|
ls-refs: report unborn targets of symrefs
When cloning, we choose the default branch based on the remote HEAD.
But if there is no remote HEAD reported (which could happen if the
target of the remote HEAD is unborn), we'll fall back to using our local
init.defaultBranch. Traditionally this hasn't been a big deal, because
most repos used "master" as the default. But these days it is likely to
cause confusion if the server and client implementations choose
different values (e.g., if the remote started with "main", we may choose
"master" locally, create commits there, and then the user is surprised
when they push to "master" and not "main").
To solve this, the remote needs to communicate the target of the HEAD
symref, even if it is unborn, and "git clone" needs to use this
information.
Currently, symrefs that have unborn targets (such as in this case) are
not communicated by the protocol. Teach Git to advertise and support the
"unborn" feature in "ls-refs" (by default, this is advertised, but
server administrators may turn this off through the lsrefs.unborn
config). This feature indicates that "ls-refs" supports the "unborn"
argument; when it is specified, "ls-refs" will send the HEAD symref with
the name of its unborn target.
This change is only for protocol v2. A similar change for protocol v0
would require independent protocol design (there being no analogous
position to signal support for "unborn") and client-side plumbing of the
data required, so the scope of this patch set is limited to protocol v2.
The client side will be updated to use this in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-05 23:48:47 +03:00
|
|
|
|
|
|
|
int ls_refs_advertise(struct repository *r, struct strbuf *value)
|
|
|
|
{
|
|
|
|
if (value) {
|
|
|
|
ensure_config_read();
|
|
|
|
if (advertise_unborn)
|
|
|
|
strbuf_addstr(value, "unborn");
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|