From 259942f549eb235e9d7d095c2db8f3dc279f3958 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 26 Sep 2016 07:59:01 -0400 Subject: [PATCH 01/11] get_sha1: detect buggy calls with multiple disambiguators The get_sha1() family of functions takes a flags field, but some of the flags are mutually exclusive. In particular, we can only handle one disambiguating function, and the flags quietly override each other. Let's instead detect these as programming bugs. Technically some of the flags are supersets of the others, so treating COMMITTISH|TREEISH as just COMMITTISH is not wrong, but it's a good sign the caller is confused. And certainly asking for BLOB|TREE does not work. We can do the check easily with some bit-twiddling, and as a bonus, the bit-mask of disambiguators will come in handy in a future patch. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- cache.h | 5 +++++ sha1_name.c | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/cache.h b/cache.h index d0494c8533..7bd78ca282 100644 --- a/cache.h +++ b/cache.h @@ -1203,6 +1203,11 @@ struct object_context { #define GET_SHA1_FOLLOW_SYMLINKS 0100 #define GET_SHA1_ONLY_TO_DIE 04000 +#define GET_SHA1_DISAMBIGUATORS \ + (GET_SHA1_COMMIT | GET_SHA1_COMMITTISH | \ + GET_SHA1_TREE | GET_SHA1_TREEISH | \ + GET_SHA1_BLOB) + extern int get_sha1(const char *str, unsigned char *sha1); extern int get_sha1_commit(const char *str, unsigned char *sha1); extern int get_sha1_committish(const char *str, unsigned char *sha1); diff --git a/sha1_name.c b/sha1_name.c index faf873cf7f..0ff83a9985 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -327,6 +327,10 @@ static int get_short_sha1(const char *name, int len, unsigned char *sha1, prepare_alt_odb(); memset(&ds, 0, sizeof(ds)); + + if (HAS_MULTI_BITS(flags & GET_SHA1_DISAMBIGUATORS)) + die("BUG: multiple get_short_sha1 disambiguator flags"); + if (flags & GET_SHA1_COMMIT) ds.fn = disambiguate_commit_only; else if (flags & GET_SHA1_COMMITTISH) From 7243ffdd78d56738e568abb544eba00e8079f329 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 26 Sep 2016 07:59:15 -0400 Subject: [PATCH 02/11] get_sha1: avoid repeating ourselves via ONLY_TO_DIE When the revision code cannot parse an argument like "HEAD:foo", it will call maybe_die_on_misspelt_object_name(), which re-runs get_sha1() with an extra ONLY_TO_DIE flag. We then spend more effort to generate a better error message. Unfortunately, a side effect is that our second call may repeat the same error messages from the original get_sha1() call. You can see this with: $ git show 0017 error: short SHA1 0017 is ambiguous. error: short SHA1 0017 is ambiguous. fatal: ambiguous argument '0017': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git [...] -- [...]' where the second "error:" line comes from the ONLY_TO_DIE call. To fix this, we can make ONLY_TO_DIE imply QUIETLY. This is a little odd, because the whole point of ONLY_TO_DIE is to output error messages. But what we want to do is tell the rest of the get_sha1() code (particularly get_sha1_1()) that the _regular_ messages should be quiet, but the only-to-die ones should not. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 3 +++ t/t1512-rev-parse-disambiguation.sh | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/sha1_name.c b/sha1_name.c index 0ff83a9985..e7e3a38728 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -1386,6 +1386,9 @@ static int get_sha1_with_context_1(const char *name, const char *cp; int only_to_die = flags & GET_SHA1_ONLY_TO_DIE; + if (only_to_die) + flags |= GET_SHA1_QUIETLY; + memset(oc, 0, sizeof(*oc)); oc->mode = S_IFINVALID; ret = get_sha1_1(name, namelen, sha1, flags); diff --git a/t/t1512-rev-parse-disambiguation.sh b/t/t1512-rev-parse-disambiguation.sh index e221167cfb..16f970982b 100755 --- a/t/t1512-rev-parse-disambiguation.sh +++ b/t/t1512-rev-parse-disambiguation.sh @@ -291,4 +291,10 @@ test_expect_success 'ambiguous short sha1 ref' ' grep "refname.*${REF}.*ambiguous" err ' +test_expect_success C_LOCALE_OUTPUT 'ambiguity errors are not repeated' ' + test_must_fail git rev-parse 00000 2>stderr && + grep "is ambiguous" stderr >errors && + test_line_count = 1 errors +' + test_done From 8a10fea49b1f95daeea445072a60139590b216cf Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 26 Sep 2016 07:59:41 -0400 Subject: [PATCH 03/11] get_sha1: propagate flags to child functions The get_sha1() function is actually implementation by many sub-functions, but we do not always pass our flags around to all of those functions. As a result, we may forget that our caller asked us to resolve with GET_SHA1_QUIETLY and output messages. The two triggerable cases are: 1. Resolving treeish:path will resolve the "treeish" portion using GET_SHA1_TREEISH, dropping all other flags. 2. The peel_onion() function did not take flags at all but recurses to get_sha1_1(), which does. The solution for both is to bitwise-OR their new flags with the existing ones (after dropping any mutually exclusive disambiguation flags). This bug can trigger with "git rev-parse --quiet", which asks for quiet resolution. But it can also happen in a more vanilla code path when we do a follow-up ONLY_TO_DIE invocation of get_sha1(), and that's what the tests check. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 18 ++++++++++++------ t/t1512-rev-parse-disambiguation.sh | 14 +++++++++++++- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/sha1_name.c b/sha1_name.c index e7e3a38728..9356ac246d 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -681,12 +681,12 @@ struct object *peel_to_type(const char *name, int namelen, } } -static int peel_onion(const char *name, int len, unsigned char *sha1) +static int peel_onion(const char *name, int len, unsigned char *sha1, + unsigned lookup_flags) { unsigned char outer[20]; const char *sp; unsigned int expected_type = 0; - unsigned lookup_flags = 0; struct object *o; /* @@ -726,10 +726,11 @@ static int peel_onion(const char *name, int len, unsigned char *sha1) else return -1; + lookup_flags &= ~GET_SHA1_DISAMBIGUATORS; if (expected_type == OBJ_COMMIT) - lookup_flags = GET_SHA1_COMMITTISH; + lookup_flags |= GET_SHA1_COMMITTISH; else if (expected_type == OBJ_TREE) - lookup_flags = GET_SHA1_TREEISH; + lookup_flags |= GET_SHA1_TREEISH; if (get_sha1_1(name, sp - name - 2, outer, lookup_flags)) return -1; @@ -830,7 +831,7 @@ static int get_sha1_1(const char *name, int len, unsigned char *sha1, unsigned l return get_nth_ancestor(name, len1, sha1, num); } - ret = peel_onion(name, len, sha1); + ret = peel_onion(name, len, sha1, lookup_flags); if (!ret) return 0; @@ -1465,7 +1466,12 @@ static int get_sha1_with_context_1(const char *name, if (*cp == ':') { unsigned char tree_sha1[20]; int len = cp - name; - if (!get_sha1_1(name, len, tree_sha1, GET_SHA1_TREEISH)) { + unsigned sub_flags = flags; + + sub_flags &= ~GET_SHA1_DISAMBIGUATORS; + sub_flags |= GET_SHA1_TREEISH; + + if (!get_sha1_1(name, len, tree_sha1, sub_flags)) { const char *filename = cp+1; char *new_filename = NULL; diff --git a/t/t1512-rev-parse-disambiguation.sh b/t/t1512-rev-parse-disambiguation.sh index 16f970982b..30e0b80f05 100755 --- a/t/t1512-rev-parse-disambiguation.sh +++ b/t/t1512-rev-parse-disambiguation.sh @@ -291,10 +291,22 @@ test_expect_success 'ambiguous short sha1 ref' ' grep "refname.*${REF}.*ambiguous" err ' -test_expect_success C_LOCALE_OUTPUT 'ambiguity errors are not repeated' ' +test_expect_success C_LOCALE_OUTPUT 'ambiguity errors are not repeated (raw)' ' test_must_fail git rev-parse 00000 2>stderr && grep "is ambiguous" stderr >errors && test_line_count = 1 errors ' +test_expect_success C_LOCALE_OUTPUT 'ambiguity errors are not repeated (treeish)' ' + test_must_fail git rev-parse 00000:foo 2>stderr && + grep "is ambiguous" stderr >errors && + test_line_count = 1 errors +' + +test_expect_success C_LOCALE_OUTPUT 'ambiguity errors are not repeated (peel)' ' + test_must_fail git rev-parse 00000^{commit} 2>stderr && + grep "is ambiguous" stderr >errors && + test_line_count = 1 errors +' + test_done From 5d5def2aa56dc9b6d4eccfb333fd08aa713733e7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 26 Sep 2016 07:59:48 -0400 Subject: [PATCH 04/11] get_short_sha1: parse tags when looking for treeish The treeish disambiguation function tries to peel tags, but it does so by calling: deref_tag(lookup_object(sha1), ...); This will only work if we have previously looked at the tag and created a "struct tag" for it. Since parsing revision arguments typically happens before anything else, this is usually not the case, and we would fail to peel the tag (we are lucky that deref_tag() gracefully handles the NULL and does not segfault). Instead, we can use parse_object(). Note that this is the same fix done by 94d75d1 (get_short_sha1(): correctly disambiguate type-limited abbreviation, 2013-07-01), but that commit fixed only the committish disambiguator, and left the bug in the treeish one. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 2 +- t/t1512-rev-parse-disambiguation.sh | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sha1_name.c b/sha1_name.c index 9356ac246d..66365049a7 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -269,7 +269,7 @@ static int disambiguate_treeish_only(const unsigned char *sha1, void *cb_data_un return 0; /* We need to do this the hard way... */ - obj = deref_tag(lookup_object(sha1), NULL, 0); + obj = deref_tag(parse_object(sha1), NULL, 0); if (obj && (obj->type == OBJ_TREE || obj->type == OBJ_COMMIT)) return 1; return 0; diff --git a/t/t1512-rev-parse-disambiguation.sh b/t/t1512-rev-parse-disambiguation.sh index 30e0b80f05..dfd356721e 100755 --- a/t/t1512-rev-parse-disambiguation.sh +++ b/t/t1512-rev-parse-disambiguation.sh @@ -264,6 +264,13 @@ test_expect_success 'ambiguous commit-ish' ' test_must_fail git log 000000000... ' +# There are three objects with this prefix: a blob, a tree, and a tag. We know +# the blob will not pass as a treeish, but the tree and tag should (and thus +# cause an error). +test_expect_success 'ambiguous tags peel to treeish' ' + test_must_fail git rev-parse 0000000000f^{tree} +' + test_expect_success 'rev-parse --disambiguate' ' # The test creates 16 objects that share the prefix and two # commits created by commit-tree in earlier tests share a From 0016043bf46f4b85054c61f9000ccc58d0ef4ad7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 26 Sep 2016 08:00:04 -0400 Subject: [PATCH 05/11] get_short_sha1: refactor init of disambiguation code The disambiguation machinery has two callers: get_short_sha1 and for_each_abbrev. Both need to repeat much of the same setup: declaring buffers, sanity-checking lengths, preparing the prefixes, etc. Let's pull that into a single init function so we can avoid repeating ourselves. Pulling the buffers into the "struct disambiguate_state" isn't strictly necessary, but it does make things simpler for the callers, who no longer have to worry about sizing them correctly (i.e., it's an implicit requirement that the caller provide 20- and 40-byte buffers). And while we're touching this code, we can convert any magic-number sizes to the more modern GIT_SHA1_* constants. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 79 ++++++++++++++++++++++++----------------------------- 1 file changed, 35 insertions(+), 44 deletions(-) diff --git a/sha1_name.c b/sha1_name.c index 66365049a7..bd10700b30 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -13,9 +13,13 @@ static int get_sha1_oneline(const char *, unsigned char *, struct commit_list *) typedef int (*disambiguate_hint_fn)(const unsigned char *, void *); struct disambiguate_state { + int len; /* length of prefix in hex chars */ + char hex_pfx[GIT_SHA1_HEXSZ]; + unsigned char bin_pfx[GIT_SHA1_RAWSZ]; + disambiguate_hint_fn fn; void *cb_data; - unsigned char candidate[20]; + unsigned char candidate[GIT_SHA1_RAWSZ]; unsigned candidate_exists:1; unsigned candidate_checked:1; unsigned candidate_ok:1; @@ -72,10 +76,10 @@ static void update_candidates(struct disambiguate_state *ds, const unsigned char /* otherwise, current can be discarded and candidate is still good */ } -static void find_short_object_filename(int len, const char *hex_pfx, struct disambiguate_state *ds) +static void find_short_object_filename(struct disambiguate_state *ds) { struct alternate_object_database *alt; - char hex[40]; + char hex[GIT_SHA1_HEXSZ]; static struct alternate_object_database *fakeent; if (!fakeent) { @@ -95,7 +99,7 @@ static void find_short_object_filename(int len, const char *hex_pfx, struct disa } fakeent->next = alt_odb_list; - xsnprintf(hex, sizeof(hex), "%.2s", hex_pfx); + xsnprintf(hex, sizeof(hex), "%.2s", ds->hex_pfx); for (alt = fakeent; alt && !ds->ambiguous; alt = alt->next) { struct dirent *de; DIR *dir; @@ -103,7 +107,7 @@ static void find_short_object_filename(int len, const char *hex_pfx, struct disa * every alt_odb struct has 42 extra bytes after the base * for exactly this purpose */ - xsnprintf(alt->name, 42, "%.2s/", hex_pfx); + xsnprintf(alt->name, 42, "%.2s/", ds->hex_pfx); dir = opendir(alt->base); if (!dir) continue; @@ -113,7 +117,7 @@ static void find_short_object_filename(int len, const char *hex_pfx, struct disa if (strlen(de->d_name) != 38) continue; - if (memcmp(de->d_name, hex_pfx + 2, len - 2)) + if (memcmp(de->d_name, ds->hex_pfx + 2, ds->len - 2)) continue; memcpy(hex + 2, de->d_name, 38); if (!get_sha1_hex(hex, sha1)) @@ -138,9 +142,7 @@ static int match_sha(unsigned len, const unsigned char *a, const unsigned char * return 1; } -static void unique_in_pack(int len, - const unsigned char *bin_pfx, - struct packed_git *p, +static void unique_in_pack(struct packed_git *p, struct disambiguate_state *ds) { uint32_t num, last, i, first = 0; @@ -155,7 +157,7 @@ static void unique_in_pack(int len, int cmp; current = nth_packed_object_sha1(p, mid); - cmp = hashcmp(bin_pfx, current); + cmp = hashcmp(ds->bin_pfx, current); if (!cmp) { first = mid; break; @@ -174,20 +176,19 @@ static void unique_in_pack(int len, */ for (i = first; i < num && !ds->ambiguous; i++) { current = nth_packed_object_sha1(p, i); - if (!match_sha(len, bin_pfx, current)) + if (!match_sha(ds->len, ds->bin_pfx, current)) break; update_candidates(ds, current); } } -static void find_short_packed_object(int len, const unsigned char *bin_pfx, - struct disambiguate_state *ds) +static void find_short_packed_object(struct disambiguate_state *ds) { struct packed_git *p; prepare_packed_git(); for (p = packed_git; p && !ds->ambiguous; p = p->next) - unique_in_pack(len, bin_pfx, p, ds); + unique_in_pack(p, ds); } #define SHORT_NAME_NOT_FOUND (-1) @@ -281,14 +282,17 @@ static int disambiguate_blob_only(const unsigned char *sha1, void *cb_data_unuse return kind == OBJ_BLOB; } -static int prepare_prefixes(const char *name, int len, - unsigned char *bin_pfx, - char *hex_pfx) +static int init_object_disambiguation(const char *name, int len, + struct disambiguate_state *ds) { int i; - hashclr(bin_pfx); - memset(hex_pfx, 'x', 40); + if (len < MINIMUM_ABBREV || len > GIT_SHA1_HEXSZ) + return -1; + + memset(ds, 0, sizeof(*ds)); + memset(ds->hex_pfx, 'x', GIT_SHA1_HEXSZ); + for (i = 0; i < len ;i++) { unsigned char c = name[i]; unsigned char val; @@ -302,11 +306,14 @@ static int prepare_prefixes(const char *name, int len, } else return -1; - hex_pfx[i] = c; + ds->hex_pfx[i] = c; if (!(i & 1)) val <<= 4; - bin_pfx[i >> 1] |= val; + ds->bin_pfx[i >> 1] |= val; } + + ds->len = len; + prepare_alt_odb(); return 0; } @@ -314,19 +321,11 @@ static int get_short_sha1(const char *name, int len, unsigned char *sha1, unsigned flags) { int status; - char hex_pfx[40]; - unsigned char bin_pfx[20]; struct disambiguate_state ds; int quietly = !!(flags & GET_SHA1_QUIETLY); - if (len < MINIMUM_ABBREV || len > 40) + if (init_object_disambiguation(name, len, &ds) < 0) return -1; - if (prepare_prefixes(name, len, bin_pfx, hex_pfx) < 0) - return -1; - - prepare_alt_odb(); - - memset(&ds, 0, sizeof(ds)); if (HAS_MULTI_BITS(flags & GET_SHA1_DISAMBIGUATORS)) die("BUG: multiple get_short_sha1 disambiguator flags"); @@ -342,36 +341,28 @@ static int get_short_sha1(const char *name, int len, unsigned char *sha1, else if (flags & GET_SHA1_BLOB) ds.fn = disambiguate_blob_only; - find_short_object_filename(len, hex_pfx, &ds); - find_short_packed_object(len, bin_pfx, &ds); + find_short_object_filename(&ds); + find_short_packed_object(&ds); status = finish_object_disambiguation(&ds, sha1); if (!quietly && (status == SHORT_NAME_AMBIGUOUS)) - return error("short SHA1 %.*s is ambiguous.", len, hex_pfx); + return error("short SHA1 %.*s is ambiguous.", ds.len, ds.hex_pfx); return status; } int for_each_abbrev(const char *prefix, each_abbrev_fn fn, void *cb_data) { - char hex_pfx[40]; - unsigned char bin_pfx[20]; struct disambiguate_state ds; - int len = strlen(prefix); - if (len < MINIMUM_ABBREV || len > 40) - return -1; - if (prepare_prefixes(prefix, len, bin_pfx, hex_pfx) < 0) + if (init_object_disambiguation(prefix, strlen(prefix), &ds) < 0) return -1; - prepare_alt_odb(); - - memset(&ds, 0, sizeof(ds)); ds.always_call_fn = 1; ds.cb_data = cb_data; ds.fn = fn; - find_short_object_filename(len, hex_pfx, &ds); - find_short_packed_object(len, bin_pfx, &ds); + find_short_object_filename(&ds); + find_short_packed_object(&ds); return ds.ambiguous; } From 59e4e34f6977f83c3a3842178da6358a5d130eb6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 26 Sep 2016 08:00:07 -0400 Subject: [PATCH 06/11] get_short_sha1: NUL-terminate hex prefix We store the hex prefix in a 40-byte buffer with the prefix itself followed by 40-minus-len "x" characters. These x's serve no purpose, and the lack of NUL termination makes the prefix string annoying to use. Let's just terminate it. Note that this is in contrast to the binary prefix, which _must_ be zero-padded, because we look at the whole thing during a binary search to find the first potential match in each pack index. The loose-object hex search cannot use the same trick because it has to do a linear walk through the unsorted results of readdir() (and even if it could, you'd want zeroes instead of x's). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sha1_name.c b/sha1_name.c index bd10700b30..bbfa1a00ad 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -14,7 +14,7 @@ typedef int (*disambiguate_hint_fn)(const unsigned char *, void *); struct disambiguate_state { int len; /* length of prefix in hex chars */ - char hex_pfx[GIT_SHA1_HEXSZ]; + char hex_pfx[GIT_SHA1_HEXSZ + 1]; unsigned char bin_pfx[GIT_SHA1_RAWSZ]; disambiguate_hint_fn fn; @@ -291,7 +291,6 @@ static int init_object_disambiguation(const char *name, int len, return -1; memset(ds, 0, sizeof(*ds)); - memset(ds->hex_pfx, 'x', GIT_SHA1_HEXSZ); for (i = 0; i < len ;i++) { unsigned char c = name[i]; @@ -313,6 +312,7 @@ static int init_object_disambiguation(const char *name, int len, } ds->len = len; + ds->hex_pfx[len] = '\0'; prepare_alt_odb(); return 0; } @@ -346,7 +346,7 @@ static int get_short_sha1(const char *name, int len, unsigned char *sha1, status = finish_object_disambiguation(&ds, sha1); if (!quietly && (status == SHORT_NAME_AMBIGUOUS)) - return error("short SHA1 %.*s is ambiguous.", ds.len, ds.hex_pfx); + return error("short SHA1 %s is ambiguous.", ds.hex_pfx); return status; } From 0c99171ad2f79430eb81214d3f1d8ced3d3621e3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 26 Sep 2016 08:00:14 -0400 Subject: [PATCH 07/11] get_short_sha1: mark ambiguity error for translation This is a human-readable message, and there's no reason it should not be translated. While we're at it, let's drop the period from the end, which is not our usual style. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sha1_name.c b/sha1_name.c index bbfa1a00ad..acea241139 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -346,7 +346,7 @@ static int get_short_sha1(const char *name, int len, unsigned char *sha1, status = finish_object_disambiguation(&ds, sha1); if (!quietly && (status == SHORT_NAME_AMBIGUOUS)) - return error("short SHA1 %s is ambiguous.", ds.hex_pfx); + return error(_("short SHA1 %s is ambiguous"), ds.hex_pfx); return status; } From 16ddcd403bdd74f47f3ae1a7e58a01e36e54a7d7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 26 Sep 2016 08:00:29 -0400 Subject: [PATCH 08/11] sha1_array: let callbacks interrupt iteration The callbacks for iterating a sha1_array must have a void return. This is unlike our usual for_each semantics, where a callback may interrupt iteration and have its value propagated. Let's switch it to the usual form, which will enable its use in more places (e.g., where we are replacing an existing iteration with a different data structure). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/technical/api-sha1-array.txt | 8 ++++++-- builtin/cat-file.c | 3 ++- builtin/receive-pack.c | 3 ++- sha1-array.c | 8 ++++++-- sha1-array.h | 8 ++++---- submodule.c | 3 ++- t/helper/test-sha1-array.c | 3 ++- 7 files changed, 24 insertions(+), 12 deletions(-) diff --git a/Documentation/technical/api-sha1-array.txt b/Documentation/technical/api-sha1-array.txt index 3e75497a37..dcc52943a5 100644 --- a/Documentation/technical/api-sha1-array.txt +++ b/Documentation/technical/api-sha1-array.txt @@ -38,16 +38,20 @@ Functions `sha1_array_for_each_unique`:: Efficiently iterate over each unique element of the list, executing the callback function for each one. If the array is - not sorted, this function has the side effect of sorting it. + not sorted, this function has the side effect of sorting it. If + the callback returns a non-zero value, the iteration ends + immediately and the callback's return is propagated; otherwise, + 0 is returned. Examples -------- ----------------------------------------- -void print_callback(const unsigned char sha1[20], +int print_callback(const unsigned char sha1[20], void *data) { printf("%s\n", sha1_to_hex(sha1)); + return 0; /* always continue */ } void some_func(void) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 94e67ebb7e..cca97a86c0 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -401,11 +401,12 @@ struct object_cb_data { struct expand_data *expand; }; -static void batch_object_cb(const unsigned char sha1[20], void *vdata) +static int batch_object_cb(const unsigned char sha1[20], void *vdata) { struct object_cb_data *data = vdata; hashcpy(data->expand->oid.hash, sha1); batch_object_write(NULL, data->opt, data->expand); + return 0; } static int batch_loose_object(const unsigned char *sha1, diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 896b16f2cc..f7cd180252 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -268,9 +268,10 @@ static int show_ref_cb(const char *path_full, const struct object_id *oid, return 0; } -static void show_one_alternate_sha1(const unsigned char sha1[20], void *unused) +static int show_one_alternate_sha1(const unsigned char sha1[20], void *unused) { show_ref(".have", sha1); + return 0; } static void collect_one_alternate_ref(const struct ref *ref, void *data) diff --git a/sha1-array.c b/sha1-array.c index 6f4a2246c9..af1d7d560d 100644 --- a/sha1-array.c +++ b/sha1-array.c @@ -42,7 +42,7 @@ void sha1_array_clear(struct sha1_array *array) array->sorted = 0; } -void sha1_array_for_each_unique(struct sha1_array *array, +int sha1_array_for_each_unique(struct sha1_array *array, for_each_sha1_fn fn, void *data) { @@ -52,8 +52,12 @@ void sha1_array_for_each_unique(struct sha1_array *array, sha1_array_sort(array); for (i = 0; i < array->nr; i++) { + int ret; if (i > 0 && !hashcmp(array->sha1[i], array->sha1[i-1])) continue; - fn(array->sha1[i], data); + ret = fn(array->sha1[i], data); + if (ret) + return ret; } + return 0; } diff --git a/sha1-array.h b/sha1-array.h index 72bb33bec6..b3230be0dd 100644 --- a/sha1-array.h +++ b/sha1-array.h @@ -14,10 +14,10 @@ void sha1_array_append(struct sha1_array *array, const unsigned char *sha1); int sha1_array_lookup(struct sha1_array *array, const unsigned char *sha1); void sha1_array_clear(struct sha1_array *array); -typedef void (*for_each_sha1_fn)(const unsigned char sha1[20], - void *data); -void sha1_array_for_each_unique(struct sha1_array *array, - for_each_sha1_fn fn, +typedef int (*for_each_sha1_fn)(const unsigned char sha1[20], void *data); +int sha1_array_for_each_unique(struct sha1_array *array, + for_each_sha1_fn fn, + void *data); #endif /* SHA1_ARRAY_H */ diff --git a/submodule.c b/submodule.c index 0ef2ff4321..aba94dd07d 100644 --- a/submodule.c +++ b/submodule.c @@ -728,9 +728,10 @@ void check_for_new_submodule_commits(unsigned char new_sha1[20]) sha1_array_append(&ref_tips_after_fetch, new_sha1); } -static void add_sha1_to_argv(const unsigned char sha1[20], void *data) +static int add_sha1_to_argv(const unsigned char sha1[20], void *data) { argv_array_push(data, sha1_to_hex(sha1)); + return 0; } static void calculate_changed_submodule_paths(void) diff --git a/t/helper/test-sha1-array.c b/t/helper/test-sha1-array.c index 09f7790971..f7a53c4ad6 100644 --- a/t/helper/test-sha1-array.c +++ b/t/helper/test-sha1-array.c @@ -1,9 +1,10 @@ #include "cache.h" #include "sha1-array.h" -static void print_sha1(const unsigned char sha1[20], void *data) +static int print_sha1(const unsigned char sha1[20], void *data) { puts(sha1_to_hex(sha1)); + return 0; } int cmd_main(int argc, const char **argv) From fad6b9e5905f00654f394cac4093a052b7a3cfb6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 26 Sep 2016 08:00:33 -0400 Subject: [PATCH 09/11] for_each_abbrev: drop duplicate objects If an object appears multiple times in the object database (e.g., in both loose and packed form, or in two separate packs), the disambiguation machinery may see it more than once. The get_short_sha1() function handles this already, but for_each_abbrev() blindly fires the callback for each instance it finds. We can fix this by collecting the output in a sha1 array and de-duplicating it. As a bonus, the sort done for the de-duplication means that our output will be stable, regardless of the order in which the objects are found. Note that the old code normalized the callback's output to 0/1 to store in the 1-bit ds->ambiguous flag (which both halted the iteration and was returned from the for_each_abbrev function). Now that we are using sha1_array, we can return the real value. In practice, it doesn't matter as the sole caller only ever returns 0. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 19 +++++++++++++++---- t/t1512-rev-parse-disambiguation.sh | 7 +++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/sha1_name.c b/sha1_name.c index acea241139..f7e388490a 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -7,6 +7,7 @@ #include "refs.h" #include "remote.h" #include "dir.h" +#include "sha1-array.h" static int get_sha1_oneline(const char *, unsigned char *, struct commit_list *); @@ -350,20 +351,30 @@ static int get_short_sha1(const char *name, int len, unsigned char *sha1, return status; } +static int collect_ambiguous(const unsigned char *sha1, void *data) +{ + sha1_array_append(data, sha1); + return 0; +} + int for_each_abbrev(const char *prefix, each_abbrev_fn fn, void *cb_data) { + struct sha1_array collect = SHA1_ARRAY_INIT; struct disambiguate_state ds; + int ret; if (init_object_disambiguation(prefix, strlen(prefix), &ds) < 0) return -1; ds.always_call_fn = 1; - ds.cb_data = cb_data; - ds.fn = fn; - + ds.fn = collect_ambiguous; + ds.cb_data = &collect; find_short_object_filename(&ds); find_short_packed_object(&ds); - return ds.ambiguous; + + ret = sha1_array_for_each_unique(&collect, fn, cb_data); + sha1_array_clear(&collect); + return ret; } int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len) diff --git a/t/t1512-rev-parse-disambiguation.sh b/t/t1512-rev-parse-disambiguation.sh index dfd356721e..1d8f550996 100755 --- a/t/t1512-rev-parse-disambiguation.sh +++ b/t/t1512-rev-parse-disambiguation.sh @@ -280,6 +280,13 @@ test_expect_success 'rev-parse --disambiguate' ' test "$(sed -e "s/^\(.........\).*/\1/" actual | sort -u)" = 000000000 ' +test_expect_success 'rev-parse --disambiguate drops duplicates' ' + git rev-parse --disambiguate=000000000 >expect && + git pack-objects .git/objects/pack/pack actual && + test_cmp expect actual +' + test_expect_success 'ambiguous 40-hex ref' ' TREE=$(git mktree Date: Mon, 26 Sep 2016 08:00:36 -0400 Subject: [PATCH 10/11] get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git [...] -- [...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 50 +++++++++++++++++++++++++++-- t/t1512-rev-parse-disambiguation.sh | 24 ++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/sha1_name.c b/sha1_name.c index f7e388490a..0513f148f1 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -318,6 +318,38 @@ static int init_object_disambiguation(const char *name, int len, return 0; } +static int show_ambiguous_object(const unsigned char *sha1, void *data) +{ + const struct disambiguate_state *ds = data; + struct strbuf desc = STRBUF_INIT; + int type; + + if (ds->fn && !ds->fn(sha1, ds->cb_data)) + return 0; + + type = sha1_object_info(sha1, NULL); + if (type == OBJ_COMMIT) { + struct commit *commit = lookup_commit(sha1); + if (commit) { + struct pretty_print_context pp = {0}; + pp.date_mode.type = DATE_SHORT; + format_commit_message(commit, " %ad - %s", &desc, &pp); + } + } else if (type == OBJ_TAG) { + struct tag *tag = lookup_tag(sha1); + if (!parse_tag(tag) && tag->tag) + strbuf_addf(&desc, " %s", tag->tag); + } + + advise(" %s %s%s", + find_unique_abbrev(sha1, DEFAULT_ABBREV), + typename(type) ? typename(type) : "unknown type", + desc.buf); + + strbuf_release(&desc); + return 0; +} + static int get_short_sha1(const char *name, int len, unsigned char *sha1, unsigned flags) { @@ -346,8 +378,22 @@ static int get_short_sha1(const char *name, int len, unsigned char *sha1, find_short_packed_object(&ds); status = finish_object_disambiguation(&ds, sha1); - if (!quietly && (status == SHORT_NAME_AMBIGUOUS)) - return error(_("short SHA1 %s is ambiguous"), ds.hex_pfx); + if (!quietly && (status == SHORT_NAME_AMBIGUOUS)) { + error(_("short SHA1 %s is ambiguous"), ds.hex_pfx); + + /* + * We may still have ambiguity if we simply saw a series of + * candidates that did not satisfy our hint function. In + * that case, we still want to show them, so disable the hint + * function entirely. + */ + if (!ds.ambiguous) + ds.fn = NULL; + + advise(_("The candidates are:")); + for_each_abbrev(ds.hex_pfx, show_ambiguous_object, &ds); + } + return status; } diff --git a/t/t1512-rev-parse-disambiguation.sh b/t/t1512-rev-parse-disambiguation.sh index 1d8f550996..c5447ef877 100755 --- a/t/t1512-rev-parse-disambiguation.sh +++ b/t/t1512-rev-parse-disambiguation.sh @@ -323,4 +323,28 @@ test_expect_success C_LOCALE_OUTPUT 'ambiguity errors are not repeated (peel)' ' test_line_count = 1 errors ' +test_expect_success C_LOCALE_OUTPUT 'ambiguity hints' ' + test_must_fail git rev-parse 000000000 2>stderr && + grep ^hint: stderr >hints && + # 16 candidates, plus one intro line + test_line_count = 17 hints +' + +test_expect_success C_LOCALE_OUTPUT 'ambiguity hints respect type' ' + test_must_fail git rev-parse 000000000^{commit} 2>stderr && + grep ^hint: stderr >hints && + # 5 commits, 1 tag (which is a commitish), plus intro line + test_line_count = 7 hints +' + +test_expect_success C_LOCALE_OUTPUT 'failed type-selector still shows hint' ' + # these two blobs share the same prefix "ee3d", but neither + # will pass for a commit + echo 851 | git hash-object --stdin -w && + echo 872 | git hash-object --stdin -w && + test_must_fail git rev-parse ee3d^{commit} 2>stderr && + grep ^hint: stderr >hints && + test_line_count = 3 hints +' + test_done From 5b33cb1fd733f581da07ae8afa7e9547eafd248e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 27 Sep 2016 08:38:01 -0400 Subject: [PATCH 11/11] get_short_sha1: make default disambiguation configurable When we find ambiguous short sha1s, we may get a disambiguation rule from our caller's context. But if we don't, we fall back to treating all sha1s the same, even though most projects will tend to refer only to commits by their short sha1s. This patch introduces a configuration option that lets the user pick a different fallback (e.g., only commits). It's possible that we may want to make this the default, but it's a good idea to start as a config option for two reasons: 1. It lets people experiment with this and see if it's a good idea (i.e., the "tend to" above is an assumption; we don't really know if this will break some obscure cases). 2. Even if we do flip the default, it gives people an escape hatch if it causes problems (you can sometimes override it by asking for "1234^{tree}", but not all combinations are possible). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- cache.h | 2 ++ config.c | 3 +++ sha1_name.c | 32 +++++++++++++++++++++++++++++ t/t1512-rev-parse-disambiguation.sh | 14 +++++++++++++ 4 files changed, 51 insertions(+) diff --git a/cache.h b/cache.h index 7bd78ca282..f346c01708 100644 --- a/cache.h +++ b/cache.h @@ -1222,6 +1222,8 @@ extern int get_oid(const char *str, struct object_id *oid); typedef int each_abbrev_fn(const unsigned char *sha1, void *); extern int for_each_abbrev(const char *prefix, each_abbrev_fn, void *); +extern int set_disambiguate_hint_config(const char *var, const char *value); + /* * Try to read a SHA1 in hexadecimal format from the 40 characters * starting at hex. Write the 20-byte result to sha1 in binary form. diff --git a/config.c b/config.c index 1e4b6178f7..83fdecb1bc 100644 --- a/config.c +++ b/config.c @@ -841,6 +841,9 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.disambiguate")) + return set_disambiguate_hint_config(var, value); + if (!strcmp(var, "core.loosecompression")) { int level = git_config_int(var, value); if (level == -1) diff --git a/sha1_name.c b/sha1_name.c index 0513f148f1..3b647fd7cf 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -283,6 +283,36 @@ static int disambiguate_blob_only(const unsigned char *sha1, void *cb_data_unuse return kind == OBJ_BLOB; } +static disambiguate_hint_fn default_disambiguate_hint; + +int set_disambiguate_hint_config(const char *var, const char *value) +{ + static const struct { + const char *name; + disambiguate_hint_fn fn; + } hints[] = { + { "none", NULL }, + { "commit", disambiguate_commit_only }, + { "committish", disambiguate_committish_only }, + { "tree", disambiguate_tree_only }, + { "treeish", disambiguate_treeish_only }, + { "blob", disambiguate_blob_only } + }; + int i; + + if (!value) + return config_error_nonbool(var); + + for (i = 0; i < ARRAY_SIZE(hints); i++) { + if (!strcasecmp(value, hints[i].name)) { + default_disambiguate_hint = hints[i].fn; + return 0; + } + } + + return error("unknown hint type for '%s': %s", var, value); +} + static int init_object_disambiguation(const char *name, int len, struct disambiguate_state *ds) { @@ -373,6 +403,8 @@ static int get_short_sha1(const char *name, int len, unsigned char *sha1, ds.fn = disambiguate_treeish_only; else if (flags & GET_SHA1_BLOB) ds.fn = disambiguate_blob_only; + else + ds.fn = default_disambiguate_hint; find_short_object_filename(&ds); find_short_packed_object(&ds); diff --git a/t/t1512-rev-parse-disambiguation.sh b/t/t1512-rev-parse-disambiguation.sh index c5447ef877..7c659eb585 100755 --- a/t/t1512-rev-parse-disambiguation.sh +++ b/t/t1512-rev-parse-disambiguation.sh @@ -347,4 +347,18 @@ test_expect_success C_LOCALE_OUTPUT 'failed type-selector still shows hint' ' test_line_count = 3 hints ' +test_expect_success 'core.disambiguate config can prefer types' ' + # ambiguous between tree and tag + sha1=0000000000f && + test_must_fail git rev-parse $sha1 && + git rev-parse $sha1^{commit} && + git -c core.disambiguate=committish rev-parse $sha1 +' + +test_expect_success 'core.disambiguate does not override context' ' + # treeish ambiguous between tag and tree + test_must_fail \ + git -c core.disambiguate=committish rev-parse $sha1^{tree} +' + test_done