From 0d2db00e24ee2df4459151c5ba6de9306e30e727 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 22 Aug 2018 20:44:38 -0400 Subject: [PATCH 1/8] trailer: use size_t for string offsets Many of the string-parsing functions inside trailer.c return integer offsets into the string (e.g., to point to the end of the trailer block). Several of these use an "int" to return or store the offsets. On a system where "size_t" is much larger than "int" (e.g., most 64-bit ones), it's easy to feed a gigantic commit message that results in a negative offset. This can result in us reading memory before the string (if the int is used as an index) or far after (if it's implicitly cast to a size_t by passing to a strbuf function). Let's fix this by using size_t for all string offsets. Note that several of the functions need ssize_t, since they use "-1" as a sentinel value. The interactions here can be pretty subtle. E.g., end_of_title in find_trailer_start() does not itself need to be signed, but it is compared to the result of last_line(), which is. That promotes the latter to unsigned, and the ">=" does not behave as you might expect. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- trailer.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/trailer.c b/trailer.c index 4e309460d1..88b35b8e89 100644 --- a/trailer.c +++ b/trailer.c @@ -585,7 +585,7 @@ static const char *token_from_item(struct arg_item *item, char *tok) return item->conf.name; } -static int token_matches_item(const char *tok, struct arg_item *item, int tok_len) +static int token_matches_item(const char *tok, struct arg_item *item, size_t tok_len) { if (!strncasecmp(tok, item->conf.name, tok_len)) return 1; @@ -603,7 +603,7 @@ static int token_matches_item(const char *tok, struct arg_item *item, int tok_le * distinguished from the non-well-formed-line case (in which this function * returns -1) because some callers of this function need such a distinction. */ -static int find_separator(const char *line, const char *separators) +static ssize_t find_separator(const char *line, const char *separators) { int whitespace_found = 0; const char *c; @@ -630,10 +630,10 @@ static int find_separator(const char *line, const char *separators) */ static void parse_trailer(struct strbuf *tok, struct strbuf *val, const struct conf_info **conf, const char *trailer, - int separator_pos) + ssize_t separator_pos) { struct arg_item *item; - int tok_len; + size_t tok_len; struct list_head *pos; if (separator_pos != -1) { @@ -721,7 +721,7 @@ static void process_command_line_args(struct list_head *arg_head, list_for_each(pos, new_trailer_head) { struct new_trailer_item *tr = list_entry(pos, struct new_trailer_item, list); - int separator_pos = find_separator(tr->text, cl_separators); + ssize_t separator_pos = find_separator(tr->text, cl_separators); if (separator_pos == 0) { struct strbuf sb = STRBUF_INIT; @@ -763,9 +763,9 @@ static const char *next_line(const char *str) /* * Return the position of the start of the last line. If len is 0, return -1. */ -static int last_line(const char *buf, size_t len) +static ssize_t last_line(const char *buf, size_t len) { - int i; + ssize_t i; if (len == 0) return -1; if (len == 1) @@ -788,7 +788,7 @@ static int last_line(const char *buf, size_t len) * Return the position of the start of the patch or the length of str if there * is no patch in the message. */ -static int find_patch_start(const char *str) +static size_t find_patch_start(const char *str) { const char *s; @@ -804,10 +804,11 @@ static int find_patch_start(const char *str) * Return the position of the first trailer line or len if there are no * trailers. */ -static int find_trailer_start(const char *buf, size_t len) +static size_t find_trailer_start(const char *buf, size_t len) { const char *s; - int end_of_title, l, only_spaces = 1; + ssize_t end_of_title, l; + int only_spaces = 1; int recognized_prefix = 0, trailer_lines = 0, non_trailer_lines = 0; /* * Number of possible continuation lines encountered. This will be @@ -838,7 +839,7 @@ static int find_trailer_start(const char *buf, size_t len) l = last_line(buf, l)) { const char *bol = buf + l; const char **p; - int separator_pos; + ssize_t separator_pos; if (bol[0] == comment_line_char) { non_trailer_lines += possible_continuation_lines; @@ -899,14 +900,14 @@ continue_outer_loop: } /* Return the position of the end of the trailers. */ -static int find_trailer_end(const char *buf, size_t len) +static size_t find_trailer_end(const char *buf, size_t len) { return len - ignore_non_trailer(buf, len); } static int ends_with_blank_line(const char *buf, size_t len) { - int ll = last_line(buf, len); + ssize_t ll = last_line(buf, len); if (ll < 0) return 0; return is_blank_line(buf + ll); @@ -939,10 +940,10 @@ static void unfold_value(struct strbuf *val) strbuf_release(&out); } -static int process_input_file(FILE *outfile, - const char *str, - struct list_head *head, - const struct process_trailer_options *opts) +static size_t process_input_file(FILE *outfile, + const char *str, + struct list_head *head, + const struct process_trailer_options *opts) { struct trailer_info info; struct strbuf tok = STRBUF_INIT; @@ -1032,7 +1033,7 @@ void process_trailers(const char *file, { LIST_HEAD(head); struct strbuf sb = STRBUF_INIT; - int trailer_end; + size_t trailer_end; FILE *outfile = stdout; ensure_configured(); @@ -1132,7 +1133,7 @@ static void format_trailer_info(struct strbuf *out, for (i = 0; i < info->trailer_nr; i++) { char *trailer = info->trailers[i]; - int separator_pos = find_separator(trailer, separators); + ssize_t separator_pos = find_separator(trailer, separators); if (separator_pos >= 1) { struct strbuf tok = STRBUF_INIT; From a3b636e21574e6cff1494e0a84644e06201ddb8d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 22 Aug 2018 20:45:44 -0400 Subject: [PATCH 2/8] trailer: use size_t for iterating trailer list We store the length of the trailers list in a size_t. So on a 64-bit system with a 32-bit int, in the unlikely case that we manage to actually allocate a list with 2^31 entries, we'd loop forever trying to iterate over it (our "int" would wrap to negative before exceeding info->trailer_nr). This probably doesn't matter in practice. Each entry is at least a pointer plus a non-empty string, so even without malloc overhead or the memory to hold the original string we're parsing from, you'd need to allocate tens of gigabytes. But it's easy enough to do it right. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sequencer.c | 2 +- trailer.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sequencer.c b/sequencer.c index 4034c0461b..b0613c06bb 100644 --- a/sequencer.c +++ b/sequencer.c @@ -225,7 +225,7 @@ static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob, int ignore_footer) { struct trailer_info info; - int i; + size_t i; int found_sob = 0, found_sob_last = 0; trailer_info_get(&info, sb->buf); diff --git a/trailer.c b/trailer.c index 88b35b8e89..40eef8880e 100644 --- a/trailer.c +++ b/trailer.c @@ -948,7 +948,7 @@ static size_t process_input_file(FILE *outfile, struct trailer_info info; struct strbuf tok = STRBUF_INIT; struct strbuf val = STRBUF_INIT; - int i; + size_t i; trailer_info_get(&info, str); @@ -1112,7 +1112,7 @@ void trailer_info_get(struct trailer_info *info, const char *str) void trailer_info_release(struct trailer_info *info) { - int i; + size_t i; for (i = 0; i < info->trailer_nr; i++) free(info->trailers[i]); free(info->trailers); @@ -1122,7 +1122,7 @@ static void format_trailer_info(struct strbuf *out, const struct trailer_info *info, const struct process_trailer_options *opts) { - int i; + size_t i; /* If we want the whole block untouched, we can take the fast path. */ if (!opts->only_trailers && !opts->unfold) { From 00a21f5cbda35eb5d355b453849b625eeca7eac4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 22 Aug 2018 20:46:23 -0400 Subject: [PATCH 3/8] trailer: pass process_trailer_opts to trailer_info_get() Most of the trailer code has an "opts" struct which is filled in by the caller. We don't pass it down to trailer_info_get(), which does the initial parsing, because there hasn't yet been a need to do so. Let's start passing it down in preparation for adding new options. Note that there's a single caller which doesn't otherwise have such an options struct. Since it's just one caller (that we'd have to modify anyway), let's not bother with any special treatment like accepting a NULL options struct, and just have it allocate one with the defaults. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sequencer.c | 3 ++- trailer.c | 7 ++++--- trailer.h | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/sequencer.c b/sequencer.c index b0613c06bb..849208eb40 100644 --- a/sequencer.c +++ b/sequencer.c @@ -224,11 +224,12 @@ static const char *get_todo_path(const struct replay_opts *opts) static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob, int ignore_footer) { + struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT; struct trailer_info info; size_t i; int found_sob = 0, found_sob_last = 0; - trailer_info_get(&info, sb->buf); + trailer_info_get(&info, sb->buf, &opts); if (info.trailer_start == info.trailer_end) return 0; diff --git a/trailer.c b/trailer.c index 40eef8880e..e769c5b72c 100644 --- a/trailer.c +++ b/trailer.c @@ -950,7 +950,7 @@ static size_t process_input_file(FILE *outfile, struct strbuf val = STRBUF_INIT; size_t i; - trailer_info_get(&info, str); + trailer_info_get(&info, str, opts); /* Print lines before the trailers as is */ if (!opts->only_trailers) @@ -1067,7 +1067,8 @@ void process_trailers(const char *file, strbuf_release(&sb); } -void trailer_info_get(struct trailer_info *info, const char *str) +void trailer_info_get(struct trailer_info *info, const char *str, + const struct process_trailer_options *opts) { int patch_start, trailer_end, trailer_start; struct strbuf **trailer_lines, **ptr; @@ -1159,7 +1160,7 @@ void format_trailers_from_commit(struct strbuf *out, const char *msg, { struct trailer_info info; - trailer_info_get(&info, msg); + trailer_info_get(&info, msg, opts); format_trailer_info(out, &info, opts); trailer_info_release(&info); } diff --git a/trailer.h b/trailer.h index 6d7f8c2a52..82a62b33bb 100644 --- a/trailer.h +++ b/trailer.h @@ -77,7 +77,8 @@ void process_trailers(const char *file, const struct process_trailer_options *opts, struct list_head *new_trailer_head); -void trailer_info_get(struct trailer_info *info, const char *str); +void trailer_info_get(struct trailer_info *info, const char *str, + const struct process_trailer_options *opts); void trailer_info_release(struct trailer_info *info); From c188668e387509565237a865a1c63e890f6bbcd7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 22 Aug 2018 20:48:21 -0400 Subject: [PATCH 4/8] interpret-trailers: tighten check for "---" patch boundary The interpret-trailers command accepts not only raw commit messages, but it also can manipulate trailers in format-patch output. That means it must find the "---" boundary separating the commit message from the patch. However, it does so by looking for any line starting with "---", regardless of whether there is further content. This is overly lax compared to the parsing done in mailinfo.c's patchbreak(), and may cause false positives (e.g., t/perf output tables uses dashes; if you cut and paste them into your commit message, it fools the parser). We could try to reuse patchbreak() here, but it actually has several heuristics that are not of interest to us (e.g., matching "diff -" without a three-dash separator or even a CVS "Index:" line). We're not interested in taking in whatever random cruft people may send, but rather handling git-formatted patches. Note that the existing documentation was written in a loose way, so technically we are changing the behavior from what it said. But this should implement the original intent in a more accurate way. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/git-interpret-trailers.txt | 5 +++-- t/t7513-interpret-trailers.sh | 22 ++++++++++++++++++++++ trailer.c | 4 +++- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/Documentation/git-interpret-trailers.txt b/Documentation/git-interpret-trailers.txt index 9111c47a1b..152479ebba 100644 --- a/Documentation/git-interpret-trailers.txt +++ b/Documentation/git-interpret-trailers.txt @@ -56,8 +56,9 @@ least one Git-generated or user-configured trailer and consists of at least 25% trailers. The group must be preceded by one or more empty (or whitespace-only) lines. The group must either be at the end of the message or be the last -non-whitespace lines before a line that starts with '---'. Such three -minus signs start the patch part of the message. +non-whitespace lines before a line that starts with '---' (followed by a +space or the end of the line). Such three minus signs start the patch +part of the message. When reading trailers, there can be whitespaces after the token, the separator and the value. There can also be whitespaces diff --git a/t/t7513-interpret-trailers.sh b/t/t7513-interpret-trailers.sh index 164719d1c9..e13b40b43f 100755 --- a/t/t7513-interpret-trailers.sh +++ b/t/t7513-interpret-trailers.sh @@ -1417,4 +1417,26 @@ test_expect_success 'unfold' ' test_cmp expected actual ' +test_expect_success 'handling of --- lines in input' ' + echo "real-trailer: just right" >expected && + + git interpret-trailers --parse >actual <<-\EOF && + subject + + body + + not-a-trailer: too soon + ------ this is just a line in the commit message with a bunch of + ------ dashes; it does not have any syntactic meaning. + + real-trailer: just right + --- + below the dashed line may be a patch, etc. + + not-a-trailer: too late + EOF + + test_cmp expected actual +' + test_done diff --git a/trailer.c b/trailer.c index e769c5b72c..8392c6c030 100644 --- a/trailer.c +++ b/trailer.c @@ -793,7 +793,9 @@ static size_t find_patch_start(const char *str) const char *s; for (s = str; *s; s = next_line(s)) { - if (starts_with(s, "---")) + const char *v; + + if (skip_prefix(s, "---", &v) && isspace(*v)) return s - str; } From 1688c9a4894df517241026c7a3848bdc84607986 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 22 Aug 2018 20:49:56 -0400 Subject: [PATCH 5/8] interpret-trailers: allow suppressing "---" divider Even with the newly-tightened "---" parser, it's still possible for a commit message to trigger a false positive if it contains something like "--- foo". If the caller knows that it has only a single commit message, it can now tell us with the "--no-divider" option, eliminating any false positives. If we were designing this from scratch, I'd probably make this the default. But we've advertised the "---" behavior in the documentation since interpret-trailers has existed. Since it's meant to be scripted, breaking that would be a bad idea. Note that the logic is in the underlying trailer.c code, which is used elsewhere. The default there will keep the current behavior, but many callers will benefit from setting this new option. That's left for future patches. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/git-interpret-trailers.txt | 7 ++++++- builtin/interpret-trailers.c | 1 + t/t7513-interpret-trailers.sh | 20 ++++++++++++++++++++ trailer.c | 6 +++++- trailer.h | 1 + 5 files changed, 33 insertions(+), 2 deletions(-) diff --git a/Documentation/git-interpret-trailers.txt b/Documentation/git-interpret-trailers.txt index 152479ebba..ee13124681 100644 --- a/Documentation/git-interpret-trailers.txt +++ b/Documentation/git-interpret-trailers.txt @@ -58,7 +58,7 @@ The group must be preceded by one or more empty (or whitespace-only) lines. The group must either be at the end of the message or be the last non-whitespace lines before a line that starts with '---' (followed by a space or the end of the line). Such three minus signs start the patch -part of the message. +part of the message. See also `--no-divider` below. When reading trailers, there can be whitespaces after the token, the separator and the value. There can also be whitespaces @@ -123,6 +123,11 @@ OPTIONS A convenience alias for `--only-trailers --only-input --unfold`. +--no-divider:: + Do not treat `---` as the end of the commit message. Use this + when you know your input contains just the commit message itself + (and not an email or the output of `git format-patch`). + CONFIGURATION VARIABLES ----------------------- diff --git a/builtin/interpret-trailers.c b/builtin/interpret-trailers.c index b742539d4d..4b87e0dd2e 100644 --- a/builtin/interpret-trailers.c +++ b/builtin/interpret-trailers.c @@ -104,6 +104,7 @@ int cmd_interpret_trailers(int argc, const char **argv, const char *prefix) OPT_BOOL(0, "unfold", &opts.unfold, N_("join whitespace-continued values")), { OPTION_CALLBACK, 0, "parse", &opts, NULL, N_("set parsing options"), PARSE_OPT_NOARG | PARSE_OPT_NONEG, parse_opt_parse }, + OPT_BOOL(0, "no-divider", &opts.no_divider, N_("do not treat --- specially")), OPT_CALLBACK(0, "trailer", &trailers, N_("trailer"), N_("trailer(s) to add"), option_parse_trailer), OPT_END() diff --git a/t/t7513-interpret-trailers.sh b/t/t7513-interpret-trailers.sh index e13b40b43f..c441861331 100755 --- a/t/t7513-interpret-trailers.sh +++ b/t/t7513-interpret-trailers.sh @@ -1439,4 +1439,24 @@ test_expect_success 'handling of --- lines in input' ' test_cmp expected actual ' +test_expect_success 'suppress --- handling' ' + echo "real-trailer: just right" >expected && + + git interpret-trailers --parse --no-divider >actual <<-\EOF && + subject + + This commit message has a "---" in it, but because we tell + interpret-trailers not to respect that, it has no effect. + + not-a-trailer: too soon + --- + + This is still the commit message body. + + real-trailer: just right + EOF + + test_cmp expected actual +' + test_done diff --git a/trailer.c b/trailer.c index 8392c6c030..0796f326b3 100644 --- a/trailer.c +++ b/trailer.c @@ -1080,7 +1080,11 @@ void trailer_info_get(struct trailer_info *info, const char *str, ensure_configured(); - patch_start = find_patch_start(str); + if (opts->no_divider) + patch_start = strlen(str); + else + patch_start = find_patch_start(str); + trailer_end = find_trailer_end(str, patch_start); trailer_start = find_trailer_start(str, trailer_end); diff --git a/trailer.h b/trailer.h index 82a62b33bb..f47b16e2c4 100644 --- a/trailer.h +++ b/trailer.h @@ -69,6 +69,7 @@ struct process_trailer_options { int only_trailers; int only_input; int unfold; + int no_divider; }; #define PROCESS_TRAILER_OPTIONS_INIT {0} From e5fba5d55886eaf48aeeb158dd4d30c2fc0294c9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 22 Aug 2018 20:50:17 -0400 Subject: [PATCH 6/8] pretty, ref-filter: format %(trailers) with no_divider option In both of these cases we know that we are feeding the trailer-parsing code a pure commit message. We should tell it so, which avoids false positives for a commit message that contains a "---" line. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pretty.c | 3 +++ ref-filter.c | 2 ++ t/t4205-log-pretty-formats.sh | 23 +++++++++++++++++++++++ t/t6300-for-each-ref.sh | 23 +++++++++++++++++++++++ 4 files changed, 51 insertions(+) diff --git a/pretty.c b/pretty.c index 703fa6ff7b..c0af210e9d 100644 --- a/pretty.c +++ b/pretty.c @@ -1304,6 +1304,9 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */ if (skip_prefix(placeholder, "(trailers", &arg)) { struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT; + + opts.no_divider = 1; + if (*arg == ':') { arg++; for (;;) { diff --git a/ref-filter.c b/ref-filter.c index 01c1a82075..92557d4c69 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -223,6 +223,8 @@ static int trailers_atom_parser(const struct ref_format *format, struct used_ato struct string_list params = STRING_LIST_INIT_DUP; int i; + atom->u.contents.trailer_opts.no_divider = 1; + if (arg) { string_list_split(¶ms, arg, ',', -1); for (i = 0; i < params.nr; i++) { diff --git a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh index 2052cadb11..978a8a66ff 100755 --- a/t/t4205-log-pretty-formats.sh +++ b/t/t4205-log-pretty-formats.sh @@ -598,4 +598,27 @@ test_expect_success ':only and :unfold work together' ' test_cmp expect actual ' +test_expect_success 'trailer parsing not fooled by --- line' ' + git commit --allow-empty -F - <<-\EOF && + this is the subject + + This is the body. The message has a "---" line which would confuse a + message+patch parser. But here we know we have only a commit message, + so we get it right. + + trailer: wrong + --- + This is more body. + + trailer: right + EOF + + { + echo "trailer: right" && + echo + } >expect && + git log --no-walk --format="%(trailers)" >actual && + test_cmp expect actual +' + test_done diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh index 48379aa0ee..3d26ba0c2a 100755 --- a/t/t6300-for-each-ref.sh +++ b/t/t6300-for-each-ref.sh @@ -715,6 +715,29 @@ test_expect_success 'basic atom: head contents:trailers' ' test_cmp expect actual.clean ' +test_expect_success 'trailer parsing not fooled by --- line' ' + git commit --allow-empty -F - <<-\EOF && + this is the subject + + This is the body. The message has a "---" line which would confuse a + message+patch parser. But here we know we have only a commit message, + so we get it right. + + trailer: wrong + --- + This is more body. + + trailer: right + EOF + + { + echo "trailer: right" && + echo + } >expect && + git for-each-ref --format="%(trailers)" refs/heads/master >actual && + test_cmp expect actual +' + test_expect_success 'Add symbolic ref for the following tests' ' git symbolic-ref refs/heads/sym refs/heads/master ' From ffce7f590fabee6f2314ffd891f1fd3629222839 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 22 Aug 2018 20:50:37 -0400 Subject: [PATCH 7/8] sequencer: ignore "---" divider when parsing trailers When the sequencer code appends a signoff or cherry-pick origin, it uses the default trailer-parsing options, which treat "---" as the end of the commit message. As a result, it may be fooled by a commit message that contains that string and fail to find the existing trailer block. Even more confusing, the actual append code does not know about "---", and always appends to the end of the string. This can lead to bizarre results. E.g., appending a signoff to a commit message like this: subject body --- these dashes confuse the parser! Signed-off-by: A results in output with a final block like: Signed-off-by: A Signed-off-by: A The parser thinks the final line of the message is "body", and ignores everything else, claiming there are no trailers. So we output an extra newline separator (wrong) and add a duplicate signoff (also wrong). Since we know we are feeding a pure commit message, we can simply tell the parser to ignore the "---" divider. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sequencer.c | 2 ++ t/t7501-commit.sh | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/sequencer.c b/sequencer.c index 849208eb40..51ef7245b1 100644 --- a/sequencer.c +++ b/sequencer.c @@ -229,6 +229,8 @@ static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob, size_t i; int found_sob = 0, found_sob_last = 0; + opts.no_divider = 1; + trailer_info_get(&info, sb->buf, &opts); if (info.trailer_start == info.trailer_end) diff --git a/t/t7501-commit.sh b/t/t7501-commit.sh index 9dbbd01fc0..025d65b517 100755 --- a/t/t7501-commit.sh +++ b/t/t7501-commit.sh @@ -517,6 +517,22 @@ Myfooter: x" && test_cmp expected actual ' +test_expect_success 'signoff not confused by ---' ' + cat >expected <<-EOF && + subject + + body + --- + these dashes confuse the parser! + + Signed-off-by: $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> + EOF + # should be a noop, since we already signed + git commit --allow-empty --signoff -F expected && + git log -1 --pretty=format:%B >actual && + test_cmp expected actual +' + test_expect_success 'multiple -m' ' >negative && From 66e83d9b41f7438cb167b9bb54093ebbf0532437 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 22 Aug 2018 20:50:51 -0400 Subject: [PATCH 8/8] append_signoff: use size_t for string offsets The append_signoff() function takes an "int" to specify the number of bytes to ignore. Most callers just pass 0, and the remainder use ignore_non_trailer() to skip over cruft. That function also returns an int, and uses them internally. On systems where size_t is larger than an int (i.e., most 64-bit systems), dealing with a ridiculously large commit message could end up overflowing an int, producing surprising results (e.g., returning a negative offset, which would cause us to look outside the original string). Let's consistently use size_t for these offsets through this whole stack. As a bonus, this makes the meaning of "ignore_footer" as an offset (and not a boolean) more clear. But while we're here, let's also document the interface. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit.c | 6 +++--- commit.h | 2 +- sequencer.c | 4 ++-- sequencer.h | 9 ++++++++- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/commit.c b/commit.c index 0030e79940..873ef0d5b1 100644 --- a/commit.c +++ b/commit.c @@ -1687,10 +1687,10 @@ const char *find_commit_header(const char *msg, const char *key, size_t *out_len * Returns the number of bytes from the tail to ignore, to be fed as * the second parameter to append_signoff(). */ -int ignore_non_trailer(const char *buf, size_t len) +size_t ignore_non_trailer(const char *buf, size_t len) { - int boc = 0; - int bol = 0; + size_t boc = 0; + size_t bol = 0; int in_old_conflicts_block = 0; size_t cutoff = wt_status_locate_end(buf, len); diff --git a/commit.h b/commit.h index c3af512f8b..1f7b97cebd 100644 --- a/commit.h +++ b/commit.h @@ -301,7 +301,7 @@ extern const char *find_commit_header(const char *msg, const char *key, size_t *out_len); /* Find the end of the log message, the right place for a new trailer. */ -extern int ignore_non_trailer(const char *buf, size_t len); +extern size_t ignore_non_trailer(const char *buf, size_t len); typedef int (*each_mergetag_fn)(struct commit *commit, struct commit_extra_header *extra, void *cb_data); diff --git a/sequencer.c b/sequencer.c index 51ef7245b1..f24d6c3597 100644 --- a/sequencer.c +++ b/sequencer.c @@ -222,7 +222,7 @@ static const char *get_todo_path(const struct replay_opts *opts) * Returns 3 when sob exists within conforming footer as last entry */ static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob, - int ignore_footer) + size_t ignore_footer) { struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT; struct trailer_info info; @@ -3660,7 +3660,7 @@ int sequencer_pick_revisions(struct replay_opts *opts) return res; } -void append_signoff(struct strbuf *msgbuf, int ignore_footer, unsigned flag) +void append_signoff(struct strbuf *msgbuf, size_t ignore_footer, unsigned flag) { unsigned no_dup_sob = flag & APPEND_SIGNOFF_DEDUP; struct strbuf sob = STRBUF_INIT; diff --git a/sequencer.h b/sequencer.h index c5787c6b56..06ff5ff065 100644 --- a/sequencer.h +++ b/sequencer.h @@ -85,7 +85,14 @@ int rearrange_squash(void); extern const char sign_off_header[]; -void append_signoff(struct strbuf *msgbuf, int ignore_footer, unsigned flag); +/* + * Append a signoff to the commit message in "msgbuf". The ignore_footer + * parameter specifies the number of bytes at the end of msgbuf that should + * not be considered at all. I.e., they are not checked for existing trailers, + * and the new signoff will be spliced into the buffer before those bytes. + */ +void append_signoff(struct strbuf *msgbuf, size_t ignore_footer, unsigned flag); + void append_conflicts_hint(struct strbuf *msgbuf); int message_is_empty(const struct strbuf *sb, enum commit_msg_cleanup_mode cleanup_mode);