From 2a5ce7cf0da8c4d97dd58f5cbc6092b394648356 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 15:33:07 -0700 Subject: [PATCH 01/34] mailinfo: remove a no-op call convert_to_utf8(it, "") The called function checks if the second parameter is either a NULL or an empty string at the very beginning and returns without doing anything. Remove the useless call. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 999a5250fb..5a4ed75a2d 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -612,11 +612,6 @@ static void decode_header(struct strbuf *it) { if (decode_header_bq(it)) return; - /* otherwise "it" is a straight copy of the input. - * This can be binary guck but there is no charset specified. - */ - if (metainfo_charset) - convert_to_utf8(it, ""); } static void decode_transfer_encoding(struct strbuf *line) From 3a8fcdaf844910578d91ff225bcb1587ddb4d062 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 15:33:44 -0700 Subject: [PATCH 02/34] mailinfo: fold decode_header_bq() into decode_header() In olden days we might have wanted to behave differently in decode_header() if the header line was encoded with RFC2047, but we apparently do not do so, hence this helper function can go, together with its return value. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 5a4ed75a2d..addc0e00a6 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -525,19 +525,17 @@ static void convert_to_utf8(struct strbuf *line, const char *charset) strbuf_attach(line, out, strlen(out), strlen(out)); } -static int decode_header_bq(struct strbuf *it) +static void decode_header(struct strbuf *it) { char *in, *ep, *cp; struct strbuf outbuf = STRBUF_INIT, *dec; struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT; - int rfc2047 = 0; in = it->buf; while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) { int encoding; strbuf_reset(&charset_q); strbuf_reset(&piecebuf); - rfc2047 = 1; if (in != ep) { /* @@ -567,22 +565,22 @@ static int decode_header_bq(struct strbuf *it) ep += 2; if (ep - it->buf >= it->len || !(cp = strchr(ep, '?'))) - goto decode_header_bq_out; + goto release_return; if (cp + 3 - it->buf > it->len) - goto decode_header_bq_out; + goto release_return; strbuf_add(&charset_q, ep, cp - ep); encoding = cp[1]; if (!encoding || cp[2] != '?') - goto decode_header_bq_out; + goto release_return; ep = strstr(cp + 3, "?="); if (!ep) - goto decode_header_bq_out; + goto release_return; strbuf_add(&piecebuf, cp + 3, ep - cp - 3); switch (tolower(encoding)) { default: - goto decode_header_bq_out; + goto release_return; case 'b': dec = decode_b_segment(&piecebuf); break; @@ -601,17 +599,10 @@ static int decode_header_bq(struct strbuf *it) strbuf_addstr(&outbuf, in); strbuf_reset(it); strbuf_addbuf(it, &outbuf); -decode_header_bq_out: +release_return: strbuf_release(&outbuf); strbuf_release(&charset_q); strbuf_release(&piecebuf); - return rfc2047; -} - -static void decode_header(struct strbuf *it) -{ - if (decode_header_bq(it)) - return; } static void decode_transfer_encoding(struct strbuf *line) From b6af8ed13a73a705b79e79ecd7320f3e90e98315 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 15:34:19 -0700 Subject: [PATCH 03/34] mailinfo: fix an off-by-one error in the boundary stack We pre-increment the pointer that we will use to store something at, so the pointer is already beyond the end of the array if it points at content[MAX_BOUNDARIES]. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index addc0e00a6..1566c19b62 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -185,7 +185,7 @@ static void handle_content_type(struct strbuf *line) if (slurp_attr(line->buf, "boundary=", boundary)) { strbuf_insert(boundary, 0, "--", 2); - if (++content_top > &content[MAX_BOUNDARIES]) { + if (++content_top >= &content[MAX_BOUNDARIES]) { fprintf(stderr, "Too many boundaries to handle\n"); exit(1); } From e38ee06e99635cca0997d6a04f0c22357670090d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 15:35:10 -0700 Subject: [PATCH 04/34] mailinfo: explicitly close file handle to the patch output This does not make a difference within the context of "git mailinfo" that runs once and exits, as flushing and closing would happen upon process termination. It however will matter when we eventually make it callable as an API function. Besides, cleaning after yourself once you are done is a good hygiene. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 1566c19b62..73be47c497 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -999,6 +999,8 @@ static int mailinfo(FILE *in, FILE *out, const char *msg, const char *patch) check_header(&line, p_hdr_data, 1); handle_body(); + fclose(patchfile); + handle_info(); return 0; From 12d19e80b049dbedd51e9e6a70260383b9db4dd0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 20 Oct 2015 14:32:32 -0700 Subject: [PATCH 05/34] mailinfo: plug strbuf leak during continuation line handling Whether this loop is left via EOF/break or upon finding a non-continuation line, the storage used for the contination line handling is left behind. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 73be47c497..a183cd49fd 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -409,6 +409,8 @@ static int is_rfc2822_header(const struct strbuf *line) static int read_one_header_line(struct strbuf *line, FILE *in) { + struct strbuf continuation = STRBUF_INIT; + /* Get the first part of the line. */ if (strbuf_getline(line, in, '\n')) return 0; @@ -430,7 +432,6 @@ static int read_one_header_line(struct strbuf *line, FILE *in) */ for (;;) { int peek; - struct strbuf continuation = STRBUF_INIT; peek = fgetc(in); ungetc(peek, in); if (peek != ' ' && peek != '\t') @@ -441,6 +442,7 @@ static int read_one_header_line(struct strbuf *line, FILE *in) strbuf_rtrim(&continuation); strbuf_addbuf(line, &continuation); } + strbuf_release(&continuation); return 1; } From 39afcd38196fa3594924a73896e5fa2bca21dd63 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 13 Oct 2015 11:03:20 -0700 Subject: [PATCH 06/34] mailinfo: move handle_boundary() lower This function wants to call find_boundary() and is called only from one place without any recursing, so it becomes easier to read if it appears after the called function. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 114 ++++++++++++++++++++++----------------------- 1 file changed, 56 insertions(+), 58 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index a183cd49fd..ed9c568a20 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -628,64 +628,6 @@ static void decode_transfer_encoding(struct strbuf *line) free(ret); } -static void handle_filter(struct strbuf *line); - -static int find_boundary(void) -{ - while (!strbuf_getline(&line, fin, '\n')) { - if (*content_top && is_multipart_boundary(&line)) - return 1; - } - return 0; -} - -static int handle_boundary(void) -{ - struct strbuf newline = STRBUF_INIT; - - strbuf_addch(&newline, '\n'); -again: - if (line.len >= (*content_top)->len + 2 && - !memcmp(line.buf + (*content_top)->len, "--", 2)) { - /* we hit an end boundary */ - /* pop the current boundary off the stack */ - strbuf_release(*content_top); - free(*content_top); - *content_top = NULL; - - /* technically won't happen as is_multipart_boundary() - will fail first. But just in case.. - */ - if (--content_top < content) { - fprintf(stderr, "Detected mismatched boundaries, " - "can't recover\n"); - exit(1); - } - handle_filter(&newline); - strbuf_release(&newline); - - /* skip to the next boundary */ - if (!find_boundary()) - return 0; - goto again; - } - - /* set some defaults */ - transfer_encoding = TE_DONTCARE; - strbuf_reset(&charset); - - /* slurp in this section's info */ - while (read_one_header_line(&line, fin)) - check_header(&line, p_hdr_data, 0); - - strbuf_release(&newline); - /* replenish line */ - if (strbuf_getline(&line, fin, '\n')) - return 0; - strbuf_addch(&line, '\n'); - return 1; -} - static inline int patchbreak(const struct strbuf *line) { size_t i; @@ -853,6 +795,62 @@ static void handle_filter(struct strbuf *line) } } +static int find_boundary(void) +{ + while (!strbuf_getline(&line, fin, '\n')) { + if (*content_top && is_multipart_boundary(&line)) + return 1; + } + return 0; +} + +static int handle_boundary(void) +{ + struct strbuf newline = STRBUF_INIT; + + strbuf_addch(&newline, '\n'); +again: + if (line.len >= (*content_top)->len + 2 && + !memcmp(line.buf + (*content_top)->len, "--", 2)) { + /* we hit an end boundary */ + /* pop the current boundary off the stack */ + strbuf_release(*content_top); + free(*content_top); + *content_top = NULL; + + /* technically won't happen as is_multipart_boundary() + will fail first. But just in case.. + */ + if (--content_top < content) { + fprintf(stderr, "Detected mismatched boundaries, " + "can't recover\n"); + exit(1); + } + handle_filter(&newline); + strbuf_release(&newline); + + /* skip to the next boundary */ + if (!find_boundary()) + return 0; + goto again; + } + + /* set some defaults */ + transfer_encoding = TE_DONTCARE; + strbuf_reset(&charset); + + /* slurp in this section's info */ + while (read_one_header_line(&line, fin)) + check_header(&line, p_hdr_data, 0); + + strbuf_release(&newline); + /* replenish line */ + if (strbuf_getline(&line, fin, '\n')) + return 0; + strbuf_addch(&line, '\n'); + return 1; +} + static void handle_body(void) { struct strbuf prev = STRBUF_INIT; From 9cc243f7a9b06fefb29cd8e4d91ecf97298c0cb5 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 17:44:07 -0700 Subject: [PATCH 07/34] mailinfo: move read_one_header_line() closer to its callers Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 136 ++++++++++++++++++++++----------------------- 1 file changed, 68 insertions(+), 68 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index ed9c568a20..04927f7ae5 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -379,74 +379,6 @@ check_header_out: return ret; } -static int is_rfc2822_header(const struct strbuf *line) -{ - /* - * The section that defines the loosest possible - * field name is "3.6.8 Optional fields". - * - * optional-field = field-name ":" unstructured CRLF - * field-name = 1*ftext - * ftext = %d33-57 / %59-126 - */ - int ch; - char *cp = line->buf; - - /* Count mbox From headers as headers */ - if (starts_with(cp, "From ") || starts_with(cp, ">From ")) - return 1; - - while ((ch = *cp++)) { - if (ch == ':') - return 1; - if ((33 <= ch && ch <= 57) || - (59 <= ch && ch <= 126)) - continue; - break; - } - return 0; -} - -static int read_one_header_line(struct strbuf *line, FILE *in) -{ - struct strbuf continuation = STRBUF_INIT; - - /* Get the first part of the line. */ - if (strbuf_getline(line, in, '\n')) - return 0; - - /* - * Is it an empty line or not a valid rfc2822 header? - * If so, stop here, and return false ("not a header") - */ - strbuf_rtrim(line); - if (!line->len || !is_rfc2822_header(line)) { - /* Re-add the newline */ - strbuf_addch(line, '\n'); - return 0; - } - - /* - * Now we need to eat all the continuation lines.. - * Yuck, 2822 header "folding" - */ - for (;;) { - int peek; - - peek = fgetc(in); ungetc(peek, in); - if (peek != ' ' && peek != '\t') - break; - if (strbuf_getline(&continuation, in, '\n')) - break; - continuation.buf[0] = ' '; - strbuf_rtrim(&continuation); - strbuf_addbuf(line, &continuation); - } - strbuf_release(&continuation); - - return 1; -} - static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047) { const char *in = q_seg->buf; @@ -795,6 +727,74 @@ static void handle_filter(struct strbuf *line) } } +static int is_rfc2822_header(const struct strbuf *line) +{ + /* + * The section that defines the loosest possible + * field name is "3.6.8 Optional fields". + * + * optional-field = field-name ":" unstructured CRLF + * field-name = 1*ftext + * ftext = %d33-57 / %59-126 + */ + int ch; + char *cp = line->buf; + + /* Count mbox From headers as headers */ + if (starts_with(cp, "From ") || starts_with(cp, ">From ")) + return 1; + + while ((ch = *cp++)) { + if (ch == ':') + return 1; + if ((33 <= ch && ch <= 57) || + (59 <= ch && ch <= 126)) + continue; + break; + } + return 0; +} + +static int read_one_header_line(struct strbuf *line, FILE *in) +{ + struct strbuf continuation = STRBUF_INIT; + + /* Get the first part of the line. */ + if (strbuf_getline(line, in, '\n')) + return 0; + + /* + * Is it an empty line or not a valid rfc2822 header? + * If so, stop here, and return false ("not a header") + */ + strbuf_rtrim(line); + if (!line->len || !is_rfc2822_header(line)) { + /* Re-add the newline */ + strbuf_addch(line, '\n'); + return 0; + } + + /* + * Now we need to eat all the continuation lines.. + * Yuck, 2822 header "folding" + */ + for (;;) { + int peek; + + peek = fgetc(in); ungetc(peek, in); + if (peek != ' ' && peek != '\t') + break; + if (strbuf_getline(&continuation, in, '\n')) + break; + continuation.buf[0] = ' '; + strbuf_rtrim(&continuation); + strbuf_addbuf(line, &continuation); + } + strbuf_release(&continuation); + + return 1; +} + static int find_boundary(void) { while (!strbuf_getline(&line, fin, '\n')) { From 4f0f9d46c72f049c80c7cec0f321429ac2e49d11 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 17:44:21 -0700 Subject: [PATCH 08/34] mailinfo: move check_header() after the helpers it uses This way, we can lose a forward decl for decode_header(). Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 135 ++++++++++++++++++++++----------------------- 1 file changed, 67 insertions(+), 68 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 04927f7ae5..dd9aad2723 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -284,7 +284,6 @@ static void cleanup_space(struct strbuf *sb) } } -static void decode_header(struct strbuf *line); static const char *header[MAX_HDR_PARSED] = { "From","Subject","Date", }; @@ -312,73 +311,6 @@ static int is_format_patch_separator(const char *line, int len) return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line)); } -static int check_header(const struct strbuf *line, - struct strbuf *hdr_data[], int overwrite) -{ - int i, ret = 0, len; - struct strbuf sb = STRBUF_INIT; - /* search for the interesting parts */ - for (i = 0; header[i]; i++) { - int len = strlen(header[i]); - if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) { - /* Unwrap inline B and Q encoding, and optionally - * normalize the meta information to utf8. - */ - strbuf_add(&sb, line->buf + len + 2, line->len - len - 2); - decode_header(&sb); - handle_header(&hdr_data[i], &sb); - ret = 1; - goto check_header_out; - } - } - - /* Content stuff */ - if (cmp_header(line, "Content-Type")) { - len = strlen("Content-Type: "); - strbuf_add(&sb, line->buf + len, line->len - len); - decode_header(&sb); - strbuf_insert(&sb, 0, "Content-Type: ", len); - handle_content_type(&sb); - ret = 1; - goto check_header_out; - } - if (cmp_header(line, "Content-Transfer-Encoding")) { - len = strlen("Content-Transfer-Encoding: "); - strbuf_add(&sb, line->buf + len, line->len - len); - decode_header(&sb); - handle_content_transfer_encoding(&sb); - ret = 1; - goto check_header_out; - } - if (cmp_header(line, "Message-Id")) { - len = strlen("Message-Id: "); - strbuf_add(&sb, line->buf + len, line->len - len); - decode_header(&sb); - handle_message_id(&sb); - ret = 1; - goto check_header_out; - } - - /* for inbody stuff */ - if (starts_with(line->buf, ">From") && isspace(line->buf[5])) { - ret = is_format_patch_separator(line->buf + 1, line->len - 1); - goto check_header_out; - } - if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) { - for (i = 0; header[i]; i++) { - if (!strcmp("Subject", header[i])) { - handle_header(&hdr_data[i], line); - ret = 1; - goto check_header_out; - } - } - } - -check_header_out: - strbuf_release(&sb); - return ret; -} - static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047) { const char *in = q_seg->buf; @@ -539,6 +471,73 @@ release_return: strbuf_release(&piecebuf); } +static int check_header(const struct strbuf *line, + struct strbuf *hdr_data[], int overwrite) +{ + int i, ret = 0, len; + struct strbuf sb = STRBUF_INIT; + /* search for the interesting parts */ + for (i = 0; header[i]; i++) { + int len = strlen(header[i]); + if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) { + /* Unwrap inline B and Q encoding, and optionally + * normalize the meta information to utf8. + */ + strbuf_add(&sb, line->buf + len + 2, line->len - len - 2); + decode_header(&sb); + handle_header(&hdr_data[i], &sb); + ret = 1; + goto check_header_out; + } + } + + /* Content stuff */ + if (cmp_header(line, "Content-Type")) { + len = strlen("Content-Type: "); + strbuf_add(&sb, line->buf + len, line->len - len); + decode_header(&sb); + strbuf_insert(&sb, 0, "Content-Type: ", len); + handle_content_type(&sb); + ret = 1; + goto check_header_out; + } + if (cmp_header(line, "Content-Transfer-Encoding")) { + len = strlen("Content-Transfer-Encoding: "); + strbuf_add(&sb, line->buf + len, line->len - len); + decode_header(&sb); + handle_content_transfer_encoding(&sb); + ret = 1; + goto check_header_out; + } + if (cmp_header(line, "Message-Id")) { + len = strlen("Message-Id: "); + strbuf_add(&sb, line->buf + len, line->len - len); + decode_header(&sb); + handle_message_id(&sb); + ret = 1; + goto check_header_out; + } + + /* for inbody stuff */ + if (starts_with(line->buf, ">From") && isspace(line->buf[5])) { + ret = is_format_patch_separator(line->buf + 1, line->len - 1); + goto check_header_out; + } + if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) { + for (i = 0; header[i]; i++) { + if (!strcmp("Subject", header[i])) { + handle_header(&hdr_data[i], line); + ret = 1; + goto check_header_out; + } + } + } + +check_header_out: + strbuf_release(&sb); + return ret; +} + static void decode_transfer_encoding(struct strbuf *line) { struct strbuf *ret; From 30f50c3426def6062b214d3f9cb0dd09776e4415 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 17:44:26 -0700 Subject: [PATCH 09/34] mailinfo: move cleanup_space() before its users Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index dd9aad2723..23dcd95211 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -31,8 +31,17 @@ static int use_inbody_headers = 1; #define MAX_HDR_PARSED 10 #define MAX_BOUNDARIES 5 -static void cleanup_space(struct strbuf *sb); - +static void cleanup_space(struct strbuf *sb) +{ + size_t pos, cnt; + for (pos = 0; pos < sb->len; pos++) { + if (isspace(sb->buf[pos])) { + sb->buf[pos] = ' '; + for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++); + strbuf_remove(sb, pos + 1, cnt); + } + } +} static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email) { @@ -272,18 +281,6 @@ static void cleanup_subject(struct strbuf *subject) strbuf_trim(subject); } -static void cleanup_space(struct strbuf *sb) -{ - size_t pos, cnt; - for (pos = 0; pos < sb->len; pos++) { - if (isspace(sb->buf[pos])) { - sb->buf[pos] = ' '; - for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++); - strbuf_remove(sb, pos + 1, cnt); - } - } -} - static const char *header[MAX_HDR_PARSED] = { "From","Subject","Date", }; From c1b40bd7b616792de5f307ecdfd8ac45573bc4f9 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 17:44:46 -0700 Subject: [PATCH 10/34] mailinfo: move definition of MAX_HDR_PARSED closer to its use Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 23dcd95211..de7ccd8a28 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -28,7 +28,6 @@ static int use_scissors; static int add_message_id; static int use_inbody_headers = 1; -#define MAX_HDR_PARSED 10 #define MAX_BOUNDARIES 5 static void cleanup_space(struct strbuf *sb) @@ -281,6 +280,7 @@ static void cleanup_subject(struct strbuf *subject) strbuf_trim(subject); } +#define MAX_HDR_PARSED 10 static const char *header[MAX_HDR_PARSED] = { "From","Subject","Date", }; From 269e239c48ac8f70248beb4539535af0ed930682 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 13 Oct 2015 11:13:32 -0700 Subject: [PATCH 11/34] mailinfo: get rid of function-local static states Two helper functions use "static int" in their scope to keep track of the state while repeatedly getting called once for each input line. Move these state variables to their ultimate caller and pass down pointers to them along the callchain, as a small step in preparation for making this entire callchain more reentrant. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index de7ccd8a28..bc1d874120 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -643,27 +643,25 @@ static int is_scissors_line(const struct strbuf *line) gap * 2 < perforation); } -static int handle_commit_msg(struct strbuf *line) +static int handle_commit_msg(struct strbuf *line, int *still_looking) { - static int still_looking = 1; - if (!cmitmsg) return 0; - if (still_looking) { + if (*still_looking) { if (!line->len || (line->len == 1 && line->buf[0] == '\n')) return 0; } - if (use_inbody_headers && still_looking) { - still_looking = check_header(line, s_hdr_data, 0); - if (still_looking) + if (use_inbody_headers && *still_looking) { + *still_looking = check_header(line, s_hdr_data, 0); + if (*still_looking) return 0; } else /* Only trim the first (blank) line of the commit message * when ignoring in-body headers. */ - still_looking = 0; + *still_looking = 0; /* normalize the log message to UTF-8. */ if (metainfo_charset) @@ -675,7 +673,7 @@ static int handle_commit_msg(struct strbuf *line) die_errno("Could not rewind output message file"); if (ftruncate(fileno(cmitmsg), 0)) die_errno("Could not truncate output message file at scissors"); - still_looking = 1; + *still_looking = 1; /* * We may have already read "secondary headers"; purge @@ -707,16 +705,13 @@ static void handle_patch(const struct strbuf *line) patch_lines++; } -static void handle_filter(struct strbuf *line) +static void handle_filter(struct strbuf *line, int *filter_stage, int *header_stage) { - static int filter = 0; - - /* filter tells us which part we left off on */ - switch (filter) { + switch (*filter_stage) { case 0: - if (!handle_commit_msg(line)) + if (!handle_commit_msg(line, header_stage)) break; - filter++; + (*filter_stage)++; case 1: handle_patch(line); break; @@ -800,7 +795,7 @@ static int find_boundary(void) return 0; } -static int handle_boundary(void) +static int handle_boundary(int *filter_stage, int *header_stage) { struct strbuf newline = STRBUF_INIT; @@ -822,7 +817,7 @@ again: "can't recover\n"); exit(1); } - handle_filter(&newline); + handle_filter(&newline, filter_stage, header_stage); strbuf_release(&newline); /* skip to the next boundary */ @@ -850,6 +845,8 @@ again: static void handle_body(void) { struct strbuf prev = STRBUF_INIT; + int filter_stage = 0; + int header_stage = 1; /* Skip up to the first boundary */ if (*content_top) { @@ -862,10 +859,10 @@ static void handle_body(void) if (*content_top && is_multipart_boundary(&line)) { /* flush any leftover */ if (prev.len) { - handle_filter(&prev); + handle_filter(&prev, &filter_stage, &header_stage); strbuf_reset(&prev); } - if (!handle_boundary()) + if (!handle_boundary(&filter_stage, &header_stage)) goto handle_body_out; } @@ -895,7 +892,7 @@ static void handle_body(void) strbuf_addbuf(&prev, sb); break; } - handle_filter(sb); + handle_filter(sb, &filter_stage, &header_stage); } /* * The partial chunk is saved in "prev" and will be @@ -905,7 +902,7 @@ static void handle_body(void) break; } default: - handle_filter(&line); + handle_filter(&line, &filter_stage, &header_stage); } } while (!strbuf_getwholeline(&line, fin, '\n')); From fde00d50f6b084680085b9924b198e7f3138fc9e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 18 Oct 2015 15:36:16 -0700 Subject: [PATCH 12/34] mailinfo: do not let handle_body() touch global "line" directly This function has a single caller, and called with the global "line" holding the first line of the e-mail body after the caller finished processing the e-mail headers. The function then goes into a loop to process each line of the input, starting from what was given by its caller, and fills the same global "line" variable from the input as it needs to process more lines. Let the caller explicitly pass a pointer to this global "line" variable as an argument, and have the function itself use that strbuf throughout, instead of referring to the global "line" itself. There are helper functions that this function calls that still touch the global directly; they will be updated as the series progresses. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index bc1d874120..6b4facabd4 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -842,7 +842,7 @@ again: return 1; } -static void handle_body(void) +static void handle_body(struct strbuf *line) { struct strbuf prev = STRBUF_INIT; int filter_stage = 0; @@ -856,7 +856,7 @@ static void handle_body(void) do { /* process any boundary lines */ - if (*content_top && is_multipart_boundary(&line)) { + if (*content_top && is_multipart_boundary(line)) { /* flush any leftover */ if (prev.len) { handle_filter(&prev, &filter_stage, &header_stage); @@ -867,7 +867,7 @@ static void handle_body(void) } /* Unwrap transfer encoding */ - decode_transfer_encoding(&line); + decode_transfer_encoding(line); switch (transfer_encoding) { case TE_BASE64: @@ -876,7 +876,7 @@ static void handle_body(void) struct strbuf **lines, **it, *sb; /* Prepend any previous partial lines */ - strbuf_insert(&line, 0, prev.buf, prev.len); + strbuf_insert(line, 0, prev.buf, prev.len); strbuf_reset(&prev); /* @@ -884,7 +884,7 @@ static void handle_body(void) * multiple new lines. Pass only one chunk * at a time to handle_filter() */ - lines = strbuf_split(&line, '\n'); + lines = strbuf_split(line, '\n'); for (it = lines; (sb = *it); it++) { if (*(it + 1) == NULL) /* The last line */ if (sb->buf[sb->len - 1] != '\n') { @@ -902,10 +902,10 @@ static void handle_body(void) break; } default: - handle_filter(&line, &filter_stage, &header_stage); + handle_filter(line, &filter_stage, &header_stage); } - } while (!strbuf_getwholeline(&line, fin, '\n')); + } while (!strbuf_getwholeline(line, fin, '\n')); handle_body_out: strbuf_release(&prev); @@ -991,7 +991,7 @@ static int mailinfo(FILE *in, FILE *out, const char *msg, const char *patch) while (read_one_header_line(&line, fin)) check_header(&line, p_hdr_data, 1); - handle_body(); + handle_body(&line); fclose(patchfile); handle_info(); From 69e24defd629eb6641e653b73459f57ab750c58b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 18 Oct 2015 15:40:06 -0700 Subject: [PATCH 13/34] mailinfo: do not let handle_boundary() touch global "line" directly This function has a single caller, and called with the global "line" holding the multi-part boundary line the caller saw while processing the e-mail body. The function then goes into a loop to process each line of the input, and fills the same global "line" variable from the input as it needs to read more lines to process the multi-part headers. Let the caller explicitly pass a pointer to this global "line" variable as an argument, and have the function itself use that strbuf throughout, instead of referring to the global "line" itself. There still is a helper function that this function calls that still touches the global directly; it will be updated as the series progresses. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 6b4facabd4..9b3f349a11 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -795,14 +795,14 @@ static int find_boundary(void) return 0; } -static int handle_boundary(int *filter_stage, int *header_stage) +static int handle_boundary(struct strbuf *line, int *filter_stage, int *header_stage) { struct strbuf newline = STRBUF_INIT; strbuf_addch(&newline, '\n'); again: - if (line.len >= (*content_top)->len + 2 && - !memcmp(line.buf + (*content_top)->len, "--", 2)) { + if (line->len >= (*content_top)->len + 2 && + !memcmp(line->buf + (*content_top)->len, "--", 2)) { /* we hit an end boundary */ /* pop the current boundary off the stack */ strbuf_release(*content_top); @@ -831,14 +831,14 @@ again: strbuf_reset(&charset); /* slurp in this section's info */ - while (read_one_header_line(&line, fin)) - check_header(&line, p_hdr_data, 0); + while (read_one_header_line(line, fin)) + check_header(line, p_hdr_data, 0); strbuf_release(&newline); /* replenish line */ - if (strbuf_getline(&line, fin, '\n')) + if (strbuf_getline(line, fin, '\n')) return 0; - strbuf_addch(&line, '\n'); + strbuf_addch(line, '\n'); return 1; } @@ -862,7 +862,7 @@ static void handle_body(struct strbuf *line) handle_filter(&prev, &filter_stage, &header_stage); strbuf_reset(&prev); } - if (!handle_boundary(&filter_stage, &header_stage)) + if (!handle_boundary(line, &filter_stage, &header_stage)) goto handle_body_out; } From fbbcafd0607d6fccb2fde39c49619bcd7a7b910b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 18 Oct 2015 15:40:06 -0700 Subject: [PATCH 14/34] mailinfo: do not let find_boundary() touch global "line" directly With the previous two commits, we established that the local variable "line" in handle_body() and handle_boundary() functions always refer to the global "line" that is used as the common and shared "current line from the input". They are the only callers of the last function that refers to the global line directly, i.e. find_boundary(). Pass "line" as a parameter to this leaf function to complete the clean-up. Now the only function that directly refers to the global "line" is the caller of handle_body() at the very beginning of this whole callchain. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 9b3f349a11..e7edd743ce 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -786,10 +786,10 @@ static int read_one_header_line(struct strbuf *line, FILE *in) return 1; } -static int find_boundary(void) +static int find_boundary(struct strbuf *line) { - while (!strbuf_getline(&line, fin, '\n')) { - if (*content_top && is_multipart_boundary(&line)) + while (!strbuf_getline(line, fin, '\n')) { + if (*content_top && is_multipart_boundary(line)) return 1; } return 0; @@ -821,7 +821,7 @@ again: strbuf_release(&newline); /* skip to the next boundary */ - if (!find_boundary()) + if (!find_boundary(line)) return 0; goto again; } @@ -850,7 +850,7 @@ static void handle_body(struct strbuf *line) /* Skip up to the first boundary */ if (*content_top) { - if (!find_boundary()) + if (!find_boundary(line)) goto handle_body_out; } From 6e21b5089f981e0e239bee722fc0d0edec703b8a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 18 Oct 2015 15:47:02 -0700 Subject: [PATCH 15/34] mailinfo: move global "line" into mailinfo() function With the previous steps, it becomes clear that the mailinfo() function is the only one that wants the "line" to be directly touchable. Move it to the function scope of this function. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index e7edd743ce..a0416610c9 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -12,7 +12,6 @@ static FILE *cmitmsg, *patchfile, *fin, *fout; static int keep_subject; static int keep_non_patch_brackets_in_subject; static const char *metainfo_charset; -static struct strbuf line = STRBUF_INIT; static struct strbuf name = STRBUF_INIT; static struct strbuf email = STRBUF_INIT; static char *message_id; @@ -964,6 +963,8 @@ static void handle_info(void) static int mailinfo(FILE *in, FILE *out, const char *msg, const char *patch) { int peek; + struct strbuf line = STRBUF_INIT; + fin = in; fout = out; @@ -995,7 +996,7 @@ static int mailinfo(FILE *in, FILE *out, const char *msg, const char *patch) fclose(patchfile); handle_info(); - + strbuf_release(&line); return 0; } From c69f2395ba6f4bc96542d6d52987989992331226 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 18 Oct 2015 22:22:10 -0700 Subject: [PATCH 16/34] mailinfo: introduce "struct mailinfo" to hold globals In this first step, move only 'email' and 'name' fields in there and remove the corresponding globals. In subsequent patches, more globals will be moved to this and the structure will be passed around as a new parameter to more functions. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 71 ++++++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index a0416610c9..a83f09885a 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -12,8 +12,11 @@ static FILE *cmitmsg, *patchfile, *fin, *fout; static int keep_subject; static int keep_non_patch_brackets_in_subject; static const char *metainfo_charset; -static struct strbuf name = STRBUF_INIT; -static struct strbuf email = STRBUF_INIT; + +struct mailinfo { + struct strbuf name; + struct strbuf email; +}; static char *message_id; static enum { @@ -53,7 +56,7 @@ static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf strbuf_addbuf(out, src); } -static void parse_bogus_from(const struct strbuf *line) +static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line) { /* John Doe */ @@ -61,7 +64,7 @@ static void parse_bogus_from(const struct strbuf *line) /* This is fallback, so do not bother if we already have an * e-mail address. */ - if (email.len) + if (mi->email.len) return; bra = strchr(line->buf, '<'); @@ -71,16 +74,16 @@ static void parse_bogus_from(const struct strbuf *line) if (!ket) return; - strbuf_reset(&email); - strbuf_add(&email, bra + 1, ket - bra - 1); + strbuf_reset(&mi->email); + strbuf_add(&mi->email, bra + 1, ket - bra - 1); - strbuf_reset(&name); - strbuf_add(&name, line->buf, bra - line->buf); - strbuf_trim(&name); - get_sane_name(&name, &name, &email); + strbuf_reset(&mi->name); + strbuf_add(&mi->name, line->buf, bra - line->buf); + strbuf_trim(&mi->name); + get_sane_name(&mi->name, &mi->name, &mi->email); } -static void handle_from(const struct strbuf *from) +static void handle_from(struct mailinfo *mi, const struct strbuf *from) { char *at; size_t el; @@ -91,14 +94,14 @@ static void handle_from(const struct strbuf *from) at = strchr(f.buf, '@'); if (!at) { - parse_bogus_from(from); + parse_bogus_from(mi, from); return; } /* * If we already have one email, don't take any confusing lines */ - if (email.len && strchr(at + 1, '@')) { + if (mi->email.len && strchr(at + 1, '@')) { strbuf_release(&f); return; } @@ -117,8 +120,8 @@ static void handle_from(const struct strbuf *from) at--; } el = strcspn(at, " \n\t\r\v\f>"); - strbuf_reset(&email); - strbuf_add(&email, at, el); + strbuf_reset(&mi->email); + strbuf_add(&mi->email, at, el); strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0)); /* The remainder is name. It could be @@ -140,7 +143,7 @@ static void handle_from(const struct strbuf *from) strbuf_setlen(&f, f.len - 1); } - get_sane_name(&name, &f, &email); + get_sane_name(&mi->name, &f, &mi->email); strbuf_release(&f); } @@ -927,7 +930,7 @@ static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf } } -static void handle_info(void) +static void handle_info(struct mailinfo *mi) { struct strbuf *hdr; int i; @@ -949,9 +952,9 @@ static void handle_info(void) output_header_lines(fout, "Subject", hdr); } else if (!strcmp(header[i], "From")) { cleanup_space(hdr); - handle_from(hdr); - fprintf(fout, "Author: %s\n", name.buf); - fprintf(fout, "Email: %s\n", email.buf); + handle_from(mi, hdr); + fprintf(fout, "Author: %s\n", mi->name.buf); + fprintf(fout, "Email: %s\n", mi->email.buf); } else { cleanup_space(hdr); fprintf(fout, "%s: %s\n", header[i], hdr->buf); @@ -960,7 +963,8 @@ static void handle_info(void) fprintf(fout, "\n"); } -static int mailinfo(FILE *in, FILE *out, const char *msg, const char *patch) +static int mailinfo(struct mailinfo *mi, + FILE *in, FILE *out, const char *msg, const char *patch) { int peek; struct strbuf line = STRBUF_INIT; @@ -995,7 +999,7 @@ static int mailinfo(FILE *in, FILE *out, const char *msg, const char *patch) handle_body(&line); fclose(patchfile); - handle_info(); + handle_info(mi); strbuf_release(&line); return 0; } @@ -1012,17 +1016,33 @@ static int git_mailinfo_config(const char *var, const char *value, void *unused) return 0; } +static void setup_mailinfo(struct mailinfo *mi) +{ + memset(mi, 0, sizeof(*mi)); + strbuf_init(&mi->name, 0); + strbuf_init(&mi->email, 0); + git_config(git_mailinfo_config, &mi); +} + +static void clear_mailinfo(struct mailinfo *mi) +{ + strbuf_release(&mi->name); + strbuf_release(&mi->email); +} + static const char mailinfo_usage[] = "git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding= | -n] [--scissors | --no-scissors] < mail >info"; int cmd_mailinfo(int argc, const char **argv, const char *prefix) { const char *def_charset; + struct mailinfo mi; + int status; /* NEEDSWORK: might want to do the optional .git/ directory * discovery */ - git_config(git_mailinfo_config, NULL); + setup_mailinfo(&mi); def_charset = get_commit_output_encoding(); metainfo_charset = def_charset; @@ -1054,5 +1074,8 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) if (argc != 3) usage(mailinfo_usage); - return !!mailinfo(stdin, stdout, argv[1], argv[2]); + status = !!mailinfo(&mi, stdin, stdout, argv[1], argv[2]); + clear_mailinfo(&mi); + + return status; } From 849106d5116e62d36070249f01adefc02cff157e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 15:39:37 -0700 Subject: [PATCH 17/34] mailinfo: move keep_subject & keep_non_patch_bracket to struct mailinfo These two are the only easy ones that do not require passing the structure around to deep corners of the callchain. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index a83f09885a..a219cbcc59 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -9,13 +9,13 @@ static FILE *cmitmsg, *patchfile, *fin, *fout; -static int keep_subject; -static int keep_non_patch_brackets_in_subject; static const char *metainfo_charset; struct mailinfo { struct strbuf name; struct strbuf email; + int keep_subject; + int keep_non_patch_brackets_in_subject; }; static char *message_id; @@ -232,7 +232,7 @@ static int is_multipart_boundary(const struct strbuf *line) !memcmp(line->buf, (*content_top)->buf, (*content_top)->len)); } -static void cleanup_subject(struct strbuf *subject) +static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject) { size_t at = 0; @@ -260,7 +260,7 @@ static void cleanup_subject(struct strbuf *subject) if (!pos) break; remove = pos - subject->buf + at + 1; - if (!keep_non_patch_brackets_in_subject || + if (!mi->keep_non_patch_brackets_in_subject || (7 <= remove && memmem(subject->buf + at, remove, "PATCH", 5))) strbuf_remove(subject, at, remove); @@ -945,8 +945,8 @@ static void handle_info(struct mailinfo *mi) continue; if (!strcmp(header[i], "Subject")) { - if (!keep_subject) { - cleanup_subject(hdr); + if (!mi->keep_subject) { + cleanup_subject(mi, hdr); cleanup_space(hdr); } output_header_lines(fout, "Subject", hdr); @@ -1049,9 +1049,9 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) while (1 < argc && argv[1][0] == '-') { if (!strcmp(argv[1], "-k")) - keep_subject = 1; + mi.keep_subject = 1; else if (!strcmp(argv[1], "-b")) - keep_non_patch_brackets_in_subject = 1; + mi.keep_non_patch_brackets_in_subject = 1; else if (!strcmp(argv[1], "-m") || !strcmp(argv[1], "--message-id")) add_message_id = 1; else if (!strcmp(argv[1], "-u")) From 173aef7c2ee9aa0021dad7c70d30f9099be4a134 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 15:40:04 -0700 Subject: [PATCH 18/34] mailinfo: move global "FILE *fin, *fout" to struct mailinfo This requires us to pass "struct mailinfo" to more functions throughout the codepath that read input lines. Incidentally, later steps are helped by this patch passing the struct to more callchains. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 54 ++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index a219cbcc59..e2979e0c76 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -7,11 +7,14 @@ #include "utf8.h" #include "strbuf.h" -static FILE *cmitmsg, *patchfile, *fin, *fout; +static FILE *cmitmsg, *patchfile; static const char *metainfo_charset; struct mailinfo { + FILE *input; + FILE *output; + struct strbuf name; struct strbuf email; int keep_subject; @@ -788,16 +791,17 @@ static int read_one_header_line(struct strbuf *line, FILE *in) return 1; } -static int find_boundary(struct strbuf *line) +static int find_boundary(struct mailinfo *mi, struct strbuf *line) { - while (!strbuf_getline(line, fin, '\n')) { + while (!strbuf_getline(line, mi->input, '\n')) { if (*content_top && is_multipart_boundary(line)) return 1; } return 0; } -static int handle_boundary(struct strbuf *line, int *filter_stage, int *header_stage) +static int handle_boundary(struct mailinfo *mi, struct strbuf *line, + int *filter_stage, int *header_stage) { struct strbuf newline = STRBUF_INIT; @@ -823,7 +827,7 @@ again: strbuf_release(&newline); /* skip to the next boundary */ - if (!find_boundary(line)) + if (!find_boundary(mi, line)) return 0; goto again; } @@ -833,18 +837,18 @@ again: strbuf_reset(&charset); /* slurp in this section's info */ - while (read_one_header_line(line, fin)) + while (read_one_header_line(line, mi->input)) check_header(line, p_hdr_data, 0); strbuf_release(&newline); /* replenish line */ - if (strbuf_getline(line, fin, '\n')) + if (strbuf_getline(line, mi->input, '\n')) return 0; strbuf_addch(line, '\n'); return 1; } -static void handle_body(struct strbuf *line) +static void handle_body(struct mailinfo *mi, struct strbuf *line) { struct strbuf prev = STRBUF_INIT; int filter_stage = 0; @@ -852,7 +856,7 @@ static void handle_body(struct strbuf *line) /* Skip up to the first boundary */ if (*content_top) { - if (!find_boundary(line)) + if (!find_boundary(mi, line)) goto handle_body_out; } @@ -864,7 +868,7 @@ static void handle_body(struct strbuf *line) handle_filter(&prev, &filter_stage, &header_stage); strbuf_reset(&prev); } - if (!handle_boundary(line, &filter_stage, &header_stage)) + if (!handle_boundary(mi, line, &filter_stage, &header_stage)) goto handle_body_out; } @@ -907,7 +911,7 @@ static void handle_body(struct strbuf *line) handle_filter(line, &filter_stage, &header_stage); } - } while (!strbuf_getwholeline(line, fin, '\n')); + } while (!strbuf_getwholeline(line, mi->input, '\n')); handle_body_out: strbuf_release(&prev); @@ -949,29 +953,25 @@ static void handle_info(struct mailinfo *mi) cleanup_subject(mi, hdr); cleanup_space(hdr); } - output_header_lines(fout, "Subject", hdr); + output_header_lines(mi->output, "Subject", hdr); } else if (!strcmp(header[i], "From")) { cleanup_space(hdr); handle_from(mi, hdr); - fprintf(fout, "Author: %s\n", mi->name.buf); - fprintf(fout, "Email: %s\n", mi->email.buf); + fprintf(mi->output, "Author: %s\n", mi->name.buf); + fprintf(mi->output, "Email: %s\n", mi->email.buf); } else { cleanup_space(hdr); - fprintf(fout, "%s: %s\n", header[i], hdr->buf); + fprintf(mi->output, "%s: %s\n", header[i], hdr->buf); } } - fprintf(fout, "\n"); + fprintf(mi->output, "\n"); } -static int mailinfo(struct mailinfo *mi, - FILE *in, FILE *out, const char *msg, const char *patch) +static int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) { int peek; struct strbuf line = STRBUF_INIT; - fin = in; - fout = out; - cmitmsg = fopen(msg, "w"); if (!cmitmsg) { perror(msg); @@ -988,15 +988,15 @@ static int mailinfo(struct mailinfo *mi, s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*s_hdr_data)); do { - peek = fgetc(in); + peek = fgetc(mi->input); } while (isspace(peek)); - ungetc(peek, in); + ungetc(peek, mi->input); /* process the email header */ - while (read_one_header_line(&line, fin)) + while (read_one_header_line(&line, mi->input)) check_header(&line, p_hdr_data, 1); - handle_body(&line); + handle_body(mi, &line); fclose(patchfile); handle_info(mi); @@ -1074,7 +1074,9 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) if (argc != 3) usage(mailinfo_usage); - status = !!mailinfo(&mi, stdin, stdout, argv[1], argv[2]); + mi.input = stdin; + mi.output = stdout; + status = !!mailinfo(&mi, argv[1], argv[2]); clear_mailinfo(&mi); return status; From 13c6df2642343ec3d49e17082f38f191f175c742 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 16:13:34 -0700 Subject: [PATCH 19/34] mailinfo: move filter/header stage to struct mailinfo Earlier we got rid of two function-scope static variables that kept track of the states of helper functions by making them extra arguments that are passed throughout the callchain. Now we have a convenient place to store and pass them around in the form of "struct mailinfo", change them into two fields in the struct. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index e2979e0c76..517d6361ea 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -19,6 +19,9 @@ struct mailinfo { struct strbuf email; int keep_subject; int keep_non_patch_brackets_in_subject; + + int filter_stage; /* still reading log or are we copying patch? */ + int header_stage; /* still checking in-body headers? */ }; static char *message_id; @@ -648,25 +651,25 @@ static int is_scissors_line(const struct strbuf *line) gap * 2 < perforation); } -static int handle_commit_msg(struct strbuf *line, int *still_looking) +static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) { if (!cmitmsg) return 0; - if (*still_looking) { + if (mi->header_stage) { if (!line->len || (line->len == 1 && line->buf[0] == '\n')) return 0; } - if (use_inbody_headers && *still_looking) { - *still_looking = check_header(line, s_hdr_data, 0); - if (*still_looking) + if (use_inbody_headers && mi->header_stage) { + mi->header_stage = check_header(line, s_hdr_data, 0); + if (mi->header_stage) return 0; } else /* Only trim the first (blank) line of the commit message * when ignoring in-body headers. */ - *still_looking = 0; + mi->header_stage = 0; /* normalize the log message to UTF-8. */ if (metainfo_charset) @@ -678,7 +681,7 @@ static int handle_commit_msg(struct strbuf *line, int *still_looking) die_errno("Could not rewind output message file"); if (ftruncate(fileno(cmitmsg), 0)) die_errno("Could not truncate output message file at scissors"); - *still_looking = 1; + mi->header_stage = 1; /* * We may have already read "secondary headers"; purge @@ -710,13 +713,13 @@ static void handle_patch(const struct strbuf *line) patch_lines++; } -static void handle_filter(struct strbuf *line, int *filter_stage, int *header_stage) +static void handle_filter(struct mailinfo *mi, struct strbuf *line) { - switch (*filter_stage) { + switch (mi->filter_stage) { case 0: - if (!handle_commit_msg(line, header_stage)) + if (!handle_commit_msg(mi, line)) break; - (*filter_stage)++; + mi->filter_stage++; case 1: handle_patch(line); break; @@ -800,8 +803,7 @@ static int find_boundary(struct mailinfo *mi, struct strbuf *line) return 0; } -static int handle_boundary(struct mailinfo *mi, struct strbuf *line, - int *filter_stage, int *header_stage) +static int handle_boundary(struct mailinfo *mi, struct strbuf *line) { struct strbuf newline = STRBUF_INIT; @@ -823,7 +825,7 @@ again: "can't recover\n"); exit(1); } - handle_filter(&newline, filter_stage, header_stage); + handle_filter(mi, &newline); strbuf_release(&newline); /* skip to the next boundary */ @@ -851,8 +853,6 @@ again: static void handle_body(struct mailinfo *mi, struct strbuf *line) { struct strbuf prev = STRBUF_INIT; - int filter_stage = 0; - int header_stage = 1; /* Skip up to the first boundary */ if (*content_top) { @@ -865,10 +865,10 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line) if (*content_top && is_multipart_boundary(line)) { /* flush any leftover */ if (prev.len) { - handle_filter(&prev, &filter_stage, &header_stage); + handle_filter(mi, &prev); strbuf_reset(&prev); } - if (!handle_boundary(mi, line, &filter_stage, &header_stage)) + if (!handle_boundary(mi, line)) goto handle_body_out; } @@ -898,7 +898,7 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line) strbuf_addbuf(&prev, sb); break; } - handle_filter(sb, &filter_stage, &header_stage); + handle_filter(mi, sb); } /* * The partial chunk is saved in "prev" and will be @@ -908,7 +908,7 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line) break; } default: - handle_filter(line, &filter_stage, &header_stage); + handle_filter(mi, line); } } while (!strbuf_getwholeline(line, mi->input, '\n')); @@ -1021,6 +1021,7 @@ static void setup_mailinfo(struct mailinfo *mi) memset(mi, 0, sizeof(*mi)); strbuf_init(&mi->name, 0); strbuf_init(&mi->email, 0); + mi->header_stage = 1; git_config(git_mailinfo_config, &mi); } From 43550efa714c0285f0173ff0cebf231956659d62 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 16:13:51 -0700 Subject: [PATCH 20/34] mailinfo: move patch_lines to struct mailinfo This one is trivial thanks to previous steps that started passing the structure throughout the input codepaths. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 517d6361ea..a104c5cbeb 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -20,6 +20,7 @@ struct mailinfo { int keep_subject; int keep_non_patch_brackets_in_subject; + int patch_lines; int filter_stage; /* still reading log or are we copying patch? */ int header_stage; /* still checking in-body headers? */ }; @@ -30,7 +31,6 @@ static enum { } transfer_encoding; static struct strbuf charset = STRBUF_INIT; -static int patch_lines; static struct strbuf **p_hdr_data, **s_hdr_data; static int use_scissors; static int add_message_id; @@ -707,10 +707,10 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) return 0; } -static void handle_patch(const struct strbuf *line) +static void handle_patch(struct mailinfo *mi, const struct strbuf *line) { fwrite(line->buf, 1, line->len, patchfile); - patch_lines++; + mi->patch_lines++; } static void handle_filter(struct mailinfo *mi, struct strbuf *line) @@ -721,7 +721,7 @@ static void handle_filter(struct mailinfo *mi, struct strbuf *line) break; mi->filter_stage++; case 1: - handle_patch(line); + handle_patch(mi, line); break; } } @@ -941,7 +941,7 @@ static void handle_info(struct mailinfo *mi) for (i = 0; header[i]; i++) { /* only print inbody headers if we output a patch file */ - if (patch_lines && s_hdr_data[i]) + if (mi->patch_lines && s_hdr_data[i]) hdr = s_hdr_data[i]; else if (p_hdr_data[i]) hdr = p_hdr_data[i]; From 6200b751bb83d3ea982774e479ad6674c9e84b0a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 18 Oct 2015 22:27:56 -0700 Subject: [PATCH 21/34] mailinfo: move add_message_id and message_id to struct mailinfo This requires us to pass the structure into check_header() codepath. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index a104c5cbeb..0287e93249 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -19,12 +19,13 @@ struct mailinfo { struct strbuf email; int keep_subject; int keep_non_patch_brackets_in_subject; + int add_message_id; + char *message_id; int patch_lines; int filter_stage; /* still reading log or are we copying patch? */ int header_stage; /* still checking in-body headers? */ }; -static char *message_id; static enum { TE_DONTCARE, TE_QP, TE_BASE64 @@ -33,7 +34,6 @@ static enum { static struct strbuf charset = STRBUF_INIT; static struct strbuf **p_hdr_data, **s_hdr_data; static int use_scissors; -static int add_message_id; static int use_inbody_headers = 1; #define MAX_BOUNDARIES 5 @@ -216,10 +216,10 @@ static void handle_content_type(struct strbuf *line) } } -static void handle_message_id(const struct strbuf *line) +static void handle_message_id(struct mailinfo *mi, const struct strbuf *line) { - if (add_message_id) - message_id = strdup(line->buf); + if (mi->add_message_id) + mi->message_id = strdup(line->buf); } static void handle_content_transfer_encoding(const struct strbuf *line) @@ -476,11 +476,13 @@ release_return: strbuf_release(&piecebuf); } -static int check_header(const struct strbuf *line, - struct strbuf *hdr_data[], int overwrite) +static int check_header(struct mailinfo *mi, + const struct strbuf *line, + struct strbuf *hdr_data[], int overwrite) { int i, ret = 0, len; struct strbuf sb = STRBUF_INIT; + /* search for the interesting parts */ for (i = 0; header[i]; i++) { int len = strlen(header[i]); @@ -518,7 +520,7 @@ static int check_header(const struct strbuf *line, len = strlen("Message-Id: "); strbuf_add(&sb, line->buf + len, line->len - len); decode_header(&sb); - handle_message_id(&sb); + handle_message_id(mi, &sb); ret = 1; goto check_header_out; } @@ -662,7 +664,7 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) } if (use_inbody_headers && mi->header_stage) { - mi->header_stage = check_header(line, s_hdr_data, 0); + mi->header_stage = check_header(mi, line, s_hdr_data, 0); if (mi->header_stage) return 0; } else @@ -696,8 +698,8 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) } if (patchbreak(line)) { - if (message_id) - fprintf(cmitmsg, "Message-Id: %s\n", message_id); + if (mi->message_id) + fprintf(cmitmsg, "Message-Id: %s\n", mi->message_id); fclose(cmitmsg); cmitmsg = NULL; return 1; @@ -840,7 +842,7 @@ again: /* slurp in this section's info */ while (read_one_header_line(line, mi->input)) - check_header(line, p_hdr_data, 0); + check_header(mi, line, p_hdr_data, 0); strbuf_release(&newline); /* replenish line */ @@ -994,7 +996,7 @@ static int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) /* process the email header */ while (read_one_header_line(&line, mi->input)) - check_header(&line, p_hdr_data, 1); + check_header(mi, &line, p_hdr_data, 1); handle_body(mi, &line); fclose(patchfile); @@ -1029,6 +1031,7 @@ static void clear_mailinfo(struct mailinfo *mi) { strbuf_release(&mi->name); strbuf_release(&mi->email); + free(mi->message_id); } static const char mailinfo_usage[] = @@ -1054,7 +1057,7 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) else if (!strcmp(argv[1], "-b")) mi.keep_non_patch_brackets_in_subject = 1; else if (!strcmp(argv[1], "-m") || !strcmp(argv[1], "--message-id")) - add_message_id = 1; + mi.add_message_id = 1; else if (!strcmp(argv[1], "-u")) metainfo_charset = def_charset; else if (!strcmp(argv[1], "-n")) From ad57ef9da9ba95c69507359abad6bd3f57837a12 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 16:14:57 -0700 Subject: [PATCH 22/34] mailinfo: move use_scissors and use_inbody_headers to struct mailinfo Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 0287e93249..7531b3df3b 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -20,6 +20,8 @@ struct mailinfo { int keep_subject; int keep_non_patch_brackets_in_subject; int add_message_id; + int use_scissors; + int use_inbody_headers; char *message_id; int patch_lines; @@ -33,8 +35,6 @@ static enum { static struct strbuf charset = STRBUF_INIT; static struct strbuf **p_hdr_data, **s_hdr_data; -static int use_scissors; -static int use_inbody_headers = 1; #define MAX_BOUNDARIES 5 @@ -663,7 +663,7 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) return 0; } - if (use_inbody_headers && mi->header_stage) { + if (mi->use_inbody_headers && mi->header_stage) { mi->header_stage = check_header(mi, line, s_hdr_data, 0); if (mi->header_stage) return 0; @@ -677,7 +677,7 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) if (metainfo_charset) convert_to_utf8(line, charset.buf); - if (use_scissors && is_scissors_line(line)) { + if (mi->use_scissors && is_scissors_line(line)) { int i; if (fseek(cmitmsg, 0L, SEEK_SET)) die_errno("Could not rewind output message file"); @@ -1006,12 +1006,14 @@ static int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) return 0; } -static int git_mailinfo_config(const char *var, const char *value, void *unused) +static int git_mailinfo_config(const char *var, const char *value, void *mi_) { + struct mailinfo *mi = mi_; + if (!starts_with(var, "mailinfo.")) - return git_default_config(var, value, unused); + return git_default_config(var, value, NULL); if (!strcmp(var, "mailinfo.scissors")) { - use_scissors = git_config_bool(var, value); + mi->use_scissors = git_config_bool(var, value); return 0; } /* perhaps others here */ @@ -1024,6 +1026,7 @@ static void setup_mailinfo(struct mailinfo *mi) strbuf_init(&mi->name, 0); strbuf_init(&mi->email, 0); mi->header_stage = 1; + mi->use_inbody_headers = 1; git_config(git_mailinfo_config, &mi); } @@ -1065,11 +1068,11 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) else if (starts_with(argv[1], "--encoding=")) metainfo_charset = argv[1] + 11; else if (!strcmp(argv[1], "--scissors")) - use_scissors = 1; + mi.use_scissors = 1; else if (!strcmp(argv[1], "--no-scissors")) - use_scissors = 0; + mi.use_scissors = 0; else if (!strcmp(argv[1], "--no-inbody-headers")) - use_inbody_headers = 0; + mi.use_inbody_headers = 0; else usage(mailinfo_usage); argc--; argv++; From 28be2d083cfce464dc898dfb78f2b957b7116277 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 16:15:40 -0700 Subject: [PATCH 23/34] mailinfo: move metainfo_charset to struct mailinfo This requires us to pass the struct down to decode_header() and convert_to_utf8() callchain. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 7531b3df3b..0b49bf6a1a 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -9,8 +9,6 @@ static FILE *cmitmsg, *patchfile; -static const char *metainfo_charset; - struct mailinfo { FILE *input; FILE *output; @@ -22,6 +20,7 @@ struct mailinfo { int add_message_id; int use_scissors; int use_inbody_headers; + const char *metainfo_charset; char *message_id; int patch_lines; @@ -380,23 +379,24 @@ static struct strbuf *decode_b_segment(const struct strbuf *b_seg) return out; } -static void convert_to_utf8(struct strbuf *line, const char *charset) +static void convert_to_utf8(struct mailinfo *mi, + struct strbuf *line, const char *charset) { char *out; if (!charset || !*charset) return; - if (same_encoding(metainfo_charset, charset)) + if (same_encoding(mi->metainfo_charset, charset)) return; - out = reencode_string(line->buf, metainfo_charset, charset); + out = reencode_string(line->buf, mi->metainfo_charset, charset); if (!out) die("cannot convert from %s to %s", - charset, metainfo_charset); + charset, mi->metainfo_charset); strbuf_attach(line, out, strlen(out), strlen(out)); } -static void decode_header(struct strbuf *it) +static void decode_header(struct mailinfo *mi, struct strbuf *it) { char *in, *ep, *cp; struct strbuf outbuf = STRBUF_INIT, *dec; @@ -459,8 +459,8 @@ static void decode_header(struct strbuf *it) dec = decode_q_segment(&piecebuf, 1); break; } - if (metainfo_charset) - convert_to_utf8(dec, charset_q.buf); + if (mi->metainfo_charset) + convert_to_utf8(mi, dec, charset_q.buf); strbuf_addbuf(&outbuf, dec); strbuf_release(dec); @@ -491,7 +491,7 @@ static int check_header(struct mailinfo *mi, * normalize the meta information to utf8. */ strbuf_add(&sb, line->buf + len + 2, line->len - len - 2); - decode_header(&sb); + decode_header(mi, &sb); handle_header(&hdr_data[i], &sb); ret = 1; goto check_header_out; @@ -502,7 +502,7 @@ static int check_header(struct mailinfo *mi, if (cmp_header(line, "Content-Type")) { len = strlen("Content-Type: "); strbuf_add(&sb, line->buf + len, line->len - len); - decode_header(&sb); + decode_header(mi, &sb); strbuf_insert(&sb, 0, "Content-Type: ", len); handle_content_type(&sb); ret = 1; @@ -511,7 +511,7 @@ static int check_header(struct mailinfo *mi, if (cmp_header(line, "Content-Transfer-Encoding")) { len = strlen("Content-Transfer-Encoding: "); strbuf_add(&sb, line->buf + len, line->len - len); - decode_header(&sb); + decode_header(mi, &sb); handle_content_transfer_encoding(&sb); ret = 1; goto check_header_out; @@ -519,7 +519,7 @@ static int check_header(struct mailinfo *mi, if (cmp_header(line, "Message-Id")) { len = strlen("Message-Id: "); strbuf_add(&sb, line->buf + len, line->len - len); - decode_header(&sb); + decode_header(mi, &sb); handle_message_id(mi, &sb); ret = 1; goto check_header_out; @@ -674,8 +674,8 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) mi->header_stage = 0; /* normalize the log message to UTF-8. */ - if (metainfo_charset) - convert_to_utf8(line, charset.buf); + if (mi->metainfo_charset) + convert_to_utf8(mi, line, charset.buf); if (mi->use_scissors && is_scissors_line(line)) { int i; @@ -1052,7 +1052,7 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) setup_mailinfo(&mi); def_charset = get_commit_output_encoding(); - metainfo_charset = def_charset; + mi.metainfo_charset = def_charset; while (1 < argc && argv[1][0] == '-') { if (!strcmp(argv[1], "-k")) @@ -1062,11 +1062,11 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) else if (!strcmp(argv[1], "-m") || !strcmp(argv[1], "--message-id")) mi.add_message_id = 1; else if (!strcmp(argv[1], "-u")) - metainfo_charset = def_charset; + mi.metainfo_charset = def_charset; else if (!strcmp(argv[1], "-n")) - metainfo_charset = NULL; + mi.metainfo_charset = NULL; else if (starts_with(argv[1], "--encoding=")) - metainfo_charset = argv[1] + 11; + mi.metainfo_charset = argv[1] + 11; else if (!strcmp(argv[1], "--scissors")) mi.use_scissors = 1; else if (!strcmp(argv[1], "--no-scissors")) From 28c6bfe94c83b810e5b23a2dc478c961c0534986 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 18 Oct 2015 15:58:35 -0700 Subject: [PATCH 24/34] mailinfo: move check for metainfo_charset to convert_to_utf8() All callers of this function refrain from calling it when mi->metainfo_charset is NULL; move the check to the callee, as it already has a few conditions at its beginning to turn it into a no-op. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 0b49bf6a1a..1048a46e40 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -384,7 +384,7 @@ static void convert_to_utf8(struct mailinfo *mi, { char *out; - if (!charset || !*charset) + if (!mi->metainfo_charset || !charset || !*charset) return; if (same_encoding(mi->metainfo_charset, charset)) @@ -459,8 +459,7 @@ static void decode_header(struct mailinfo *mi, struct strbuf *it) dec = decode_q_segment(&piecebuf, 1); break; } - if (mi->metainfo_charset) - convert_to_utf8(mi, dec, charset_q.buf); + convert_to_utf8(mi, dec, charset_q.buf); strbuf_addbuf(&outbuf, dec); strbuf_release(dec); @@ -674,8 +673,7 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) mi->header_stage = 0; /* normalize the log message to UTF-8. */ - if (mi->metainfo_charset) - convert_to_utf8(mi, line, charset.buf); + convert_to_utf8(mi, line, charset.buf); if (mi->use_scissors && is_scissors_line(line)) { int i; From ab50e38b5d15dc4490cc45c3d7ad6c232cb4c330 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 16:16:05 -0700 Subject: [PATCH 25/34] mailinfo: move transfer_encoding to struct mailinfo Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 1048a46e40..f74973ea0d 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -23,14 +23,14 @@ struct mailinfo { const char *metainfo_charset; char *message_id; + enum { + TE_DONTCARE, TE_QP, TE_BASE64 + } transfer_encoding; int patch_lines; int filter_stage; /* still reading log or are we copying patch? */ int header_stage; /* still checking in-body headers? */ }; -static enum { - TE_DONTCARE, TE_QP, TE_BASE64 -} transfer_encoding; static struct strbuf charset = STRBUF_INIT; static struct strbuf **p_hdr_data, **s_hdr_data; @@ -221,14 +221,15 @@ static void handle_message_id(struct mailinfo *mi, const struct strbuf *line) mi->message_id = strdup(line->buf); } -static void handle_content_transfer_encoding(const struct strbuf *line) +static void handle_content_transfer_encoding(struct mailinfo *mi, + const struct strbuf *line) { if (strcasestr(line->buf, "base64")) - transfer_encoding = TE_BASE64; + mi->transfer_encoding = TE_BASE64; else if (strcasestr(line->buf, "quoted-printable")) - transfer_encoding = TE_QP; + mi->transfer_encoding = TE_QP; else - transfer_encoding = TE_DONTCARE; + mi->transfer_encoding = TE_DONTCARE; } static int is_multipart_boundary(const struct strbuf *line) @@ -511,7 +512,7 @@ static int check_header(struct mailinfo *mi, len = strlen("Content-Transfer-Encoding: "); strbuf_add(&sb, line->buf + len, line->len - len); decode_header(mi, &sb); - handle_content_transfer_encoding(&sb); + handle_content_transfer_encoding(mi, &sb); ret = 1; goto check_header_out; } @@ -544,11 +545,11 @@ check_header_out: return ret; } -static void decode_transfer_encoding(struct strbuf *line) +static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line) { struct strbuf *ret; - switch (transfer_encoding) { + switch (mi->transfer_encoding) { case TE_QP: ret = decode_q_segment(line, 0); break; @@ -835,7 +836,7 @@ again: } /* set some defaults */ - transfer_encoding = TE_DONTCARE; + mi->transfer_encoding = TE_DONTCARE; strbuf_reset(&charset); /* slurp in this section's info */ @@ -873,9 +874,9 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line) } /* Unwrap transfer encoding */ - decode_transfer_encoding(line); + decode_transfer_encoding(mi, line); - switch (transfer_encoding) { + switch (mi->transfer_encoding) { case TE_BASE64: case TE_QP: { From f1e037b9af77fd9a3f4c5213af66179d0f722a72 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 18 Oct 2015 22:30:08 -0700 Subject: [PATCH 26/34] mailinfo: move charset to struct mailinfo Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index f74973ea0d..1fd29f6820 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -22,6 +22,7 @@ struct mailinfo { int use_inbody_headers; const char *metainfo_charset; + struct strbuf charset; char *message_id; enum { TE_DONTCARE, TE_QP, TE_BASE64 @@ -31,8 +32,6 @@ struct mailinfo { int header_stage; /* still checking in-body headers? */ }; - -static struct strbuf charset = STRBUF_INIT; static struct strbuf **p_hdr_data, **s_hdr_data; #define MAX_BOUNDARIES 5 @@ -193,7 +192,7 @@ static struct strbuf *content[MAX_BOUNDARIES]; static struct strbuf **content_top = content; -static void handle_content_type(struct strbuf *line) +static void handle_content_type(struct mailinfo *mi, struct strbuf *line) { struct strbuf *boundary = xmalloc(sizeof(struct strbuf)); strbuf_init(boundary, line->len); @@ -207,7 +206,7 @@ static void handle_content_type(struct strbuf *line) *content_top = boundary; boundary = NULL; } - slurp_attr(line->buf, "charset=", &charset); + slurp_attr(line->buf, "charset=", &mi->charset); if (boundary) { strbuf_release(boundary); @@ -504,7 +503,7 @@ static int check_header(struct mailinfo *mi, strbuf_add(&sb, line->buf + len, line->len - len); decode_header(mi, &sb); strbuf_insert(&sb, 0, "Content-Type: ", len); - handle_content_type(&sb); + handle_content_type(mi, &sb); ret = 1; goto check_header_out; } @@ -674,7 +673,7 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) mi->header_stage = 0; /* normalize the log message to UTF-8. */ - convert_to_utf8(mi, line, charset.buf); + convert_to_utf8(mi, line, mi->charset.buf); if (mi->use_scissors && is_scissors_line(line)) { int i; @@ -837,7 +836,7 @@ again: /* set some defaults */ mi->transfer_encoding = TE_DONTCARE; - strbuf_reset(&charset); + strbuf_reset(&mi->charset); /* slurp in this section's info */ while (read_one_header_line(line, mi->input)) @@ -1024,6 +1023,7 @@ static void setup_mailinfo(struct mailinfo *mi) memset(mi, 0, sizeof(*mi)); strbuf_init(&mi->name, 0); strbuf_init(&mi->email, 0); + strbuf_init(&mi->charset, 0); mi->header_stage = 1; mi->use_inbody_headers = 1; git_config(git_mailinfo_config, &mi); @@ -1033,6 +1033,7 @@ static void clear_mailinfo(struct mailinfo *mi) { strbuf_release(&mi->name); strbuf_release(&mi->email); + strbuf_release(&mi->charset); free(mi->message_id); } From 8f63588a6e23685b8deae681c9f126158c612e97 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 16:16:47 -0700 Subject: [PATCH 27/34] mailinfo: move cmitmsg and patchfile to struct mailinfo Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 1fd29f6820..f57100f2ce 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -7,11 +7,11 @@ #include "utf8.h" #include "strbuf.h" -static FILE *cmitmsg, *patchfile; - struct mailinfo { FILE *input; FILE *output; + FILE *cmitmsg; + FILE *patchfile; struct strbuf name; struct strbuf email; @@ -654,7 +654,7 @@ static int is_scissors_line(const struct strbuf *line) static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) { - if (!cmitmsg) + if (!mi->cmitmsg) return 0; if (mi->header_stage) { @@ -677,9 +677,9 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) if (mi->use_scissors && is_scissors_line(line)) { int i; - if (fseek(cmitmsg, 0L, SEEK_SET)) + if (fseek(mi->cmitmsg, 0L, SEEK_SET)) die_errno("Could not rewind output message file"); - if (ftruncate(fileno(cmitmsg), 0)) + if (ftruncate(fileno(mi->cmitmsg), 0)) die_errno("Could not truncate output message file at scissors"); mi->header_stage = 1; @@ -697,19 +697,19 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) if (patchbreak(line)) { if (mi->message_id) - fprintf(cmitmsg, "Message-Id: %s\n", mi->message_id); - fclose(cmitmsg); - cmitmsg = NULL; + fprintf(mi->cmitmsg, "Message-Id: %s\n", mi->message_id); + fclose(mi->cmitmsg); + mi->cmitmsg = NULL; return 1; } - fputs(line->buf, cmitmsg); + fputs(line->buf, mi->cmitmsg); return 0; } static void handle_patch(struct mailinfo *mi, const struct strbuf *line) { - fwrite(line->buf, 1, line->len, patchfile); + fwrite(line->buf, 1, line->len, mi->patchfile); mi->patch_lines++; } @@ -972,15 +972,15 @@ static int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) int peek; struct strbuf line = STRBUF_INIT; - cmitmsg = fopen(msg, "w"); - if (!cmitmsg) { + mi->cmitmsg = fopen(msg, "w"); + if (!mi->cmitmsg) { perror(msg); return -1; } - patchfile = fopen(patch, "w"); - if (!patchfile) { + mi->patchfile = fopen(patch, "w"); + if (!mi->patchfile) { perror(patch); - fclose(cmitmsg); + fclose(mi->cmitmsg); return -1; } @@ -997,7 +997,7 @@ static int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) check_header(mi, &line, p_hdr_data, 1); handle_body(mi, &line); - fclose(patchfile); + fclose(mi->patchfile); handle_info(mi); strbuf_release(&line); From d895bf0f57aa162142c011c92c1bde3822323f33 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 17:43:24 -0700 Subject: [PATCH 28/34] mailinfo: move [ps]_hdr_data to struct mailinfo Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index f57100f2ce..315d542188 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -30,10 +30,10 @@ struct mailinfo { int patch_lines; int filter_stage; /* still reading log or are we copying patch? */ int header_stage; /* still checking in-body headers? */ + struct strbuf **p_hdr_data; + struct strbuf **s_hdr_data; }; -static struct strbuf **p_hdr_data, **s_hdr_data; - #define MAX_BOUNDARIES 5 static void cleanup_space(struct strbuf *sb) @@ -663,7 +663,7 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) } if (mi->use_inbody_headers && mi->header_stage) { - mi->header_stage = check_header(mi, line, s_hdr_data, 0); + mi->header_stage = check_header(mi, line, mi->s_hdr_data, 0); if (mi->header_stage) return 0; } else @@ -688,9 +688,9 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) * them to give ourselves a clean restart. */ for (i = 0; header[i]; i++) { - if (s_hdr_data[i]) - strbuf_release(s_hdr_data[i]); - s_hdr_data[i] = NULL; + if (mi->s_hdr_data[i]) + strbuf_release(mi->s_hdr_data[i]); + mi->s_hdr_data[i] = NULL; } return 0; } @@ -840,7 +840,7 @@ again: /* slurp in this section's info */ while (read_one_header_line(line, mi->input)) - check_header(mi, line, p_hdr_data, 0); + check_header(mi, line, mi->p_hdr_data, 0); strbuf_release(&newline); /* replenish line */ @@ -941,10 +941,10 @@ static void handle_info(struct mailinfo *mi) for (i = 0; header[i]; i++) { /* only print inbody headers if we output a patch file */ - if (mi->patch_lines && s_hdr_data[i]) - hdr = s_hdr_data[i]; - else if (p_hdr_data[i]) - hdr = p_hdr_data[i]; + if (mi->patch_lines && mi->s_hdr_data[i]) + hdr = mi->s_hdr_data[i]; + else if (mi->p_hdr_data[i]) + hdr = mi->p_hdr_data[i]; else continue; @@ -984,8 +984,8 @@ static int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) return -1; } - p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*p_hdr_data)); - s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*s_hdr_data)); + mi->p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->p_hdr_data))); + mi->s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->s_hdr_data))); do { peek = fgetc(mi->input); @@ -994,7 +994,7 @@ static int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) /* process the email header */ while (read_one_header_line(&line, mi->input)) - check_header(mi, &line, p_hdr_data, 1); + check_header(mi, &line, mi->p_hdr_data, 1); handle_body(mi, &line); fclose(mi->patchfile); @@ -1031,10 +1031,19 @@ static void setup_mailinfo(struct mailinfo *mi) static void clear_mailinfo(struct mailinfo *mi) { + int i; + strbuf_release(&mi->name); strbuf_release(&mi->email); strbuf_release(&mi->charset); free(mi->message_id); + + for (i = 0; mi->p_hdr_data[i]; i++) + strbuf_release(mi->p_hdr_data[i]); + free(mi->p_hdr_data); + for (i = 0; mi->s_hdr_data[i]; i++) + strbuf_release(mi->s_hdr_data[i]); + free(mi->s_hdr_data); } static const char mailinfo_usage[] = From 8e919277e07c5056021e3ec746693e5cd0496a5b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 17:43:54 -0700 Subject: [PATCH 29/34] mailinfo: move content/content_top to struct mailinfo Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 315d542188..93043255ad 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -7,6 +7,8 @@ #include "utf8.h" #include "strbuf.h" +#define MAX_BOUNDARIES 5 + struct mailinfo { FILE *input; FILE *output; @@ -22,6 +24,8 @@ struct mailinfo { int use_inbody_headers; const char *metainfo_charset; + struct strbuf *content[MAX_BOUNDARIES]; + struct strbuf **content_top; struct strbuf charset; char *message_id; enum { @@ -34,8 +38,6 @@ struct mailinfo { struct strbuf **s_hdr_data; }; -#define MAX_BOUNDARIES 5 - static void cleanup_space(struct strbuf *sb) { size_t pos, cnt; @@ -188,10 +190,6 @@ static int slurp_attr(const char *line, const char *name, struct strbuf *attr) return 1; } -static struct strbuf *content[MAX_BOUNDARIES]; - -static struct strbuf **content_top = content; - static void handle_content_type(struct mailinfo *mi, struct strbuf *line) { struct strbuf *boundary = xmalloc(sizeof(struct strbuf)); @@ -199,11 +197,11 @@ static void handle_content_type(struct mailinfo *mi, struct strbuf *line) if (slurp_attr(line->buf, "boundary=", boundary)) { strbuf_insert(boundary, 0, "--", 2); - if (++content_top >= &content[MAX_BOUNDARIES]) { + if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) { fprintf(stderr, "Too many boundaries to handle\n"); exit(1); } - *content_top = boundary; + *(mi->content_top) = boundary; boundary = NULL; } slurp_attr(line->buf, "charset=", &mi->charset); @@ -231,10 +229,12 @@ static void handle_content_transfer_encoding(struct mailinfo *mi, mi->transfer_encoding = TE_DONTCARE; } -static int is_multipart_boundary(const struct strbuf *line) +static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line) { - return (((*content_top)->len <= line->len) && - !memcmp(line->buf, (*content_top)->buf, (*content_top)->len)); + struct strbuf *content_top = *(mi->content_top); + + return ((content_top->len <= line->len) && + !memcmp(line->buf, content_top->buf, content_top->len)); } static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject) @@ -797,7 +797,7 @@ static int read_one_header_line(struct strbuf *line, FILE *in) static int find_boundary(struct mailinfo *mi, struct strbuf *line) { while (!strbuf_getline(line, mi->input, '\n')) { - if (*content_top && is_multipart_boundary(line)) + if (*(mi->content_top) && is_multipart_boundary(mi, line)) return 1; } return 0; @@ -809,18 +809,18 @@ static int handle_boundary(struct mailinfo *mi, struct strbuf *line) strbuf_addch(&newline, '\n'); again: - if (line->len >= (*content_top)->len + 2 && - !memcmp(line->buf + (*content_top)->len, "--", 2)) { + if (line->len >= (*(mi->content_top))->len + 2 && + !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) { /* we hit an end boundary */ /* pop the current boundary off the stack */ - strbuf_release(*content_top); - free(*content_top); - *content_top = NULL; + strbuf_release(*(mi->content_top)); + free(*(mi->content_top)); + *(mi->content_top) = NULL; /* technically won't happen as is_multipart_boundary() will fail first. But just in case.. */ - if (--content_top < content) { + if (--mi->content_top < mi->content) { fprintf(stderr, "Detected mismatched boundaries, " "can't recover\n"); exit(1); @@ -855,14 +855,14 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line) struct strbuf prev = STRBUF_INIT; /* Skip up to the first boundary */ - if (*content_top) { + if (*(mi->content_top)) { if (!find_boundary(mi, line)) goto handle_body_out; } do { /* process any boundary lines */ - if (*content_top && is_multipart_boundary(line)) { + if (*(mi->content_top) && is_multipart_boundary(mi, line)) { /* flush any leftover */ if (prev.len) { handle_filter(mi, &prev); @@ -1026,6 +1026,7 @@ static void setup_mailinfo(struct mailinfo *mi) strbuf_init(&mi->charset, 0); mi->header_stage = 1; mi->use_inbody_headers = 1; + mi->content_top = mi->content; git_config(git_mailinfo_config, &mi); } @@ -1044,6 +1045,11 @@ static void clear_mailinfo(struct mailinfo *mi) for (i = 0; mi->s_hdr_data[i]; i++) strbuf_release(mi->s_hdr_data[i]); free(mi->s_hdr_data); + + while (mi->content < mi->content_top) { + free(*(mi->content_top)); + mi->content_top--; + } } static const char mailinfo_usage[] = From 4933910ab7a74ebcf13342726d7f055ee35a8f7c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 16:16:33 -0700 Subject: [PATCH 30/34] mailinfo: handle_commit_msg() shouldn't be called after finding patchbreak There is a strange "if (!mi->cmitmsg) return 0" at the very beginning of handle_commit_msg(), but the condition should never trigger, because: * The only place cmitmsg is set to NULL is after this function sees a patch break, closes the FILE * to write the commit log message and returns 1. This function returns non-zero only from that codepath. * The caller of this function, upon seeing a non-zero return, increments filter_stage, starts treating the input as patch text and will never call handle_commit_msg() again. Replace it with an assert(!mi->filter_stage) to ensure the above observation will stay to be true. Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 93043255ad..4a8c704c8a 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -654,8 +654,7 @@ static int is_scissors_line(const struct strbuf *line) static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) { - if (!mi->cmitmsg) - return 0; + assert(!mi->filter_stage); if (mi->header_stage) { if (!line->len || (line->len == 1 && line->buf[0] == '\n')) From 05e625e5bf5231af984f17b82be9c48063b37242 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 17:43:27 -0700 Subject: [PATCH 31/34] mailinfo: keep the parsed log message in a strbuf When mailinfo() is eventually libified, the calling "git am" still will have to write out the log message in the "msg" file for hooks and other users of the information, but it does not have to reopen and reread what it wrote earlier if the function kept it in a strbuf. This also removes the need for seeking and truncating the output file when we see a scissors mark in the input, which in turn allows us to lose two callsites of die_errno(). Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 4a8c704c8a..11c8ee0ece 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -12,7 +12,6 @@ struct mailinfo { FILE *input; FILE *output; - FILE *cmitmsg; FILE *patchfile; struct strbuf name; @@ -36,6 +35,8 @@ struct mailinfo { int header_stage; /* still checking in-body headers? */ struct strbuf **p_hdr_data; struct strbuf **s_hdr_data; + + struct strbuf log_message; }; static void cleanup_space(struct strbuf *sb) @@ -676,10 +677,8 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) if (mi->use_scissors && is_scissors_line(line)) { int i; - if (fseek(mi->cmitmsg, 0L, SEEK_SET)) - die_errno("Could not rewind output message file"); - if (ftruncate(fileno(mi->cmitmsg), 0)) - die_errno("Could not truncate output message file at scissors"); + + strbuf_setlen(&mi->log_message, 0); mi->header_stage = 1; /* @@ -696,13 +695,12 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) if (patchbreak(line)) { if (mi->message_id) - fprintf(mi->cmitmsg, "Message-Id: %s\n", mi->message_id); - fclose(mi->cmitmsg); - mi->cmitmsg = NULL; + strbuf_addf(&mi->log_message, + "Message-Id: %s\n", mi->message_id); return 1; } - fputs(line->buf, mi->cmitmsg); + strbuf_addbuf(&mi->log_message, line); return 0; } @@ -968,18 +966,19 @@ static void handle_info(struct mailinfo *mi) static int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) { + FILE *cmitmsg; int peek; struct strbuf line = STRBUF_INIT; - mi->cmitmsg = fopen(msg, "w"); - if (!mi->cmitmsg) { + cmitmsg = fopen(msg, "w"); + if (!cmitmsg) { perror(msg); return -1; } mi->patchfile = fopen(patch, "w"); if (!mi->patchfile) { perror(patch); - fclose(mi->cmitmsg); + fclose(cmitmsg); return -1; } @@ -996,6 +995,8 @@ static int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) check_header(mi, &line, mi->p_hdr_data, 1); handle_body(mi, &line); + fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg); + fclose(cmitmsg); fclose(mi->patchfile); handle_info(mi); @@ -1023,6 +1024,7 @@ static void setup_mailinfo(struct mailinfo *mi) strbuf_init(&mi->name, 0); strbuf_init(&mi->email, 0); strbuf_init(&mi->charset, 0); + strbuf_init(&mi->log_message, 0); mi->header_stage = 1; mi->use_inbody_headers = 1; mi->content_top = mi->content; @@ -1049,6 +1051,8 @@ static void clear_mailinfo(struct mailinfo *mi) free(*(mi->content_top)); mi->content_top--; } + + strbuf_release(&mi->log_message); } static const char mailinfo_usage[] = From c6905e45f078530cda57690b0db6a7378dc1f794 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 17:44:55 -0700 Subject: [PATCH 32/34] mailinfo: libify Move the bulk of the code from builtin/mailinfo.c to mailinfo.c so that new callers can start calling mailinfo() directly. Note that a few calls to exit() and die() need to be cleaned up for the API to be truly useful, which will come in later steps. Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/mailinfo.c | 1049 +------------------------------------------- mailinfo.c | 1020 ++++++++++++++++++++++++++++++++++++++++++ mailinfo.h | 40 ++ 4 files changed, 1062 insertions(+), 1048 deletions(-) create mode 100644 mailinfo.c create mode 100644 mailinfo.h diff --git a/Makefile b/Makefile index 8d5df7ea1e..7dd3bff12f 100644 --- a/Makefile +++ b/Makefile @@ -726,6 +726,7 @@ LIB_OBJS += list-objects.o LIB_OBJS += ll-merge.o LIB_OBJS += lockfile.o LIB_OBJS += log-tree.o +LIB_OBJS += mailinfo.o LIB_OBJS += mailmap.o LIB_OBJS += match-trees.o LIB_OBJS += merge.o diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 11c8ee0ece..f6df274111 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -6,1054 +6,7 @@ #include "builtin.h" #include "utf8.h" #include "strbuf.h" - -#define MAX_BOUNDARIES 5 - -struct mailinfo { - FILE *input; - FILE *output; - FILE *patchfile; - - struct strbuf name; - struct strbuf email; - int keep_subject; - int keep_non_patch_brackets_in_subject; - int add_message_id; - int use_scissors; - int use_inbody_headers; - const char *metainfo_charset; - - struct strbuf *content[MAX_BOUNDARIES]; - struct strbuf **content_top; - struct strbuf charset; - char *message_id; - enum { - TE_DONTCARE, TE_QP, TE_BASE64 - } transfer_encoding; - int patch_lines; - int filter_stage; /* still reading log or are we copying patch? */ - int header_stage; /* still checking in-body headers? */ - struct strbuf **p_hdr_data; - struct strbuf **s_hdr_data; - - struct strbuf log_message; -}; - -static void cleanup_space(struct strbuf *sb) -{ - size_t pos, cnt; - for (pos = 0; pos < sb->len; pos++) { - if (isspace(sb->buf[pos])) { - sb->buf[pos] = ' '; - for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++); - strbuf_remove(sb, pos + 1, cnt); - } - } -} - -static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email) -{ - struct strbuf *src = name; - if (name->len < 3 || 60 < name->len || strchr(name->buf, '@') || - strchr(name->buf, '<') || strchr(name->buf, '>')) - src = email; - else if (name == out) - return; - strbuf_reset(out); - strbuf_addbuf(out, src); -} - -static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line) -{ - /* John Doe */ - - char *bra, *ket; - /* This is fallback, so do not bother if we already have an - * e-mail address. - */ - if (mi->email.len) - return; - - bra = strchr(line->buf, '<'); - if (!bra) - return; - ket = strchr(bra, '>'); - if (!ket) - return; - - strbuf_reset(&mi->email); - strbuf_add(&mi->email, bra + 1, ket - bra - 1); - - strbuf_reset(&mi->name); - strbuf_add(&mi->name, line->buf, bra - line->buf); - strbuf_trim(&mi->name); - get_sane_name(&mi->name, &mi->name, &mi->email); -} - -static void handle_from(struct mailinfo *mi, const struct strbuf *from) -{ - char *at; - size_t el; - struct strbuf f; - - strbuf_init(&f, from->len); - strbuf_addbuf(&f, from); - - at = strchr(f.buf, '@'); - if (!at) { - parse_bogus_from(mi, from); - return; - } - - /* - * If we already have one email, don't take any confusing lines - */ - if (mi->email.len && strchr(at + 1, '@')) { - strbuf_release(&f); - return; - } - - /* Pick up the string around '@', possibly delimited with <> - * pair; that is the email part. - */ - while (at > f.buf) { - char c = at[-1]; - if (isspace(c)) - break; - if (c == '<') { - at[-1] = ' '; - break; - } - at--; - } - el = strcspn(at, " \n\t\r\v\f>"); - strbuf_reset(&mi->email); - strbuf_add(&mi->email, at, el); - strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0)); - - /* The remainder is name. It could be - * - * - "John Doe " (a), or - * - "john.doe@xz (John Doe)" (b), or - * - "John (zzz) Doe (Comment)" (c) - * - * but we have removed the email part, so - * - * - remove extra spaces which could stay after email (case 'c'), and - * - trim from both ends, possibly removing the () pair at the end - * (cases 'a' and 'b'). - */ - cleanup_space(&f); - strbuf_trim(&f); - if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') { - strbuf_remove(&f, 0, 1); - strbuf_setlen(&f, f.len - 1); - } - - get_sane_name(&mi->name, &f, &mi->email); - strbuf_release(&f); -} - -static void handle_header(struct strbuf **out, const struct strbuf *line) -{ - if (!*out) { - *out = xmalloc(sizeof(struct strbuf)); - strbuf_init(*out, line->len); - } else - strbuf_reset(*out); - - strbuf_addbuf(*out, line); -} - -/* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt - * to have enough heuristics to grok MIME encoded patches often found - * on our mailing lists. For example, we do not even treat header lines - * case insensitively. - */ - -static int slurp_attr(const char *line, const char *name, struct strbuf *attr) -{ - const char *ends, *ap = strcasestr(line, name); - size_t sz; - - strbuf_setlen(attr, 0); - if (!ap) - return 0; - ap += strlen(name); - if (*ap == '"') { - ap++; - ends = "\""; - } - else - ends = "; \t"; - sz = strcspn(ap, ends); - strbuf_add(attr, ap, sz); - return 1; -} - -static void handle_content_type(struct mailinfo *mi, struct strbuf *line) -{ - struct strbuf *boundary = xmalloc(sizeof(struct strbuf)); - strbuf_init(boundary, line->len); - - if (slurp_attr(line->buf, "boundary=", boundary)) { - strbuf_insert(boundary, 0, "--", 2); - if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) { - fprintf(stderr, "Too many boundaries to handle\n"); - exit(1); - } - *(mi->content_top) = boundary; - boundary = NULL; - } - slurp_attr(line->buf, "charset=", &mi->charset); - - if (boundary) { - strbuf_release(boundary); - free(boundary); - } -} - -static void handle_message_id(struct mailinfo *mi, const struct strbuf *line) -{ - if (mi->add_message_id) - mi->message_id = strdup(line->buf); -} - -static void handle_content_transfer_encoding(struct mailinfo *mi, - const struct strbuf *line) -{ - if (strcasestr(line->buf, "base64")) - mi->transfer_encoding = TE_BASE64; - else if (strcasestr(line->buf, "quoted-printable")) - mi->transfer_encoding = TE_QP; - else - mi->transfer_encoding = TE_DONTCARE; -} - -static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line) -{ - struct strbuf *content_top = *(mi->content_top); - - return ((content_top->len <= line->len) && - !memcmp(line->buf, content_top->buf, content_top->len)); -} - -static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject) -{ - size_t at = 0; - - while (at < subject->len) { - char *pos; - size_t remove; - - switch (subject->buf[at]) { - case 'r': case 'R': - if (subject->len <= at + 3) - break; - if ((subject->buf[at + 1] == 'e' || - subject->buf[at + 1] == 'E') && - subject->buf[at + 2] == ':') { - strbuf_remove(subject, at, 3); - continue; - } - at++; - break; - case ' ': case '\t': case ':': - strbuf_remove(subject, at, 1); - continue; - case '[': - pos = strchr(subject->buf + at, ']'); - if (!pos) - break; - remove = pos - subject->buf + at + 1; - if (!mi->keep_non_patch_brackets_in_subject || - (7 <= remove && - memmem(subject->buf + at, remove, "PATCH", 5))) - strbuf_remove(subject, at, remove); - else { - at += remove; - /* - * If the input had a space after the ], keep - * it. We don't bother with finding the end of - * the space, since we later normalize it - * anyway. - */ - if (isspace(subject->buf[at])) - at += 1; - } - continue; - } - break; - } - strbuf_trim(subject); -} - -#define MAX_HDR_PARSED 10 -static const char *header[MAX_HDR_PARSED] = { - "From","Subject","Date", -}; - -static inline int cmp_header(const struct strbuf *line, const char *hdr) -{ - int len = strlen(hdr); - return !strncasecmp(line->buf, hdr, len) && line->len > len && - line->buf[len] == ':' && isspace(line->buf[len + 1]); -} - -static int is_format_patch_separator(const char *line, int len) -{ - static const char SAMPLE[] = - "From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n"; - const char *cp; - - if (len != strlen(SAMPLE)) - return 0; - if (!skip_prefix(line, "From ", &cp)) - return 0; - if (strspn(cp, "0123456789abcdef") != 40) - return 0; - cp += 40; - return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line)); -} - -static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047) -{ - const char *in = q_seg->buf; - int c; - struct strbuf *out = xmalloc(sizeof(struct strbuf)); - strbuf_init(out, q_seg->len); - - while ((c = *in++) != 0) { - if (c == '=') { - int d = *in++; - if (d == '\n' || !d) - break; /* drop trailing newline */ - strbuf_addch(out, (hexval(d) << 4) | hexval(*in++)); - continue; - } - if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */ - c = 0x20; - strbuf_addch(out, c); - } - return out; -} - -static struct strbuf *decode_b_segment(const struct strbuf *b_seg) -{ - /* Decode in..ep, possibly in-place to ot */ - int c, pos = 0, acc = 0; - const char *in = b_seg->buf; - struct strbuf *out = xmalloc(sizeof(struct strbuf)); - strbuf_init(out, b_seg->len); - - while ((c = *in++) != 0) { - if (c == '+') - c = 62; - else if (c == '/') - c = 63; - else if ('A' <= c && c <= 'Z') - c -= 'A'; - else if ('a' <= c && c <= 'z') - c -= 'a' - 26; - else if ('0' <= c && c <= '9') - c -= '0' - 52; - else - continue; /* garbage */ - switch (pos++) { - case 0: - acc = (c << 2); - break; - case 1: - strbuf_addch(out, (acc | (c >> 4))); - acc = (c & 15) << 4; - break; - case 2: - strbuf_addch(out, (acc | (c >> 2))); - acc = (c & 3) << 6; - break; - case 3: - strbuf_addch(out, (acc | c)); - acc = pos = 0; - break; - } - } - return out; -} - -static void convert_to_utf8(struct mailinfo *mi, - struct strbuf *line, const char *charset) -{ - char *out; - - if (!mi->metainfo_charset || !charset || !*charset) - return; - - if (same_encoding(mi->metainfo_charset, charset)) - return; - out = reencode_string(line->buf, mi->metainfo_charset, charset); - if (!out) - die("cannot convert from %s to %s", - charset, mi->metainfo_charset); - strbuf_attach(line, out, strlen(out), strlen(out)); -} - -static void decode_header(struct mailinfo *mi, struct strbuf *it) -{ - char *in, *ep, *cp; - struct strbuf outbuf = STRBUF_INIT, *dec; - struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT; - - in = it->buf; - while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) { - int encoding; - strbuf_reset(&charset_q); - strbuf_reset(&piecebuf); - - if (in != ep) { - /* - * We are about to process an encoded-word - * that begins at ep, but there is something - * before the encoded word. - */ - char *scan; - for (scan = in; scan < ep; scan++) - if (!isspace(*scan)) - break; - - if (scan != ep || in == it->buf) { - /* - * We should not lose that "something", - * unless we have just processed an - * encoded-word, and there is only LWS - * before the one we are about to process. - */ - strbuf_add(&outbuf, in, ep - in); - } - } - /* E.g. - * ep : "=?iso-2022-jp?B?GyR...?= foo" - * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz" - */ - ep += 2; - - if (ep - it->buf >= it->len || !(cp = strchr(ep, '?'))) - goto release_return; - - if (cp + 3 - it->buf > it->len) - goto release_return; - strbuf_add(&charset_q, ep, cp - ep); - - encoding = cp[1]; - if (!encoding || cp[2] != '?') - goto release_return; - ep = strstr(cp + 3, "?="); - if (!ep) - goto release_return; - strbuf_add(&piecebuf, cp + 3, ep - cp - 3); - switch (tolower(encoding)) { - default: - goto release_return; - case 'b': - dec = decode_b_segment(&piecebuf); - break; - case 'q': - dec = decode_q_segment(&piecebuf, 1); - break; - } - convert_to_utf8(mi, dec, charset_q.buf); - - strbuf_addbuf(&outbuf, dec); - strbuf_release(dec); - free(dec); - in = ep + 2; - } - strbuf_addstr(&outbuf, in); - strbuf_reset(it); - strbuf_addbuf(it, &outbuf); -release_return: - strbuf_release(&outbuf); - strbuf_release(&charset_q); - strbuf_release(&piecebuf); -} - -static int check_header(struct mailinfo *mi, - const struct strbuf *line, - struct strbuf *hdr_data[], int overwrite) -{ - int i, ret = 0, len; - struct strbuf sb = STRBUF_INIT; - - /* search for the interesting parts */ - for (i = 0; header[i]; i++) { - int len = strlen(header[i]); - if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) { - /* Unwrap inline B and Q encoding, and optionally - * normalize the meta information to utf8. - */ - strbuf_add(&sb, line->buf + len + 2, line->len - len - 2); - decode_header(mi, &sb); - handle_header(&hdr_data[i], &sb); - ret = 1; - goto check_header_out; - } - } - - /* Content stuff */ - if (cmp_header(line, "Content-Type")) { - len = strlen("Content-Type: "); - strbuf_add(&sb, line->buf + len, line->len - len); - decode_header(mi, &sb); - strbuf_insert(&sb, 0, "Content-Type: ", len); - handle_content_type(mi, &sb); - ret = 1; - goto check_header_out; - } - if (cmp_header(line, "Content-Transfer-Encoding")) { - len = strlen("Content-Transfer-Encoding: "); - strbuf_add(&sb, line->buf + len, line->len - len); - decode_header(mi, &sb); - handle_content_transfer_encoding(mi, &sb); - ret = 1; - goto check_header_out; - } - if (cmp_header(line, "Message-Id")) { - len = strlen("Message-Id: "); - strbuf_add(&sb, line->buf + len, line->len - len); - decode_header(mi, &sb); - handle_message_id(mi, &sb); - ret = 1; - goto check_header_out; - } - - /* for inbody stuff */ - if (starts_with(line->buf, ">From") && isspace(line->buf[5])) { - ret = is_format_patch_separator(line->buf + 1, line->len - 1); - goto check_header_out; - } - if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) { - for (i = 0; header[i]; i++) { - if (!strcmp("Subject", header[i])) { - handle_header(&hdr_data[i], line); - ret = 1; - goto check_header_out; - } - } - } - -check_header_out: - strbuf_release(&sb); - return ret; -} - -static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line) -{ - struct strbuf *ret; - - switch (mi->transfer_encoding) { - case TE_QP: - ret = decode_q_segment(line, 0); - break; - case TE_BASE64: - ret = decode_b_segment(line); - break; - case TE_DONTCARE: - default: - return; - } - strbuf_reset(line); - strbuf_addbuf(line, ret); - strbuf_release(ret); - free(ret); -} - -static inline int patchbreak(const struct strbuf *line) -{ - size_t i; - - /* Beginning of a "diff -" header? */ - if (starts_with(line->buf, "diff -")) - return 1; - - /* CVS "Index: " line? */ - if (starts_with(line->buf, "Index: ")) - return 1; - - /* - * "--- " starts patches without headers - * "---*" is a manual separator - */ - if (line->len < 4) - return 0; - - if (starts_with(line->buf, "---")) { - /* space followed by a filename? */ - if (line->buf[3] == ' ' && !isspace(line->buf[4])) - return 1; - /* Just whitespace? */ - for (i = 3; i < line->len; i++) { - unsigned char c = line->buf[i]; - if (c == '\n') - return 1; - if (!isspace(c)) - break; - } - return 0; - } - return 0; -} - -static int is_scissors_line(const struct strbuf *line) -{ - size_t i, len = line->len; - int scissors = 0, gap = 0; - int first_nonblank = -1; - int last_nonblank = 0, visible, perforation = 0, in_perforation = 0; - const char *buf = line->buf; - - for (i = 0; i < len; i++) { - if (isspace(buf[i])) { - if (in_perforation) { - perforation++; - gap++; - } - continue; - } - last_nonblank = i; - if (first_nonblank < 0) - first_nonblank = i; - if (buf[i] == '-') { - in_perforation = 1; - perforation++; - continue; - } - if (i + 1 < len && - (!memcmp(buf + i, ">8", 2) || !memcmp(buf + i, "8<", 2) || - !memcmp(buf + i, ">%", 2) || !memcmp(buf + i, "%<", 2))) { - in_perforation = 1; - perforation += 2; - scissors += 2; - i++; - continue; - } - in_perforation = 0; - } - - /* - * The mark must be at least 8 bytes long (e.g. "-- >8 --"). - * Even though there can be arbitrary cruft on the same line - * (e.g. "cut here"), in order to avoid misidentification, the - * perforation must occupy more than a third of the visible - * width of the line, and dashes and scissors must occupy more - * than half of the perforation. - */ - - visible = last_nonblank - first_nonblank + 1; - return (scissors && 8 <= visible && - visible < perforation * 3 && - gap * 2 < perforation); -} - -static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) -{ - assert(!mi->filter_stage); - - if (mi->header_stage) { - if (!line->len || (line->len == 1 && line->buf[0] == '\n')) - return 0; - } - - if (mi->use_inbody_headers && mi->header_stage) { - mi->header_stage = check_header(mi, line, mi->s_hdr_data, 0); - if (mi->header_stage) - return 0; - } else - /* Only trim the first (blank) line of the commit message - * when ignoring in-body headers. - */ - mi->header_stage = 0; - - /* normalize the log message to UTF-8. */ - convert_to_utf8(mi, line, mi->charset.buf); - - if (mi->use_scissors && is_scissors_line(line)) { - int i; - - strbuf_setlen(&mi->log_message, 0); - mi->header_stage = 1; - - /* - * We may have already read "secondary headers"; purge - * them to give ourselves a clean restart. - */ - for (i = 0; header[i]; i++) { - if (mi->s_hdr_data[i]) - strbuf_release(mi->s_hdr_data[i]); - mi->s_hdr_data[i] = NULL; - } - return 0; - } - - if (patchbreak(line)) { - if (mi->message_id) - strbuf_addf(&mi->log_message, - "Message-Id: %s\n", mi->message_id); - return 1; - } - - strbuf_addbuf(&mi->log_message, line); - return 0; -} - -static void handle_patch(struct mailinfo *mi, const struct strbuf *line) -{ - fwrite(line->buf, 1, line->len, mi->patchfile); - mi->patch_lines++; -} - -static void handle_filter(struct mailinfo *mi, struct strbuf *line) -{ - switch (mi->filter_stage) { - case 0: - if (!handle_commit_msg(mi, line)) - break; - mi->filter_stage++; - case 1: - handle_patch(mi, line); - break; - } -} - -static int is_rfc2822_header(const struct strbuf *line) -{ - /* - * The section that defines the loosest possible - * field name is "3.6.8 Optional fields". - * - * optional-field = field-name ":" unstructured CRLF - * field-name = 1*ftext - * ftext = %d33-57 / %59-126 - */ - int ch; - char *cp = line->buf; - - /* Count mbox From headers as headers */ - if (starts_with(cp, "From ") || starts_with(cp, ">From ")) - return 1; - - while ((ch = *cp++)) { - if (ch == ':') - return 1; - if ((33 <= ch && ch <= 57) || - (59 <= ch && ch <= 126)) - continue; - break; - } - return 0; -} - -static int read_one_header_line(struct strbuf *line, FILE *in) -{ - struct strbuf continuation = STRBUF_INIT; - - /* Get the first part of the line. */ - if (strbuf_getline(line, in, '\n')) - return 0; - - /* - * Is it an empty line or not a valid rfc2822 header? - * If so, stop here, and return false ("not a header") - */ - strbuf_rtrim(line); - if (!line->len || !is_rfc2822_header(line)) { - /* Re-add the newline */ - strbuf_addch(line, '\n'); - return 0; - } - - /* - * Now we need to eat all the continuation lines.. - * Yuck, 2822 header "folding" - */ - for (;;) { - int peek; - - peek = fgetc(in); ungetc(peek, in); - if (peek != ' ' && peek != '\t') - break; - if (strbuf_getline(&continuation, in, '\n')) - break; - continuation.buf[0] = ' '; - strbuf_rtrim(&continuation); - strbuf_addbuf(line, &continuation); - } - strbuf_release(&continuation); - - return 1; -} - -static int find_boundary(struct mailinfo *mi, struct strbuf *line) -{ - while (!strbuf_getline(line, mi->input, '\n')) { - if (*(mi->content_top) && is_multipart_boundary(mi, line)) - return 1; - } - return 0; -} - -static int handle_boundary(struct mailinfo *mi, struct strbuf *line) -{ - struct strbuf newline = STRBUF_INIT; - - strbuf_addch(&newline, '\n'); -again: - if (line->len >= (*(mi->content_top))->len + 2 && - !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) { - /* we hit an end boundary */ - /* pop the current boundary off the stack */ - strbuf_release(*(mi->content_top)); - free(*(mi->content_top)); - *(mi->content_top) = NULL; - - /* technically won't happen as is_multipart_boundary() - will fail first. But just in case.. - */ - if (--mi->content_top < mi->content) { - fprintf(stderr, "Detected mismatched boundaries, " - "can't recover\n"); - exit(1); - } - handle_filter(mi, &newline); - strbuf_release(&newline); - - /* skip to the next boundary */ - if (!find_boundary(mi, line)) - return 0; - goto again; - } - - /* set some defaults */ - mi->transfer_encoding = TE_DONTCARE; - strbuf_reset(&mi->charset); - - /* slurp in this section's info */ - while (read_one_header_line(line, mi->input)) - check_header(mi, line, mi->p_hdr_data, 0); - - strbuf_release(&newline); - /* replenish line */ - if (strbuf_getline(line, mi->input, '\n')) - return 0; - strbuf_addch(line, '\n'); - return 1; -} - -static void handle_body(struct mailinfo *mi, struct strbuf *line) -{ - struct strbuf prev = STRBUF_INIT; - - /* Skip up to the first boundary */ - if (*(mi->content_top)) { - if (!find_boundary(mi, line)) - goto handle_body_out; - } - - do { - /* process any boundary lines */ - if (*(mi->content_top) && is_multipart_boundary(mi, line)) { - /* flush any leftover */ - if (prev.len) { - handle_filter(mi, &prev); - strbuf_reset(&prev); - } - if (!handle_boundary(mi, line)) - goto handle_body_out; - } - - /* Unwrap transfer encoding */ - decode_transfer_encoding(mi, line); - - switch (mi->transfer_encoding) { - case TE_BASE64: - case TE_QP: - { - struct strbuf **lines, **it, *sb; - - /* Prepend any previous partial lines */ - strbuf_insert(line, 0, prev.buf, prev.len); - strbuf_reset(&prev); - - /* - * This is a decoded line that may contain - * multiple new lines. Pass only one chunk - * at a time to handle_filter() - */ - lines = strbuf_split(line, '\n'); - for (it = lines; (sb = *it); it++) { - if (*(it + 1) == NULL) /* The last line */ - if (sb->buf[sb->len - 1] != '\n') { - /* Partial line, save it for later. */ - strbuf_addbuf(&prev, sb); - break; - } - handle_filter(mi, sb); - } - /* - * The partial chunk is saved in "prev" and will be - * appended by the next iteration of read_line_with_nul(). - */ - strbuf_list_free(lines); - break; - } - default: - handle_filter(mi, line); - } - - } while (!strbuf_getwholeline(line, mi->input, '\n')); - -handle_body_out: - strbuf_release(&prev); -} - -static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data) -{ - const char *sp = data->buf; - while (1) { - char *ep = strchr(sp, '\n'); - int len; - if (!ep) - len = strlen(sp); - else - len = ep - sp; - fprintf(fout, "%s: %.*s\n", hdr, len, sp); - if (!ep) - break; - sp = ep + 1; - } -} - -static void handle_info(struct mailinfo *mi) -{ - struct strbuf *hdr; - int i; - - for (i = 0; header[i]; i++) { - /* only print inbody headers if we output a patch file */ - if (mi->patch_lines && mi->s_hdr_data[i]) - hdr = mi->s_hdr_data[i]; - else if (mi->p_hdr_data[i]) - hdr = mi->p_hdr_data[i]; - else - continue; - - if (!strcmp(header[i], "Subject")) { - if (!mi->keep_subject) { - cleanup_subject(mi, hdr); - cleanup_space(hdr); - } - output_header_lines(mi->output, "Subject", hdr); - } else if (!strcmp(header[i], "From")) { - cleanup_space(hdr); - handle_from(mi, hdr); - fprintf(mi->output, "Author: %s\n", mi->name.buf); - fprintf(mi->output, "Email: %s\n", mi->email.buf); - } else { - cleanup_space(hdr); - fprintf(mi->output, "%s: %s\n", header[i], hdr->buf); - } - } - fprintf(mi->output, "\n"); -} - -static int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) -{ - FILE *cmitmsg; - int peek; - struct strbuf line = STRBUF_INIT; - - cmitmsg = fopen(msg, "w"); - if (!cmitmsg) { - perror(msg); - return -1; - } - mi->patchfile = fopen(patch, "w"); - if (!mi->patchfile) { - perror(patch); - fclose(cmitmsg); - return -1; - } - - mi->p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->p_hdr_data))); - mi->s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->s_hdr_data))); - - do { - peek = fgetc(mi->input); - } while (isspace(peek)); - ungetc(peek, mi->input); - - /* process the email header */ - while (read_one_header_line(&line, mi->input)) - check_header(mi, &line, mi->p_hdr_data, 1); - - handle_body(mi, &line); - fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg); - fclose(cmitmsg); - fclose(mi->patchfile); - - handle_info(mi); - strbuf_release(&line); - return 0; -} - -static int git_mailinfo_config(const char *var, const char *value, void *mi_) -{ - struct mailinfo *mi = mi_; - - if (!starts_with(var, "mailinfo.")) - return git_default_config(var, value, NULL); - if (!strcmp(var, "mailinfo.scissors")) { - mi->use_scissors = git_config_bool(var, value); - return 0; - } - /* perhaps others here */ - return 0; -} - -static void setup_mailinfo(struct mailinfo *mi) -{ - memset(mi, 0, sizeof(*mi)); - strbuf_init(&mi->name, 0); - strbuf_init(&mi->email, 0); - strbuf_init(&mi->charset, 0); - strbuf_init(&mi->log_message, 0); - mi->header_stage = 1; - mi->use_inbody_headers = 1; - mi->content_top = mi->content; - git_config(git_mailinfo_config, &mi); -} - -static void clear_mailinfo(struct mailinfo *mi) -{ - int i; - - strbuf_release(&mi->name); - strbuf_release(&mi->email); - strbuf_release(&mi->charset); - free(mi->message_id); - - for (i = 0; mi->p_hdr_data[i]; i++) - strbuf_release(mi->p_hdr_data[i]); - free(mi->p_hdr_data); - for (i = 0; mi->s_hdr_data[i]; i++) - strbuf_release(mi->s_hdr_data[i]); - free(mi->s_hdr_data); - - while (mi->content < mi->content_top) { - free(*(mi->content_top)); - mi->content_top--; - } - - strbuf_release(&mi->log_message); -} +#include "mailinfo.h" static const char mailinfo_usage[] = "git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding= | -n] [--scissors | --no-scissors] < mail >info"; diff --git a/mailinfo.c b/mailinfo.c new file mode 100644 index 0000000000..90adc7583b --- /dev/null +++ b/mailinfo.c @@ -0,0 +1,1020 @@ +#include "cache.h" +#include "utf8.h" +#include "strbuf.h" +#include "mailinfo.h" + +static void cleanup_space(struct strbuf *sb) +{ + size_t pos, cnt; + for (pos = 0; pos < sb->len; pos++) { + if (isspace(sb->buf[pos])) { + sb->buf[pos] = ' '; + for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++); + strbuf_remove(sb, pos + 1, cnt); + } + } +} + +static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email) +{ + struct strbuf *src = name; + if (name->len < 3 || 60 < name->len || strchr(name->buf, '@') || + strchr(name->buf, '<') || strchr(name->buf, '>')) + src = email; + else if (name == out) + return; + strbuf_reset(out); + strbuf_addbuf(out, src); +} + +static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line) +{ + /* John Doe */ + + char *bra, *ket; + /* This is fallback, so do not bother if we already have an + * e-mail address. + */ + if (mi->email.len) + return; + + bra = strchr(line->buf, '<'); + if (!bra) + return; + ket = strchr(bra, '>'); + if (!ket) + return; + + strbuf_reset(&mi->email); + strbuf_add(&mi->email, bra + 1, ket - bra - 1); + + strbuf_reset(&mi->name); + strbuf_add(&mi->name, line->buf, bra - line->buf); + strbuf_trim(&mi->name); + get_sane_name(&mi->name, &mi->name, &mi->email); +} + +static void handle_from(struct mailinfo *mi, const struct strbuf *from) +{ + char *at; + size_t el; + struct strbuf f; + + strbuf_init(&f, from->len); + strbuf_addbuf(&f, from); + + at = strchr(f.buf, '@'); + if (!at) { + parse_bogus_from(mi, from); + return; + } + + /* + * If we already have one email, don't take any confusing lines + */ + if (mi->email.len && strchr(at + 1, '@')) { + strbuf_release(&f); + return; + } + + /* Pick up the string around '@', possibly delimited with <> + * pair; that is the email part. + */ + while (at > f.buf) { + char c = at[-1]; + if (isspace(c)) + break; + if (c == '<') { + at[-1] = ' '; + break; + } + at--; + } + el = strcspn(at, " \n\t\r\v\f>"); + strbuf_reset(&mi->email); + strbuf_add(&mi->email, at, el); + strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0)); + + /* The remainder is name. It could be + * + * - "John Doe " (a), or + * - "john.doe@xz (John Doe)" (b), or + * - "John (zzz) Doe (Comment)" (c) + * + * but we have removed the email part, so + * + * - remove extra spaces which could stay after email (case 'c'), and + * - trim from both ends, possibly removing the () pair at the end + * (cases 'a' and 'b'). + */ + cleanup_space(&f); + strbuf_trim(&f); + if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') { + strbuf_remove(&f, 0, 1); + strbuf_setlen(&f, f.len - 1); + } + + get_sane_name(&mi->name, &f, &mi->email); + strbuf_release(&f); +} + +static void handle_header(struct strbuf **out, const struct strbuf *line) +{ + if (!*out) { + *out = xmalloc(sizeof(struct strbuf)); + strbuf_init(*out, line->len); + } else + strbuf_reset(*out); + + strbuf_addbuf(*out, line); +} + +/* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt + * to have enough heuristics to grok MIME encoded patches often found + * on our mailing lists. For example, we do not even treat header lines + * case insensitively. + */ + +static int slurp_attr(const char *line, const char *name, struct strbuf *attr) +{ + const char *ends, *ap = strcasestr(line, name); + size_t sz; + + strbuf_setlen(attr, 0); + if (!ap) + return 0; + ap += strlen(name); + if (*ap == '"') { + ap++; + ends = "\""; + } + else + ends = "; \t"; + sz = strcspn(ap, ends); + strbuf_add(attr, ap, sz); + return 1; +} + +static void handle_content_type(struct mailinfo *mi, struct strbuf *line) +{ + struct strbuf *boundary = xmalloc(sizeof(struct strbuf)); + strbuf_init(boundary, line->len); + + if (slurp_attr(line->buf, "boundary=", boundary)) { + strbuf_insert(boundary, 0, "--", 2); + if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) { + fprintf(stderr, "Too many boundaries to handle\n"); + exit(1); + } + *(mi->content_top) = boundary; + boundary = NULL; + } + slurp_attr(line->buf, "charset=", &mi->charset); + + if (boundary) { + strbuf_release(boundary); + free(boundary); + } +} + +static void handle_message_id(struct mailinfo *mi, const struct strbuf *line) +{ + if (mi->add_message_id) + mi->message_id = strdup(line->buf); +} + +static void handle_content_transfer_encoding(struct mailinfo *mi, + const struct strbuf *line) +{ + if (strcasestr(line->buf, "base64")) + mi->transfer_encoding = TE_BASE64; + else if (strcasestr(line->buf, "quoted-printable")) + mi->transfer_encoding = TE_QP; + else + mi->transfer_encoding = TE_DONTCARE; +} + +static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line) +{ + struct strbuf *content_top = *(mi->content_top); + + return ((content_top->len <= line->len) && + !memcmp(line->buf, content_top->buf, content_top->len)); +} + +static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject) +{ + size_t at = 0; + + while (at < subject->len) { + char *pos; + size_t remove; + + switch (subject->buf[at]) { + case 'r': case 'R': + if (subject->len <= at + 3) + break; + if ((subject->buf[at + 1] == 'e' || + subject->buf[at + 1] == 'E') && + subject->buf[at + 2] == ':') { + strbuf_remove(subject, at, 3); + continue; + } + at++; + break; + case ' ': case '\t': case ':': + strbuf_remove(subject, at, 1); + continue; + case '[': + pos = strchr(subject->buf + at, ']'); + if (!pos) + break; + remove = pos - subject->buf + at + 1; + if (!mi->keep_non_patch_brackets_in_subject || + (7 <= remove && + memmem(subject->buf + at, remove, "PATCH", 5))) + strbuf_remove(subject, at, remove); + else { + at += remove; + /* + * If the input had a space after the ], keep + * it. We don't bother with finding the end of + * the space, since we later normalize it + * anyway. + */ + if (isspace(subject->buf[at])) + at += 1; + } + continue; + } + break; + } + strbuf_trim(subject); +} + +#define MAX_HDR_PARSED 10 +static const char *header[MAX_HDR_PARSED] = { + "From","Subject","Date", +}; + +static inline int cmp_header(const struct strbuf *line, const char *hdr) +{ + int len = strlen(hdr); + return !strncasecmp(line->buf, hdr, len) && line->len > len && + line->buf[len] == ':' && isspace(line->buf[len + 1]); +} + +static int is_format_patch_separator(const char *line, int len) +{ + static const char SAMPLE[] = + "From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n"; + const char *cp; + + if (len != strlen(SAMPLE)) + return 0; + if (!skip_prefix(line, "From ", &cp)) + return 0; + if (strspn(cp, "0123456789abcdef") != 40) + return 0; + cp += 40; + return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line)); +} + +static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047) +{ + const char *in = q_seg->buf; + int c; + struct strbuf *out = xmalloc(sizeof(struct strbuf)); + strbuf_init(out, q_seg->len); + + while ((c = *in++) != 0) { + if (c == '=') { + int d = *in++; + if (d == '\n' || !d) + break; /* drop trailing newline */ + strbuf_addch(out, (hexval(d) << 4) | hexval(*in++)); + continue; + } + if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */ + c = 0x20; + strbuf_addch(out, c); + } + return out; +} + +static struct strbuf *decode_b_segment(const struct strbuf *b_seg) +{ + /* Decode in..ep, possibly in-place to ot */ + int c, pos = 0, acc = 0; + const char *in = b_seg->buf; + struct strbuf *out = xmalloc(sizeof(struct strbuf)); + strbuf_init(out, b_seg->len); + + while ((c = *in++) != 0) { + if (c == '+') + c = 62; + else if (c == '/') + c = 63; + else if ('A' <= c && c <= 'Z') + c -= 'A'; + else if ('a' <= c && c <= 'z') + c -= 'a' - 26; + else if ('0' <= c && c <= '9') + c -= '0' - 52; + else + continue; /* garbage */ + switch (pos++) { + case 0: + acc = (c << 2); + break; + case 1: + strbuf_addch(out, (acc | (c >> 4))); + acc = (c & 15) << 4; + break; + case 2: + strbuf_addch(out, (acc | (c >> 2))); + acc = (c & 3) << 6; + break; + case 3: + strbuf_addch(out, (acc | c)); + acc = pos = 0; + break; + } + } + return out; +} + +static void convert_to_utf8(struct mailinfo *mi, + struct strbuf *line, const char *charset) +{ + char *out; + + if (!mi->metainfo_charset || !charset || !*charset) + return; + + if (same_encoding(mi->metainfo_charset, charset)) + return; + out = reencode_string(line->buf, mi->metainfo_charset, charset); + if (!out) + die("cannot convert from %s to %s", + charset, mi->metainfo_charset); + strbuf_attach(line, out, strlen(out), strlen(out)); +} + +static void decode_header(struct mailinfo *mi, struct strbuf *it) +{ + char *in, *ep, *cp; + struct strbuf outbuf = STRBUF_INIT, *dec; + struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT; + + in = it->buf; + while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) { + int encoding; + strbuf_reset(&charset_q); + strbuf_reset(&piecebuf); + + if (in != ep) { + /* + * We are about to process an encoded-word + * that begins at ep, but there is something + * before the encoded word. + */ + char *scan; + for (scan = in; scan < ep; scan++) + if (!isspace(*scan)) + break; + + if (scan != ep || in == it->buf) { + /* + * We should not lose that "something", + * unless we have just processed an + * encoded-word, and there is only LWS + * before the one we are about to process. + */ + strbuf_add(&outbuf, in, ep - in); + } + } + /* E.g. + * ep : "=?iso-2022-jp?B?GyR...?= foo" + * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz" + */ + ep += 2; + + if (ep - it->buf >= it->len || !(cp = strchr(ep, '?'))) + goto release_return; + + if (cp + 3 - it->buf > it->len) + goto release_return; + strbuf_add(&charset_q, ep, cp - ep); + + encoding = cp[1]; + if (!encoding || cp[2] != '?') + goto release_return; + ep = strstr(cp + 3, "?="); + if (!ep) + goto release_return; + strbuf_add(&piecebuf, cp + 3, ep - cp - 3); + switch (tolower(encoding)) { + default: + goto release_return; + case 'b': + dec = decode_b_segment(&piecebuf); + break; + case 'q': + dec = decode_q_segment(&piecebuf, 1); + break; + } + convert_to_utf8(mi, dec, charset_q.buf); + + strbuf_addbuf(&outbuf, dec); + strbuf_release(dec); + free(dec); + in = ep + 2; + } + strbuf_addstr(&outbuf, in); + strbuf_reset(it); + strbuf_addbuf(it, &outbuf); +release_return: + strbuf_release(&outbuf); + strbuf_release(&charset_q); + strbuf_release(&piecebuf); +} + +static int check_header(struct mailinfo *mi, + const struct strbuf *line, + struct strbuf *hdr_data[], int overwrite) +{ + int i, ret = 0, len; + struct strbuf sb = STRBUF_INIT; + + /* search for the interesting parts */ + for (i = 0; header[i]; i++) { + int len = strlen(header[i]); + if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) { + /* Unwrap inline B and Q encoding, and optionally + * normalize the meta information to utf8. + */ + strbuf_add(&sb, line->buf + len + 2, line->len - len - 2); + decode_header(mi, &sb); + handle_header(&hdr_data[i], &sb); + ret = 1; + goto check_header_out; + } + } + + /* Content stuff */ + if (cmp_header(line, "Content-Type")) { + len = strlen("Content-Type: "); + strbuf_add(&sb, line->buf + len, line->len - len); + decode_header(mi, &sb); + strbuf_insert(&sb, 0, "Content-Type: ", len); + handle_content_type(mi, &sb); + ret = 1; + goto check_header_out; + } + if (cmp_header(line, "Content-Transfer-Encoding")) { + len = strlen("Content-Transfer-Encoding: "); + strbuf_add(&sb, line->buf + len, line->len - len); + decode_header(mi, &sb); + handle_content_transfer_encoding(mi, &sb); + ret = 1; + goto check_header_out; + } + if (cmp_header(line, "Message-Id")) { + len = strlen("Message-Id: "); + strbuf_add(&sb, line->buf + len, line->len - len); + decode_header(mi, &sb); + handle_message_id(mi, &sb); + ret = 1; + goto check_header_out; + } + + /* for inbody stuff */ + if (starts_with(line->buf, ">From") && isspace(line->buf[5])) { + ret = is_format_patch_separator(line->buf + 1, line->len - 1); + goto check_header_out; + } + if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) { + for (i = 0; header[i]; i++) { + if (!strcmp("Subject", header[i])) { + handle_header(&hdr_data[i], line); + ret = 1; + goto check_header_out; + } + } + } + +check_header_out: + strbuf_release(&sb); + return ret; +} + +static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line) +{ + struct strbuf *ret; + + switch (mi->transfer_encoding) { + case TE_QP: + ret = decode_q_segment(line, 0); + break; + case TE_BASE64: + ret = decode_b_segment(line); + break; + case TE_DONTCARE: + default: + return; + } + strbuf_reset(line); + strbuf_addbuf(line, ret); + strbuf_release(ret); + free(ret); +} + +static inline int patchbreak(const struct strbuf *line) +{ + size_t i; + + /* Beginning of a "diff -" header? */ + if (starts_with(line->buf, "diff -")) + return 1; + + /* CVS "Index: " line? */ + if (starts_with(line->buf, "Index: ")) + return 1; + + /* + * "--- " starts patches without headers + * "---*" is a manual separator + */ + if (line->len < 4) + return 0; + + if (starts_with(line->buf, "---")) { + /* space followed by a filename? */ + if (line->buf[3] == ' ' && !isspace(line->buf[4])) + return 1; + /* Just whitespace? */ + for (i = 3; i < line->len; i++) { + unsigned char c = line->buf[i]; + if (c == '\n') + return 1; + if (!isspace(c)) + break; + } + return 0; + } + return 0; +} + +static int is_scissors_line(const struct strbuf *line) +{ + size_t i, len = line->len; + int scissors = 0, gap = 0; + int first_nonblank = -1; + int last_nonblank = 0, visible, perforation = 0, in_perforation = 0; + const char *buf = line->buf; + + for (i = 0; i < len; i++) { + if (isspace(buf[i])) { + if (in_perforation) { + perforation++; + gap++; + } + continue; + } + last_nonblank = i; + if (first_nonblank < 0) + first_nonblank = i; + if (buf[i] == '-') { + in_perforation = 1; + perforation++; + continue; + } + if (i + 1 < len && + (!memcmp(buf + i, ">8", 2) || !memcmp(buf + i, "8<", 2) || + !memcmp(buf + i, ">%", 2) || !memcmp(buf + i, "%<", 2))) { + in_perforation = 1; + perforation += 2; + scissors += 2; + i++; + continue; + } + in_perforation = 0; + } + + /* + * The mark must be at least 8 bytes long (e.g. "-- >8 --"). + * Even though there can be arbitrary cruft on the same line + * (e.g. "cut here"), in order to avoid misidentification, the + * perforation must occupy more than a third of the visible + * width of the line, and dashes and scissors must occupy more + * than half of the perforation. + */ + + visible = last_nonblank - first_nonblank + 1; + return (scissors && 8 <= visible && + visible < perforation * 3 && + gap * 2 < perforation); +} + +static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) +{ + assert(!mi->filter_stage); + + if (mi->header_stage) { + if (!line->len || (line->len == 1 && line->buf[0] == '\n')) + return 0; + } + + if (mi->use_inbody_headers && mi->header_stage) { + mi->header_stage = check_header(mi, line, mi->s_hdr_data, 0); + if (mi->header_stage) + return 0; + } else + /* Only trim the first (blank) line of the commit message + * when ignoring in-body headers. + */ + mi->header_stage = 0; + + /* normalize the log message to UTF-8. */ + convert_to_utf8(mi, line, mi->charset.buf); + + if (mi->use_scissors && is_scissors_line(line)) { + int i; + + strbuf_setlen(&mi->log_message, 0); + mi->header_stage = 1; + + /* + * We may have already read "secondary headers"; purge + * them to give ourselves a clean restart. + */ + for (i = 0; header[i]; i++) { + if (mi->s_hdr_data[i]) + strbuf_release(mi->s_hdr_data[i]); + mi->s_hdr_data[i] = NULL; + } + return 0; + } + + if (patchbreak(line)) { + if (mi->message_id) + strbuf_addf(&mi->log_message, + "Message-Id: %s\n", mi->message_id); + return 1; + } + + strbuf_addbuf(&mi->log_message, line); + return 0; +} + +static void handle_patch(struct mailinfo *mi, const struct strbuf *line) +{ + fwrite(line->buf, 1, line->len, mi->patchfile); + mi->patch_lines++; +} + +static void handle_filter(struct mailinfo *mi, struct strbuf *line) +{ + switch (mi->filter_stage) { + case 0: + if (!handle_commit_msg(mi, line)) + break; + mi->filter_stage++; + case 1: + handle_patch(mi, line); + break; + } +} + +static int is_rfc2822_header(const struct strbuf *line) +{ + /* + * The section that defines the loosest possible + * field name is "3.6.8 Optional fields". + * + * optional-field = field-name ":" unstructured CRLF + * field-name = 1*ftext + * ftext = %d33-57 / %59-126 + */ + int ch; + char *cp = line->buf; + + /* Count mbox From headers as headers */ + if (starts_with(cp, "From ") || starts_with(cp, ">From ")) + return 1; + + while ((ch = *cp++)) { + if (ch == ':') + return 1; + if ((33 <= ch && ch <= 57) || + (59 <= ch && ch <= 126)) + continue; + break; + } + return 0; +} + +static int read_one_header_line(struct strbuf *line, FILE *in) +{ + struct strbuf continuation = STRBUF_INIT; + + /* Get the first part of the line. */ + if (strbuf_getline(line, in, '\n')) + return 0; + + /* + * Is it an empty line or not a valid rfc2822 header? + * If so, stop here, and return false ("not a header") + */ + strbuf_rtrim(line); + if (!line->len || !is_rfc2822_header(line)) { + /* Re-add the newline */ + strbuf_addch(line, '\n'); + return 0; + } + + /* + * Now we need to eat all the continuation lines.. + * Yuck, 2822 header "folding" + */ + for (;;) { + int peek; + + peek = fgetc(in); ungetc(peek, in); + if (peek != ' ' && peek != '\t') + break; + if (strbuf_getline(&continuation, in, '\n')) + break; + continuation.buf[0] = ' '; + strbuf_rtrim(&continuation); + strbuf_addbuf(line, &continuation); + } + strbuf_release(&continuation); + + return 1; +} + +static int find_boundary(struct mailinfo *mi, struct strbuf *line) +{ + while (!strbuf_getline(line, mi->input, '\n')) { + if (*(mi->content_top) && is_multipart_boundary(mi, line)) + return 1; + } + return 0; +} + +static int handle_boundary(struct mailinfo *mi, struct strbuf *line) +{ + struct strbuf newline = STRBUF_INIT; + + strbuf_addch(&newline, '\n'); +again: + if (line->len >= (*(mi->content_top))->len + 2 && + !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) { + /* we hit an end boundary */ + /* pop the current boundary off the stack */ + strbuf_release(*(mi->content_top)); + free(*(mi->content_top)); + *(mi->content_top) = NULL; + + /* technically won't happen as is_multipart_boundary() + will fail first. But just in case.. + */ + if (--mi->content_top < mi->content) { + fprintf(stderr, "Detected mismatched boundaries, " + "can't recover\n"); + exit(1); + } + handle_filter(mi, &newline); + strbuf_release(&newline); + + /* skip to the next boundary */ + if (!find_boundary(mi, line)) + return 0; + goto again; + } + + /* set some defaults */ + mi->transfer_encoding = TE_DONTCARE; + strbuf_reset(&mi->charset); + + /* slurp in this section's info */ + while (read_one_header_line(line, mi->input)) + check_header(mi, line, mi->p_hdr_data, 0); + + strbuf_release(&newline); + /* replenish line */ + if (strbuf_getline(line, mi->input, '\n')) + return 0; + strbuf_addch(line, '\n'); + return 1; +} + +static void handle_body(struct mailinfo *mi, struct strbuf *line) +{ + struct strbuf prev = STRBUF_INIT; + + /* Skip up to the first boundary */ + if (*(mi->content_top)) { + if (!find_boundary(mi, line)) + goto handle_body_out; + } + + do { + /* process any boundary lines */ + if (*(mi->content_top) && is_multipart_boundary(mi, line)) { + /* flush any leftover */ + if (prev.len) { + handle_filter(mi, &prev); + strbuf_reset(&prev); + } + if (!handle_boundary(mi, line)) + goto handle_body_out; + } + + /* Unwrap transfer encoding */ + decode_transfer_encoding(mi, line); + + switch (mi->transfer_encoding) { + case TE_BASE64: + case TE_QP: + { + struct strbuf **lines, **it, *sb; + + /* Prepend any previous partial lines */ + strbuf_insert(line, 0, prev.buf, prev.len); + strbuf_reset(&prev); + + /* + * This is a decoded line that may contain + * multiple new lines. Pass only one chunk + * at a time to handle_filter() + */ + lines = strbuf_split(line, '\n'); + for (it = lines; (sb = *it); it++) { + if (*(it + 1) == NULL) /* The last line */ + if (sb->buf[sb->len - 1] != '\n') { + /* Partial line, save it for later. */ + strbuf_addbuf(&prev, sb); + break; + } + handle_filter(mi, sb); + } + /* + * The partial chunk is saved in "prev" and will be + * appended by the next iteration of read_line_with_nul(). + */ + strbuf_list_free(lines); + break; + } + default: + handle_filter(mi, line); + } + + } while (!strbuf_getwholeline(line, mi->input, '\n')); + +handle_body_out: + strbuf_release(&prev); +} + +static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data) +{ + const char *sp = data->buf; + while (1) { + char *ep = strchr(sp, '\n'); + int len; + if (!ep) + len = strlen(sp); + else + len = ep - sp; + fprintf(fout, "%s: %.*s\n", hdr, len, sp); + if (!ep) + break; + sp = ep + 1; + } +} + +static void handle_info(struct mailinfo *mi) +{ + struct strbuf *hdr; + int i; + + for (i = 0; header[i]; i++) { + /* only print inbody headers if we output a patch file */ + if (mi->patch_lines && mi->s_hdr_data[i]) + hdr = mi->s_hdr_data[i]; + else if (mi->p_hdr_data[i]) + hdr = mi->p_hdr_data[i]; + else + continue; + + if (!strcmp(header[i], "Subject")) { + if (!mi->keep_subject) { + cleanup_subject(mi, hdr); + cleanup_space(hdr); + } + output_header_lines(mi->output, "Subject", hdr); + } else if (!strcmp(header[i], "From")) { + cleanup_space(hdr); + handle_from(mi, hdr); + fprintf(mi->output, "Author: %s\n", mi->name.buf); + fprintf(mi->output, "Email: %s\n", mi->email.buf); + } else { + cleanup_space(hdr); + fprintf(mi->output, "%s: %s\n", header[i], hdr->buf); + } + } + fprintf(mi->output, "\n"); +} + +int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) +{ + FILE *cmitmsg; + int peek; + struct strbuf line = STRBUF_INIT; + + cmitmsg = fopen(msg, "w"); + if (!cmitmsg) { + perror(msg); + return -1; + } + mi->patchfile = fopen(patch, "w"); + if (!mi->patchfile) { + perror(patch); + fclose(cmitmsg); + return -1; + } + + mi->p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->p_hdr_data))); + mi->s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->s_hdr_data))); + + do { + peek = fgetc(mi->input); + } while (isspace(peek)); + ungetc(peek, mi->input); + + /* process the email header */ + while (read_one_header_line(&line, mi->input)) + check_header(mi, &line, mi->p_hdr_data, 1); + + handle_body(mi, &line); + fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg); + fclose(cmitmsg); + fclose(mi->patchfile); + + handle_info(mi); + strbuf_release(&line); + return 0; +} + +static int git_mailinfo_config(const char *var, const char *value, void *mi_) +{ + struct mailinfo *mi = mi_; + + if (!starts_with(var, "mailinfo.")) + return git_default_config(var, value, NULL); + if (!strcmp(var, "mailinfo.scissors")) { + mi->use_scissors = git_config_bool(var, value); + return 0; + } + /* perhaps others here */ + return 0; +} + +void setup_mailinfo(struct mailinfo *mi) +{ + memset(mi, 0, sizeof(*mi)); + strbuf_init(&mi->name, 0); + strbuf_init(&mi->email, 0); + strbuf_init(&mi->charset, 0); + strbuf_init(&mi->log_message, 0); + mi->header_stage = 1; + mi->use_inbody_headers = 1; + mi->content_top = mi->content; + git_config(git_mailinfo_config, &mi); +} + +void clear_mailinfo(struct mailinfo *mi) +{ + int i; + + strbuf_release(&mi->name); + strbuf_release(&mi->email); + strbuf_release(&mi->charset); + free(mi->message_id); + + for (i = 0; mi->p_hdr_data[i]; i++) + strbuf_release(mi->p_hdr_data[i]); + free(mi->p_hdr_data); + for (i = 0; mi->s_hdr_data[i]; i++) + strbuf_release(mi->s_hdr_data[i]); + free(mi->s_hdr_data); + + while (mi->content < mi->content_top) { + free(*(mi->content_top)); + mi->content_top--; + } + + strbuf_release(&mi->log_message); +} diff --git a/mailinfo.h b/mailinfo.h new file mode 100644 index 0000000000..1e97b737f3 --- /dev/null +++ b/mailinfo.h @@ -0,0 +1,40 @@ +#ifndef MAILINFO_H +#define MAILINFO_H + +#define MAX_BOUNDARIES 5 + +struct mailinfo { + FILE *input; + FILE *output; + FILE *patchfile; + + struct strbuf name; + struct strbuf email; + int keep_subject; + int keep_non_patch_brackets_in_subject; + int add_message_id; + int use_scissors; + int use_inbody_headers; + const char *metainfo_charset; + + struct strbuf *content[MAX_BOUNDARIES]; + struct strbuf **content_top; + struct strbuf charset; + char *message_id; + enum { + TE_DONTCARE, TE_QP, TE_BASE64 + } transfer_encoding; + int patch_lines; + int filter_stage; /* still reading log or are we copying patch? */ + int header_stage; /* still checking in-body headers? */ + struct strbuf **p_hdr_data; + struct strbuf **s_hdr_data; + + struct strbuf log_message; +}; + +extern void setup_mailinfo(struct mailinfo *); +extern int mailinfo(struct mailinfo *, const char *msg, const char *patch); +extern void clear_mailinfo(struct mailinfo *); + +#endif /* MAILINFO_H */ From 669b963af2778c489b0742d2f4d56aeda7dcfad8 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 17:45:16 -0700 Subject: [PATCH 33/34] mailinfo: handle charset conversion errors in the caller Instead of dying in convert_to_utf8(), just report an error and let the callers handle it. Between the two callers: - decode_header() silently punts when it cannot parse a broken RFC2047 encoded text (e.g. when it sees anything other than B or Q after it sees "=?") by jumping to release_return, returning the string it successfully parsed out so far, to the caller. A piece of string that convert_to_utf8() cannot handle can be treated the same way. - handle_commit_msg() doesn't cope with a malformed line well, so die there for now. We'll lift this even higher in later changes in this series. Signed-off-by: Junio C Hamano --- mailinfo.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/mailinfo.c b/mailinfo.c index 90adc7583b..97e5cb8d70 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -344,21 +344,22 @@ static struct strbuf *decode_b_segment(const struct strbuf *b_seg) return out; } -static void convert_to_utf8(struct mailinfo *mi, - struct strbuf *line, const char *charset) +static int convert_to_utf8(struct mailinfo *mi, + struct strbuf *line, const char *charset) { char *out; if (!mi->metainfo_charset || !charset || !*charset) - return; + return 0; if (same_encoding(mi->metainfo_charset, charset)) - return; + return 0; out = reencode_string(line->buf, mi->metainfo_charset, charset); if (!out) - die("cannot convert from %s to %s", - charset, mi->metainfo_charset); + return error("cannot convert from %s to %s", + charset, mi->metainfo_charset); strbuf_attach(line, out, strlen(out), strlen(out)); + return 0; } static void decode_header(struct mailinfo *mi, struct strbuf *it) @@ -424,7 +425,8 @@ static void decode_header(struct mailinfo *mi, struct strbuf *it) dec = decode_q_segment(&piecebuf, 1); break; } - convert_to_utf8(mi, dec, charset_q.buf); + if (convert_to_utf8(mi, dec, charset_q.buf)) + goto release_return; strbuf_addbuf(&outbuf, dec); strbuf_release(dec); @@ -637,7 +639,8 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) mi->header_stage = 0; /* normalize the log message to UTF-8. */ - convert_to_utf8(mi, line, mi->charset.buf); + if (convert_to_utf8(mi, line, mi->charset.buf)) + exit(128); if (mi->use_scissors && is_scissors_line(line)) { int i; From 6ac617a321383b2e3a0f0537e3224ec6229e6500 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 14 Oct 2015 17:45:29 -0700 Subject: [PATCH 34/34] mailinfo: remove calls to exit() and die() deep in the callchain The top-level mailinfo() would instead punt when the code in the deeper part of the callchain detects an unrecoverable error in the input. Signed-off-by: Junio C Hamano --- mailinfo.c | 30 ++++++++++++++++++++++-------- mailinfo.h | 1 + 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/mailinfo.c b/mailinfo.c index 97e5cb8d70..e157ca6eb5 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -163,8 +163,10 @@ static void handle_content_type(struct mailinfo *mi, struct strbuf *line) if (slurp_attr(line->buf, "boundary=", boundary)) { strbuf_insert(boundary, 0, "--", 2); if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) { - fprintf(stderr, "Too many boundaries to handle\n"); - exit(1); + error("Too many boundaries to handle"); + mi->input_error = -1; + mi->content_top = &mi->content[MAX_BOUNDARIES] - 1; + return; } *(mi->content_top) = boundary; boundary = NULL; @@ -355,9 +357,11 @@ static int convert_to_utf8(struct mailinfo *mi, if (same_encoding(mi->metainfo_charset, charset)) return 0; out = reencode_string(line->buf, mi->metainfo_charset, charset); - if (!out) + if (!out) { + mi->input_error = -1; return error("cannot convert from %s to %s", charset, mi->metainfo_charset); + } strbuf_attach(line, out, strlen(out), strlen(out)); return 0; } @@ -367,6 +371,7 @@ static void decode_header(struct mailinfo *mi, struct strbuf *it) char *in, *ep, *cp; struct strbuf outbuf = STRBUF_INIT, *dec; struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT; + int found_error = 1; /* pessimism */ in = it->buf; while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) { @@ -436,10 +441,14 @@ static void decode_header(struct mailinfo *mi, struct strbuf *it) strbuf_addstr(&outbuf, in); strbuf_reset(it); strbuf_addbuf(it, &outbuf); + found_error = 0; release_return: strbuf_release(&outbuf); strbuf_release(&charset_q); strbuf_release(&piecebuf); + + if (found_error) + mi->input_error = -1; } static int check_header(struct mailinfo *mi, @@ -640,7 +649,7 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) /* normalize the log message to UTF-8. */ if (convert_to_utf8(mi, line, mi->charset.buf)) - exit(128); + return 0; /* mi->input_error already set */ if (mi->use_scissors && is_scissors_line(line)) { int i; @@ -785,12 +794,15 @@ again: will fail first. But just in case.. */ if (--mi->content_top < mi->content) { - fprintf(stderr, "Detected mismatched boundaries, " - "can't recover\n"); - exit(1); + error("Detected mismatched boundaries, can't recover"); + mi->input_error = -1; + mi->content_top = mi->content; + return 0; } handle_filter(mi, &newline); strbuf_release(&newline); + if (mi->input_error) + return 0; /* skip to the next boundary */ if (!find_boundary(mi, line)) @@ -875,6 +887,8 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line) handle_filter(mi, line); } + if (mi->input_error) + break; } while (!strbuf_getwholeline(line, mi->input, '\n')); handle_body_out: @@ -968,7 +982,7 @@ int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) handle_info(mi); strbuf_release(&line); - return 0; + return mi->input_error; } static int git_mailinfo_config(const char *var, const char *value, void *mi_) diff --git a/mailinfo.h b/mailinfo.h index 1e97b737f3..93776a7e05 100644 --- a/mailinfo.h +++ b/mailinfo.h @@ -31,6 +31,7 @@ struct mailinfo { struct strbuf **s_hdr_data; struct strbuf log_message; + int input_error; }; extern void setup_mailinfo(struct mailinfo *);