From bb96a2c9005f925b4e80ece0a7cd6230f7f4b43d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 19 Feb 2010 23:15:01 +0100 Subject: [PATCH 1/4] utf8.c: remove print_wrapped_text() strbuf_add_wrapped_text() is called only from print_wrapped_text() without a strbuf (in which case it writes its results to stdout). At its only callsite, supply a strbuf, call strbuf_add_wrapped_text() directly and remove the wrapper function. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- builtin-shortlog.c | 17 ++++++++++++++--- utf8.c | 5 ----- utf8.h | 1 - 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/builtin-shortlog.c b/builtin-shortlog.c index 8aa63c7857..d96858f9ad 100644 --- a/builtin-shortlog.c +++ b/builtin-shortlog.c @@ -301,9 +301,19 @@ parse_done: return 0; } +static void add_wrapped_shortlog_msg(struct strbuf *sb, const char *s, + const struct shortlog *log) +{ + int col = strbuf_add_wrapped_text(sb, s, log->in1, log->in2, log->wrap); + if (col != log->wrap) + strbuf_addch(sb, '\n'); +} + void shortlog_output(struct shortlog *log) { int i, j; + struct strbuf sb = STRBUF_INIT; + if (log->sort_by_number) qsort(log->list.items, log->list.nr, sizeof(struct string_list_item), compare_by_number); @@ -318,9 +328,9 @@ void shortlog_output(struct shortlog *log) const char *msg = onelines->items[j].string; if (log->wrap_lines) { - int col = print_wrapped_text(msg, log->in1, log->in2, log->wrap); - if (col != log->wrap) - putchar('\n'); + strbuf_reset(&sb); + add_wrapped_shortlog_msg(&sb, msg, log); + fwrite(sb.buf, sb.len, 1, stdout); } else printf(" %s\n", msg); @@ -334,6 +344,7 @@ void shortlog_output(struct shortlog *log) log->list.items[i].util = NULL; } + strbuf_release(&sb); log->list.strdup_strings = 1; string_list_clear(&log->list, 1); clear_mailmap(&log->mailmap); diff --git a/utf8.c b/utf8.c index 7ddff23fa7..5c8a2697f3 100644 --- a/utf8.c +++ b/utf8.c @@ -405,11 +405,6 @@ new_line: } } -int print_wrapped_text(const char *text, int indent, int indent2, int width) -{ - return strbuf_add_wrapped_text(NULL, text, indent, indent2, width); -} - int is_encoding_utf8(const char *name) { if (!name) diff --git a/utf8.h b/utf8.h index ae30ae4c6e..b09687d500 100644 --- a/utf8.h +++ b/utf8.h @@ -9,7 +9,6 @@ int utf8_strwidth(const char *string); int is_utf8(const char *text); int is_encoding_utf8(const char *name); -int print_wrapped_text(const char *text, int indent, int indent2, int len); int strbuf_add_wrapped_text(struct strbuf *buf, const char *text, int indent, int indent2, int width); From 3c0ff44a1ee92bd0f811b95d747a08763983566b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 19 Feb 2010 23:15:55 +0100 Subject: [PATCH 2/4] utf8.c: remove print_spaces() The previous patch made sure that strbuf_add_wrapped_text() (and thus strbuf_add_indented_text(), too) always get a strbuf. Make use of this fact by adding strbuf_addchars(), a small helper that adds a char the specified number of times to a strbuf, and use it to replace print_spaces(). Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- utf8.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/utf8.c b/utf8.c index 5c8a2697f3..a4e36ff33c 100644 --- a/utf8.c +++ b/utf8.c @@ -288,14 +288,11 @@ static inline void strbuf_write(struct strbuf *sb, const char *buf, int len) fwrite(buf, len, 1, stdout); } -static void print_spaces(struct strbuf *buf, int count) +static void strbuf_addchars(struct strbuf *sb, int c, size_t n) { - static const char s[] = " "; - while (count >= sizeof(s)) { - strbuf_write(buf, s, sizeof(s) - 1); - count -= sizeof(s) - 1; - } - strbuf_write(buf, s, count); + strbuf_grow(sb, n); + memset(sb->buf + sb->len, c, n); + strbuf_setlen(sb, sb->len + n); } static void strbuf_add_indented_text(struct strbuf *buf, const char *text, @@ -307,7 +304,7 @@ static void strbuf_add_indented_text(struct strbuf *buf, const char *text, const char *eol = strchrnul(text, '\n'); if (*eol == '\n') eol++; - print_spaces(buf, indent); + strbuf_addchars(buf, ' ', indent); strbuf_write(buf, text, eol - text); text = eol; indent = indent2; @@ -366,7 +363,7 @@ int strbuf_add_wrapped_text(struct strbuf *buf, if (space) start = space; else - print_spaces(buf, indent); + strbuf_addchars(buf, ' ', indent); strbuf_write(buf, start, text - start); if (!c) return w; From 68ad5e1e9c10e8a640703aadbdf8b8366014373b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 19 Feb 2010 23:16:45 +0100 Subject: [PATCH 3/4] utf8.c: remove strbuf_write() The patch before the previous one made sure that all callers of strbuf_add_wrapped_text() supply a strbuf. Replace all calls of strbuf_write() with regular strbuf functions and remove it. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- utf8.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/utf8.c b/utf8.c index a4e36ff33c..9f64f59d66 100644 --- a/utf8.c +++ b/utf8.c @@ -280,14 +280,6 @@ int is_utf8(const char *text) return 1; } -static inline void strbuf_write(struct strbuf *sb, const char *buf, int len) -{ - if (sb) - strbuf_insert(sb, sb->len, buf, len); - else - fwrite(buf, len, 1, stdout); -} - static void strbuf_addchars(struct strbuf *sb, int c, size_t n) { strbuf_grow(sb, n); @@ -305,7 +297,7 @@ static void strbuf_add_indented_text(struct strbuf *buf, const char *text, if (*eol == '\n') eol++; strbuf_addchars(buf, ' ', indent); - strbuf_write(buf, text, eol - text); + strbuf_add(buf, text, eol - text); text = eol; indent = indent2; } @@ -364,7 +356,7 @@ int strbuf_add_wrapped_text(struct strbuf *buf, start = space; else strbuf_addchars(buf, ' ', indent); - strbuf_write(buf, start, text - start); + strbuf_add(buf, start, text - start); if (!c) return w; space = text; @@ -373,20 +365,20 @@ int strbuf_add_wrapped_text(struct strbuf *buf, else if (c == '\n') { space++; if (*space == '\n') { - strbuf_write(buf, "\n", 1); + strbuf_addch(buf, '\n'); goto new_line; } else if (!isalnum(*space)) goto new_line; else - strbuf_write(buf, " ", 1); + strbuf_addch(buf, ' '); } w++; text++; } else { new_line: - strbuf_write(buf, "\n", 1); + strbuf_addch(buf, '\n'); text = bol = space + isspace(*space); space = NULL; w = indent = indent2; From 462749b728f72079a67202d4d0d1ef19ef993f61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 19 Feb 2010 23:20:44 +0100 Subject: [PATCH 4/4] utf8.c: speculatively assume utf-8 in strbuf_add_wrapped_text() is_utf8() works by calling utf8_width() for each character at the supplied location. In strbuf_add_wrapped_text(), we do that anyway while wrapping the lines. So instead of checking the encoding beforehand, optimistically assume that it's utf-8 and wrap along until an invalid character is hit, and when that happens start over. This pays off if the text consists only of valid utf-8 characters. The following command was run against the Linux kernel repo with git 1.7.0: $ time git log --format='%b' v2.6.32 >/dev/null real 0m2.679s user 0m2.580s sys 0m0.100s $ time git log --format='%w(60,4,8)%b' >/dev/null real 0m4.342s user 0m4.230s sys 0m0.110s And with this patch series: $ time git log --format='%w(60,4,8)%b' >/dev/null real 0m3.741s user 0m3.630s sys 0m0.110s So the cost of wrapping is reduced to 70% in this case. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- utf8.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/utf8.c b/utf8.c index 9f64f59d66..6db9cd9a07 100644 --- a/utf8.c +++ b/utf8.c @@ -324,16 +324,21 @@ static size_t display_mode_esc_sequence_len(const char *s) * consumed (and no extra indent is necessary for the first line). */ int strbuf_add_wrapped_text(struct strbuf *buf, - const char *text, int indent, int indent2, int width) + const char *text, int indent1, int indent2, int width) { - int w = indent, assume_utf8 = is_utf8(text); - const char *bol = text, *space = NULL; + int indent, w, assume_utf8 = 1; + const char *bol, *space, *start = text; + size_t orig_len = buf->len; if (width <= 0) { - strbuf_add_indented_text(buf, text, indent, indent2); + strbuf_add_indented_text(buf, text, indent1, indent2); return 1; } +retry: + bol = text; + w = indent = indent1; + space = NULL; if (indent < 0) { w = -indent; space = text; @@ -385,9 +390,15 @@ new_line: } continue; } - if (assume_utf8) + if (assume_utf8) { w += utf8_width(&text, NULL); - else { + if (!text) { + assume_utf8 = 0; + text = start; + strbuf_setlen(buf, orig_len); + goto retry; + } + } else { w++; text++; }