From d551a488169aeb2ac09dba781f2ffbecf3425996 Mon Sep 17 00:00:00 2001 From: Marius Storm-Olsen Date: Sun, 8 Feb 2009 15:34:27 +0100 Subject: [PATCH 1/5] Add mailmap.file as configurational option for mailmap location This allows us to augment the repo mailmap file, and to use mailmap files elsewhere than the repository root. Meaning that the entries in mailmap.file will override the entries in "./.mailmap", should they match. Signed-off-by: Marius Storm-Olsen Signed-off-by: Junio C Hamano --- Documentation/config.txt | 8 +++ Documentation/git-shortlog.txt | 3 +- builtin-blame.c | 2 +- builtin-shortlog.c | 3 +- cache.h | 1 + config.c | 12 ++++ mailmap.c | 12 +++- mailmap.h | 2 +- pretty.c | 2 +- t/t4203-mailmap.sh | 109 +++++++++++++++++++++++++++++++++ 10 files changed, 147 insertions(+), 7 deletions(-) create mode 100755 t/t4203-mailmap.sh diff --git a/Documentation/config.txt b/Documentation/config.txt index e2b8775dd3..6dd57bd52c 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1012,6 +1012,14 @@ log.showroot:: Tools like linkgit:git-log[1] or linkgit:git-whatchanged[1], which normally hide the root commit will now show it. True by default. +mailmap.file:: + The location of an augmenting mailmap file. The default + mailmap, located in the root of the repository, is loaded + first, then the mailmap file pointed to by this variable. + The location of the mailmap file may be in a repository + subdirectory, or somewhere outside of the repository itself. + See linkgit:git-shortlog[1] and linkgit:git-blame[1]. + man.viewer:: Specify the programs that may be used to display help in the 'man' format. See linkgit:git-help[1]. diff --git a/Documentation/git-shortlog.txt b/Documentation/git-shortlog.txt index 498bd28929..66b6045849 100644 --- a/Documentation/git-shortlog.txt +++ b/Documentation/git-shortlog.txt @@ -48,7 +48,8 @@ OPTIONS FILES ----- -If a file `.mailmap` exists at the toplevel of the repository, +If a file `.mailmap` exists at the toplevel of the repository, or at the +location pointed to by the log.mailmap configuration option, it is used to map an author email address to a canonical real name. This can be used to coalesce together commits by the same person where their name was spelled differently (whether with the same email address or diff --git a/builtin-blame.c b/builtin-blame.c index aae14ef8bb..19edcf33b1 100644 --- a/builtin-blame.c +++ b/builtin-blame.c @@ -2394,7 +2394,7 @@ parse_done: die("reading graft file %s failed: %s", revs_file, strerror(errno)); - read_mailmap(&mailmap, ".mailmap", NULL); + read_mailmap(&mailmap, NULL); if (!incremental) setup_pager(); diff --git a/builtin-shortlog.c b/builtin-shortlog.c index 5f9f3f09b1..314b6bc5c7 100644 --- a/builtin-shortlog.c +++ b/builtin-shortlog.c @@ -219,7 +219,7 @@ void shortlog_init(struct shortlog *log) { memset(log, 0, sizeof(*log)); - read_mailmap(&log->mailmap, ".mailmap", &log->common_repo_prefix); + read_mailmap(&log->mailmap, &log->common_repo_prefix); log->list.strdup_strings = 1; log->wrap = DEFAULT_WRAPLEN; @@ -248,6 +248,7 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix) struct parse_opt_ctx_t ctx; prefix = setup_git_directory_gently(&nongit); + git_config(git_default_config, NULL); shortlog_init(&log); init_revisions(&rev, prefix); parse_options_start(&ctx, argc, argv, PARSE_OPT_KEEP_DASHDASH | diff --git a/cache.h b/cache.h index 45e713e928..e88bcca96d 100644 --- a/cache.h +++ b/cache.h @@ -867,6 +867,7 @@ extern int user_ident_explicitly_given; extern const char *git_commit_encoding; extern const char *git_log_output_encoding; +extern const char *git_mailmap_file; /* IO helper functions */ extern void maybe_flush_or_die(FILE *, const char *); diff --git a/config.c b/config.c index 790405a213..0c8c76f13b 100644 --- a/config.c +++ b/config.c @@ -565,6 +565,15 @@ static int git_default_branch_config(const char *var, const char *value) return 0; } +static int git_default_mailmap_config(const char *var, const char *value) +{ + if (!strcmp(var, "mailmap.file")) + return git_config_string(&git_mailmap_file, var, value); + + /* Add other config variables here and to Documentation/config.txt. */ + return 0; +} + int git_default_config(const char *var, const char *value, void *dummy) { if (!prefixcmp(var, "core.")) @@ -579,6 +588,9 @@ int git_default_config(const char *var, const char *value, void *dummy) if (!prefixcmp(var, "branch.")) return git_default_branch_config(var, value); + if (!prefixcmp(var, "mailmap.")) + return git_default_mailmap_config(var, value); + if (!strcmp(var, "pager.color") || !strcmp(var, "color.pager")) { pager_use_color = git_config_bool(var,value); return 0; diff --git a/mailmap.c b/mailmap.c index 88fc6f3946..d006dad673 100644 --- a/mailmap.c +++ b/mailmap.c @@ -2,10 +2,11 @@ #include "string-list.h" #include "mailmap.h" -int read_mailmap(struct string_list *map, const char *filename, char **repo_abbrev) +const char *git_mailmap_file; +static int read_single_mailmap(struct string_list *map, const char *filename, char **repo_abbrev) { char buffer[1024]; - FILE *f = fopen(filename, "r"); + FILE *f = (filename == NULL ? NULL : fopen(filename, "r")); if (f == NULL) return 1; @@ -60,6 +61,13 @@ int read_mailmap(struct string_list *map, const char *filename, char **repo_abbr return 0; } +int read_mailmap(struct string_list *map, char **repo_abbrev) +{ + /* each failure returns 1, so >1 means both calls failed */ + return read_single_mailmap(map, ".mailmap", repo_abbrev) + + read_single_mailmap(map, git_mailmap_file, repo_abbrev) > 1; +} + int map_email(struct string_list *map, const char *email, char *name, int maxlen) { char *p; diff --git a/mailmap.h b/mailmap.h index 6e48f83ced..ba2ee7670c 100644 --- a/mailmap.h +++ b/mailmap.h @@ -1,7 +1,7 @@ #ifndef MAILMAP_H #define MAILMAP_H -int read_mailmap(struct string_list *map, const char *filename, char **repo_abbrev); +int read_mailmap(struct string_list *map, char **repo_abbrev); int map_email(struct string_list *mailmap, const char *email, char *name, int maxlen); #endif diff --git a/pretty.c b/pretty.c index cc460b5697..9e03d6ae2d 100644 --- a/pretty.c +++ b/pretty.c @@ -312,7 +312,7 @@ static int mailmap_name(struct strbuf *sb, const char *email) if (!mail_map) { mail_map = xcalloc(1, sizeof(*mail_map)); - read_mailmap(mail_map, ".mailmap", NULL); + read_mailmap(mail_map, NULL); } if (!mail_map->nr) diff --git a/t/t4203-mailmap.sh b/t/t4203-mailmap.sh new file mode 100755 index 0000000000..fc50ac22e3 --- /dev/null +++ b/t/t4203-mailmap.sh @@ -0,0 +1,109 @@ +#!/bin/sh + +test_description='.mailmap configurations' + +. ./test-lib.sh + +test_expect_success setup ' + echo one >one && + git add one && + test_tick && + git commit -m initial && + echo two >>one && + git add one && + git commit --author "nick1 " -m second +' + +cat >expect <<\EOF +A U Thor (1): + initial + +nick1 (1): + second + +EOF + +test_expect_success 'No mailmap' ' + git shortlog HEAD >actual && + test_cmp expect actual +' + +cat >expect <<\EOF +Repo Guy (1): + initial + +nick1 (1): + second + +EOF + +test_expect_success 'default .mailmap' ' + echo "Repo Guy " > .mailmap && + git shortlog HEAD >actual && + test_cmp expect actual +' + +# Using a mailmap file in a subdirectory of the repo here, but +# could just as well have been a file outside of the repository +cat >expect <<\EOF +Internal Guy (1): + second + +Repo Guy (1): + initial + +EOF +test_expect_success 'mailmap.file set' ' + mkdir internal_mailmap && + echo "Internal Guy " > internal_mailmap/.mailmap && + git config mailmap.file internal_mailmap/.mailmap && + git shortlog HEAD >actual && + test_cmp expect actual +' + +cat >expect <<\EOF +External Guy (1): + initial + +Internal Guy (1): + second + +EOF +test_expect_success 'mailmap.file override' ' + echo "External Guy " >> internal_mailmap/.mailmap && + git config mailmap.file internal_mailmap/.mailmap && + git shortlog HEAD >actual && + test_cmp expect actual +' + +cat >expect <<\EOF +Repo Guy (1): + initial + +nick1 (1): + second + +EOF + +test_expect_success 'mailmap.file non-existant' ' + rm internal_mailmap/.mailmap && + rmdir internal_mailmap && + git shortlog HEAD >actual && + test_cmp expect actual +' + +cat >expect <<\EOF +A U Thor (1): + initial + +nick1 (1): + second + +EOF +test_expect_success 'No mailmap files, but configured' ' + rm .mailmap && + git shortlog HEAD >actual && + test_cmp expect actual +' + +test_done From cfa1ee6b340172a415049704cd848593392b9064 Mon Sep 17 00:00:00 2001 From: Marius Storm-Olsen Date: Sun, 8 Feb 2009 15:34:28 +0100 Subject: [PATCH 2/5] Add find_insert_index, insert_at_index and clear_func functions to string_list string_list_find_insert_index() and string_list_insert_at_index() enables you to see if an item is in the string_list, and to insert at the appropriate index in the list, if not there. This is usefull if you need to manipulate an existing item, if present, and insert a new item if not. Future mailmap code will use this construct to enable complex (old_name, old_email) -> (new_name, new_email) lookups. The string_list_clear_func() allows to call a custom cleanup function on each item in a string_list, which is useful is the util member points to a complex structure. Signed-off-by: Marius Storm-Olsen Signed-off-by: Junio C Hamano --- string-list.c | 43 +++++++++++++++++++++++++++++++++++++++---- string-list.h | 9 +++++++++ 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/string-list.c b/string-list.c index ddd83c8c76..15e14cf47a 100644 --- a/string-list.c +++ b/string-list.c @@ -26,10 +26,10 @@ static int get_entry_index(const struct string_list *list, const char *string, } /* returns -1-index if already exists */ -static int add_entry(struct string_list *list, const char *string) +static int add_entry(int insert_at, struct string_list *list, const char *string) { - int exact_match; - int index = get_entry_index(list, string, &exact_match); + int exact_match = 0; + int index = insert_at != -1 ? insert_at : get_entry_index(list, string, &exact_match); if (exact_match) return -1 - index; @@ -53,7 +53,13 @@ static int add_entry(struct string_list *list, const char *string) struct string_list_item *string_list_insert(const char *string, struct string_list *list) { - int index = add_entry(list, string); + return string_list_insert_at_index(-1, string, list); +} + +struct string_list_item *string_list_insert_at_index(int insert_at, + const char *string, struct string_list *list) +{ + int index = add_entry(insert_at, list, string); if (index < 0) index = -1 - index; @@ -68,6 +74,16 @@ int string_list_has_string(const struct string_list *list, const char *string) return exact_match; } +int string_list_find_insert_index(const struct string_list *list, const char *string, + int negative_existing_index) +{ + int exact_match; + int index = get_entry_index(list, string, &exact_match); + if (exact_match) + index = -1 - (negative_existing_index ? index : 0); + return index; +} + struct string_list_item *string_list_lookup(const char *string, struct string_list *list) { int exact_match, i = get_entry_index(list, string, &exact_match); @@ -94,6 +110,25 @@ void string_list_clear(struct string_list *list, int free_util) list->nr = list->alloc = 0; } +void string_list_clear_func(struct string_list *list, string_list_clear_func_t clearfunc) +{ + if (list->items) { + int i; + if (clearfunc) { + for (i = 0; i < list->nr; i++) + clearfunc(list->items[i].util, list->items[i].string); + } + if (list->strdup_strings) { + for (i = 0; i < list->nr; i++) + free(list->items[i].string); + } + free(list->items); + } + list->items = NULL; + list->nr = list->alloc = 0; +} + + void print_string_list(const char *text, const struct string_list *p) { int i; diff --git a/string-list.h b/string-list.h index 4d6a7051fe..d32ba05202 100644 --- a/string-list.h +++ b/string-list.h @@ -15,9 +15,18 @@ struct string_list void print_string_list(const char *text, const struct string_list *p); void string_list_clear(struct string_list *list, int free_util); +/* Use this function to call a custom clear function on each util pointer */ +/* The string associated with the util pointer is passed as the second argument */ +typedef void (*string_list_clear_func_t)(void *p, const char *str); +void string_list_clear_func(struct string_list *list, string_list_clear_func_t clearfunc); + /* Use these functions only on sorted lists: */ int string_list_has_string(const struct string_list *list, const char *string); +int string_list_find_insert_index(const struct string_list *list, const char *string, + int negative_existing_index); struct string_list_item *string_list_insert(const char *string, struct string_list *list); +struct string_list_item *string_list_insert_at_index(int insert_at, + const char *string, struct string_list *list); struct string_list_item *string_list_lookup(const char *string, struct string_list *list); /* Use these functions only on unsorted lists: */ From 0925ce4d49ef8352617e8f56231bd36802a2f138 Mon Sep 17 00:00:00 2001 From: Marius Storm-Olsen Date: Sun, 8 Feb 2009 15:34:29 +0100 Subject: [PATCH 3/5] Add map_user() and clear_mailmap() to mailmap map_user() allows to lookup and replace both email and name of a user, based on a new style mailmap file. The possible mailmap definitions are now: proper_name # Old style # New style proper_name # New style proper_name commit_name # New style map_email() operates the same as before, with the exception that it also will to try to match on a name passed in through the name return buffer. clear_mailmap() is needed to now clear the more complex mailmap structure. Signed-off-by: Marius Storm-Olsen Signed-off-by: Junio C Hamano --- Documentation/git-shortlog.txt | 81 ++++++++++---- mailmap.c | 196 ++++++++++++++++++++++++++++----- mailmap.h | 4 + 3 files changed, 233 insertions(+), 48 deletions(-) diff --git a/Documentation/git-shortlog.txt b/Documentation/git-shortlog.txt index 66b6045849..a0eaab5250 100644 --- a/Documentation/git-shortlog.txt +++ b/Documentation/git-shortlog.txt @@ -48,24 +48,38 @@ OPTIONS FILES ----- -If a file `.mailmap` exists at the toplevel of the repository, or at the -location pointed to by the log.mailmap configuration option, -it is used to map an author email address to a canonical real name. This -can be used to coalesce together commits by the same person where their -name was spelled differently (whether with the same email address or -not). +If the file `.mailmap` exists at the toplevel of the repository, or at +the location pointed to by the mailmap.file configuration option, it +is used to map author and committer names and email addresses to +canonical real names and email addresses. +This mapping can be used to coalesce together commits by the same +person where their name and/or email address was spelled differently. -Each line in the file consists, in this order, of the canonical real name -of an author, whitespace, and an email address (enclosed by '<' and '>') -to map to the name. Use hash '#' for comments, either on their own line, -or after the email address. +In the simple form, each line in the file consists of the canonical +real name of an author, whitespace, and an email address used in the +commit (enclosed by '<' and '>') to map to the name. Thus, looks like +this +-- + Proper Name +-- -A canonical name may appear in more than one line, associated with -different email addresses, but it doesn't make sense for a given address -to appear more than once (if that happens, a later line overrides the -earlier ones). +The more complex forms are +-- + +-- +which allows mailmap to replace only the email part of a commit, and +-- + Proper Name +-- +which allows mailmap to replace both the name and the email of a +commit matching the specified commit email address, and +-- + Proper Name Commit Name +-- +which allows mailmap to replace both the name and the email of a +commit matching both the specified commit name and email address. -So, for example, if your history contains commits by two authors, Jane +Example 1: Your history contains commits by two authors, Jane and Joe, whose names appear in the repository under several forms: ------------ @@ -76,16 +90,43 @@ Jane Doe Jane D. ------------ -Then, supposing Joe wants his middle name initial used, and Jane prefers -her family name fully spelled out, a proper `.mailmap` file would look like: +Now suppose that Joe wants his middle name initial used, and Jane +prefers her family name fully spelled out. A proper `.mailmap` file +would look like: ------------ -# Note how we don't need an entry for , because the -# real name of that author is correct already, and coalesced directly. -Jane Doe +Jane Doe Joe R. Developer ------------ +Note how we don't need an entry for , because the +real name of that author is correct already, and coalesced directly. + +Example 2: Your repository contains commits from the following +authors: + +------------ +nick1 +nick2 +nick2 +santa +claus +CTO +------------ + +Then, you might want a `.mailmap` file looking like: +------------ + +Some Dude nick1 +Other Author nick2 +Other Author +Santa Claus +------------ + +Use hash '#' for comments that are either on their own line, or after +the email address. + + Author ------ Written by Jeff Garzik diff --git a/mailmap.c b/mailmap.c index d006dad673..f12bb45a3f 100644 --- a/mailmap.c +++ b/mailmap.c @@ -2,7 +2,122 @@ #include "string-list.h" #include "mailmap.h" +#define DEBUG_MAILMAP 0 +#if DEBUG_MAILMAP +#define debug_mm(...) fprintf(stderr, __VA_ARGS__) +#else +static inline void debug_mm(const char *format, ...) {} +#endif + const char *git_mailmap_file; + +struct mailmap_info { + char *name; + char *email; +}; + +struct mailmap_entry { + /* name and email for the simple mail-only case */ + char *name; + char *email; + + /* name and email for the complex mail and name matching case */ + struct string_list namemap; +}; + +static void free_mailmap_info(void *p, const char *s) +{ + struct mailmap_info *mi = (struct mailmap_info *)p; + debug_mm("mailmap: -- complex: '%s' -> '%s' <%s>\n", s, mi->name, mi->email); + free(mi->name); + free(mi->email); +} + +static void free_mailmap_entry(void *p, const char *s) +{ + struct mailmap_entry *me = (struct mailmap_entry *)p; + debug_mm("mailmap: removing entries for <%s>, with %d sub-entries\n", s, me->namemap.nr); + debug_mm("mailmap: - simple: '%s' <%s>\n", me->name, me->email); + free(me->name); + free(me->email); + + me->namemap.strdup_strings = 1; + string_list_clear_func(&me->namemap, free_mailmap_info); +} + +static void add_mapping(struct string_list *map, + char *new_name, char *new_email, char *old_name, char *old_email) +{ + struct mailmap_entry *me; + int index; + if (old_email == NULL) { + old_email = new_email; + new_email = NULL; + } + + if ((index = string_list_find_insert_index(map, old_email, 1)) < 0) { + /* mailmap entry exists, invert index value */ + index = -1 - index; + } else { + /* create mailmap entry */ + struct string_list_item *item = string_list_insert_at_index(index, old_email, map); + item->util = xmalloc(sizeof(struct mailmap_entry)); + memset(item->util, 0, sizeof(struct mailmap_entry)); + ((struct mailmap_entry *)item->util)->namemap.strdup_strings = 1; + } + me = (struct mailmap_entry *)map->items[index].util; + + if (old_name == NULL) { + debug_mm("mailmap: adding (simple) entry for %s at index %d\n", old_email, index); + /* Replace current name and new email for simple entry */ + free(me->name); + free(me->email); + if (new_name) + me->name = xstrdup(new_name); + if (new_email) + me->email = xstrdup(new_email); + } else { + struct mailmap_info *mi = xmalloc(sizeof(struct mailmap_info)); + debug_mm("mailmap: adding (complex) entry for %s at index %d\n", old_email, index); + if (new_name) + mi->name = xstrdup(new_name); + if (new_email) + mi->email = xstrdup(new_email); + string_list_insert(old_name, &me->namemap)->util = mi; + } + + debug_mm("mailmap: '%s' <%s> -> '%s' <%s>\n", + old_name, old_email, new_name, new_email); +} + +static char *parse_name_and_email(char *buffer, char **name, char **email) +{ + char *left, *right, *nstart, *nend; + *name = *email = 0; + + if ((left = strchr(buffer, '<')) == NULL) + return NULL; + if ((right = strchr(left+1, '>')) == NULL) + return NULL; + if (left+1 == right) + return NULL; + + /* remove whitespace from beginning and end of name */ + nstart = buffer; + while (isspace(*nstart) && nstart < left) + ++nstart; + nend = left-1; + while (isspace(*nend) && nend > nstart) + --nend; + + *name = (nstart < nend ? nstart : NULL); + *email = left+1; + *(nend+1) = '\0'; + *right++ = '\0'; + + return (*right == '\0' ? NULL : right); +} + static int read_single_mailmap(struct string_list *map, const char *filename, char **repo_abbrev) { char buffer[1024]; @@ -11,9 +126,7 @@ static int read_single_mailmap(struct string_list *map, const char *filename, ch if (f == NULL) return 1; while (fgets(buffer, sizeof(buffer), f) != NULL) { - char *end_of_name, *left_bracket, *right_bracket; - char *name, *email; - int i; + char *name1 = 0, *email1 = 0, *name2 = 0, *email2 = 0; if (buffer[0] == '#') { static const char abbrev[] = "# repo-abbrev:"; int abblen = sizeof(abbrev) - 1; @@ -37,25 +150,11 @@ static int read_single_mailmap(struct string_list *map, const char *filename, ch } continue; } - if ((left_bracket = strchr(buffer, '<')) == NULL) - continue; - if ((right_bracket = strchr(left_bracket + 1, '>')) == NULL) - continue; - if (right_bracket == left_bracket + 1) - continue; - for (end_of_name = left_bracket; - end_of_name != buffer && isspace(end_of_name[-1]); - end_of_name--) - ; /* keep on looking */ - if (end_of_name == buffer) - continue; - name = xmalloc(end_of_name - buffer + 1); - strlcpy(name, buffer, end_of_name - buffer + 1); - email = xmalloc(right_bracket - left_bracket); - for (i = 0; i < right_bracket - left_bracket - 1; i++) - email[i] = tolower(left_bracket[i + 1]); - email[right_bracket - left_bracket - 1] = '\0'; - string_list_insert(email, map)->util = name; + if ((name2 = parse_name_and_email(buffer, &name1, &email1)) != NULL) + parse_name_and_email(name2, &name2, &email2); + + if (email1) + add_mapping(map, name1, email1, name2, email2); } fclose(f); return 0; @@ -63,22 +162,37 @@ static int read_single_mailmap(struct string_list *map, const char *filename, ch int read_mailmap(struct string_list *map, char **repo_abbrev) { + map->strdup_strings = 1; /* each failure returns 1, so >1 means both calls failed */ return read_single_mailmap(map, ".mailmap", repo_abbrev) + read_single_mailmap(map, git_mailmap_file, repo_abbrev) > 1; } -int map_email(struct string_list *map, const char *email, char *name, int maxlen) +void clear_mailmap(struct string_list *map) +{ + debug_mm("mailmap: clearing %d entries...\n", map->nr); + map->strdup_strings = 1; + string_list_clear_func(map, free_mailmap_entry); + debug_mm("mailmap: cleared\n"); +} + +int map_user(struct string_list *map, + char *email, int maxlen_email, char *name, int maxlen_name) { char *p; struct string_list_item *item; + struct mailmap_entry *me; char buf[1024], *mailbuf; int i; - /* autocomplete common developers */ + /* figure out space requirement for email */ p = strchr(email, '>'); - if (!p) - return 0; + if (!p) { + /* email passed in might not be wrapped in <>, but end with a \0 */ + p = memchr(email, '\0', maxlen_email); + if (p == 0) + return 0; + } if (p - email + 1 < sizeof(buf)) mailbuf = buf; else @@ -88,13 +202,39 @@ int map_email(struct string_list *map, const char *email, char *name, int maxlen for (i = 0; i < p - email; i++) mailbuf[i] = tolower(email[i]); mailbuf[i] = 0; + + debug_mm("map_user: map '%s' <%s>\n", name, mailbuf); item = string_list_lookup(mailbuf, map); + if (item != NULL) { + me = (struct mailmap_entry *)item->util; + if (me->namemap.nr) { + /* The item has multiple items, so we'll look up on name too */ + /* If the name is not found, we choose the simple entry */ + struct string_list_item *subitem = string_list_lookup(name, &me->namemap); + if (subitem) + item = subitem; + } + } if (mailbuf != buf) free(mailbuf); if (item != NULL) { - const char *realname = (const char *)item->util; - strlcpy(name, realname, maxlen); + struct mailmap_info *mi = (struct mailmap_info *)item->util; + if (mi->name == NULL && (mi->email == NULL || maxlen_email == 0)) { + debug_mm("map_user: -- (no simple mapping)\n"); + return 0; + } + if (maxlen_email && mi->email) + strlcpy(email, mi->email, maxlen_email); + if (maxlen_name && mi->name) + strlcpy(name, mi->name, maxlen_name); + debug_mm("map_user: to '%s' <%s>\n", name, mi->email ? mi->email : ""); return 1; } + debug_mm("map_user: --\n"); return 0; } + +int map_email(struct string_list *map, const char *email, char *name, int maxlen) +{ + return map_user(map, (char *)email, 0, name, maxlen); +} diff --git a/mailmap.h b/mailmap.h index ba2ee7670c..4b2ca3a7de 100644 --- a/mailmap.h +++ b/mailmap.h @@ -2,6 +2,10 @@ #define MAILMAP_H int read_mailmap(struct string_list *map, char **repo_abbrev); +void clear_mailmap(struct string_list *map); + int map_email(struct string_list *mailmap, const char *email, char *name, int maxlen); +int map_user(struct string_list *mailmap, + char *email, int maxlen_email, char *name, int maxlen_name); #endif From d20d654fe8923a502527547b17fe284d15d6aec9 Mon Sep 17 00:00:00 2001 From: Marius Storm-Olsen Date: Sun, 8 Feb 2009 15:34:30 +0100 Subject: [PATCH 4/5] Change current mailmap usage to do matching on both name and email of author/committer. Signed-off-by: Marius Storm-Olsen Signed-off-by: Junio C Hamano --- Documentation/pretty-formats.txt | 2 + builtin-blame.c | 50 +++++++++------ builtin-shortlog.c | 22 +++++-- pretty.c | 57 +++++++++-------- t/t4203-mailmap.sh | 106 +++++++++++++++++++++++++++++++ 5 files changed, 186 insertions(+), 51 deletions(-) diff --git a/Documentation/pretty-formats.txt b/Documentation/pretty-formats.txt index 3d87d3edd5..28808b72e7 100644 --- a/Documentation/pretty-formats.txt +++ b/Documentation/pretty-formats.txt @@ -103,6 +103,7 @@ The placeholders are: - '%an': author name - '%aN': author name (respecting .mailmap) - '%ae': author email +- '%aE': author email (respecting .mailmap) - '%ad': author date (format respects --date= option) - '%aD': author date, RFC2822 style - '%ar': author date, relative @@ -111,6 +112,7 @@ The placeholders are: - '%cn': committer name - '%cN': committer name (respecting .mailmap) - '%ce': committer email +- '%cE': committer email (respecting .mailmap) - '%cd': committer date - '%cD': committer date, RFC2822 style - '%cr': committer date, relative diff --git a/builtin-blame.c b/builtin-blame.c index 19edcf33b1..f3be9fa9a5 100644 --- a/builtin-blame.c +++ b/builtin-blame.c @@ -1263,11 +1263,12 @@ struct commit_info * Parse author/committer line in the commit object buffer */ static void get_ac_line(const char *inbuf, const char *what, - int bufsz, char *person, const char **mail, + int person_len, char *person, + int mail_len, char *mail, unsigned long *time, const char **tz) { int len, tzlen, maillen; - char *tmp, *endp, *timepos; + char *tmp, *endp, *timepos, *mailpos; tmp = strstr(inbuf, what); if (!tmp) @@ -1278,10 +1279,11 @@ static void get_ac_line(const char *inbuf, const char *what, len = strlen(tmp); else len = endp - tmp; - if (bufsz <= len) { + if (person_len <= len) { error_out: /* Ugh */ - *mail = *tz = "(unknown)"; + *tz = "(unknown)"; + strcpy(mail, *tz); *time = 0; return; } @@ -1304,9 +1306,10 @@ static void get_ac_line(const char *inbuf, const char *what, *tmp = 0; while (*tmp != ' ') tmp--; - *mail = tmp + 1; + mailpos = tmp + 1; *tmp = 0; maillen = timepos - tmp; + memcpy(mail, mailpos, maillen); if (!mailmap.nr) return; @@ -1315,20 +1318,23 @@ static void get_ac_line(const char *inbuf, const char *what, * mailmap expansion may make the name longer. * make room by pushing stuff down. */ - tmp = person + bufsz - (tzlen + 1); + tmp = person + person_len - (tzlen + 1); memmove(tmp, *tz, tzlen); tmp[tzlen] = 0; *tz = tmp; - tmp = tmp - (maillen + 1); - memmove(tmp, *mail, maillen); - tmp[maillen] = 0; - *mail = tmp; - /* - * Now, convert e-mail using mailmap + * Now, convert both name and e-mail using mailmap */ - map_email(&mailmap, tmp + 1, person, tmp-person-1); + if(map_user(&mailmap, mail+1, mail_len-1, person, tmp-person-1)) { + /* Add a trailing '>' to email, since map_user returns plain emails + Note: It already has '<', since we replace from mail+1 */ + mailpos = memchr(mail, '\0', mail_len); + if (mailpos && mailpos-mail < mail_len - 1) { + *mailpos = '>'; + *(mailpos+1) = '\0'; + } + } } static void get_commit_info(struct commit *commit, @@ -1337,8 +1343,10 @@ static void get_commit_info(struct commit *commit, { int len; char *tmp, *endp, *reencoded, *message; - static char author_buf[1024]; - static char committer_buf[1024]; + static char author_name[1024]; + static char author_mail[1024]; + static char committer_name[1024]; + static char committer_mail[1024]; static char summary_buf[1024]; /* @@ -1356,9 +1364,11 @@ static void get_commit_info(struct commit *commit, } reencoded = reencode_commit_message(commit, NULL); message = reencoded ? reencoded : commit->buffer; - ret->author = author_buf; + ret->author = author_name; + ret->author_mail = author_mail; get_ac_line(message, "\nauthor ", - sizeof(author_buf), author_buf, &ret->author_mail, + sizeof(author_name), author_name, + sizeof(author_mail), author_mail, &ret->author_time, &ret->author_tz); if (!detailed) { @@ -1366,9 +1376,11 @@ static void get_commit_info(struct commit *commit, return; } - ret->committer = committer_buf; + ret->committer = committer_name; + ret->committer_mail = committer_mail; get_ac_line(message, "\ncommitter ", - sizeof(committer_buf), committer_buf, &ret->committer_mail, + sizeof(committer_name), committer_name, + sizeof(committer_mail), committer_mail, &ret->committer_time, &ret->committer_tz); ret->summary = summary_buf; diff --git a/builtin-shortlog.c b/builtin-shortlog.c index 314b6bc5c7..badd912038 100644 --- a/builtin-shortlog.c +++ b/builtin-shortlog.c @@ -40,6 +40,7 @@ static void insert_one_record(struct shortlog *log, char *buffer, *p; struct string_list_item *item; char namebuf[1024]; + char emailbuf[1024]; size_t len; const char *eol; const char *boemail, *eoemail; @@ -51,7 +52,19 @@ static void insert_one_record(struct shortlog *log, eoemail = strchr(boemail, '>'); if (!eoemail) return; - if (!map_email(&log->mailmap, boemail+1, namebuf, sizeof(namebuf))) { + + /* copy author name to namebuf, to support matching on both name and email */ + memcpy(namebuf, author, boemail - author); + len = boemail - author; + while(len > 0 && isspace(namebuf[len-1])) + len--; + namebuf[len] = 0; + + /* copy email name to emailbuf, to allow email replacement as well */ + memcpy(emailbuf, boemail+1, eoemail - boemail); + emailbuf[eoemail - boemail - 1] = 0; + + if (!map_user(&log->mailmap, emailbuf, sizeof(emailbuf), namebuf, sizeof(namebuf))) { while (author < boemail && isspace(*author)) author++; for (len = 0; @@ -67,8 +80,8 @@ static void insert_one_record(struct shortlog *log, if (log->email) { size_t room = sizeof(namebuf) - len - 1; - int maillen = eoemail - boemail + 1; - snprintf(namebuf + len, room, " %.*s", maillen, boemail); + int maillen = strlen(emailbuf); + snprintf(namebuf + len, room, " <%.*s>", maillen, emailbuf); } item = string_list_insert(namebuf, &log->list); @@ -321,6 +334,5 @@ void shortlog_output(struct shortlog *log) log->list.strdup_strings = 1; string_list_clear(&log->list, 1); - log->mailmap.strdup_strings = 1; - string_list_clear(&log->mailmap, 1); + clear_mailmap(&log->mailmap); } diff --git a/pretty.c b/pretty.c index 9e03d6ae2d..29f81c3f44 100644 --- a/pretty.c +++ b/pretty.c @@ -305,23 +305,14 @@ static char *logmsg_reencode(const struct commit *commit, return out; } -static int mailmap_name(struct strbuf *sb, const char *email) +static int mailmap_name(char *email, int email_len, char *name, int name_len) { static struct string_list *mail_map; - char buffer[1024]; - if (!mail_map) { mail_map = xcalloc(1, sizeof(*mail_map)); read_mailmap(mail_map, NULL); } - - if (!mail_map->nr) - return -1; - - if (!map_email(mail_map, email, buffer, sizeof(buffer))) - return -1; - strbuf_addstr(sb, buffer); - return 0; + return mail_map->nr && map_user(mail_map, email, email_len, name, name_len); } static size_t format_person_part(struct strbuf *sb, char part, @@ -332,6 +323,9 @@ static size_t format_person_part(struct strbuf *sb, char part, int start, end, tz = 0; unsigned long date = 0; char *ep; + const char *name_start, *name_end, *mail_start, *mail_end, *msg_end = msg+len; + char person_name[1024]; + char person_mail[1024]; /* advance 'end' to point to email start delimiter */ for (end = 0; end < len && msg[end] != '<'; end++) @@ -345,25 +339,34 @@ static size_t format_person_part(struct strbuf *sb, char part, if (end >= len - 2) goto skip; + /* Seek for both name and email part */ + name_start = msg; + name_end = msg+end; + while (name_end > name_start && isspace(*(name_end-1))) + name_end--; + mail_start = msg+end+1; + mail_end = mail_start; + while (mail_end < msg_end && *mail_end != '>') + mail_end++; + if (mail_end == msg_end) + goto skip; + end = mail_end-msg; + + if (part == 'N' || part == 'E') { /* mailmap lookup */ + strlcpy(person_name, name_start, name_end-name_start+1); + strlcpy(person_mail, mail_start, mail_end-mail_start+1); + mailmap_name(person_mail, sizeof(person_mail), person_name, sizeof(person_name)); + name_start = person_name; + name_end = name_start + strlen(person_name); + mail_start = person_mail; + mail_end = mail_start + strlen(person_mail); + } if (part == 'n' || part == 'N') { /* name */ - while (end > 0 && isspace(msg[end - 1])) - end--; - if (part != 'N' || !msg[end] || !msg[end + 1] || - mailmap_name(sb, msg + end + 2) < 0) - strbuf_add(sb, msg, end); + strbuf_add(sb, name_start, name_end-name_start); return placeholder_len; } - start = ++end; /* save email start position */ - - /* advance 'end' to point to email end delimiter */ - for ( ; end < len && msg[end] != '>'; end++) - ; /* do nothing */ - - if (end >= len) - goto skip; - - if (part == 'e') { /* email */ - strbuf_add(sb, msg + start, end - start); + if (part == 'e' || part == 'E') { /* email */ + strbuf_add(sb, mail_start, mail_end-mail_start); return placeholder_len; } diff --git a/t/t4203-mailmap.sh b/t/t4203-mailmap.sh index fc50ac22e3..9a7d1b4466 100755 --- a/t/t4203-mailmap.sh +++ b/t/t4203-mailmap.sh @@ -106,4 +106,110 @@ test_expect_success 'No mailmap files, but configured' ' test_cmp expect actual ' +# Extended mailmap configurations should give us the following output for shortlog +cat >expect <<\EOF +A U Thor (1): + initial + +CTO (1): + seventh + +Other Author (2): + third + fourth + +Santa Claus (2): + fifth + sixth + +Some Dude (1): + second + +EOF + +test_expect_success 'Shortlog output (complex mapping)' ' + echo three >>one && + git add one && + test_tick && + git commit --author "nick2 " -m third && + + echo four >>one && + git add one && + test_tick && + git commit --author "nick2 " -m fourth && + + echo five >>one && + git add one && + test_tick && + git commit --author "santa " -m fifth && + + echo six >>one && + git add one && + test_tick && + git commit --author "claus " -m sixth && + + echo seven >>one && + git add one && + test_tick && + git commit --author "CTO " -m seventh && + + mkdir internal_mailmap && + echo "Committed " > internal_mailmap/.mailmap && + echo " " >> internal_mailmap/.mailmap && + echo "Some Dude nick1 " >> internal_mailmap/.mailmap && + echo "Other Author nick2 " >> internal_mailmap/.mailmap && + echo "Other Author " >> internal_mailmap/.mailmap && + echo "Santa Claus " >> internal_mailmap/.mailmap && + echo "Santa Claus " >> internal_mailmap/.mailmap && + + git shortlog -e HEAD >actual && + test_cmp expect actual + +' + +# git log with --pretty format which uses the name and email mailmap placemarkers +cat >expect <<\EOF +Author CTO maps to CTO +Committer C O Mitter maps to Committed + +Author claus maps to Santa Claus +Committer C O Mitter maps to Committed + +Author santa maps to Santa Claus +Committer C O Mitter maps to Committed + +Author nick2 maps to Other Author +Committer C O Mitter maps to Committed + +Author nick2 maps to Other Author +Committer C O Mitter maps to Committed + +Author nick1 maps to Some Dude +Committer C O Mitter maps to Committed + +Author A U Thor maps to A U Thor +Committer C O Mitter maps to Committed +EOF + +test_expect_success 'Log output (complex mapping)' ' + git log --pretty=format:"Author %an <%ae> maps to %aN <%aE>%nCommitter %cn <%ce> maps to %cN <%cE>%n" >actual && + test_cmp expect actual +' + +# git blame +cat >expect <<\EOF +^3a2fdcb (A U Thor 2005-04-07 15:13:13 -0700 1) one +7de6f99b (Some Dude 2005-04-07 15:13:13 -0700 2) two +5815879d (Other Author 2005-04-07 15:14:13 -0700 3) three +ff859d96 (Other Author 2005-04-07 15:15:13 -0700 4) four +5ab6d4fa (Santa Claus 2005-04-07 15:16:13 -0700 5) five +38a42d8b (Santa Claus 2005-04-07 15:17:13 -0700 6) six +8ddc0386 (CTO 2005-04-07 15:18:13 -0700 7) seven +EOF + +test_expect_success 'Blame output (complex mapping)' ' + git blame one >actual && + test_cmp expect actual +' + test_done From 7d48e9e6f77d336376c1a554eeff0590f77e1ee1 Mon Sep 17 00:00:00 2001 From: Marius Storm-Olsen Date: Sun, 8 Feb 2009 15:34:31 +0100 Subject: [PATCH 5/5] Move mailmap documentation into separate file Include it directly from git-shortlog.txt, and refer to it from pretty-format.txt. Signed-off-by: Marius Storm-Olsen Signed-off-by: Junio C Hamano --- Documentation/git-blame.txt | 6 +++ Documentation/git-shortlog.txt | 83 +++----------------------------- Documentation/mailmap.txt | 75 +++++++++++++++++++++++++++++ Documentation/pretty-formats.txt | 8 +-- 4 files changed, 91 insertions(+), 81 deletions(-) create mode 100644 Documentation/mailmap.txt diff --git a/Documentation/git-blame.txt b/Documentation/git-blame.txt index fba374d652..6999cf2a65 100644 --- a/Documentation/git-blame.txt +++ b/Documentation/git-blame.txt @@ -184,6 +184,12 @@ there is ever added information (like the commit encoding or extended commit commentary), a blame viewer won't ever care. +MAPPING AUTHORS +--------------- + +include::mailmap.txt[] + + SEE ALSO -------- linkgit:git-annotate[1] diff --git a/Documentation/git-shortlog.txt b/Documentation/git-shortlog.txt index a0eaab5250..42463a955d 100644 --- a/Documentation/git-shortlog.txt +++ b/Documentation/git-shortlog.txt @@ -45,86 +45,15 @@ OPTIONS and subsequent lines are indented by `indent2` spaces. `width`, `indent1`, and `indent2` default to 76, 6 and 9 respectively. -FILES ------ -If the file `.mailmap` exists at the toplevel of the repository, or at -the location pointed to by the mailmap.file configuration option, it -is used to map author and committer names and email addresses to -canonical real names and email addresses. -This mapping can be used to coalesce together commits by the same -person where their name and/or email address was spelled differently. +MAPPING AUTHORS +--------------- -In the simple form, each line in the file consists of the canonical -real name of an author, whitespace, and an email address used in the -commit (enclosed by '<' and '>') to map to the name. Thus, looks like -this --- - Proper Name --- +The `.mailmap` feature is used to coalesce together commits by the same +person in the shortlog, where their name and/or email address was +spelled differently. -The more complex forms are --- - --- -which allows mailmap to replace only the email part of a commit, and --- - Proper Name --- -which allows mailmap to replace both the name and the email of a -commit matching the specified commit email address, and --- - Proper Name Commit Name --- -which allows mailmap to replace both the name and the email of a -commit matching both the specified commit name and email address. - -Example 1: Your history contains commits by two authors, Jane -and Joe, whose names appear in the repository under several forms: - ------------- -Joe Developer -Joe R. Developer -Jane Doe -Jane Doe -Jane D. ------------- - -Now suppose that Joe wants his middle name initial used, and Jane -prefers her family name fully spelled out. A proper `.mailmap` file -would look like: - ------------- -Jane Doe -Joe R. Developer ------------- - -Note how we don't need an entry for , because the -real name of that author is correct already, and coalesced directly. - -Example 2: Your repository contains commits from the following -authors: - ------------- -nick1 -nick2 -nick2 -santa -claus -CTO ------------- - -Then, you might want a `.mailmap` file looking like: ------------- - -Some Dude nick1 -Other Author nick2 -Other Author -Santa Claus ------------- - -Use hash '#' for comments that are either on their own line, or after -the email address. +include::mailmap.txt[] Author diff --git a/Documentation/mailmap.txt b/Documentation/mailmap.txt new file mode 100644 index 0000000000..e25b154838 --- /dev/null +++ b/Documentation/mailmap.txt @@ -0,0 +1,75 @@ +If the file `.mailmap` exists at the toplevel of the repository, or at +the location pointed to by the mailmap.file configuration option, it +is used to map author and committer names and email addresses to +canonical real names and email addresses. + +In the simple form, each line in the file consists of the canonical +real name of an author, whitespace, and an email address used in the +commit (enclosed by '<' and '>') to map to the name. Thus, looks like +this +-- + Proper Name +-- + +The more complex forms are +-- + +-- +which allows mailmap to replace only the email part of a commit, and +-- + Proper Name +-- +which allows mailmap to replace both the name and the email of a +commit matching the specified commit email address, and +-- + Proper Name Commit Name +-- +which allows mailmap to replace both the name and the email of a +commit matching both the specified commit name and email address. + +Example 1: Your history contains commits by two authors, Jane +and Joe, whose names appear in the repository under several forms: + +------------ +Joe Developer +Joe R. Developer +Jane Doe +Jane Doe +Jane D. +------------ + +Now suppose that Joe wants his middle name initial used, and Jane +prefers her family name fully spelled out. A proper `.mailmap` file +would look like: + +------------ +Jane Doe +Joe R. Developer +------------ + +Note how we don't need an entry for , because the +real name of that author is correct already. + +Example 2: Your repository contains commits from the following +authors: + +------------ +nick1 +nick2 +nick2 +santa +claus +CTO +------------ + +Then, you might want a `.mailmap` file looking like: +------------ + +Some Dude nick1 +Other Author nick2 +Other Author +Santa Claus +------------ + +Use hash '#' for comments that are either on their own line, or after +the email address. \ No newline at end of file diff --git a/Documentation/pretty-formats.txt b/Documentation/pretty-formats.txt index 28808b72e7..159390c35a 100644 --- a/Documentation/pretty-formats.txt +++ b/Documentation/pretty-formats.txt @@ -101,18 +101,18 @@ The placeholders are: - '%P': parent hashes - '%p': abbreviated parent hashes - '%an': author name -- '%aN': author name (respecting .mailmap) +- '%aN': author name (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) - '%ae': author email -- '%aE': author email (respecting .mailmap) +- '%aE': author email (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) - '%ad': author date (format respects --date= option) - '%aD': author date, RFC2822 style - '%ar': author date, relative - '%at': author date, UNIX timestamp - '%ai': author date, ISO 8601 format - '%cn': committer name -- '%cN': committer name (respecting .mailmap) +- '%cN': committer name (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) - '%ce': committer email -- '%cE': committer email (respecting .mailmap) +- '%cE': committer email (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) - '%cd': committer date - '%cD': committer date, RFC2822 style - '%cr': committer date, relative