From e69db0b323ff35ffb15e96b0ab492c0f6268f43c Mon Sep 17 00:00:00 2001
From: Thomas Gummerer <t.gummerer@gmail.com>
Date: Sat, 14 Jul 2018 22:44:33 +0100
Subject: [PATCH 01/11] rerere: unify error messages when read_cache fails

We have multiple different variants of the error message we show to
the user if 'read_cache' fails.  The "Could not read index" variant we
are using in 'rerere.c' is currently not used anywhere in translated
form.

As a subsequent commit will mark all output that comes from 'rerere.c'
for translation, make the life of the translators a little bit easier
by using a string that is used elsewhere, and marked for translation
there, and thus most likely already translated.

"index file corrupt" seems to be the most common error message we show
when 'read_cache' fails, so use that here as well.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 rerere.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/rerere.c b/rerere.c
index e0862e2778..473d32a5cd 100644
--- a/rerere.c
+++ b/rerere.c
@@ -568,7 +568,7 @@ static int find_conflict(struct string_list *conflict)
 {
 	int i;
 	if (read_cache() < 0)
-		return error("Could not read index");
+		return error("index file corrupt");
 
 	for (i = 0; i < active_nr;) {
 		int conflict_type;
@@ -601,7 +601,7 @@ int rerere_remaining(struct string_list *merge_rr)
 	if (setup_rerere(merge_rr, RERERE_READONLY))
 		return 0;
 	if (read_cache() < 0)
-		return error("Could not read index");
+		return error("index file corrupt");
 
 	for (i = 0; i < active_nr;) {
 		int conflict_type;
@@ -1103,7 +1103,7 @@ int rerere_forget(struct pathspec *pathspec)
 	struct string_list merge_rr = STRING_LIST_INIT_DUP;
 
 	if (read_cache() < 0)
-		return error("Could not read index");
+		return error("index file corrupt");
 
 	fd = setup_rerere(&merge_rr, RERERE_NOAUTOUPDATE);
 	if (fd < 0)

From c5d1d13239ecd0d3ec93796a196f5ff0513a7c03 Mon Sep 17 00:00:00 2001
From: Thomas Gummerer <t.gummerer@gmail.com>
Date: Sat, 14 Jul 2018 22:44:34 +0100
Subject: [PATCH 02/11] rerere: lowercase error messages

Documentation/CodingGuidelines mentions that error messages should be
lowercase.  Prior to marking them for translation follow that pattern
in rerere as well, so translators won't have to translate messages
that don't conform to our guidelines.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 rerere.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/rerere.c b/rerere.c
index 473d32a5cd..c5d9ea171f 100644
--- a/rerere.c
+++ b/rerere.c
@@ -484,12 +484,12 @@ static int handle_file(const char *path, unsigned char *sha1, const char *output
 	io.input = fopen(path, "r");
 	io.io.wrerror = 0;
 	if (!io.input)
-		return error_errno("Could not open %s", path);
+		return error_errno("could not open %s", path);
 
 	if (output) {
 		io.io.output = fopen(output, "w");
 		if (!io.io.output) {
-			error_errno("Could not write %s", output);
+			error_errno("could not write %s", output);
 			fclose(io.input);
 			return -1;
 		}
@@ -499,15 +499,15 @@ static int handle_file(const char *path, unsigned char *sha1, const char *output
 
 	fclose(io.input);
 	if (io.io.wrerror)
-		error("There were errors while writing %s (%s)",
+		error("there were errors while writing %s (%s)",
 		      path, strerror(io.io.wrerror));
 	if (io.io.output && fclose(io.io.output))
-		io.io.wrerror = error_errno("Failed to flush %s", path);
+		io.io.wrerror = error_errno("failed to flush %s", path);
 
 	if (hunk_no < 0) {
 		if (output)
 			unlink_or_warn(output);
-		return error("Could not parse conflict hunks in %s", path);
+		return error("could not parse conflict hunks in %s", path);
 	}
 	if (io.io.wrerror)
 		return -1;
@@ -690,11 +690,11 @@ static int merge(const struct rerere_id *id, const char *path)
 	/* Update "path" with the resolution */
 	f = fopen(path, "w");
 	if (!f)
-		return error_errno("Could not open %s", path);
+		return error_errno("could not open %s", path);
 	if (fwrite(result.ptr, result.size, 1, f) != 1)
-		error_errno("Could not write %s", path);
+		error_errno("could not write %s", path);
 	if (fclose(f))
-		return error_errno("Writing %s failed", path);
+		return error_errno("writing %s failed", path);
 
 out:
 	free(cur.ptr);
@@ -720,7 +720,7 @@ static void update_paths(struct string_list *update)
 
 	if (write_locked_index(&the_index, &index_lock,
 			       COMMIT_LOCK | SKIP_IF_UNCHANGED))
-		die("Unable to write new index file");
+		die("unable to write new index file");
 }
 
 static void remove_variant(struct rerere_id *id)
@@ -878,7 +878,7 @@ static int is_rerere_enabled(void)
 		return rr_cache_exists;
 
 	if (!rr_cache_exists && mkdir_in_gitdir(git_path_rr_cache()))
-		die("Could not create directory %s", git_path_rr_cache());
+		die("could not create directory %s", git_path_rr_cache());
 	return 1;
 }
 
@@ -1031,7 +1031,7 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr)
 	 */
 	ret = handle_cache(path, sha1, NULL);
 	if (ret < 1)
-		return error("Could not parse conflict hunks in '%s'", path);
+		return error("could not parse conflict hunks in '%s'", path);
 
 	/* Nuke the recorded resolution for the conflict */
 	id = new_rerere_id(sha1);
@@ -1049,7 +1049,7 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr)
 		handle_cache(path, sha1, rerere_path(id, "thisimage"));
 		if (read_mmfile(&cur, rerere_path(id, "thisimage"))) {
 			free(cur.ptr);
-			error("Failed to update conflicted state in '%s'", path);
+			error("failed to update conflicted state in '%s'", path);
 			goto fail_exit;
 		}
 		cleanly_resolved = !try_merge(id, path, &cur, &result);

From 28fc9abd3faaadfb882f7cd4586a9e27b067a284 Mon Sep 17 00:00:00 2001
From: Thomas Gummerer <t.gummerer@gmail.com>
Date: Sat, 14 Jul 2018 22:44:35 +0100
Subject: [PATCH 03/11] rerere: wrap paths in output in sq

It looks like most paths in the output in the git codebase are wrapped
in single quotes.  Standardize on that in rerere as well.

Apart from being more consistent, this also makes some of the strings
match strings that are already translated in other parts of the
codebase, thus reducing the work for translators, when the strings are
marked for translation in a subsequent commit.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/rerere.c |  2 +-
 rerere.c         | 26 +++++++++++++-------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/builtin/rerere.c b/builtin/rerere.c
index 0bc40298c2..e0c67c98e9 100644
--- a/builtin/rerere.c
+++ b/builtin/rerere.c
@@ -107,7 +107,7 @@ int cmd_rerere(int argc, const char **argv, const char *prefix)
 			const char *path = merge_rr.items[i].string;
 			const struct rerere_id *id = merge_rr.items[i].util;
 			if (diff_two(rerere_path(id, "preimage"), path, path, path))
-				die("unable to generate diff for %s", rerere_path(id, NULL));
+				die("unable to generate diff for '%s'", rerere_path(id, NULL));
 		}
 	} else
 		usage_with_options(rerere_usage, options);
diff --git a/rerere.c b/rerere.c
index c5d9ea171f..cde1f6e696 100644
--- a/rerere.c
+++ b/rerere.c
@@ -484,12 +484,12 @@ static int handle_file(const char *path, unsigned char *sha1, const char *output
 	io.input = fopen(path, "r");
 	io.io.wrerror = 0;
 	if (!io.input)
-		return error_errno("could not open %s", path);
+		return error_errno("could not open '%s'", path);
 
 	if (output) {
 		io.io.output = fopen(output, "w");
 		if (!io.io.output) {
-			error_errno("could not write %s", output);
+			error_errno("could not write '%s'", output);
 			fclose(io.input);
 			return -1;
 		}
@@ -499,15 +499,15 @@ static int handle_file(const char *path, unsigned char *sha1, const char *output
 
 	fclose(io.input);
 	if (io.io.wrerror)
-		error("there were errors while writing %s (%s)",
+		error("there were errors while writing '%s' (%s)",
 		      path, strerror(io.io.wrerror));
 	if (io.io.output && fclose(io.io.output))
-		io.io.wrerror = error_errno("failed to flush %s", path);
+		io.io.wrerror = error_errno("failed to flush '%s'", path);
 
 	if (hunk_no < 0) {
 		if (output)
 			unlink_or_warn(output);
-		return error("could not parse conflict hunks in %s", path);
+		return error("could not parse conflict hunks in '%s'", path);
 	}
 	if (io.io.wrerror)
 		return -1;
@@ -684,17 +684,17 @@ static int merge(const struct rerere_id *id, const char *path)
 	 * Mark that "postimage" was used to help gc.
 	 */
 	if (utime(rerere_path(id, "postimage"), NULL) < 0)
-		warning_errno("failed utime() on %s",
+		warning_errno("failed utime() on '%s'",
 			      rerere_path(id, "postimage"));
 
 	/* Update "path" with the resolution */
 	f = fopen(path, "w");
 	if (!f)
-		return error_errno("could not open %s", path);
+		return error_errno("could not open '%s'", path);
 	if (fwrite(result.ptr, result.size, 1, f) != 1)
-		error_errno("could not write %s", path);
+		error_errno("could not write '%s'", path);
 	if (fclose(f))
-		return error_errno("writing %s failed", path);
+		return error_errno("writing '%s' failed", path);
 
 out:
 	free(cur.ptr);
@@ -878,7 +878,7 @@ static int is_rerere_enabled(void)
 		return rr_cache_exists;
 
 	if (!rr_cache_exists && mkdir_in_gitdir(git_path_rr_cache()))
-		die("could not create directory %s", git_path_rr_cache());
+		die("could not create directory '%s'", git_path_rr_cache());
 	return 1;
 }
 
@@ -1067,9 +1067,9 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr)
 	filename = rerere_path(id, "postimage");
 	if (unlink(filename)) {
 		if (errno == ENOENT)
-			error("no remembered resolution for %s", path);
+			error("no remembered resolution for '%s'", path);
 		else
-			error_errno("cannot unlink %s", filename);
+			error_errno("cannot unlink '%s'", filename);
 		goto fail_exit;
 	}
 
@@ -1088,7 +1088,7 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr)
 	item = string_list_insert(rr, path);
 	free_rerere_id(item);
 	item->util = id;
-	fprintf(stderr, "Forgot resolution for %s\n", path);
+	fprintf(stderr, "Forgot resolution for '%s'\n", path);
 	return 0;
 
 fail_exit:

From 2373b6505966244ab3f5b630680169692d28f6f6 Mon Sep 17 00:00:00 2001
From: Thomas Gummerer <t.gummerer@gmail.com>
Date: Sun, 5 Aug 2018 18:20:30 +0100
Subject: [PATCH 04/11] rerere: mark strings for translation

'git rerere' is considered a porcelain command and as such its output
should be translated.  Its functionality is also only enabled through
a config setting, so scripts really shouldn't rely on the output
either way.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/rerere.c |  4 +--
 rerere.c         | 68 ++++++++++++++++++++++++------------------------
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/builtin/rerere.c b/builtin/rerere.c
index e0c67c98e9..5ed941b91f 100644
--- a/builtin/rerere.c
+++ b/builtin/rerere.c
@@ -75,7 +75,7 @@ int cmd_rerere(int argc, const char **argv, const char *prefix)
 	if (!strcmp(argv[0], "forget")) {
 		struct pathspec pathspec;
 		if (argc < 2)
-			warning("'git rerere forget' without paths is deprecated");
+			warning(_("'git rerere forget' without paths is deprecated"));
 		parse_pathspec(&pathspec, 0, PATHSPEC_PREFER_CWD,
 			       prefix, argv + 1);
 		return rerere_forget(&pathspec);
@@ -107,7 +107,7 @@ int cmd_rerere(int argc, const char **argv, const char *prefix)
 			const char *path = merge_rr.items[i].string;
 			const struct rerere_id *id = merge_rr.items[i].util;
 			if (diff_two(rerere_path(id, "preimage"), path, path, path))
-				die("unable to generate diff for '%s'", rerere_path(id, NULL));
+				die(_("unable to generate diff for '%s'"), rerere_path(id, NULL));
 		}
 	} else
 		usage_with_options(rerere_usage, options);
diff --git a/rerere.c b/rerere.c
index cde1f6e696..be98c0afcb 100644
--- a/rerere.c
+++ b/rerere.c
@@ -212,7 +212,7 @@ static void read_rr(struct string_list *rr)
 
 		/* There has to be the hash, tab, path and then NUL */
 		if (buf.len < 42 || get_sha1_hex(buf.buf, sha1))
-			die("corrupt MERGE_RR");
+			die(_("corrupt MERGE_RR"));
 
 		if (buf.buf[40] != '.') {
 			variant = 0;
@@ -221,10 +221,10 @@ static void read_rr(struct string_list *rr)
 			errno = 0;
 			variant = strtol(buf.buf + 41, &path, 10);
 			if (errno)
-				die("corrupt MERGE_RR");
+				die(_("corrupt MERGE_RR"));
 		}
 		if (*(path++) != '\t')
-			die("corrupt MERGE_RR");
+			die(_("corrupt MERGE_RR"));
 		buf.buf[40] = '\0';
 		id = new_rerere_id_hex(buf.buf);
 		id->variant = variant;
@@ -259,12 +259,12 @@ static int write_rr(struct string_list *rr, int out_fd)
 				    rr->items[i].string, 0);
 
 		if (write_in_full(out_fd, buf.buf, buf.len) < 0)
-			die("unable to write rerere record");
+			die(_("unable to write rerere record"));
 
 		strbuf_release(&buf);
 	}
 	if (commit_lock_file(&write_lock) != 0)
-		die("unable to write rerere record");
+		die(_("unable to write rerere record"));
 	return 0;
 }
 
@@ -484,12 +484,12 @@ static int handle_file(const char *path, unsigned char *sha1, const char *output
 	io.input = fopen(path, "r");
 	io.io.wrerror = 0;
 	if (!io.input)
-		return error_errno("could not open '%s'", path);
+		return error_errno(_("could not open '%s'"), path);
 
 	if (output) {
 		io.io.output = fopen(output, "w");
 		if (!io.io.output) {
-			error_errno("could not write '%s'", output);
+			error_errno(_("could not write '%s'"), output);
 			fclose(io.input);
 			return -1;
 		}
@@ -499,15 +499,15 @@ static int handle_file(const char *path, unsigned char *sha1, const char *output
 
 	fclose(io.input);
 	if (io.io.wrerror)
-		error("there were errors while writing '%s' (%s)",
+		error(_("there were errors while writing '%s' (%s)"),
 		      path, strerror(io.io.wrerror));
 	if (io.io.output && fclose(io.io.output))
-		io.io.wrerror = error_errno("failed to flush '%s'", path);
+		io.io.wrerror = error_errno(_("failed to flush '%s'"), path);
 
 	if (hunk_no < 0) {
 		if (output)
 			unlink_or_warn(output);
-		return error("could not parse conflict hunks in '%s'", path);
+		return error(_("could not parse conflict hunks in '%s'"), path);
 	}
 	if (io.io.wrerror)
 		return -1;
@@ -568,7 +568,7 @@ static int find_conflict(struct string_list *conflict)
 {
 	int i;
 	if (read_cache() < 0)
-		return error("index file corrupt");
+		return error(_("index file corrupt"));
 
 	for (i = 0; i < active_nr;) {
 		int conflict_type;
@@ -601,7 +601,7 @@ int rerere_remaining(struct string_list *merge_rr)
 	if (setup_rerere(merge_rr, RERERE_READONLY))
 		return 0;
 	if (read_cache() < 0)
-		return error("index file corrupt");
+		return error(_("index file corrupt"));
 
 	for (i = 0; i < active_nr;) {
 		int conflict_type;
@@ -684,17 +684,17 @@ static int merge(const struct rerere_id *id, const char *path)
 	 * Mark that "postimage" was used to help gc.
 	 */
 	if (utime(rerere_path(id, "postimage"), NULL) < 0)
-		warning_errno("failed utime() on '%s'",
+		warning_errno(_("failed utime() on '%s'"),
 			      rerere_path(id, "postimage"));
 
 	/* Update "path" with the resolution */
 	f = fopen(path, "w");
 	if (!f)
-		return error_errno("could not open '%s'", path);
+		return error_errno(_("could not open '%s'"), path);
 	if (fwrite(result.ptr, result.size, 1, f) != 1)
-		error_errno("could not write '%s'", path);
+		error_errno(_("could not write '%s'"), path);
 	if (fclose(f))
-		return error_errno("writing '%s' failed", path);
+		return error_errno(_("writing '%s' failed"), path);
 
 out:
 	free(cur.ptr);
@@ -714,13 +714,13 @@ static void update_paths(struct string_list *update)
 		struct string_list_item *item = &update->items[i];
 		if (add_file_to_cache(item->string, 0))
 			exit(128);
-		fprintf(stderr, "Staged '%s' using previous resolution.\n",
+		fprintf_ln(stderr, _("Staged '%s' using previous resolution."),
 			item->string);
 	}
 
 	if (write_locked_index(&the_index, &index_lock,
 			       COMMIT_LOCK | SKIP_IF_UNCHANGED))
-		die("unable to write new index file");
+		die(_("unable to write new index file"));
 }
 
 static void remove_variant(struct rerere_id *id)
@@ -752,7 +752,7 @@ static void do_rerere_one_path(struct string_list_item *rr_item,
 		if (!handle_file(path, NULL, NULL)) {
 			copy_file(rerere_path(id, "postimage"), path, 0666);
 			id->collection->status[variant] |= RR_HAS_POSTIMAGE;
-			fprintf(stderr, "Recorded resolution for '%s'.\n", path);
+			fprintf_ln(stderr, _("Recorded resolution for '%s'."), path);
 			free_rerere_id(rr_item);
 			rr_item->util = NULL;
 			return;
@@ -786,9 +786,9 @@ static void do_rerere_one_path(struct string_list_item *rr_item,
 		if (rerere_autoupdate)
 			string_list_insert(update, path);
 		else
-			fprintf(stderr,
-				"Resolved '%s' using previous resolution.\n",
-				path);
+			fprintf_ln(stderr,
+				   _("Resolved '%s' using previous resolution."),
+				   path);
 		free_rerere_id(rr_item);
 		rr_item->util = NULL;
 		return;
@@ -802,11 +802,11 @@ static void do_rerere_one_path(struct string_list_item *rr_item,
 	if (id->collection->status[variant] & RR_HAS_POSTIMAGE) {
 		const char *path = rerere_path(id, "postimage");
 		if (unlink(path))
-			die_errno("cannot unlink stray '%s'", path);
+			die_errno(_("cannot unlink stray '%s'"), path);
 		id->collection->status[variant] &= ~RR_HAS_POSTIMAGE;
 	}
 	id->collection->status[variant] |= RR_HAS_PREIMAGE;
-	fprintf(stderr, "Recorded preimage for '%s'\n", path);
+	fprintf_ln(stderr, _("Recorded preimage for '%s'"), path);
 }
 
 static int do_plain_rerere(struct string_list *rr, int fd)
@@ -878,7 +878,7 @@ static int is_rerere_enabled(void)
 		return rr_cache_exists;
 
 	if (!rr_cache_exists && mkdir_in_gitdir(git_path_rr_cache()))
-		die("could not create directory '%s'", git_path_rr_cache());
+		die(_("could not create directory '%s'"), git_path_rr_cache());
 	return 1;
 }
 
@@ -1031,7 +1031,7 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr)
 	 */
 	ret = handle_cache(path, sha1, NULL);
 	if (ret < 1)
-		return error("could not parse conflict hunks in '%s'", path);
+		return error(_("could not parse conflict hunks in '%s'"), path);
 
 	/* Nuke the recorded resolution for the conflict */
 	id = new_rerere_id(sha1);
@@ -1049,7 +1049,7 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr)
 		handle_cache(path, sha1, rerere_path(id, "thisimage"));
 		if (read_mmfile(&cur, rerere_path(id, "thisimage"))) {
 			free(cur.ptr);
-			error("failed to update conflicted state in '%s'", path);
+			error(_("failed to update conflicted state in '%s'"), path);
 			goto fail_exit;
 		}
 		cleanly_resolved = !try_merge(id, path, &cur, &result);
@@ -1060,16 +1060,16 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr)
 	}
 
 	if (id->collection->status_nr <= id->variant) {
-		error("no remembered resolution for '%s'", path);
+		error(_("no remembered resolution for '%s'"), path);
 		goto fail_exit;
 	}
 
 	filename = rerere_path(id, "postimage");
 	if (unlink(filename)) {
 		if (errno == ENOENT)
-			error("no remembered resolution for '%s'", path);
+			error(_("no remembered resolution for '%s'"), path);
 		else
-			error_errno("cannot unlink '%s'", filename);
+			error_errno(_("cannot unlink '%s'"), filename);
 		goto fail_exit;
 	}
 
@@ -1079,7 +1079,7 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr)
 	 * the postimage.
 	 */
 	handle_cache(path, sha1, rerere_path(id, "preimage"));
-	fprintf(stderr, "Updated preimage for '%s'\n", path);
+	fprintf_ln(stderr, _("Updated preimage for '%s'"), path);
 
 	/*
 	 * And remember that we can record resolution for this
@@ -1088,7 +1088,7 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr)
 	item = string_list_insert(rr, path);
 	free_rerere_id(item);
 	item->util = id;
-	fprintf(stderr, "Forgot resolution for '%s'\n", path);
+	fprintf(stderr, _("Forgot resolution for '%s'\n"), path);
 	return 0;
 
 fail_exit:
@@ -1103,7 +1103,7 @@ int rerere_forget(struct pathspec *pathspec)
 	struct string_list merge_rr = STRING_LIST_INIT_DUP;
 
 	if (read_cache() < 0)
-		return error("index file corrupt");
+		return error(_("index file corrupt"));
 
 	fd = setup_rerere(&merge_rr, RERERE_NOAUTOUPDATE);
 	if (fd < 0)
@@ -1191,7 +1191,7 @@ void rerere_gc(struct string_list *rr)
 	git_config(git_default_config, NULL);
 	dir = opendir(git_path("rr-cache"));
 	if (!dir)
-		die_errno("unable to open rr-cache directory");
+		die_errno(_("unable to open rr-cache directory"));
 	/* Collect stale conflict IDs ... */
 	while ((e = readdir(dir))) {
 		struct rerere_dir *rr_dir;

From fb90dca34c498ac1f922d579c202b4723e87455a Mon Sep 17 00:00:00 2001
From: Thomas Gummerer <t.gummerer@gmail.com>
Date: Sun, 5 Aug 2018 18:20:31 +0100
Subject: [PATCH 05/11] rerere: add documentation for conflict normalization

Add some documentation for the logic behind the conflict normalization
in rerere.

Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/technical/rerere.txt | 140 +++++++++++++++++++++++++++++
 rerere.c                           |   4 -
 2 files changed, 140 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/technical/rerere.txt

diff --git a/Documentation/technical/rerere.txt b/Documentation/technical/rerere.txt
new file mode 100644
index 0000000000..3d10dbfa67
--- /dev/null
+++ b/Documentation/technical/rerere.txt
@@ -0,0 +1,140 @@
+Rerere
+======
+
+This document describes the rerere logic.
+
+Conflict normalization
+----------------------
+
+To ensure recorded conflict resolutions can be looked up in the rerere
+database, even when branches are merged in a different order,
+different branches are merged that result in the same conflict, or
+when different conflict style settings are used, rerere normalizes the
+conflicts before writing them to the rerere database.
+
+Different conflict styles and branch names are normalized by stripping
+the labels from the conflict markers, and removing the common ancestor
+version from the `diff3` conflict style. Branches that are merged
+in different order are normalized by sorting the conflict hunks.  More
+on each of those steps in the following sections.
+
+Once these two normalization operations are applied, a conflict ID is
+calculated based on the normalized conflict, which is later used by
+rerere to look up the conflict in the rerere database.
+
+Removing the common ancestor version
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Say we have three branches AB, AC and AC2.  The common ancestor of
+these branches has a file with a line containing the string "A" (for
+brevity this is called "line A" in the rest of the document).  In
+branch AB this line is changed to "B", in AC, this line is changed to
+"C", and branch AC2 is forked off of AC, after the line was changed to
+"C".
+
+Forking a branch ABAC off of branch AB and then merging AC into it, we
+get a conflict like the following:
+
+    <<<<<<< HEAD
+    B
+    =======
+    C
+    >>>>>>> AC
+
+Doing the analogous with AC2 (forking a branch ABAC2 off of branch AB
+and then merging branch AC2 into it), using the diff3 conflict style,
+we get a conflict like the following:
+
+    <<<<<<< HEAD
+    B
+    ||||||| merged common ancestors
+    A
+    =======
+    C
+    >>>>>>> AC2
+
+By resolving this conflict, to leave line D, the user declares:
+
+    After examining what branches AB and AC did, I believe that making
+    line A into line D is the best thing to do that is compatible with
+    what AB and AC wanted to do.
+
+As branch AC2 refers to the same commit as AC, the above implies that
+this is also compatible what AB and AC2 wanted to do.
+
+By extension, this means that rerere should recognize that the above
+conflicts are the same.  To do this, the labels on the conflict
+markers are stripped, and the common ancestor version is removed.  The above
+examples would both result in the following normalized conflict:
+
+    <<<<<<<
+    B
+    =======
+    C
+    >>>>>>>
+
+Sorting hunks
+~~~~~~~~~~~~~
+
+As before, lets imagine that a common ancestor had a file with line A
+its early part, and line X in its late part.  And then four branches
+are forked that do these things:
+
+    - AB: changes A to B
+    - AC: changes A to C
+    - XY: changes X to Y
+    - XZ: changes X to Z
+
+Now, forking a branch ABAC off of branch AB and then merging AC into
+it, and forking a branch ACAB off of branch AC and then merging AB
+into it, would yield the conflict in a different order.  The former
+would say "A became B or C, what now?" while the latter would say "A
+became C or B, what now?"
+
+As a reminder, the act of merging AC into ABAC and resolving the
+conflict to leave line D means that the user declares:
+
+    After examining what branches AB and AC did, I believe that
+    making line A into line D is the best thing to do that is
+    compatible with what AB and AC wanted to do.
+
+So the conflict we would see when merging AB into ACAB should be
+resolved the same way---it is the resolution that is in line with that
+declaration.
+
+Imagine that similarly previously a branch XYXZ was forked from XY,
+and XZ was merged into it, and resolved "X became Y or Z" into "X
+became W".
+
+Now, if a branch ABXY was forked from AB and then merged XY, then ABXY
+would have line B in its early part and line Y in its later part.
+Such a merge would be quite clean.  We can construct 4 combinations
+using these four branches ((AB, AC) x (XY, XZ)).
+
+Merging ABXY and ACXZ would make "an early A became B or C, a late X
+became Y or Z" conflict, while merging ACXY and ABXZ would make "an
+early A became C or B, a late X became Y or Z".  We can see there are
+4 combinations of ("B or C", "C or B") x ("X or Y", "Y or X").
+
+By sorting, the conflict is given its canonical name, namely, "an
+early part became B or C, a late part becames X or Y", and whenever
+any of these four patterns appear, and we can get to the same conflict
+and resolution that we saw earlier.
+
+Without the sorting, we'd have to somehow find a previous resolution
+from combinatorial explosion.
+
+Conflict ID calculation
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Once the conflict normalization is done, the conflict ID is calculated
+as the sha1 hash of the conflict hunks appended to each other,
+separated by <NUL> characters.  The conflict markers are stripped out
+before the sha1 is calculated.  So in the example above, where we
+merge branch AC which changes line A to line C, into branch AB, which
+changes line A to line C, the conflict ID would be
+SHA1('B<NUL>C<NUL>').
+
+If there are multiple conflicts in one file, the sha1 is calculated
+the same way with all hunks appended to each other, in the order in
+which they appear in the file, separated by a <NUL> character.
diff --git a/rerere.c b/rerere.c
index be98c0afcb..da1ab54027 100644
--- a/rerere.c
+++ b/rerere.c
@@ -394,10 +394,6 @@ static int is_cmarker(char *buf, int marker_char, int marker_size)
  * and NUL concatenated together.
  *
  * Return the number of conflict hunks found.
- *
- * NEEDSWORK: the logic and theory of operation behind this conflict
- * normalization may deserve to be documented somewhere, perhaps in
- * Documentation/technical/rerere.txt.
  */
 static int handle_path(unsigned char *sha1, struct rerere_io *io, int marker_size)
 {

From 93406a282f404fd9f736e96ae27cc6e2e5eb3cf1 Mon Sep 17 00:00:00 2001
From: Thomas Gummerer <t.gummerer@gmail.com>
Date: Sun, 5 Aug 2018 18:20:32 +0100
Subject: [PATCH 06/11] rerere: fix crash with files rerere can't handle

Currently when a user does a conflict resolution and ends it (in any
way that calls 'git rerere' again) with a file 'rerere' can't handle,
subsequent rerere operations that are interested in that path, such as
'rerere clear' or 'rerere forget <path>' will fail, or even worse in
the case of 'rerere clear' segfault.

Such states include nested conflicts, or a conflict marker that
doesn't have any match.

This is because 'git rerere' calculates a conflict file and writes it
to the MERGE_RR file.  When the user then changes the file in any way
rerere can't handle, and then calls 'git rerere' on it again to record
the conflict resolution, the handle_file function fails, and removes
the 'preimage' file in the rr-cache in the process, while leaving the
ID in the MERGE_RR file.

Now when 'rerere clear' is run, it reads the ID from the MERGE_RR
file, however the 'fit_variant' function for the ID is never called as
the 'preimage' file does not exist anymore.  This means
'collection->status' in 'has_rerere_resolution' is NULL, and the
command will crash.

To fix this, remove the rerere ID from the MERGE_RR file in the case
when we can't handle it, just after the 'preimage' file was removed
and remove the corresponding variant from .git/rr-cache/.  Removing it
unconditionally is fine here, because if the user would have resolved
the conflict and ran rerere, the entry would no longer be in the
MERGE_RR file, so we wouldn't have this problem in the first place,
while if the conflict was not resolved.

Currently there is nothing left in this folder, as the 'preimage'
was already deleted by the 'handle_file' function, so 'remove_variant'
is a no-op.  Still call the function, to make sure we clean everything
up, in case we add some other files corresponding to a variant in the
future.

Note that other variants that have the same conflict ID will not be
touched.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 rerere.c          | 12 +++++++-----
 t/t4200-rerere.sh | 21 +++++++++++++++++++++
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/rerere.c b/rerere.c
index da1ab54027..895ad80c0c 100644
--- a/rerere.c
+++ b/rerere.c
@@ -823,10 +823,7 @@ static int do_plain_rerere(struct string_list *rr, int fd)
 		struct rerere_id *id;
 		unsigned char sha1[20];
 		const char *path = conflict.items[i].string;
-		int ret;
-
-		if (string_list_has_string(rr, path))
-			continue;
+		int ret, has_string;
 
 		/*
 		 * Ask handle_file() to scan and assign a
@@ -834,7 +831,12 @@ static int do_plain_rerere(struct string_list *rr, int fd)
 		 * yet.
 		 */
 		ret = handle_file(path, sha1, NULL);
-		if (ret < 1)
+		has_string = string_list_has_string(rr, path);
+		if (ret < 0 && has_string) {
+			remove_variant(string_list_lookup(rr, path)->util);
+			string_list_remove(rr, path, 1);
+		}
+		if (ret < 1 || has_string)
 			continue;
 
 		id = new_rerere_id(sha1);
diff --git a/t/t4200-rerere.sh b/t/t4200-rerere.sh
index 8417e5a4b1..23f9c0ca45 100755
--- a/t/t4200-rerere.sh
+++ b/t/t4200-rerere.sh
@@ -580,4 +580,25 @@ test_expect_success 'multiple identical conflicts' '
 	count_pre_post 0 0
 '
 
+test_expect_success 'rerere with unexpected conflict markers does not crash' '
+	git reset --hard &&
+
+	git checkout -b branch-1 master &&
+	echo "bar" >test &&
+	git add test &&
+	git commit -q -m two &&
+
+	git reset --hard &&
+	git checkout -b branch-2 master &&
+	echo "foo" >test &&
+	git add test &&
+	git commit -q -a -m one &&
+
+	test_must_fail git merge branch-1 &&
+	echo "<<<<<<< a" >test &&
+	git rerere &&
+
+	git rerere clear
+'
+
 test_done

From 221444f5d11a8fac84514447c2902430e237759c Mon Sep 17 00:00:00 2001
From: Thomas Gummerer <t.gummerer@gmail.com>
Date: Sun, 5 Aug 2018 18:20:33 +0100
Subject: [PATCH 07/11] rerere: only return whether a path has conflicts or not

We currently return the exact number of conflict hunks a certain path
has from the 'handle_paths' function.  However all of its callers only
care whether there are conflicts or not or if there is an error.
Return only that information, and document that only that information
is returned.  This will simplify the code in the subsequent steps.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 rerere.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/rerere.c b/rerere.c
index 895ad80c0c..bf803043e2 100644
--- a/rerere.c
+++ b/rerere.c
@@ -393,12 +393,13 @@ static int is_cmarker(char *buf, int marker_char, int marker_size)
  * one side of the conflict, NUL, the other side of the conflict,
  * and NUL concatenated together.
  *
- * Return the number of conflict hunks found.
+ * Return 1 if conflict hunks are found, 0 if there are no conflict
+ * hunks and -1 if an error occured.
  */
 static int handle_path(unsigned char *sha1, struct rerere_io *io, int marker_size)
 {
 	git_SHA_CTX ctx;
-	int hunk_no = 0;
+	int has_conflicts = 0;
 	enum {
 		RR_CONTEXT = 0, RR_SIDE_1, RR_SIDE_2, RR_ORIGINAL
 	} hunk = RR_CONTEXT;
@@ -426,7 +427,7 @@ static int handle_path(unsigned char *sha1, struct rerere_io *io, int marker_siz
 				goto bad;
 			if (strbuf_cmp(&one, &two) > 0)
 				strbuf_swap(&one, &two);
-			hunk_no++;
+			has_conflicts = 1;
 			hunk = RR_CONTEXT;
 			rerere_io_putconflict('<', marker_size, io);
 			rerere_io_putmem(one.buf, one.len, io);
@@ -462,7 +463,7 @@ static int handle_path(unsigned char *sha1, struct rerere_io *io, int marker_siz
 		git_SHA1_Final(sha1, &ctx);
 	if (hunk != RR_CONTEXT)
 		return -1;
-	return hunk_no;
+	return has_conflicts;
 }
 
 /*
@@ -471,7 +472,7 @@ static int handle_path(unsigned char *sha1, struct rerere_io *io, int marker_siz
  */
 static int handle_file(const char *path, unsigned char *sha1, const char *output)
 {
-	int hunk_no = 0;
+	int has_conflicts = 0;
 	struct rerere_io_file io;
 	int marker_size = ll_merge_marker_size(path);
 
@@ -491,7 +492,7 @@ static int handle_file(const char *path, unsigned char *sha1, const char *output
 		}
 	}
 
-	hunk_no = handle_path(sha1, (struct rerere_io *)&io, marker_size);
+	has_conflicts = handle_path(sha1, (struct rerere_io *)&io, marker_size);
 
 	fclose(io.input);
 	if (io.io.wrerror)
@@ -500,14 +501,14 @@ static int handle_file(const char *path, unsigned char *sha1, const char *output
 	if (io.io.output && fclose(io.io.output))
 		io.io.wrerror = error_errno(_("failed to flush '%s'"), path);
 
-	if (hunk_no < 0) {
+	if (has_conflicts < 0) {
 		if (output)
 			unlink_or_warn(output);
 		return error(_("could not parse conflict hunks in '%s'"), path);
 	}
 	if (io.io.wrerror)
 		return -1;
-	return hunk_no;
+	return has_conflicts;
 }
 
 /*
@@ -954,7 +955,7 @@ static int handle_cache(const char *path, unsigned char *sha1, const char *outpu
 	mmfile_t mmfile[3] = {{NULL}};
 	mmbuffer_t result = {NULL, 0};
 	const struct cache_entry *ce;
-	int pos, len, i, hunk_no;
+	int pos, len, i, has_conflicts;
 	struct rerere_io_mem io;
 	int marker_size = ll_merge_marker_size(path);
 
@@ -1008,11 +1009,11 @@ static int handle_cache(const char *path, unsigned char *sha1, const char *outpu
 	 * Grab the conflict ID and optionally write the original
 	 * contents with conflict markers out.
 	 */
-	hunk_no = handle_path(sha1, (struct rerere_io *)&io, marker_size);
+	has_conflicts = handle_path(sha1, (struct rerere_io *)&io, marker_size);
 	strbuf_release(&io.input);
 	if (io.io.output)
 		fclose(io.io.output);
-	return hunk_no;
+	return has_conflicts;
 }
 
 static int rerere_forget_one_path(const char *path, struct string_list *rr)

From c0f16f8e14432b9b6a1b21885080eab6388d887e Mon Sep 17 00:00:00 2001
From: Thomas Gummerer <t.gummerer@gmail.com>
Date: Sun, 5 Aug 2018 18:20:34 +0100
Subject: [PATCH 08/11] rerere: factor out handle_conflict function

Factor out the handle_conflict function, which handles a single
conflict in a path.  This is in preparation for a subsequent commit,
where this function will be re-used.

Note that this does change the behaviour of 'git rerere' slightly.
Where previously we'd consider all files where an unmatched conflict
marker is found as invalid, we now only consider files invalid when
the "ours" conflict marker ("<<<<<<< <text>") is unmatched, not when
other conflict markers (e.g. "=======") is unmatched.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 rerere.c | 111 +++++++++++++++++++++++++++++--------------------------
 1 file changed, 59 insertions(+), 52 deletions(-)

diff --git a/rerere.c b/rerere.c
index bf803043e2..2d62251943 100644
--- a/rerere.c
+++ b/rerere.c
@@ -384,6 +384,58 @@ static int is_cmarker(char *buf, int marker_char, int marker_size)
 	return isspace(*buf);
 }
 
+static int handle_conflict(struct rerere_io *io, int marker_size, git_SHA_CTX *ctx)
+{
+	enum {
+		RR_SIDE_1 = 0, RR_SIDE_2, RR_ORIGINAL
+	} hunk = RR_SIDE_1;
+	struct strbuf one = STRBUF_INIT, two = STRBUF_INIT;
+	struct strbuf buf = STRBUF_INIT;
+	int has_conflicts = -1;
+
+	while (!io->getline(&buf, io)) {
+		if (is_cmarker(buf.buf, '<', marker_size)) {
+			break;
+		} else if (is_cmarker(buf.buf, '|', marker_size)) {
+			if (hunk != RR_SIDE_1)
+				break;
+			hunk = RR_ORIGINAL;
+		} else if (is_cmarker(buf.buf, '=', marker_size)) {
+			if (hunk != RR_SIDE_1 && hunk != RR_ORIGINAL)
+				break;
+			hunk = RR_SIDE_2;
+		} else if (is_cmarker(buf.buf, '>', marker_size)) {
+			if (hunk != RR_SIDE_2)
+				break;
+			if (strbuf_cmp(&one, &two) > 0)
+				strbuf_swap(&one, &two);
+			has_conflicts = 1;
+			rerere_io_putconflict('<', marker_size, io);
+			rerere_io_putmem(one.buf, one.len, io);
+			rerere_io_putconflict('=', marker_size, io);
+			rerere_io_putmem(two.buf, two.len, io);
+			rerere_io_putconflict('>', marker_size, io);
+			if (ctx) {
+				git_SHA1_Update(ctx, one.buf ? one.buf : "",
+					    one.len + 1);
+				git_SHA1_Update(ctx, two.buf ? two.buf : "",
+					    two.len + 1);
+			}
+			break;
+		} else if (hunk == RR_SIDE_1)
+			strbuf_addbuf(&one, &buf);
+		else if (hunk == RR_ORIGINAL)
+			; /* discard */
+		else if (hunk == RR_SIDE_2)
+			strbuf_addbuf(&two, &buf);
+	}
+	strbuf_release(&one);
+	strbuf_release(&two);
+	strbuf_release(&buf);
+
+	return has_conflicts;
+}
+
 /*
  * Read contents a file with conflicts, normalize the conflicts
  * by (1) discarding the common ancestor version in diff3-style,
@@ -399,70 +451,25 @@ static int is_cmarker(char *buf, int marker_char, int marker_size)
 static int handle_path(unsigned char *sha1, struct rerere_io *io, int marker_size)
 {
 	git_SHA_CTX ctx;
-	int has_conflicts = 0;
-	enum {
-		RR_CONTEXT = 0, RR_SIDE_1, RR_SIDE_2, RR_ORIGINAL
-	} hunk = RR_CONTEXT;
-	struct strbuf one = STRBUF_INIT, two = STRBUF_INIT;
 	struct strbuf buf = STRBUF_INIT;
-
+	int has_conflicts = 0;
 	if (sha1)
 		git_SHA1_Init(&ctx);
 
 	while (!io->getline(&buf, io)) {
 		if (is_cmarker(buf.buf, '<', marker_size)) {
-			if (hunk != RR_CONTEXT)
-				goto bad;
-			hunk = RR_SIDE_1;
-		} else if (is_cmarker(buf.buf, '|', marker_size)) {
-			if (hunk != RR_SIDE_1)
-				goto bad;
-			hunk = RR_ORIGINAL;
-		} else if (is_cmarker(buf.buf, '=', marker_size)) {
-			if (hunk != RR_SIDE_1 && hunk != RR_ORIGINAL)
-				goto bad;
-			hunk = RR_SIDE_2;
-		} else if (is_cmarker(buf.buf, '>', marker_size)) {
-			if (hunk != RR_SIDE_2)
-				goto bad;
-			if (strbuf_cmp(&one, &two) > 0)
-				strbuf_swap(&one, &two);
-			has_conflicts = 1;
-			hunk = RR_CONTEXT;
-			rerere_io_putconflict('<', marker_size, io);
-			rerere_io_putmem(one.buf, one.len, io);
-			rerere_io_putconflict('=', marker_size, io);
-			rerere_io_putmem(two.buf, two.len, io);
-			rerere_io_putconflict('>', marker_size, io);
-			if (sha1) {
-				git_SHA1_Update(&ctx, one.buf ? one.buf : "",
-					    one.len + 1);
-				git_SHA1_Update(&ctx, two.buf ? two.buf : "",
-					    two.len + 1);
-			}
-			strbuf_reset(&one);
-			strbuf_reset(&two);
-		} else if (hunk == RR_SIDE_1)
-			strbuf_addbuf(&one, &buf);
-		else if (hunk == RR_ORIGINAL)
-			; /* discard */
-		else if (hunk == RR_SIDE_2)
-			strbuf_addbuf(&two, &buf);
-		else
+			has_conflicts = handle_conflict(io, marker_size,
+							sha1 ? &ctx : NULL);
+			if (has_conflicts < 0)
+				break;
+		} else
 			rerere_io_putstr(buf.buf, io);
-		continue;
-	bad:
-		hunk = 99; /* force error exit */
-		break;
 	}
-	strbuf_release(&one);
-	strbuf_release(&two);
 	strbuf_release(&buf);
 
 	if (sha1)
 		git_SHA1_Final(sha1, &ctx);
-	if (hunk != RR_CONTEXT)
-		return -1;
+
 	return has_conflicts;
 }
 

From 5ebbdad3447ccaaf5af18a5f4f8f5065bd9fff3d Mon Sep 17 00:00:00 2001
From: Thomas Gummerer <t.gummerer@gmail.com>
Date: Sun, 5 Aug 2018 18:20:35 +0100
Subject: [PATCH 09/11] rerere: return strbuf from handle path

Currently we write the conflict to disk directly in the handle_path
function.  To make it re-usable for nested conflicts, instead of
writing the conflict out directly, store it in a strbuf and let the
caller write it out.

This does mean some slight increase in memory usage, however that
increase is limited to the size of the largest conflict we've
currently processed.  We already keep one copy of the conflict in
memory, and it shouldn't be too large, so the increase in memory usage
seems acceptable.

As a bonus this lets us get replace the rerere_io_putconflict function
with a trivial two line function.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 rerere.c | 58 ++++++++++++++++++--------------------------------------
 1 file changed, 18 insertions(+), 40 deletions(-)

diff --git a/rerere.c b/rerere.c
index 2d62251943..a35b88916c 100644
--- a/rerere.c
+++ b/rerere.c
@@ -302,38 +302,6 @@ static void rerere_io_putstr(const char *str, struct rerere_io *io)
 		ferr_puts(str, io->output, &io->wrerror);
 }
 
-/*
- * Write a conflict marker to io->output (if defined).
- */
-static void rerere_io_putconflict(int ch, int size, struct rerere_io *io)
-{
-	char buf[64];
-
-	while (size) {
-		if (size <= sizeof(buf) - 2) {
-			memset(buf, ch, size);
-			buf[size] = '\n';
-			buf[size + 1] = '\0';
-			size = 0;
-		} else {
-			int sz = sizeof(buf) - 1;
-
-			/*
-			 * Make sure we will not write everything out
-			 * in this round by leaving at least 1 byte
-			 * for the next round, giving the next round
-			 * a chance to add the terminating LF.  Yuck.
-			 */
-			if (size <= sz)
-				sz -= (sz - size) + 1;
-			memset(buf, ch, sz);
-			buf[sz] = '\0';
-			size -= sz;
-		}
-		rerere_io_putstr(buf, io);
-	}
-}
-
 static void rerere_io_putmem(const char *mem, size_t sz, struct rerere_io *io)
 {
 	if (io->output)
@@ -384,7 +352,14 @@ static int is_cmarker(char *buf, int marker_char, int marker_size)
 	return isspace(*buf);
 }
 
-static int handle_conflict(struct rerere_io *io, int marker_size, git_SHA_CTX *ctx)
+static void rerere_strbuf_putconflict(struct strbuf *buf, int ch, size_t size)
+{
+	strbuf_addchars(buf, ch, size);
+	strbuf_addch(buf, '\n');
+}
+
+static int handle_conflict(struct strbuf *out, struct rerere_io *io,
+			   int marker_size, git_SHA_CTX *ctx)
 {
 	enum {
 		RR_SIDE_1 = 0, RR_SIDE_2, RR_ORIGINAL
@@ -410,11 +385,11 @@ static int handle_conflict(struct rerere_io *io, int marker_size, git_SHA_CTX *c
 			if (strbuf_cmp(&one, &two) > 0)
 				strbuf_swap(&one, &two);
 			has_conflicts = 1;
-			rerere_io_putconflict('<', marker_size, io);
-			rerere_io_putmem(one.buf, one.len, io);
-			rerere_io_putconflict('=', marker_size, io);
-			rerere_io_putmem(two.buf, two.len, io);
-			rerere_io_putconflict('>', marker_size, io);
+			rerere_strbuf_putconflict(out, '<', marker_size);
+			strbuf_addbuf(out, &one);
+			rerere_strbuf_putconflict(out, '=', marker_size);
+			strbuf_addbuf(out, &two);
+			rerere_strbuf_putconflict(out, '>', marker_size);
 			if (ctx) {
 				git_SHA1_Update(ctx, one.buf ? one.buf : "",
 					    one.len + 1);
@@ -451,21 +426,24 @@ static int handle_conflict(struct rerere_io *io, int marker_size, git_SHA_CTX *c
 static int handle_path(unsigned char *sha1, struct rerere_io *io, int marker_size)
 {
 	git_SHA_CTX ctx;
-	struct strbuf buf = STRBUF_INIT;
+	struct strbuf buf = STRBUF_INIT, out = STRBUF_INIT;
 	int has_conflicts = 0;
 	if (sha1)
 		git_SHA1_Init(&ctx);
 
 	while (!io->getline(&buf, io)) {
 		if (is_cmarker(buf.buf, '<', marker_size)) {
-			has_conflicts = handle_conflict(io, marker_size,
+			has_conflicts = handle_conflict(&out, io, marker_size,
 							sha1 ? &ctx : NULL);
 			if (has_conflicts < 0)
 				break;
+			rerere_io_putmem(out.buf, out.len, io);
+			strbuf_reset(&out);
 		} else
 			rerere_io_putstr(buf.buf, io);
 	}
 	strbuf_release(&buf);
+	strbuf_release(&out);
 
 	if (sha1)
 		git_SHA1_Final(sha1, &ctx);

From 4af32207bc106412a456791037fc4a402101fbb4 Mon Sep 17 00:00:00 2001
From: Thomas Gummerer <t.gummerer@gmail.com>
Date: Sun, 5 Aug 2018 18:20:36 +0100
Subject: [PATCH 10/11] rerere: teach rerere to handle nested conflicts

Currently rerere can't handle nested conflicts and will error out when
it encounters such conflicts.  Do that by recursively calling the
'handle_conflict' function to normalize the conflict.

Note that a conflict like this would only be produced if a user
commits a file with conflict markers, and gets a conflict including
that in a susbsequent operation.

The conflict ID calculation here deserves some explanation:

As we are using the same handle_conflict function, the nested conflict
is normalized the same way as for non-nested conflicts, which means
the ancestor in the diff3 case is stripped out, and the parts of the
conflict are ordered alphabetically.

The conflict ID is however is only calculated in the top level
handle_conflict call, so it will include the markers that 'rerere'
adds to the output.  e.g. say there's the following conflict:

    <<<<<<< HEAD
    1
    =======
    <<<<<<< HEAD
    3
    =======
    2
    >>>>>>> branch-2
    >>>>>>> branch-3~

it would be recorde as follows in the preimage:

    <<<<<<<
    1
    =======
    <<<<<<<
    2
    =======
    3
    >>>>>>>
    >>>>>>>

and the conflict ID would be calculated as

    sha1(1<NUL><<<<<<<
    2
    =======
    3
    >>>>>>><NUL>)

Stripping out vs. leaving the conflict markers in place in the inner
conflict should have no practical impact, but it simplifies the
implementation.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/technical/rerere.txt | 42 ++++++++++++++++++++++++++++++
 rerere.c                           | 10 +++++--
 t/t4200-rerere.sh                  | 37 ++++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/Documentation/technical/rerere.txt b/Documentation/technical/rerere.txt
index 3d10dbfa67..e65ba9b0c6 100644
--- a/Documentation/technical/rerere.txt
+++ b/Documentation/technical/rerere.txt
@@ -138,3 +138,45 @@ SHA1('B<NUL>C<NUL>').
 If there are multiple conflicts in one file, the sha1 is calculated
 the same way with all hunks appended to each other, in the order in
 which they appear in the file, separated by a <NUL> character.
+
+Nested conflicts
+~~~~~~~~~~~~~~~~
+
+Nested conflicts are handled very similarly to "simple" conflicts.
+Similar to simple conflicts, the conflict is first normalized by
+stripping the labels from conflict markers, stripping the common ancestor
+version, and the sorting the conflict hunks, both for the outer and the
+inner conflict.  This is done recursively, so any number of nested
+conflicts can be handled.
+
+The only difference is in how the conflict ID is calculated.  For the
+inner conflict, the conflict markers themselves are not stripped out
+before calculating the sha1.
+
+Say we have the following conflict for example:
+
+    <<<<<<< HEAD
+    1
+    =======
+    <<<<<<< HEAD
+    3
+    =======
+    2
+    >>>>>>> branch-2
+    >>>>>>> branch-3~
+
+After stripping out the labels of the conflict markers, and sorting
+the hunks, the conflict would look as follows:
+
+    <<<<<<<
+    1
+    =======
+    <<<<<<<
+    2
+    =======
+    3
+    >>>>>>>
+    >>>>>>>
+
+and finally the conflict ID would be calculated as:
+`sha1('1<NUL><<<<<<<\n3\n=======\n2\n>>>>>>><NUL>')`
diff --git a/rerere.c b/rerere.c
index a35b88916c..f78bef80b1 100644
--- a/rerere.c
+++ b/rerere.c
@@ -365,12 +365,18 @@ static int handle_conflict(struct strbuf *out, struct rerere_io *io,
 		RR_SIDE_1 = 0, RR_SIDE_2, RR_ORIGINAL
 	} hunk = RR_SIDE_1;
 	struct strbuf one = STRBUF_INIT, two = STRBUF_INIT;
-	struct strbuf buf = STRBUF_INIT;
+	struct strbuf buf = STRBUF_INIT, conflict = STRBUF_INIT;
 	int has_conflicts = -1;
 
 	while (!io->getline(&buf, io)) {
 		if (is_cmarker(buf.buf, '<', marker_size)) {
-			break;
+			if (handle_conflict(&conflict, io, marker_size, NULL) < 0)
+				break;
+			if (hunk == RR_SIDE_1)
+				strbuf_addbuf(&one, &conflict);
+			else
+				strbuf_addbuf(&two, &conflict);
+			strbuf_release(&conflict);
 		} else if (is_cmarker(buf.buf, '|', marker_size)) {
 			if (hunk != RR_SIDE_1)
 				break;
diff --git a/t/t4200-rerere.sh b/t/t4200-rerere.sh
index 23f9c0ca45..afaf085e42 100755
--- a/t/t4200-rerere.sh
+++ b/t/t4200-rerere.sh
@@ -601,4 +601,41 @@ test_expect_success 'rerere with unexpected conflict markers does not crash' '
 	git rerere clear
 '
 
+test_expect_success 'rerere with inner conflict markers' '
+	git reset --hard &&
+
+	git checkout -b A master &&
+	echo "bar" >test &&
+	git add test &&
+	git commit -q -m two &&
+	echo "baz" >test &&
+	git add test &&
+	git commit -q -m three &&
+
+	git reset --hard &&
+	git checkout -b B master &&
+	echo "foo" >test &&
+	git add test &&
+	git commit -q -a -m one &&
+
+	test_must_fail git merge A~ &&
+	git add test &&
+	git commit -q -m "will solve conflicts later" &&
+	test_must_fail git merge A &&
+
+	echo "resolved" >test &&
+	git add test &&
+	git commit -q -m "solved conflict" &&
+
+	echo "resolved" >expect &&
+
+	git reset --hard HEAD~~ &&
+	test_must_fail git merge A~ &&
+	git add test &&
+	git commit -q -m "will solve conflicts later" &&
+	test_must_fail git merge A &&
+	cat test >actual &&
+	test_cmp expect actual
+'
+
 test_done

From bd7dfa543e0263858071b8648a55ef7ccae1085b Mon Sep 17 00:00:00 2001
From: Thomas Gummerer <t.gummerer@gmail.com>
Date: Sun, 5 Aug 2018 18:20:37 +0100
Subject: [PATCH 11/11] rerere: recalculate conflict ID when unresolved
 conflict is committed

Currently when a user doesn't resolve a conflict, commits the results,
and does an operation which creates another conflict, rerere will use
the ID of the previously unresolved conflict for the new conflict.
This is because the conflict is kept in the MERGE_RR file, which
'rerere' reads every time it is invoked.

After the new conflict is solved, rerere will record the resolution
with the ID of the old conflict.  So in order to replay the conflict,
both merges would have to be re-done, instead of just the last one, in
order for rerere to be able to automatically resolve the conflict.

Instead of that, assign a new conflict ID if there are still conflicts
in a file and the file had conflicts at a previous step.  This ID
matches the conflict we actually resolved at the corresponding step.

Note that there are no backwards compatibility worries here, as rerere
would have failed to even normalize the conflict before this patch
series.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 rerere.c          | 7 +++----
 t/t4200-rerere.sh | 7 +++++++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/rerere.c b/rerere.c
index f78bef80b1..dd81d09e19 100644
--- a/rerere.c
+++ b/rerere.c
@@ -815,7 +815,7 @@ static int do_plain_rerere(struct string_list *rr, int fd)
 		struct rerere_id *id;
 		unsigned char sha1[20];
 		const char *path = conflict.items[i].string;
-		int ret, has_string;
+		int ret;
 
 		/*
 		 * Ask handle_file() to scan and assign a
@@ -823,12 +823,11 @@ static int do_plain_rerere(struct string_list *rr, int fd)
 		 * yet.
 		 */
 		ret = handle_file(path, sha1, NULL);
-		has_string = string_list_has_string(rr, path);
-		if (ret < 0 && has_string) {
+		if (ret != 0 && string_list_has_string(rr, path)) {
 			remove_variant(string_list_lookup(rr, path)->util);
 			string_list_remove(rr, path, 1);
 		}
-		if (ret < 1 || has_string)
+		if (ret < 1)
 			continue;
 
 		id = new_rerere_id(sha1);
diff --git a/t/t4200-rerere.sh b/t/t4200-rerere.sh
index afaf085e42..819f6dd672 100755
--- a/t/t4200-rerere.sh
+++ b/t/t4200-rerere.sh
@@ -635,6 +635,13 @@ test_expect_success 'rerere with inner conflict markers' '
 	git commit -q -m "will solve conflicts later" &&
 	test_must_fail git merge A &&
 	cat test >actual &&
+	test_cmp expect actual &&
+
+	git add test &&
+	git commit -m "rerere solved conflict" &&
+	git reset --hard HEAD~ &&
+	test_must_fail git merge A &&
+	cat test >actual &&
 	test_cmp expect actual
 '