From c5aa90682f82fe0dabdc0280be93568d52efddfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 25 Sep 2011 21:39:08 +0800 Subject: [PATCH 1/2] Revert removal of multi-match discard heuristic in 27af01 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 27af01d (xdiff/xprepare: improve O(n*m) performance in xdl_cleanup_records(), 2011-08-17) was supposed to be a performance boost only. However, it unexpectedly changed the behaviour of diff. Revert a part of 27af01d that removes logic that mark lines as "multi-match" (ie. dis[i] == 2). This was preventing the multi-match discard heuristic (performed in xdl_cleanup_records() and xdl_clean_mmatch()) from executing. Reported-by: Alexander Pepper Signed-off-by: René Scharfe Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 05a8f01f38..4c447ca6d2 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -398,7 +398,7 @@ static int xdl_clean_mmatch(char const *dis, long i, long s, long e) { * might be potentially discarded if they happear in a run of discardable. */ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { - long i, nm, nreff; + long i, nm, nreff, mlim; xrecord_t **recs; xdlclass_t *rcrec; char *dis, *dis1, *dis2; @@ -411,16 +411,20 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd dis1 = dis; dis2 = dis1 + xdf1->nrec + 1; + if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT) + mlim = XDL_MAX_EQLIMIT; for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { rcrec = cf->rcrecs[(*recs)->ha]; nm = rcrec ? rcrec->len2 : 0; - dis1[i] = (nm == 0) ? 0: 1; + dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; } + if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) + mlim = XDL_MAX_EQLIMIT; for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { rcrec = cf->rcrecs[(*recs)->ha]; nm = rcrec ? rcrec->len1 : 0; - dis2[i] = (nm == 0) ? 0: 1; + dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; } for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; From 81b568c839376a10cd5fe29db0b8a72f537fa36a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 1 Oct 2011 21:56:28 -0700 Subject: [PATCH 2/2] diff: resurrect XDF_NEED_MINIMAL with --minimal Earlier, 582aa00 (git diff too slow for a file, 2010-05-02) unconditionally dropped XDF_NEED_MINIMAL option from the internal xdiff invocation to help performance on pathological cases, while hinting that a follow-up patch could reintroduce it with "--minimal" option from the command line. Make it so. Signed-off-by: Junio C Hamano --- Documentation/diff-options.txt | 4 ++++ diff.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index b620b3afec..5c53bdba94 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -45,6 +45,10 @@ ifndef::git-format-patch[] Synonym for `-p --raw`. endif::git-format-patch[] +--minimal:: + Spend extra time to make sure the smallest possible + diff is produced. + --patience:: Generate a diff using the "patience diff" algorithm. diff --git a/diff.c b/diff.c index 93ef9a265c..c261cc049d 100644 --- a/diff.c +++ b/diff.c @@ -3385,6 +3385,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) } /* xdiff options */ + else if (!strcmp(arg, "--minimal")) + DIFF_XDL_SET(options, NEED_MINIMAL); + else if (!strcmp(arg, "--no-minimal")) + DIFF_XDL_CLR(options, NEED_MINIMAL); else if (!strcmp(arg, "-w") || !strcmp(arg, "--ignore-all-space")) DIFF_XDL_SET(options, IGNORE_WHITESPACE); else if (!strcmp(arg, "-b") || !strcmp(arg, "--ignore-space-change"))