зеркало из https://github.com/microsoft/git.git
Merge branch 'jk/diff-highlight'
* jk/diff-highlight: diff-highlight: document some non-optimal cases diff-highlight: match multi-line hunks diff-highlight: refactor to prepare for multi-line hunks diff-highlight: don't highlight whole lines diff-highlight: make perl strict and warnings fatal
This commit is contained in:
Коммит
d30146ac5f
|
@ -14,13 +14,15 @@ Instead, this script post-processes the line-oriented diff, finds pairs
|
|||
of lines, and highlights the differing segments. It's currently very
|
||||
simple and stupid about doing these tasks. In particular:
|
||||
|
||||
1. It will only highlight a pair of lines if they are the only two
|
||||
lines in a hunk. It could instead try to match up "before" and
|
||||
"after" lines for a given hunk into pairs of similar lines.
|
||||
However, this may end up visually distracting, as the paired
|
||||
lines would have other highlighted lines in between them. And in
|
||||
practice, the lines which most need attention called to their
|
||||
small, hard-to-see changes are touching only a single line.
|
||||
1. It will only highlight hunks in which the number of removed and
|
||||
added lines is the same, and it will pair lines within the hunk by
|
||||
position (so the first removed line is compared to the first added
|
||||
line, and so forth). This is simple and tends to work well in
|
||||
practice. More complex changes don't highlight well, so we tend to
|
||||
exclude them due to the "same number of removed and added lines"
|
||||
restriction. Or even if we do try to highlight them, they end up
|
||||
not highlighting because of our "don't highlight if the whole line
|
||||
would be highlighted" rule.
|
||||
|
||||
2. It will find the common prefix and suffix of two lines, and
|
||||
consider everything in the middle to be "different". It could
|
||||
|
@ -55,3 +57,96 @@ following in your git configuration:
|
|||
show = diff-highlight | less
|
||||
diff = diff-highlight | less
|
||||
---------------------------------------------
|
||||
|
||||
Bugs
|
||||
----
|
||||
|
||||
Because diff-highlight relies on heuristics to guess which parts of
|
||||
changes are important, there are some cases where the highlighting is
|
||||
more distracting than useful. Fortunately, these cases are rare in
|
||||
practice, and when they do occur, the worst case is simply a little
|
||||
extra highlighting. This section documents some cases known to be
|
||||
sub-optimal, in case somebody feels like working on improving the
|
||||
heuristics.
|
||||
|
||||
1. Two changes on the same line get highlighted in a blob. For example,
|
||||
highlighting:
|
||||
|
||||
----------------------------------------------
|
||||
-foo(buf, size);
|
||||
+foo(obj->buf, obj->size);
|
||||
----------------------------------------------
|
||||
|
||||
yields (where the inside of "+{}" would be highlighted):
|
||||
|
||||
----------------------------------------------
|
||||
-foo(buf, size);
|
||||
+foo(+{obj->buf, obj->}size);
|
||||
----------------------------------------------
|
||||
|
||||
whereas a more semantically meaningful output would be:
|
||||
|
||||
----------------------------------------------
|
||||
-foo(buf, size);
|
||||
+foo(+{obj->}buf, +{obj->}size);
|
||||
----------------------------------------------
|
||||
|
||||
Note that doing this right would probably involve a set of
|
||||
content-specific boundary patterns, similar to word-diff. Otherwise
|
||||
you get junk like:
|
||||
|
||||
-----------------------------------------------------
|
||||
-this line has some -{i}nt-{ere}sti-{ng} text on it
|
||||
+this line has some +{fa}nt+{a}sti+{c} text on it
|
||||
-----------------------------------------------------
|
||||
|
||||
which is less readable than the current output.
|
||||
|
||||
2. The multi-line matching assumes that lines in the pre- and post-image
|
||||
match by position. This is often the case, but can be fooled when a
|
||||
line is removed from the top and a new one added at the bottom (or
|
||||
vice versa). Unless the lines in the middle are also changed, diffs
|
||||
will show this as two hunks, and it will not get highlighted at all
|
||||
(which is good). But if the lines in the middle are changed, the
|
||||
highlighting can be misleading. Here's a pathological case:
|
||||
|
||||
-----------------------------------------------------
|
||||
-one
|
||||
-two
|
||||
-three
|
||||
-four
|
||||
+two 2
|
||||
+three 3
|
||||
+four 4
|
||||
+five 5
|
||||
-----------------------------------------------------
|
||||
|
||||
which gets highlighted as:
|
||||
|
||||
-----------------------------------------------------
|
||||
-one
|
||||
-t-{wo}
|
||||
-three
|
||||
-f-{our}
|
||||
+two 2
|
||||
+t+{hree 3}
|
||||
+four 4
|
||||
+f+{ive 5}
|
||||
-----------------------------------------------------
|
||||
|
||||
because it matches "two" to "three 3", and so forth. It would be
|
||||
nicer as:
|
||||
|
||||
-----------------------------------------------------
|
||||
-one
|
||||
-two
|
||||
-three
|
||||
-four
|
||||
+two +{2}
|
||||
+three +{3}
|
||||
+four +{4}
|
||||
+five 5
|
||||
-----------------------------------------------------
|
||||
|
||||
which would probably involve pre-matching the lines into pairs
|
||||
according to some heuristic.
|
||||
|
|
|
@ -1,28 +1,37 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
use warnings FATAL => 'all';
|
||||
use strict;
|
||||
|
||||
# Highlight by reversing foreground and background. You could do
|
||||
# other things like bold or underline if you prefer.
|
||||
my $HIGHLIGHT = "\x1b[7m";
|
||||
my $UNHIGHLIGHT = "\x1b[27m";
|
||||
my $COLOR = qr/\x1b\[[0-9;]*m/;
|
||||
my $BORING = qr/$COLOR|\s/;
|
||||
|
||||
my @window;
|
||||
my @removed;
|
||||
my @added;
|
||||
my $in_hunk;
|
||||
|
||||
while (<>) {
|
||||
# We highlight only single-line changes, so we need
|
||||
# a 4-line window to make a decision on whether
|
||||
# to highlight.
|
||||
push @window, $_;
|
||||
next if @window < 4;
|
||||
if ($window[0] =~ /^$COLOR*(\@| )/ &&
|
||||
$window[1] =~ /^$COLOR*-/ &&
|
||||
$window[2] =~ /^$COLOR*\+/ &&
|
||||
$window[3] !~ /^$COLOR*\+/) {
|
||||
print shift @window;
|
||||
show_pair(shift @window, shift @window);
|
||||
if (!$in_hunk) {
|
||||
print;
|
||||
$in_hunk = /^$COLOR*\@/;
|
||||
}
|
||||
elsif (/^$COLOR*-/) {
|
||||
push @removed, $_;
|
||||
}
|
||||
elsif (/^$COLOR*\+/) {
|
||||
push @added, $_;
|
||||
}
|
||||
else {
|
||||
print shift @window;
|
||||
show_hunk(\@removed, \@added);
|
||||
@removed = ();
|
||||
@added = ();
|
||||
|
||||
print;
|
||||
$in_hunk = /^$COLOR*[\@ ]/;
|
||||
}
|
||||
|
||||
# Most of the time there is enough output to keep things streaming,
|
||||
|
@ -38,23 +47,40 @@ while (<>) {
|
|||
}
|
||||
}
|
||||
|
||||
# Special case a single-line hunk at the end of file.
|
||||
if (@window == 3 &&
|
||||
$window[0] =~ /^$COLOR*(\@| )/ &&
|
||||
$window[1] =~ /^$COLOR*-/ &&
|
||||
$window[2] =~ /^$COLOR*\+/) {
|
||||
print shift @window;
|
||||
show_pair(shift @window, shift @window);
|
||||
}
|
||||
|
||||
# And then flush any remaining lines.
|
||||
while (@window) {
|
||||
print shift @window;
|
||||
}
|
||||
# Flush any queued hunk (this can happen when there is no trailing context in
|
||||
# the final diff of the input).
|
||||
show_hunk(\@removed, \@added);
|
||||
|
||||
exit 0;
|
||||
|
||||
sub show_pair {
|
||||
sub show_hunk {
|
||||
my ($a, $b) = @_;
|
||||
|
||||
# If one side is empty, then there is nothing to compare or highlight.
|
||||
if (!@$a || !@$b) {
|
||||
print @$a, @$b;
|
||||
return;
|
||||
}
|
||||
|
||||
# If we have mismatched numbers of lines on each side, we could try to
|
||||
# be clever and match up similar lines. But for now we are simple and
|
||||
# stupid, and only handle multi-line hunks that remove and add the same
|
||||
# number of lines.
|
||||
if (@$a != @$b) {
|
||||
print @$a, @$b;
|
||||
return;
|
||||
}
|
||||
|
||||
my @queue;
|
||||
for (my $i = 0; $i < @$a; $i++) {
|
||||
my ($rm, $add) = highlight_pair($a->[$i], $b->[$i]);
|
||||
print $rm;
|
||||
push @queue, $add;
|
||||
}
|
||||
print @queue;
|
||||
}
|
||||
|
||||
sub highlight_pair {
|
||||
my @a = split_line(shift);
|
||||
my @b = split_line(shift);
|
||||
|
||||
|
@ -101,8 +127,14 @@ sub show_pair {
|
|||
}
|
||||
}
|
||||
|
||||
print highlight(\@a, $pa, $sa);
|
||||
print highlight(\@b, $pb, $sb);
|
||||
if (is_pair_interesting(\@a, $pa, $sa, \@b, $pb, $sb)) {
|
||||
return highlight_line(\@a, $pa, $sa),
|
||||
highlight_line(\@b, $pb, $sb);
|
||||
}
|
||||
else {
|
||||
return join('', @a),
|
||||
join('', @b);
|
||||
}
|
||||
}
|
||||
|
||||
sub split_line {
|
||||
|
@ -111,7 +143,7 @@ sub split_line {
|
|||
split /($COLOR*)/;
|
||||
}
|
||||
|
||||
sub highlight {
|
||||
sub highlight_line {
|
||||
my ($line, $prefix, $suffix) = @_;
|
||||
|
||||
return join('',
|
||||
|
@ -122,3 +154,20 @@ sub highlight {
|
|||
@{$line}[($suffix+1)..$#$line]
|
||||
);
|
||||
}
|
||||
|
||||
# Pairs are interesting to highlight only if we are going to end up
|
||||
# highlighting a subset (i.e., not the whole line). Otherwise, the highlighting
|
||||
# is just useless noise. We can detect this by finding either a matching prefix
|
||||
# or suffix (disregarding boring bits like whitespace and colorization).
|
||||
sub is_pair_interesting {
|
||||
my ($a, $pa, $sa, $b, $pb, $sb) = @_;
|
||||
my $prefix_a = join('', @$a[0..($pa-1)]);
|
||||
my $prefix_b = join('', @$b[0..($pb-1)]);
|
||||
my $suffix_a = join('', @$a[($sa+1)..$#$a]);
|
||||
my $suffix_b = join('', @$b[($sb+1)..$#$b]);
|
||||
|
||||
return $prefix_a !~ /^$COLOR*-$BORING*$/ ||
|
||||
$prefix_b !~ /^$COLOR*\+$BORING*$/ ||
|
||||
$suffix_a !~ /^$BORING*$/ ||
|
||||
$suffix_b !~ /^$BORING*$/;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче