Convert pdf, doc and docx files to text by default

Converting PDF and  Word files to text before diffing them allows an
easier comparison between changed files. This reintroduces some
functionality of Git for Windows 1.x.

The pdftotext tool exists both in the xpdf package and the poppler
package; we opted to include the xpdf one because requires us to add
only its dependency libstdc++-6.dll in addition. Poppler's version would
require 23 additional dlls.

In Johannes' tests, this change increased the portable Git by a scant
701kB -- which he deems worth the benefit.

This fixes https://github.com/git-for-windows/git/issues/355

[jes: re-wrapped commit message, avoided double list entry for
astextplain (in git-extra) and unzip.exe (dependency of docx2txt) that
7-Zip would complain about]

Signed-off-by: Matthias Aßhauer <mha1993@live.de>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
Matthias Aßhauer 2015-09-13 07:41:01 +02:00 коммит произвёл Johannes Schindelin
Родитель d81733f044
Коммит 4c5e4423b4
5 изменённых файлов: 57 добавлений и 3 удалений

Просмотреть файл

@ -18,6 +18,8 @@ pkgver() {
build() {
test $startdir/$pkgname.install -nt $startdir/$pkgname.install.in ||
sed -e "/^@@GITCONFIG@@$/r $startdir/gitconfig" -e "/^@@GITCONFIG@@$/d" \
-e "/^@@GITATTRIBUTES@@$/r $startdir/gitattributes" \
-e "/^@@GITATTRIBUTES@@$/d" \
<$startdir/$pkgname.install.in >$startdir/$pkgname.install
gcc -o create-shortcut.exe $startdir/create-shortcut.c -luuid -lole32
@ -50,4 +52,5 @@ package() {
install -m755 $startdir/bash_profile.sh $pkgdir/etc/profile.d
install -m644 $startdir/msys2-32.ico $pkgdir/usr/share/git
install -m644 $startdir/99-post-install-cleanup.post $pkgdir/etc/post-install
install -m755 $startdir/astextplain $pkgdir/$mingwdir/bin
}

30
git-extra/astextplain Normal file
Просмотреть файл

@ -0,0 +1,30 @@
#!/bin/sh -e
# minimalistic replacement for `run-mailcap --action=cat <file>`
if test "$#" != 1 ; then
echo "Usage: astextplain <file>" 1>&2
exit 1
fi
# XXX output encoding (UTF-8) hardcoded
case "$1" in
*.doc | *.DOC | *.dot | *.DOT)
antiword -m UTF-8 "$1" | sed "s/\^M$//" || cat "$1"
;;
*.docx | *.DOCX)
docx2txt.pl "$1" -
;;
*.pdf | *.PDF)
pdftotext -layout "$1" -enc UTF-8 - | sed "s/(\^M$)|(^\^L)//"
;;
# TODO add rtf support
*.rtf | *.RTF)
cat "$1"
;;
*)
echo "E: unsupported filetype $1" 1>&2
exit 1
;;
esac
exit 0

Просмотреть файл

@ -8,6 +8,12 @@ post_install () {
cat > /$dir/etc/gitconfig <<\GITCONFIG
@@GITCONFIG@@
GITCONFIG
test ! -d /$dir ||
test -f /$dir/etc/gitattributes ||
cat > /$dir/etc/gitattributes <<\GITATTRIBUTES
@@GITATTRIBUTES@@
GITATTRIBUTES
done
grep -q '^db_home: env windows' /etc/nsswitch.conf ||

10
git-extra/gitattributes Normal file
Просмотреть файл

@ -0,0 +1,10 @@
*.doc diff=astextplain
*.DOC diff=astextplain
*.docx diff=astextplain
*.DOCX diff=astextplain
*.dot diff=astextplain
*.DOT diff=astextplain
*.pdf diff=astextplain
*.PDF diff=astextplain
*.rtf diff=astextplain
*.RTF diff=astextplain

Просмотреть файл

@ -28,14 +28,15 @@ pacman_list () {
}
# Packages that have been added after Git SDK 1.0.0 was released...
pacman -S --needed --noconfirm mingw-w64-$ARCH-connect git-flow >&2 ||
pacman -S --needed --noconfirm mingw-w64-$ARCH-connect git-flow unzip docx2txt \
mingw-w64-$ARCH-antiword mingw-w64-$ARCH-xpdf >&2 ||
die "Could not install required packages"
pacman_list mingw-w64-$ARCH-git mingw-w64-$ARCH-git-doc-html \
git-extra ncurses mintty vim openssh winpty \
sed awk less grep gnupg tar findutils coreutils diffutils patch \
dos2unix which subversion mingw-w64-$ARCH-tk \
mingw-w64-$ARCH-connect git-flow "$@" |
mingw-w64-$ARCH-connect git-flow docx2txt mingw-w64-$ARCH-antiword "$@" |
grep -v -e '\.[acho]$' -e '\.l[ao]$' -e '/aclocal/' \
-e '/man/' -e '/pkgconfig/' -e '/emacs/' \
-e '^/usr/lib/python' -e '^/usr/lib/ruby' \
@ -71,7 +72,8 @@ grep --perl-regexp -v -e '^/usr/(lib|share)/terminfo/(?!.*/(cygwin|dumb|xterm.*)
sed 's/^\///'
test -z "$PACKAGE_VERSIONS_FILE" ||
pacman -Q filesystem dash rebase util-linux >>"$PACKAGE_VERSIONS_FILE"
pacman -Q filesystem dash rebase util-linux unzip \
mingw-w64-$ARCH-xpdf >>"$PACKAGE_VERSIONS_FILE"
cat <<EOF
etc/profile
@ -89,4 +91,7 @@ usr/bin/dash.exe
usr/bin/rebase.exe
usr/bin/rebaseall
usr/bin/getopt.exe
mingw$BITNESS/etc/gitattributes
mingw$BITNESS/bin/pdftotext.exe
mingw$BITNESS/bin/libstdc++-6.dll
EOF