convert: tighten the safe autocrlf handling

When a text file had been commited with CRLF and the file is commited
again, the CRLF are kept if .gitattributs has "text=auto".
This is done by analyzing the content of the blob stored in the index:
If a '\r' is found, Git assumes that the blob was commited with CRLF.

The simple search for a '\r' does not always work as expected:
A file is encoded in UTF-16 with CRLF and commited. Git treats it as binary.
Now the content is converted into UTF-8. At the next commit Git treats the
file as text, the CRLF should be converted into LF, but isn't.

Replace has_cr_in_index() with has_crlf_in_index(). When no '\r' is found,
0 is returned directly, this is the most common case.
If a '\r' is found, the content is analyzed more deeply.

Reported-By: Ashish Negi <ashishnegi33@gmail.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Torsten Bögershausen 2017-11-26 13:20:52 +01:00 коммит произвёл Junio C Hamano
Родитель fc849d8d6b
Коммит 86ff70a0f0
2 изменённых файлов: 85 добавлений и 10 удалений

Просмотреть файл

@ -220,18 +220,27 @@ static void check_safe_crlf(const char *path, enum crlf_action crlf_action,
}
}
static int has_cr_in_index(const struct index_state *istate, const char *path)
static int has_crlf_in_index(const struct index_state *istate, const char *path)
{
unsigned long sz;
void *data;
int has_cr;
const char *crp;
int has_crlf = 0;
data = read_blob_data_from_index(istate, path, &sz);
if (!data)
return 0;
has_cr = memchr(data, '\r', sz) != NULL;
crp = memchr(data, '\r', sz);
if (crp) {
unsigned int ret_stats;
ret_stats = gather_convert_stats(data, sz);
if (!(ret_stats & CONVERT_STAT_BITS_BIN) &&
(ret_stats & CONVERT_STAT_BITS_TXT_CRLF))
has_crlf = 1;
}
free(data);
return has_cr;
return has_crlf;
}
static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats,
@ -290,7 +299,7 @@ static int crlf_to_git(const struct index_state *istate,
* cherry-pick.
*/
if ((checksafe != SAFE_CRLF_RENORMALIZE) &&
has_cr_in_index(istate, path))
has_crlf_in_index(istate, path))
convert_crlf_into_lf = 0;
}
if ((checksafe == SAFE_CRLF_WARN ||

Просмотреть файл

@ -43,19 +43,31 @@ create_gitattributes () {
} >.gitattributes
}
create_NNO_files () {
# Create 2 sets of files:
# The NNO files are "Not NOrmalized in the repo. We use CRLF_mix_LF and store
# it under different names for the different test cases, see ${pfx}
# Depending on .gitattributes they are normalized at the next commit (or not)
# The MIX files have different contents in the repo.
# Depending on its contents, the "new safer autocrlf" may kick in.
create_NNO_MIX_files () {
for crlf in false true input
do
for attr in "" auto text -text
do
for aeol in "" lf crlf
do
pfx=NNO_attr_${attr}_aeol_${aeol}_${crlf}
pfx=NNO_attr_${attr}_aeol_${aeol}_${crlf} &&
cp CRLF_mix_LF ${pfx}_LF.txt &&
cp CRLF_mix_LF ${pfx}_CRLF.txt &&
cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt &&
cp CRLF_mix_LF ${pfx}_LF_mix_CR.txt &&
cp CRLF_mix_LF ${pfx}_CRLF_nul.txt
cp CRLF_mix_LF ${pfx}_CRLF_nul.txt &&
pfx=MIX_attr_${attr}_aeol_${aeol}_${crlf} &&
cp LF ${pfx}_LF.txt &&
cp CRLF ${pfx}_CRLF.txt &&
cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt &&
cp LF_mix_CR ${pfx}_LF_mix_CR.txt &&
cp CRLF_nul ${pfx}_CRLF_nul.txt
done
done
done
@ -136,6 +148,49 @@ commit_chk_wrnNNO () {
'
}
# Commit a file with mixed line endings on top of different files
# in the index. Check for warnings
commit_MIX_chkwrn () {
attr=$1 ; shift
aeol=$1 ; shift
crlf=$1 ; shift
lfwarn=$1 ; shift
crlfwarn=$1 ; shift
lfmixcrlf=$1 ; shift
lfmixcr=$1 ; shift
crlfnul=$1 ; shift
pfx=MIX_attr_${attr}_aeol_${aeol}_${crlf}
#Commit file with CLRF_mix_LF on top of existing file
create_gitattributes "$attr" $aeol &&
for f in LF CRLF CRLF_mix_LF LF_mix_CR CRLF_nul
do
fname=${pfx}_$f.txt &&
cp CRLF_mix_LF $fname &&
printf Z >>"$fname" &&
git -c core.autocrlf=$crlf add $fname 2>"${pfx}_$f.err"
done
test_expect_success "commit file with mixed EOL crlf=$crlf attr=$attr LF" '
check_warning "$lfwarn" ${pfx}_LF.err
'
test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF" '
check_warning "$crlfwarn" ${pfx}_CRLF.err
'
test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF_mix_LF" '
check_warning "$lfmixcrlf" ${pfx}_CRLF_mix_LF.err
'
test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf LF_mix_cr" '
check_warning "$lfmixcr" ${pfx}_LF_mix_CR.err
'
test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF_nul" '
check_warning "$crlfnul" ${pfx}_CRLF_nul.err
'
}
stats_ascii () {
case "$1" in
LF)
@ -323,8 +378,8 @@ test_expect_success 'setup master' '
printf "\$Id: 0000000000000000000000000000000000000000 \$\r\nLINEONE\r\nLINETWO\rLINETHREE" >CRLF_mix_CR &&
printf "\$Id: 0000000000000000000000000000000000000000 \$\r\nLINEONEQ\r\nLINETWO\r\nLINETHREE" | q_to_nul >CRLF_nul &&
printf "\$Id: 0000000000000000000000000000000000000000 \$\nLINEONEQ\nLINETWO\nLINETHREE" | q_to_nul >LF_nul &&
create_NNO_files CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF &&
git -c core.autocrlf=false add NNO_*.txt &&
create_NNO_MIX_files CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF &&
git -c core.autocrlf=false add NNO_*.txt MIX_*.txt &&
git commit -m "mixed line endings" &&
test_tick
'
@ -385,6 +440,17 @@ test_expect_success 'commit files attr=crlf' '
commit_check_warn input "crlf" "LF_CRLF" "" "LF_CRLF" "LF_CRLF" ""
'
# Commit "CRLFmixLF" on top of these files already in the repo:
# LF, CRLF, CRLFmixLF LF_mix_CR CRLFNULL
# attr LF CRLF CRLFmixLF LF_mix_CR CRLFNUL
commit_MIX_chkwrn "" "" false "" "" "" "" ""
commit_MIX_chkwrn "" "" true "LF_CRLF" "" "" "LF_CRLF" "LF_CRLF"
commit_MIX_chkwrn "" "" input "CRLF_LF" "" "" "CRLF_LF" "CRLF_LF"
commit_MIX_chkwrn "auto" "" false "CRLF_LF" "" "" "CRLF_LF" "CRLF_LF"
commit_MIX_chkwrn "auto" "" true "LF_CRLF" "" "" "LF_CRLF" "LF_CRLF"
commit_MIX_chkwrn "auto" "" input "CRLF_LF" "" "" "CRLF_LF" "CRLF_LF"
# attr LF CRLF CRLFmixLF LF_mix_CR CRLFNUL
commit_chk_wrnNNO "" "" false "" "" "" "" ""
commit_chk_wrnNNO "" "" true LF_CRLF "" "" "" ""