зеркало из https://github.com/microsoft/git.git
convert: check for detectable errors in UTF encodings
Check that new content is valid with respect to the user defined 'working-tree-encoding' attribute. Signed-off-by: Lars Schneider <larsxschneider@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Родитель
107642fe26
Коммит
7a17918c34
61
convert.c
61
convert.c
|
@ -266,6 +266,64 @@ static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats,
|
|||
|
||||
}
|
||||
|
||||
static int validate_encoding(const char *path, const char *enc,
|
||||
const char *data, size_t len, int die_on_error)
|
||||
{
|
||||
/* We only check for UTF here as UTF?? can be an alias for UTF-?? */
|
||||
if (istarts_with(enc, "UTF")) {
|
||||
/*
|
||||
* Check for detectable errors in UTF encodings
|
||||
*/
|
||||
if (has_prohibited_utf_bom(enc, data, len)) {
|
||||
const char *error_msg = _(
|
||||
"BOM is prohibited in '%s' if encoded as %s");
|
||||
/*
|
||||
* This advice is shown for UTF-??BE and UTF-??LE encodings.
|
||||
* We cut off the last two characters of the encoding name
|
||||
* to generate the encoding name suitable for BOMs.
|
||||
*/
|
||||
const char *advise_msg = _(
|
||||
"The file '%s' contains a byte order "
|
||||
"mark (BOM). Please use UTF-%s as "
|
||||
"working-tree-encoding.");
|
||||
const char *stripped = NULL;
|
||||
char *upper = xstrdup_toupper(enc);
|
||||
upper[strlen(upper)-2] = '\0';
|
||||
if (!skip_prefix(upper, "UTF-", &stripped))
|
||||
skip_prefix(stripped, "UTF", &stripped);
|
||||
advise(advise_msg, path, stripped);
|
||||
free(upper);
|
||||
if (die_on_error)
|
||||
die(error_msg, path, enc);
|
||||
else {
|
||||
return error(error_msg, path, enc);
|
||||
}
|
||||
|
||||
} else if (is_missing_required_utf_bom(enc, data, len)) {
|
||||
const char *error_msg = _(
|
||||
"BOM is required in '%s' if encoded as %s");
|
||||
const char *advise_msg = _(
|
||||
"The file '%s' is missing a byte order "
|
||||
"mark (BOM). Please use UTF-%sBE or UTF-%sLE "
|
||||
"(depending on the byte order) as "
|
||||
"working-tree-encoding.");
|
||||
const char *stripped = NULL;
|
||||
char *upper = xstrdup_toupper(enc);
|
||||
if (!skip_prefix(upper, "UTF-", &stripped))
|
||||
skip_prefix(stripped, "UTF", &stripped);
|
||||
advise(advise_msg, path, stripped, stripped);
|
||||
free(upper);
|
||||
if (die_on_error)
|
||||
die(error_msg, path, enc);
|
||||
else {
|
||||
return error(error_msg, path, enc);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *default_encoding = "UTF-8";
|
||||
|
||||
static int encode_to_git(const char *path, const char *src, size_t src_len,
|
||||
|
@ -291,6 +349,9 @@ static int encode_to_git(const char *path, const char *src, size_t src_len,
|
|||
if (!buf && !src)
|
||||
return 1;
|
||||
|
||||
if (validate_encoding(path, enc, src, src_len, die_on_error))
|
||||
return 0;
|
||||
|
||||
dst = reencode_string_len(src, src_len, default_encoding, enc,
|
||||
&dst_len);
|
||||
if (!dst) {
|
||||
|
|
|
@ -62,6 +62,52 @@ test_expect_success 'check $GIT_DIR/info/attributes support' '
|
|||
|
||||
for i in 16 32
|
||||
do
|
||||
test_expect_success "check prohibited UTF-${i} BOM" '
|
||||
test_when_finished "git reset --hard HEAD" &&
|
||||
|
||||
echo "*.utf${i}be text working-tree-encoding=utf-${i}be" >>.gitattributes &&
|
||||
echo "*.utf${i}le text working-tree-encoding=utf-${i}LE" >>.gitattributes &&
|
||||
|
||||
# Here we add a UTF-16 (resp. UTF-32) files with BOM (big/little-endian)
|
||||
# but we tell Git to treat it as UTF-16BE/UTF-16LE (resp. UTF-32).
|
||||
# In these cases the BOM is prohibited.
|
||||
cp bebom.utf${i}be.raw bebom.utf${i}be &&
|
||||
test_must_fail git add bebom.utf${i}be 2>err.out &&
|
||||
test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
|
||||
test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
|
||||
|
||||
cp lebom.utf${i}le.raw lebom.utf${i}be &&
|
||||
test_must_fail git add lebom.utf${i}be 2>err.out &&
|
||||
test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
|
||||
test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
|
||||
|
||||
cp bebom.utf${i}be.raw bebom.utf${i}le &&
|
||||
test_must_fail git add bebom.utf${i}le 2>err.out &&
|
||||
test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
|
||||
test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
|
||||
|
||||
cp lebom.utf${i}le.raw lebom.utf${i}le &&
|
||||
test_must_fail git add lebom.utf${i}le 2>err.out &&
|
||||
test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
|
||||
test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out
|
||||
'
|
||||
|
||||
test_expect_success "check required UTF-${i} BOM" '
|
||||
test_when_finished "git reset --hard HEAD" &&
|
||||
|
||||
echo "*.utf${i} text working-tree-encoding=utf-${i}" >>.gitattributes &&
|
||||
|
||||
cp nobom.utf${i}be.raw nobom.utf${i} &&
|
||||
test_must_fail git add nobom.utf${i} 2>err.out &&
|
||||
test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
|
||||
test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out &&
|
||||
|
||||
cp nobom.utf${i}le.raw nobom.utf${i} &&
|
||||
test_must_fail git add nobom.utf${i} 2>err.out &&
|
||||
test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
|
||||
test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out
|
||||
'
|
||||
|
||||
test_expect_success "eol conversion for UTF-${i} encoded files on checkout" '
|
||||
test_when_finished "rm -f crlf.utf${i}.raw lf.utf${i}.raw" &&
|
||||
test_when_finished "git reset --hard HEAD^" &&
|
||||
|
@ -139,4 +185,20 @@ test_expect_success 'error if encoding round trip is not the same during refresh
|
|||
test_i18ngrep "error: .* overwritten by checkout:" err.out
|
||||
'
|
||||
|
||||
test_expect_success 'error if encoding garbage is already in Git' '
|
||||
BEFORE_STATE=$(git rev-parse HEAD) &&
|
||||
test_when_finished "git reset --hard $BEFORE_STATE" &&
|
||||
|
||||
# Skip the UTF-16 filter for the added file
|
||||
# This simulates a Git version that has no checkoutEncoding support
|
||||
cp nobom.utf16be.raw nonsense.utf16 &&
|
||||
TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) &&
|
||||
git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 &&
|
||||
COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
|
||||
git update-ref refs/heads/master $COMMIT &&
|
||||
|
||||
git diff 2>err.out &&
|
||||
test_i18ngrep "error: BOM is required" err.out
|
||||
'
|
||||
|
||||
test_done
|
||||
|
|
Загрузка…
Ссылка в новой задаче