зеркало из https://github.com/microsoft/git.git
Merge branch 'sp/safecrlf'
* sp/safecrlf: safecrlf: Add mechanism to warn about irreversible crlf conversions
This commit is contained in:
Коммит
2ac4b4b222
|
@ -139,6 +139,51 @@ core.autocrlf::
|
||||||
"text" (i.e. be subjected to the autocrlf mechanism) is
|
"text" (i.e. be subjected to the autocrlf mechanism) is
|
||||||
decided purely based on the contents.
|
decided purely based on the contents.
|
||||||
|
|
||||||
|
core.safecrlf::
|
||||||
|
If true, makes git check if converting `CRLF` as controlled by
|
||||||
|
`core.autocrlf` is reversible. Git will verify if a command
|
||||||
|
modifies a file in the work tree either directly or indirectly.
|
||||||
|
For example, committing a file followed by checking out the
|
||||||
|
same file should yield the original file in the work tree. If
|
||||||
|
this is not the case for the current setting of
|
||||||
|
`core.autocrlf`, git will reject the file. The variable can
|
||||||
|
be set to "warn", in which case git will only warn about an
|
||||||
|
irreversible conversion but continue the operation.
|
||||||
|
+
|
||||||
|
CRLF conversion bears a slight chance of corrupting data.
|
||||||
|
autocrlf=true will convert CRLF to LF during commit and LF to
|
||||||
|
CRLF during checkout. A file that contains a mixture of LF and
|
||||||
|
CRLF before the commit cannot be recreated by git. For text
|
||||||
|
files this is the right thing to do: it corrects line endings
|
||||||
|
such that we have only LF line endings in the repository.
|
||||||
|
But for binary files that are accidentally classified as text the
|
||||||
|
conversion can corrupt data.
|
||||||
|
+
|
||||||
|
If you recognize such corruption early you can easily fix it by
|
||||||
|
setting the conversion type explicitly in .gitattributes. Right
|
||||||
|
after committing you still have the original file in your work
|
||||||
|
tree and this file is not yet corrupted. You can explicitly tell
|
||||||
|
git that this file is binary and git will handle the file
|
||||||
|
appropriately.
|
||||||
|
+
|
||||||
|
Unfortunately, the desired effect of cleaning up text files with
|
||||||
|
mixed line endings and the undesired effect of corrupting binary
|
||||||
|
files cannot be distinguished. In both cases CRLFs are removed
|
||||||
|
in an irreversible way. For text files this is the right thing
|
||||||
|
to do because CRLFs are line endings, while for binary files
|
||||||
|
converting CRLFs corrupts data.
|
||||||
|
+
|
||||||
|
Note, this safety check does not mean that a checkout will generate a
|
||||||
|
file identical to the original file for a different setting of
|
||||||
|
`core.autocrlf`, but only for the current one. For example, a text
|
||||||
|
file with `LF` would be accepted with `core.autocrlf=input` and could
|
||||||
|
later be checked out with `core.autocrlf=true`, in which case the
|
||||||
|
resulting file would contain `CRLF`, although the original file
|
||||||
|
contained `LF`. However, in both work trees the line endings would be
|
||||||
|
consistent, that is either all `LF` or all `CRLF`, but never mixed. A
|
||||||
|
file with mixed line endings would be reported by the `core.safecrlf`
|
||||||
|
mechanism.
|
||||||
|
|
||||||
core.symlinks::
|
core.symlinks::
|
||||||
If false, symbolic links are checked out as small plain files that
|
If false, symbolic links are checked out as small plain files that
|
||||||
contain the link text. linkgit:git-update-index[1] and
|
contain the link text. linkgit:git-update-index[1] and
|
||||||
|
|
|
@ -133,6 +133,26 @@ When `core.autocrlf` is set to "input", line endings are
|
||||||
converted to LF upon checkin, but there is no conversion done
|
converted to LF upon checkin, but there is no conversion done
|
||||||
upon checkout.
|
upon checkout.
|
||||||
|
|
||||||
|
If `core.safecrlf` is set to "true" or "warn", git verifies if
|
||||||
|
the conversion is reversible for the current setting of
|
||||||
|
`core.autocrlf`. For "true", git rejects irreversible
|
||||||
|
conversions; for "warn", git only prints a warning but accepts
|
||||||
|
an irreversible conversion. The safety triggers to prevent such
|
||||||
|
a conversion done to the files in the work tree, but there are a
|
||||||
|
few exceptions. Even though...
|
||||||
|
|
||||||
|
- "git add" itself does not touch the files in the work tree, the
|
||||||
|
next checkout would, so the safety triggers;
|
||||||
|
|
||||||
|
- "git apply" to update a text file with a patch does touch the files
|
||||||
|
in the work tree, but the operation is about text files and CRLF
|
||||||
|
conversion is about fixing the line ending inconsistencies, so the
|
||||||
|
safety does not trigger;
|
||||||
|
|
||||||
|
- "git diff" itself does not touch the files in the work tree, it is
|
||||||
|
often run to inspect the changes you intend to next "git add". To
|
||||||
|
catch potential problems early, safety triggers.
|
||||||
|
|
||||||
|
|
||||||
`ident`
|
`ident`
|
||||||
^^^^^^^
|
^^^^^^^
|
||||||
|
|
|
@ -1430,7 +1430,7 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
|
||||||
case S_IFREG:
|
case S_IFREG:
|
||||||
if (strbuf_read_file(buf, path, st->st_size) != st->st_size)
|
if (strbuf_read_file(buf, path, st->st_size) != st->st_size)
|
||||||
return error("unable to open or read %s", path);
|
return error("unable to open or read %s", path);
|
||||||
convert_to_git(path, buf->buf, buf->len, buf);
|
convert_to_git(path, buf->buf, buf->len, buf, 0);
|
||||||
return 0;
|
return 0;
|
||||||
default:
|
default:
|
||||||
return -1;
|
return -1;
|
||||||
|
|
|
@ -2073,7 +2073,7 @@ static struct commit *fake_working_tree_commit(const char *path, const char *con
|
||||||
if (strbuf_read(&buf, 0, 0) < 0)
|
if (strbuf_read(&buf, 0, 0) < 0)
|
||||||
die("read error %s from stdin", strerror(errno));
|
die("read error %s from stdin", strerror(errno));
|
||||||
}
|
}
|
||||||
convert_to_git(path, buf.buf, buf.len, &buf);
|
convert_to_git(path, buf.buf, buf.len, &buf, 0);
|
||||||
origin->file.ptr = buf.buf;
|
origin->file.ptr = buf.buf;
|
||||||
origin->file.size = buf.len;
|
origin->file.size = buf.len;
|
||||||
pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1);
|
pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1);
|
||||||
|
|
11
cache.h
11
cache.h
|
@ -383,6 +383,14 @@ extern size_t packed_git_limit;
|
||||||
extern size_t delta_base_cache_limit;
|
extern size_t delta_base_cache_limit;
|
||||||
extern int auto_crlf;
|
extern int auto_crlf;
|
||||||
|
|
||||||
|
enum safe_crlf {
|
||||||
|
SAFE_CRLF_FALSE = 0,
|
||||||
|
SAFE_CRLF_FAIL = 1,
|
||||||
|
SAFE_CRLF_WARN = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
extern enum safe_crlf safe_crlf;
|
||||||
|
|
||||||
#define GIT_REPO_VERSION 0
|
#define GIT_REPO_VERSION 0
|
||||||
extern int repository_format_version;
|
extern int repository_format_version;
|
||||||
extern int check_repository_format(void);
|
extern int check_repository_format(void);
|
||||||
|
@ -691,7 +699,8 @@ extern void trace_argv_printf(const char **argv, const char *format, ...);
|
||||||
|
|
||||||
/* convert.c */
|
/* convert.c */
|
||||||
/* returns 1 if *dst was used */
|
/* returns 1 if *dst was used */
|
||||||
extern int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst);
|
extern int convert_to_git(const char *path, const char *src, size_t len,
|
||||||
|
struct strbuf *dst, enum safe_crlf checksafe);
|
||||||
extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
|
extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
|
||||||
|
|
||||||
/* add */
|
/* add */
|
||||||
|
|
9
config.c
9
config.c
|
@ -415,6 +415,15 @@ int git_default_config(const char *var, const char *value)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!strcmp(var, "core.safecrlf")) {
|
||||||
|
if (value && !strcasecmp(value, "warn")) {
|
||||||
|
safe_crlf = SAFE_CRLF_WARN;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
safe_crlf = git_config_bool(var, value);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (!strcmp(var, "user.name")) {
|
if (!strcmp(var, "user.name")) {
|
||||||
if (!value)
|
if (!value)
|
||||||
return config_error_nonbool(var);
|
return config_error_nonbool(var);
|
||||||
|
|
47
convert.c
47
convert.c
|
@ -85,8 +85,39 @@ static int is_binary(unsigned long size, struct text_stat *stats)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void check_safe_crlf(const char *path, int action,
|
||||||
|
struct text_stat *stats, enum safe_crlf checksafe)
|
||||||
|
{
|
||||||
|
if (!checksafe)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (action == CRLF_INPUT || auto_crlf <= 0) {
|
||||||
|
/*
|
||||||
|
* CRLFs would not be restored by checkout:
|
||||||
|
* check if we'd remove CRLFs
|
||||||
|
*/
|
||||||
|
if (stats->crlf) {
|
||||||
|
if (checksafe == SAFE_CRLF_WARN)
|
||||||
|
warning("CRLF will be replaced by LF in %s.", path);
|
||||||
|
else /* i.e. SAFE_CRLF_FAIL */
|
||||||
|
die("CRLF would be replaced by LF in %s.", path);
|
||||||
|
}
|
||||||
|
} else if (auto_crlf > 0) {
|
||||||
|
/*
|
||||||
|
* CRLFs would be added by checkout:
|
||||||
|
* check if we have "naked" LFs
|
||||||
|
*/
|
||||||
|
if (stats->lf != stats->crlf) {
|
||||||
|
if (checksafe == SAFE_CRLF_WARN)
|
||||||
|
warning("LF will be replaced by CRLF in %s", path);
|
||||||
|
else /* i.e. SAFE_CRLF_FAIL */
|
||||||
|
die("LF would be replaced by CRLF in %s", path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int crlf_to_git(const char *path, const char *src, size_t len,
|
static int crlf_to_git(const char *path, const char *src, size_t len,
|
||||||
struct strbuf *buf, int action)
|
struct strbuf *buf, int action, enum safe_crlf checksafe)
|
||||||
{
|
{
|
||||||
struct text_stat stats;
|
struct text_stat stats;
|
||||||
char *dst;
|
char *dst;
|
||||||
|
@ -95,9 +126,6 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
gather_stats(src, len, &stats);
|
gather_stats(src, len, &stats);
|
||||||
/* No CR? Nothing to convert, regardless. */
|
|
||||||
if (!stats.cr)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (action == CRLF_GUESS) {
|
if (action == CRLF_GUESS) {
|
||||||
/*
|
/*
|
||||||
|
@ -115,6 +143,12 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
check_safe_crlf(path, action, &stats, checksafe);
|
||||||
|
|
||||||
|
/* Optimization: No CR? Nothing to convert, regardless. */
|
||||||
|
if (!stats.cr)
|
||||||
|
return 0;
|
||||||
|
|
||||||
/* only grow if not in place */
|
/* only grow if not in place */
|
||||||
if (strbuf_avail(buf) + buf->len < len)
|
if (strbuf_avail(buf) + buf->len < len)
|
||||||
strbuf_grow(buf, len - buf->len);
|
strbuf_grow(buf, len - buf->len);
|
||||||
|
@ -536,7 +570,8 @@ static int git_path_check_ident(const char *path, struct git_attr_check *check)
|
||||||
return !!ATTR_TRUE(value);
|
return !!ATTR_TRUE(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst)
|
int convert_to_git(const char *path, const char *src, size_t len,
|
||||||
|
struct strbuf *dst, enum safe_crlf checksafe)
|
||||||
{
|
{
|
||||||
struct git_attr_check check[3];
|
struct git_attr_check check[3];
|
||||||
int crlf = CRLF_GUESS;
|
int crlf = CRLF_GUESS;
|
||||||
|
@ -558,7 +593,7 @@ int convert_to_git(const char *path, const char *src, size_t len, struct strbuf
|
||||||
src = dst->buf;
|
src = dst->buf;
|
||||||
len = dst->len;
|
len = dst->len;
|
||||||
}
|
}
|
||||||
ret |= crlf_to_git(path, src, len, dst, crlf);
|
ret |= crlf_to_git(path, src, len, dst, crlf, checksafe);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
src = dst->buf;
|
src = dst->buf;
|
||||||
len = dst->len;
|
len = dst->len;
|
||||||
|
|
2
diff.c
2
diff.c
|
@ -1631,7 +1631,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
|
||||||
* Convert from working tree format to canonical git format
|
* Convert from working tree format to canonical git format
|
||||||
*/
|
*/
|
||||||
strbuf_init(&buf, 0);
|
strbuf_init(&buf, 0);
|
||||||
if (convert_to_git(s->path, s->data, s->size, &buf)) {
|
if (convert_to_git(s->path, s->data, s->size, &buf, safe_crlf)) {
|
||||||
size_t size = 0;
|
size_t size = 0;
|
||||||
munmap(s->data, s->size);
|
munmap(s->data, s->size);
|
||||||
s->should_munmap = 0;
|
s->should_munmap = 0;
|
||||||
|
|
|
@ -35,6 +35,7 @@ int pager_use_color = 1;
|
||||||
const char *editor_program;
|
const char *editor_program;
|
||||||
const char *excludes_file;
|
const char *excludes_file;
|
||||||
int auto_crlf = 0; /* 1: both ways, -1: only when adding git objects */
|
int auto_crlf = 0; /* 1: both ways, -1: only when adding git objects */
|
||||||
|
enum safe_crlf safe_crlf = SAFE_CRLF_WARN;
|
||||||
unsigned whitespace_rule_cfg = WS_DEFAULT_RULE;
|
unsigned whitespace_rule_cfg = WS_DEFAULT_RULE;
|
||||||
|
|
||||||
/* This is set by setup_git_dir_gently() and/or git_default_config() */
|
/* This is set by setup_git_dir_gently() and/or git_default_config() */
|
||||||
|
|
|
@ -2358,7 +2358,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
|
||||||
if ((type == OBJ_BLOB) && S_ISREG(st->st_mode)) {
|
if ((type == OBJ_BLOB) && S_ISREG(st->st_mode)) {
|
||||||
struct strbuf nbuf;
|
struct strbuf nbuf;
|
||||||
strbuf_init(&nbuf, 0);
|
strbuf_init(&nbuf, 0);
|
||||||
if (convert_to_git(path, buf, size, &nbuf)) {
|
if (convert_to_git(path, buf, size, &nbuf,
|
||||||
|
write_object ? safe_crlf : 0)) {
|
||||||
munmap(buf, size);
|
munmap(buf, size);
|
||||||
buf = strbuf_detach(&nbuf, &size);
|
buf = strbuf_detach(&nbuf, &size);
|
||||||
re_allocated = 1;
|
re_allocated = 1;
|
||||||
|
|
|
@ -8,6 +8,10 @@ q_to_nul () {
|
||||||
tr Q '\000'
|
tr Q '\000'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
q_to_cr () {
|
||||||
|
tr Q '\015'
|
||||||
|
}
|
||||||
|
|
||||||
append_cr () {
|
append_cr () {
|
||||||
sed -e 's/$/Q/' | tr Q '\015'
|
sed -e 's/$/Q/' | tr Q '\015'
|
||||||
}
|
}
|
||||||
|
@ -42,6 +46,60 @@ test_expect_success setup '
|
||||||
echo happy.
|
echo happy.
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'safecrlf: autocrlf=input, all CRLF' '
|
||||||
|
|
||||||
|
git config core.autocrlf input &&
|
||||||
|
git config core.safecrlf true &&
|
||||||
|
|
||||||
|
for w in I am all CRLF; do echo $w; done | append_cr >allcrlf &&
|
||||||
|
! git add allcrlf
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'safecrlf: autocrlf=input, mixed LF/CRLF' '
|
||||||
|
|
||||||
|
git config core.autocrlf input &&
|
||||||
|
git config core.safecrlf true &&
|
||||||
|
|
||||||
|
for w in Oh here is CRLFQ in text; do echo $w; done | q_to_cr >mixed &&
|
||||||
|
! git add mixed
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'safecrlf: autocrlf=true, all LF' '
|
||||||
|
|
||||||
|
git config core.autocrlf true &&
|
||||||
|
git config core.safecrlf true &&
|
||||||
|
|
||||||
|
for w in I am all LF; do echo $w; done >alllf &&
|
||||||
|
! git add alllf
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'safecrlf: autocrlf=true mixed LF/CRLF' '
|
||||||
|
|
||||||
|
git config core.autocrlf true &&
|
||||||
|
git config core.safecrlf true &&
|
||||||
|
|
||||||
|
for w in Oh here is CRLFQ in text; do echo $w; done | q_to_cr >mixed &&
|
||||||
|
! git add mixed
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'safecrlf: print warning only once' '
|
||||||
|
|
||||||
|
git config core.autocrlf input &&
|
||||||
|
git config core.safecrlf warn &&
|
||||||
|
|
||||||
|
for w in I am all LF; do echo $w; done >doublewarn &&
|
||||||
|
git add doublewarn &&
|
||||||
|
git commit -m "nowarn" &&
|
||||||
|
for w in Oh here is CRLFQ in text; do echo $w; done | q_to_cr >doublewarn &&
|
||||||
|
test $(git add doublewarn 2>&1 | grep "CRLF will be replaced by LF" | wc -l) = 1
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'switch off autocrlf, safecrlf, reset HEAD' '
|
||||||
|
git config core.autocrlf false &&
|
||||||
|
git config core.safecrlf false &&
|
||||||
|
git reset --hard HEAD^
|
||||||
|
'
|
||||||
|
|
||||||
test_expect_success 'update with autocrlf=input' '
|
test_expect_success 'update with autocrlf=input' '
|
||||||
|
|
||||||
rm -f tmp one dir/two three &&
|
rm -f tmp one dir/two three &&
|
||||||
|
|
Загрузка…
Ссылка в новой задаче