diff --stat: mark any file larger than core.bigfilethreshold binary

Too large files may lead to failure to allocate memory. If it happens
here, it could impact quite a few commands that involve
diff. Moreover, too large files are inefficient to compare anyway (and
most likely non-text), so mark them binary and skip looking at their
content.

Noticed-by: Dale R. Worley <worley@alum.mit.edu>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Nguyễn Thái Ngọc Duy 2014-08-16 10:08:05 +07:00 коммит произвёл Junio C Hamano
Родитель 8e5dd3d654
Коммит 6bf3b81348
5 изменённых файлов: 27 добавлений и 11 удалений

Просмотреть файл

@ -499,7 +499,8 @@ core.bigFileThreshold::
Files larger than this size are stored deflated, without
attempting delta compression. Storing large files without
delta compression avoids excessive memory usage, at the
slight expense of increased disk usage.
slight expense of increased disk usage. Additionally files
larger than this size are always treated as binary.
+
Default is 512 MiB on all platforms. This should be reasonable
for most projects as source code and other text files can still

Просмотреть файл

@ -440,8 +440,8 @@ Unspecified::
A path to which the `diff` attribute is unspecified
first gets its contents inspected, and if it looks like
text, it is treated as text. Otherwise it would
generate `Binary files differ`.
text and is smaller than core.bigFileThreshold, it is treated
as text. Otherwise it would generate `Binary files differ`.
String::

26
diff.c
Просмотреть файл

@ -2188,8 +2188,8 @@ int diff_filespec_is_binary(struct diff_filespec *one)
one->is_binary = one->driver->binary;
else {
if (!one->data && DIFF_FILE_VALID(one))
diff_populate_filespec(one, 0);
if (one->data)
diff_populate_filespec(one, CHECK_BINARY);
if (one->is_binary == -1 && one->data)
one->is_binary = buffer_is_binary(one->data,
one->size);
if (one->is_binary == -1)
@ -2725,6 +2725,11 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
}
if (size_only)
return 0;
if ((flags & CHECK_BINARY) &&
s->size > big_file_threshold && s->is_binary == -1) {
s->is_binary = 1;
return 0;
}
fd = open(s->path, O_RDONLY);
if (fd < 0)
goto err_empty;
@ -2746,16 +2751,21 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
}
else {
enum object_type type;
if (size_only) {
if (size_only || (flags & CHECK_BINARY)) {
type = sha1_object_info(s->sha1, &s->size);
if (type < 0)
die("unable to read %s", sha1_to_hex(s->sha1));
} else {
s->data = read_sha1_file(s->sha1, &type, &s->size);
if (!s->data)
die("unable to read %s", sha1_to_hex(s->sha1));
s->should_free = 1;
if (size_only)
return 0;
if (s->size > big_file_threshold && s->is_binary == -1) {
s->is_binary = 1;
return 0;
}
}
s->data = read_sha1_file(s->sha1, &type, &s->size);
if (!s->data)
die("unable to read %s", sha1_to_hex(s->sha1));
s->should_free = 1;
}
return 0;
}

Просмотреть файл

@ -56,6 +56,7 @@ extern void fill_filespec(struct diff_filespec *, const unsigned char *,
int, unsigned short);
#define CHECK_SIZE_ONLY 1
#define CHECK_BINARY 2
extern int diff_populate_filespec(struct diff_filespec *, unsigned int);
extern void diff_free_filespec_data(struct diff_filespec *);
extern void diff_free_filespec_blob(struct diff_filespec *);

Просмотреть файл

@ -112,6 +112,10 @@ test_expect_success 'diff --raw' '
git diff --raw HEAD^
'
test_expect_success 'diff --stat' '
git diff --stat HEAD^ HEAD
'
test_expect_success 'hash-object' '
git hash-object large1
'