Teach a few codepaths to punt (instead of dying) when large blobs
that would not fit in core are involved in the operation.

* nd/large-blobs:
  diff: shortcut for diff'ing two binary SHA-1 objects
  diff --stat: mark any file larger than core.bigfilethreshold binary
  diff.c: allow to pass more flags to diff_populate_filespec
  sha1_file.c: do not die failing to malloc in unpack_compressed_entry
  wrapper.c: introduce gentle xmallocz that does not die()
This commit is contained in:
Junio C Hamano 2014-09-11 10:33:32 -07:00
Родитель 08ad26a63d 1aaf69e669
Коммит bedd3b4b7b
9 изменённых файлов: 125 добавлений и 37 удалений

Просмотреть файл

@ -499,7 +499,8 @@ core.bigFileThreshold::
Files larger than this size are stored deflated, without
attempting delta compression. Storing large files without
delta compression avoids excessive memory usage, at the
slight expense of increased disk usage.
slight expense of increased disk usage. Additionally files
larger than this size are always treated as binary.
+
Default is 512 MiB on all platforms. This should be reasonable
for most projects as source code and other text files can still

Просмотреть файл

@ -440,8 +440,8 @@ Unspecified::
A path to which the `diff` attribute is unspecified
first gets its contents inspected, and if it looks like
text, it is treated as text. Otherwise it would
generate `Binary files differ`.
text and is smaller than core.bigFileThreshold, it is treated
as text. Otherwise it would generate `Binary files differ`.
String::

46
diff.c
Просмотреть файл

@ -376,7 +376,7 @@ static unsigned long diff_filespec_size(struct diff_filespec *one)
{
if (!DIFF_FILE_VALID(one))
return 0;
diff_populate_filespec(one, 1);
diff_populate_filespec(one, CHECK_SIZE_ONLY);
return one->size;
}
@ -1910,11 +1910,11 @@ static void show_dirstat(struct diff_options *options)
diff_free_filespec_data(p->one);
diff_free_filespec_data(p->two);
} else if (DIFF_FILE_VALID(p->one)) {
diff_populate_filespec(p->one, 1);
diff_populate_filespec(p->one, CHECK_SIZE_ONLY);
copied = added = 0;
diff_free_filespec_data(p->one);
} else if (DIFF_FILE_VALID(p->two)) {
diff_populate_filespec(p->two, 1);
diff_populate_filespec(p->two, CHECK_SIZE_ONLY);
copied = 0;
added = p->two->size;
diff_free_filespec_data(p->two);
@ -2188,8 +2188,8 @@ int diff_filespec_is_binary(struct diff_filespec *one)
one->is_binary = one->driver->binary;
else {
if (!one->data && DIFF_FILE_VALID(one))
diff_populate_filespec(one, 0);
if (one->data)
diff_populate_filespec(one, CHECK_BINARY);
if (one->is_binary == -1 && one->data)
one->is_binary = buffer_is_binary(one->data,
one->size);
if (one->is_binary == -1)
@ -2324,6 +2324,19 @@ static void builtin_diff(const char *name_a,
} else if (!DIFF_OPT_TST(o, TEXT) &&
( (!textconv_one && diff_filespec_is_binary(one)) ||
(!textconv_two && diff_filespec_is_binary(two)) )) {
if (!one->data && !two->data &&
S_ISREG(one->mode) && S_ISREG(two->mode) &&
!DIFF_OPT_TST(o, BINARY)) {
if (!hashcmp(one->sha1, two->sha1)) {
if (must_show_header)
fprintf(o->file, "%s", header.buf);
goto free_ab_and_return;
}
fprintf(o->file, "%s", header.buf);
fprintf(o->file, "%sBinary files %s and %s differ\n",
line_prefix, lbl[0], lbl[1]);
goto free_ab_and_return;
}
if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
die("unable to read files to diff");
/* Quite common confusing case */
@ -2668,8 +2681,9 @@ static int diff_populate_gitlink(struct diff_filespec *s, int size_only)
* grab the data for the blob (or file) for our own in-core comparison.
* diff_filespec has data and size fields for this purpose.
*/
int diff_populate_filespec(struct diff_filespec *s, int size_only)
int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
{
int size_only = flags & CHECK_SIZE_ONLY;
int err = 0;
/*
* demote FAIL to WARN to allow inspecting the situation
@ -2724,6 +2738,11 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
}
if (size_only)
return 0;
if ((flags & CHECK_BINARY) &&
s->size > big_file_threshold && s->is_binary == -1) {
s->is_binary = 1;
return 0;
}
fd = open(s->path, O_RDONLY);
if (fd < 0)
goto err_empty;
@ -2745,17 +2764,22 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
}
else {
enum object_type type;
if (size_only) {
if (size_only || (flags & CHECK_BINARY)) {
type = sha1_object_info(s->sha1, &s->size);
if (type < 0)
die("unable to read %s", sha1_to_hex(s->sha1));
} else {
if (size_only)
return 0;
if (s->size > big_file_threshold && s->is_binary == -1) {
s->is_binary = 1;
return 0;
}
}
s->data = read_sha1_file(s->sha1, &type, &s->size);
if (!s->data)
die("unable to read %s", sha1_to_hex(s->sha1));
s->should_free = 1;
}
}
return 0;
}
@ -4688,8 +4712,8 @@ static int diff_filespec_check_stat_unmatch(struct diff_filepair *p)
!DIFF_FILE_VALID(p->two) ||
(p->one->sha1_valid && p->two->sha1_valid) ||
(p->one->mode != p->two->mode) ||
diff_populate_filespec(p->one, 1) ||
diff_populate_filespec(p->two, 1) ||
diff_populate_filespec(p->one, CHECK_SIZE_ONLY) ||
diff_populate_filespec(p->two, CHECK_SIZE_ONLY) ||
(p->one->size != p->two->size) ||
!diff_filespec_is_identical(p->one, p->two)) /* (2) */
p->skip_stat_unmatch_result = 1;

Просмотреть файл

@ -147,9 +147,11 @@ static int estimate_similarity(struct diff_filespec *src,
* is a possible size - we really should have a flag to
* say whether the size is valid or not!)
*/
if (!src->cnt_data && diff_populate_filespec(src, 1))
if (!src->cnt_data &&
diff_populate_filespec(src, CHECK_SIZE_ONLY))
return 0;
if (!dst->cnt_data && diff_populate_filespec(dst, 1))
if (!dst->cnt_data &&
diff_populate_filespec(dst, CHECK_SIZE_ONLY))
return 0;
max_size = ((src->size > dst->size) ? src->size : dst->size);

Просмотреть файл

@ -55,7 +55,9 @@ extern void free_filespec(struct diff_filespec *);
extern void fill_filespec(struct diff_filespec *, const unsigned char *,
int, unsigned short);
extern int diff_populate_filespec(struct diff_filespec *, int);
#define CHECK_SIZE_ONLY 1
#define CHECK_BINARY 2
extern int diff_populate_filespec(struct diff_filespec *, unsigned int);
extern void diff_free_filespec_data(struct diff_filespec *);
extern void diff_free_filespec_blob(struct diff_filespec *);
extern int diff_filespec_is_binary(struct diff_filespec *);

Просмотреть файл

@ -609,6 +609,7 @@ extern try_to_free_t set_try_to_free_routine(try_to_free_t);
extern char *xstrdup(const char *str);
extern void *xmalloc(size_t size);
extern void *xmallocz(size_t size);
extern void *xmallocz_gently(size_t size);
extern void *xmemdupz(const void *data, size_t len);
extern char *xstrndup(const char *str, size_t len);
extern void *xrealloc(void *ptr, size_t size);

Просмотреть файл

@ -1923,7 +1923,9 @@ static void *unpack_compressed_entry(struct packed_git *p,
git_zstream stream;
unsigned char *buffer, *in;
buffer = xmallocz(size);
buffer = xmallocz_gently(size);
if (!buffer)
return NULL;
memset(&stream, 0, sizeof(stream));
stream.next_out = buffer;
stream.avail_out = size + 1;

Просмотреть файл

@ -112,6 +112,20 @@ test_expect_success 'diff --raw' '
git diff --raw HEAD^
'
test_expect_success 'diff --stat' '
git diff --stat HEAD^ HEAD
'
test_expect_success 'diff' '
git diff HEAD^ HEAD >actual &&
grep "Binary files.*differ" actual
'
test_expect_success 'diff --cached' '
git diff --cached HEAD^ >actual &&
grep "Binary files.*differ" actual
'
test_expect_success 'hash-object' '
git hash-object large1
'
@ -163,4 +177,10 @@ test_expect_success 'zip achiving, deflate' '
git archive --format=zip HEAD >/dev/null
'
test_expect_success 'fsck' '
test_must_fail git fsck 2>err &&
n=$(grep "error: attempting to allocate .* over limit" err | wc -l) &&
test "$n" -gt 1
'
test_done

Просмотреть файл

@ -9,17 +9,24 @@ static void do_nothing(size_t size)
static void (*try_to_free_routine)(size_t size) = do_nothing;
static void memory_limit_check(size_t size)
static int memory_limit_check(size_t size, int gentle)
{
static int limit = -1;
if (limit == -1) {
const char *env = getenv("GIT_ALLOC_LIMIT");
limit = env ? atoi(env) * 1024 : 0;
}
if (limit && size > limit)
if (limit && size > limit) {
if (gentle) {
error("attempting to allocate %"PRIuMAX" over limit %d",
(intmax_t)size, limit);
return -1;
} else
die("attempting to allocate %"PRIuMAX" over limit %d",
(intmax_t)size, limit);
}
return 0;
}
try_to_free_t set_try_to_free_routine(try_to_free_t routine)
{
@ -42,11 +49,12 @@ char *xstrdup(const char *str)
return ret;
}
void *xmalloc(size_t size)
static void *do_xmalloc(size_t size, int gentle)
{
void *ret;
memory_limit_check(size);
if (memory_limit_check(size, gentle))
return NULL;
ret = malloc(size);
if (!ret && !size)
ret = malloc(1);
@ -55,9 +63,16 @@ void *xmalloc(size_t size)
ret = malloc(size);
if (!ret && !size)
ret = malloc(1);
if (!ret)
if (!ret) {
if (!gentle)
die("Out of memory, malloc failed (tried to allocate %lu bytes)",
(unsigned long)size);
else {
error("Out of memory, malloc failed (tried to allocate %lu bytes)",
(unsigned long)size);
return NULL;
}
}
}
#ifdef XMALLOC_POISON
memset(ret, 0xA5, size);
@ -65,16 +80,37 @@ void *xmalloc(size_t size)
return ret;
}
void *xmallocz(size_t size)
void *xmalloc(size_t size)
{
return do_xmalloc(size, 0);
}
static void *do_xmallocz(size_t size, int gentle)
{
void *ret;
if (unsigned_add_overflows(size, 1))
if (unsigned_add_overflows(size, 1)) {
if (gentle) {
error("Data too large to fit into virtual memory space.");
return NULL;
} else
die("Data too large to fit into virtual memory space.");
ret = xmalloc(size + 1);
}
ret = do_xmalloc(size + 1, gentle);
if (ret)
((char*)ret)[size] = 0;
return ret;
}
void *xmallocz(size_t size)
{
return do_xmallocz(size, 0);
}
void *xmallocz_gently(size_t size)
{
return do_xmallocz(size, 1);
}
/*
* xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of
* "data" to the allocated memory, zero terminates the allocated memory,
@ -96,7 +132,7 @@ void *xrealloc(void *ptr, size_t size)
{
void *ret;
memory_limit_check(size);
memory_limit_check(size, 0);
ret = realloc(ptr, size);
if (!ret && !size)
ret = realloc(ptr, 1);
@ -115,7 +151,7 @@ void *xcalloc(size_t nmemb, size_t size)
{
void *ret;
memory_limit_check(size * nmemb);
memory_limit_check(size * nmemb, 0);
ret = calloc(nmemb, size);
if (!ret && (!nmemb || !size))
ret = calloc(1, 1);