зеркало из https://github.com/microsoft/git.git
Merge branch 'nd/large-blobs'
Teach a few codepaths to punt (instead of dying) when large blobs that would not fit in core are involved in the operation. * nd/large-blobs: diff: shortcut for diff'ing two binary SHA-1 objects diff --stat: mark any file larger than core.bigfilethreshold binary diff.c: allow to pass more flags to diff_populate_filespec sha1_file.c: do not die failing to malloc in unpack_compressed_entry wrapper.c: introduce gentle xmallocz that does not die()
This commit is contained in:
Коммит
bedd3b4b7b
|
@ -499,7 +499,8 @@ core.bigFileThreshold::
|
||||||
Files larger than this size are stored deflated, without
|
Files larger than this size are stored deflated, without
|
||||||
attempting delta compression. Storing large files without
|
attempting delta compression. Storing large files without
|
||||||
delta compression avoids excessive memory usage, at the
|
delta compression avoids excessive memory usage, at the
|
||||||
slight expense of increased disk usage.
|
slight expense of increased disk usage. Additionally files
|
||||||
|
larger than this size are always treated as binary.
|
||||||
+
|
+
|
||||||
Default is 512 MiB on all platforms. This should be reasonable
|
Default is 512 MiB on all platforms. This should be reasonable
|
||||||
for most projects as source code and other text files can still
|
for most projects as source code and other text files can still
|
||||||
|
|
|
@ -440,8 +440,8 @@ Unspecified::
|
||||||
|
|
||||||
A path to which the `diff` attribute is unspecified
|
A path to which the `diff` attribute is unspecified
|
||||||
first gets its contents inspected, and if it looks like
|
first gets its contents inspected, and if it looks like
|
||||||
text, it is treated as text. Otherwise it would
|
text and is smaller than core.bigFileThreshold, it is treated
|
||||||
generate `Binary files differ`.
|
as text. Otherwise it would generate `Binary files differ`.
|
||||||
|
|
||||||
String::
|
String::
|
||||||
|
|
||||||
|
|
52
diff.c
52
diff.c
|
@ -376,7 +376,7 @@ static unsigned long diff_filespec_size(struct diff_filespec *one)
|
||||||
{
|
{
|
||||||
if (!DIFF_FILE_VALID(one))
|
if (!DIFF_FILE_VALID(one))
|
||||||
return 0;
|
return 0;
|
||||||
diff_populate_filespec(one, 1);
|
diff_populate_filespec(one, CHECK_SIZE_ONLY);
|
||||||
return one->size;
|
return one->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1910,11 +1910,11 @@ static void show_dirstat(struct diff_options *options)
|
||||||
diff_free_filespec_data(p->one);
|
diff_free_filespec_data(p->one);
|
||||||
diff_free_filespec_data(p->two);
|
diff_free_filespec_data(p->two);
|
||||||
} else if (DIFF_FILE_VALID(p->one)) {
|
} else if (DIFF_FILE_VALID(p->one)) {
|
||||||
diff_populate_filespec(p->one, 1);
|
diff_populate_filespec(p->one, CHECK_SIZE_ONLY);
|
||||||
copied = added = 0;
|
copied = added = 0;
|
||||||
diff_free_filespec_data(p->one);
|
diff_free_filespec_data(p->one);
|
||||||
} else if (DIFF_FILE_VALID(p->two)) {
|
} else if (DIFF_FILE_VALID(p->two)) {
|
||||||
diff_populate_filespec(p->two, 1);
|
diff_populate_filespec(p->two, CHECK_SIZE_ONLY);
|
||||||
copied = 0;
|
copied = 0;
|
||||||
added = p->two->size;
|
added = p->two->size;
|
||||||
diff_free_filespec_data(p->two);
|
diff_free_filespec_data(p->two);
|
||||||
|
@ -2188,8 +2188,8 @@ int diff_filespec_is_binary(struct diff_filespec *one)
|
||||||
one->is_binary = one->driver->binary;
|
one->is_binary = one->driver->binary;
|
||||||
else {
|
else {
|
||||||
if (!one->data && DIFF_FILE_VALID(one))
|
if (!one->data && DIFF_FILE_VALID(one))
|
||||||
diff_populate_filespec(one, 0);
|
diff_populate_filespec(one, CHECK_BINARY);
|
||||||
if (one->data)
|
if (one->is_binary == -1 && one->data)
|
||||||
one->is_binary = buffer_is_binary(one->data,
|
one->is_binary = buffer_is_binary(one->data,
|
||||||
one->size);
|
one->size);
|
||||||
if (one->is_binary == -1)
|
if (one->is_binary == -1)
|
||||||
|
@ -2324,6 +2324,19 @@ static void builtin_diff(const char *name_a,
|
||||||
} else if (!DIFF_OPT_TST(o, TEXT) &&
|
} else if (!DIFF_OPT_TST(o, TEXT) &&
|
||||||
( (!textconv_one && diff_filespec_is_binary(one)) ||
|
( (!textconv_one && diff_filespec_is_binary(one)) ||
|
||||||
(!textconv_two && diff_filespec_is_binary(two)) )) {
|
(!textconv_two && diff_filespec_is_binary(two)) )) {
|
||||||
|
if (!one->data && !two->data &&
|
||||||
|
S_ISREG(one->mode) && S_ISREG(two->mode) &&
|
||||||
|
!DIFF_OPT_TST(o, BINARY)) {
|
||||||
|
if (!hashcmp(one->sha1, two->sha1)) {
|
||||||
|
if (must_show_header)
|
||||||
|
fprintf(o->file, "%s", header.buf);
|
||||||
|
goto free_ab_and_return;
|
||||||
|
}
|
||||||
|
fprintf(o->file, "%s", header.buf);
|
||||||
|
fprintf(o->file, "%sBinary files %s and %s differ\n",
|
||||||
|
line_prefix, lbl[0], lbl[1]);
|
||||||
|
goto free_ab_and_return;
|
||||||
|
}
|
||||||
if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
|
if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
|
||||||
die("unable to read files to diff");
|
die("unable to read files to diff");
|
||||||
/* Quite common confusing case */
|
/* Quite common confusing case */
|
||||||
|
@ -2668,8 +2681,9 @@ static int diff_populate_gitlink(struct diff_filespec *s, int size_only)
|
||||||
* grab the data for the blob (or file) for our own in-core comparison.
|
* grab the data for the blob (or file) for our own in-core comparison.
|
||||||
* diff_filespec has data and size fields for this purpose.
|
* diff_filespec has data and size fields for this purpose.
|
||||||
*/
|
*/
|
||||||
int diff_populate_filespec(struct diff_filespec *s, int size_only)
|
int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
|
||||||
{
|
{
|
||||||
|
int size_only = flags & CHECK_SIZE_ONLY;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
/*
|
/*
|
||||||
* demote FAIL to WARN to allow inspecting the situation
|
* demote FAIL to WARN to allow inspecting the situation
|
||||||
|
@ -2724,6 +2738,11 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
|
||||||
}
|
}
|
||||||
if (size_only)
|
if (size_only)
|
||||||
return 0;
|
return 0;
|
||||||
|
if ((flags & CHECK_BINARY) &&
|
||||||
|
s->size > big_file_threshold && s->is_binary == -1) {
|
||||||
|
s->is_binary = 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
fd = open(s->path, O_RDONLY);
|
fd = open(s->path, O_RDONLY);
|
||||||
if (fd < 0)
|
if (fd < 0)
|
||||||
goto err_empty;
|
goto err_empty;
|
||||||
|
@ -2745,16 +2764,21 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
enum object_type type;
|
enum object_type type;
|
||||||
if (size_only) {
|
if (size_only || (flags & CHECK_BINARY)) {
|
||||||
type = sha1_object_info(s->sha1, &s->size);
|
type = sha1_object_info(s->sha1, &s->size);
|
||||||
if (type < 0)
|
if (type < 0)
|
||||||
die("unable to read %s", sha1_to_hex(s->sha1));
|
die("unable to read %s", sha1_to_hex(s->sha1));
|
||||||
} else {
|
if (size_only)
|
||||||
s->data = read_sha1_file(s->sha1, &type, &s->size);
|
return 0;
|
||||||
if (!s->data)
|
if (s->size > big_file_threshold && s->is_binary == -1) {
|
||||||
die("unable to read %s", sha1_to_hex(s->sha1));
|
s->is_binary = 1;
|
||||||
s->should_free = 1;
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
s->data = read_sha1_file(s->sha1, &type, &s->size);
|
||||||
|
if (!s->data)
|
||||||
|
die("unable to read %s", sha1_to_hex(s->sha1));
|
||||||
|
s->should_free = 1;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -4688,8 +4712,8 @@ static int diff_filespec_check_stat_unmatch(struct diff_filepair *p)
|
||||||
!DIFF_FILE_VALID(p->two) ||
|
!DIFF_FILE_VALID(p->two) ||
|
||||||
(p->one->sha1_valid && p->two->sha1_valid) ||
|
(p->one->sha1_valid && p->two->sha1_valid) ||
|
||||||
(p->one->mode != p->two->mode) ||
|
(p->one->mode != p->two->mode) ||
|
||||||
diff_populate_filespec(p->one, 1) ||
|
diff_populate_filespec(p->one, CHECK_SIZE_ONLY) ||
|
||||||
diff_populate_filespec(p->two, 1) ||
|
diff_populate_filespec(p->two, CHECK_SIZE_ONLY) ||
|
||||||
(p->one->size != p->two->size) ||
|
(p->one->size != p->two->size) ||
|
||||||
!diff_filespec_is_identical(p->one, p->two)) /* (2) */
|
!diff_filespec_is_identical(p->one, p->two)) /* (2) */
|
||||||
p->skip_stat_unmatch_result = 1;
|
p->skip_stat_unmatch_result = 1;
|
||||||
|
|
|
@ -147,9 +147,11 @@ static int estimate_similarity(struct diff_filespec *src,
|
||||||
* is a possible size - we really should have a flag to
|
* is a possible size - we really should have a flag to
|
||||||
* say whether the size is valid or not!)
|
* say whether the size is valid or not!)
|
||||||
*/
|
*/
|
||||||
if (!src->cnt_data && diff_populate_filespec(src, 1))
|
if (!src->cnt_data &&
|
||||||
|
diff_populate_filespec(src, CHECK_SIZE_ONLY))
|
||||||
return 0;
|
return 0;
|
||||||
if (!dst->cnt_data && diff_populate_filespec(dst, 1))
|
if (!dst->cnt_data &&
|
||||||
|
diff_populate_filespec(dst, CHECK_SIZE_ONLY))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
max_size = ((src->size > dst->size) ? src->size : dst->size);
|
max_size = ((src->size > dst->size) ? src->size : dst->size);
|
||||||
|
|
|
@ -55,7 +55,9 @@ extern void free_filespec(struct diff_filespec *);
|
||||||
extern void fill_filespec(struct diff_filespec *, const unsigned char *,
|
extern void fill_filespec(struct diff_filespec *, const unsigned char *,
|
||||||
int, unsigned short);
|
int, unsigned short);
|
||||||
|
|
||||||
extern int diff_populate_filespec(struct diff_filespec *, int);
|
#define CHECK_SIZE_ONLY 1
|
||||||
|
#define CHECK_BINARY 2
|
||||||
|
extern int diff_populate_filespec(struct diff_filespec *, unsigned int);
|
||||||
extern void diff_free_filespec_data(struct diff_filespec *);
|
extern void diff_free_filespec_data(struct diff_filespec *);
|
||||||
extern void diff_free_filespec_blob(struct diff_filespec *);
|
extern void diff_free_filespec_blob(struct diff_filespec *);
|
||||||
extern int diff_filespec_is_binary(struct diff_filespec *);
|
extern int diff_filespec_is_binary(struct diff_filespec *);
|
||||||
|
|
|
@ -609,6 +609,7 @@ extern try_to_free_t set_try_to_free_routine(try_to_free_t);
|
||||||
extern char *xstrdup(const char *str);
|
extern char *xstrdup(const char *str);
|
||||||
extern void *xmalloc(size_t size);
|
extern void *xmalloc(size_t size);
|
||||||
extern void *xmallocz(size_t size);
|
extern void *xmallocz(size_t size);
|
||||||
|
extern void *xmallocz_gently(size_t size);
|
||||||
extern void *xmemdupz(const void *data, size_t len);
|
extern void *xmemdupz(const void *data, size_t len);
|
||||||
extern char *xstrndup(const char *str, size_t len);
|
extern char *xstrndup(const char *str, size_t len);
|
||||||
extern void *xrealloc(void *ptr, size_t size);
|
extern void *xrealloc(void *ptr, size_t size);
|
||||||
|
|
|
@ -1923,7 +1923,9 @@ static void *unpack_compressed_entry(struct packed_git *p,
|
||||||
git_zstream stream;
|
git_zstream stream;
|
||||||
unsigned char *buffer, *in;
|
unsigned char *buffer, *in;
|
||||||
|
|
||||||
buffer = xmallocz(size);
|
buffer = xmallocz_gently(size);
|
||||||
|
if (!buffer)
|
||||||
|
return NULL;
|
||||||
memset(&stream, 0, sizeof(stream));
|
memset(&stream, 0, sizeof(stream));
|
||||||
stream.next_out = buffer;
|
stream.next_out = buffer;
|
||||||
stream.avail_out = size + 1;
|
stream.avail_out = size + 1;
|
||||||
|
|
|
@ -112,6 +112,20 @@ test_expect_success 'diff --raw' '
|
||||||
git diff --raw HEAD^
|
git diff --raw HEAD^
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'diff --stat' '
|
||||||
|
git diff --stat HEAD^ HEAD
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'diff' '
|
||||||
|
git diff HEAD^ HEAD >actual &&
|
||||||
|
grep "Binary files.*differ" actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'diff --cached' '
|
||||||
|
git diff --cached HEAD^ >actual &&
|
||||||
|
grep "Binary files.*differ" actual
|
||||||
|
'
|
||||||
|
|
||||||
test_expect_success 'hash-object' '
|
test_expect_success 'hash-object' '
|
||||||
git hash-object large1
|
git hash-object large1
|
||||||
'
|
'
|
||||||
|
@ -163,4 +177,10 @@ test_expect_success 'zip achiving, deflate' '
|
||||||
git archive --format=zip HEAD >/dev/null
|
git archive --format=zip HEAD >/dev/null
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'fsck' '
|
||||||
|
test_must_fail git fsck 2>err &&
|
||||||
|
n=$(grep "error: attempting to allocate .* over limit" err | wc -l) &&
|
||||||
|
test "$n" -gt 1
|
||||||
|
'
|
||||||
|
|
||||||
test_done
|
test_done
|
||||||
|
|
68
wrapper.c
68
wrapper.c
|
@ -9,16 +9,23 @@ static void do_nothing(size_t size)
|
||||||
|
|
||||||
static void (*try_to_free_routine)(size_t size) = do_nothing;
|
static void (*try_to_free_routine)(size_t size) = do_nothing;
|
||||||
|
|
||||||
static void memory_limit_check(size_t size)
|
static int memory_limit_check(size_t size, int gentle)
|
||||||
{
|
{
|
||||||
static int limit = -1;
|
static int limit = -1;
|
||||||
if (limit == -1) {
|
if (limit == -1) {
|
||||||
const char *env = getenv("GIT_ALLOC_LIMIT");
|
const char *env = getenv("GIT_ALLOC_LIMIT");
|
||||||
limit = env ? atoi(env) * 1024 : 0;
|
limit = env ? atoi(env) * 1024 : 0;
|
||||||
}
|
}
|
||||||
if (limit && size > limit)
|
if (limit && size > limit) {
|
||||||
die("attempting to allocate %"PRIuMAX" over limit %d",
|
if (gentle) {
|
||||||
(intmax_t)size, limit);
|
error("attempting to allocate %"PRIuMAX" over limit %d",
|
||||||
|
(intmax_t)size, limit);
|
||||||
|
return -1;
|
||||||
|
} else
|
||||||
|
die("attempting to allocate %"PRIuMAX" over limit %d",
|
||||||
|
(intmax_t)size, limit);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
try_to_free_t set_try_to_free_routine(try_to_free_t routine)
|
try_to_free_t set_try_to_free_routine(try_to_free_t routine)
|
||||||
|
@ -42,11 +49,12 @@ char *xstrdup(const char *str)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *xmalloc(size_t size)
|
static void *do_xmalloc(size_t size, int gentle)
|
||||||
{
|
{
|
||||||
void *ret;
|
void *ret;
|
||||||
|
|
||||||
memory_limit_check(size);
|
if (memory_limit_check(size, gentle))
|
||||||
|
return NULL;
|
||||||
ret = malloc(size);
|
ret = malloc(size);
|
||||||
if (!ret && !size)
|
if (!ret && !size)
|
||||||
ret = malloc(1);
|
ret = malloc(1);
|
||||||
|
@ -55,9 +63,16 @@ void *xmalloc(size_t size)
|
||||||
ret = malloc(size);
|
ret = malloc(size);
|
||||||
if (!ret && !size)
|
if (!ret && !size)
|
||||||
ret = malloc(1);
|
ret = malloc(1);
|
||||||
if (!ret)
|
if (!ret) {
|
||||||
die("Out of memory, malloc failed (tried to allocate %lu bytes)",
|
if (!gentle)
|
||||||
(unsigned long)size);
|
die("Out of memory, malloc failed (tried to allocate %lu bytes)",
|
||||||
|
(unsigned long)size);
|
||||||
|
else {
|
||||||
|
error("Out of memory, malloc failed (tried to allocate %lu bytes)",
|
||||||
|
(unsigned long)size);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#ifdef XMALLOC_POISON
|
#ifdef XMALLOC_POISON
|
||||||
memset(ret, 0xA5, size);
|
memset(ret, 0xA5, size);
|
||||||
|
@ -65,16 +80,37 @@ void *xmalloc(size_t size)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *xmallocz(size_t size)
|
void *xmalloc(size_t size)
|
||||||
|
{
|
||||||
|
return do_xmalloc(size, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *do_xmallocz(size_t size, int gentle)
|
||||||
{
|
{
|
||||||
void *ret;
|
void *ret;
|
||||||
if (unsigned_add_overflows(size, 1))
|
if (unsigned_add_overflows(size, 1)) {
|
||||||
die("Data too large to fit into virtual memory space.");
|
if (gentle) {
|
||||||
ret = xmalloc(size + 1);
|
error("Data too large to fit into virtual memory space.");
|
||||||
((char*)ret)[size] = 0;
|
return NULL;
|
||||||
|
} else
|
||||||
|
die("Data too large to fit into virtual memory space.");
|
||||||
|
}
|
||||||
|
ret = do_xmalloc(size + 1, gentle);
|
||||||
|
if (ret)
|
||||||
|
((char*)ret)[size] = 0;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void *xmallocz(size_t size)
|
||||||
|
{
|
||||||
|
return do_xmallocz(size, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void *xmallocz_gently(size_t size)
|
||||||
|
{
|
||||||
|
return do_xmallocz(size, 1);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of
|
* xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of
|
||||||
* "data" to the allocated memory, zero terminates the allocated memory,
|
* "data" to the allocated memory, zero terminates the allocated memory,
|
||||||
|
@ -96,7 +132,7 @@ void *xrealloc(void *ptr, size_t size)
|
||||||
{
|
{
|
||||||
void *ret;
|
void *ret;
|
||||||
|
|
||||||
memory_limit_check(size);
|
memory_limit_check(size, 0);
|
||||||
ret = realloc(ptr, size);
|
ret = realloc(ptr, size);
|
||||||
if (!ret && !size)
|
if (!ret && !size)
|
||||||
ret = realloc(ptr, 1);
|
ret = realloc(ptr, 1);
|
||||||
|
@ -115,7 +151,7 @@ void *xcalloc(size_t nmemb, size_t size)
|
||||||
{
|
{
|
||||||
void *ret;
|
void *ret;
|
||||||
|
|
||||||
memory_limit_check(size * nmemb);
|
memory_limit_check(size * nmemb, 0);
|
||||||
ret = calloc(nmemb, size);
|
ret = calloc(nmemb, size);
|
||||||
if (!ret && (!nmemb || !size))
|
if (!ret && (!nmemb || !size))
|
||||||
ret = calloc(1, 1);
|
ret = calloc(1, 1);
|
||||||
|
|
Загрузка…
Ссылка в новой задаче