зеркало из https://github.com/microsoft/git.git
Merge branch 'sp/stream-clean-filter'
When running a required clean filter, we do not have to mmap the original before feeding the filter. Instead, stream the file contents directly to the filter and process its output. * sp/stream-clean-filter: sha1_file: don't convert off_t to size_t too early to avoid potential die() convert: stream from fd to required clean filter to reduce used address space copy_fd(): do not close the input file descriptor mmap_limit: introduce GIT_MMAP_LIMIT to allow testing expected mmap size memory_limit: use git_env_ulong() to parse GIT_ALLOC_LIMIT config.c: add git_env_ulong() to parse environment variable convert: drop arguments other than 'path' from would_convert_to_git()
This commit is contained in:
Коммит
f0d8900175
1
cache.h
1
cache.h
|
@ -1324,6 +1324,7 @@ extern int git_config_rename_section_in_file(const char *, const char *, const c
|
||||||
extern const char *git_etc_gitconfig(void);
|
extern const char *git_etc_gitconfig(void);
|
||||||
extern int check_repository_format_version(const char *var, const char *value, void *cb);
|
extern int check_repository_format_version(const char *var, const char *value, void *cb);
|
||||||
extern int git_env_bool(const char *, int);
|
extern int git_env_bool(const char *, int);
|
||||||
|
extern unsigned long git_env_ulong(const char *, unsigned long);
|
||||||
extern int git_config_system(void);
|
extern int git_config_system(void);
|
||||||
extern int config_error_nonbool(const char *);
|
extern int config_error_nonbool(const char *);
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
|
|
16
config.c
16
config.c
|
@ -1139,12 +1139,28 @@ const char *git_etc_gitconfig(void)
|
||||||
return system_wide;
|
return system_wide;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Parse environment variable 'k' as a boolean (in various
|
||||||
|
* possible spellings); if missing, use the default value 'def'.
|
||||||
|
*/
|
||||||
int git_env_bool(const char *k, int def)
|
int git_env_bool(const char *k, int def)
|
||||||
{
|
{
|
||||||
const char *v = getenv(k);
|
const char *v = getenv(k);
|
||||||
return v ? git_config_bool(k, v) : def;
|
return v ? git_config_bool(k, v) : def;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Parse environment variable 'k' as ulong with possibly a unit
|
||||||
|
* suffix; if missing, use the default value 'val'.
|
||||||
|
*/
|
||||||
|
unsigned long git_env_ulong(const char *k, unsigned long val)
|
||||||
|
{
|
||||||
|
const char *v = getenv(k);
|
||||||
|
if (v && !git_parse_ulong(v, &val))
|
||||||
|
die("failed to parse %s", k);
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
int git_config_system(void)
|
int git_config_system(void)
|
||||||
{
|
{
|
||||||
return !git_env_bool("GIT_CONFIG_NOSYSTEM", 0);
|
return !git_env_bool("GIT_CONFIG_NOSYSTEM", 0);
|
||||||
|
|
55
convert.c
55
convert.c
|
@ -312,11 +312,12 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
|
||||||
struct filter_params {
|
struct filter_params {
|
||||||
const char *src;
|
const char *src;
|
||||||
unsigned long size;
|
unsigned long size;
|
||||||
|
int fd;
|
||||||
const char *cmd;
|
const char *cmd;
|
||||||
const char *path;
|
const char *path;
|
||||||
};
|
};
|
||||||
|
|
||||||
static int filter_buffer(int in, int out, void *data)
|
static int filter_buffer_or_fd(int in, int out, void *data)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Spawn cmd and feed the buffer contents through its stdin.
|
* Spawn cmd and feed the buffer contents through its stdin.
|
||||||
|
@ -354,7 +355,12 @@ static int filter_buffer(int in, int out, void *data)
|
||||||
|
|
||||||
sigchain_push(SIGPIPE, SIG_IGN);
|
sigchain_push(SIGPIPE, SIG_IGN);
|
||||||
|
|
||||||
write_err = (write_in_full(child_process.in, params->src, params->size) < 0);
|
if (params->src) {
|
||||||
|
write_err = (write_in_full(child_process.in, params->src, params->size) < 0);
|
||||||
|
} else {
|
||||||
|
write_err = copy_fd(params->fd, child_process.in);
|
||||||
|
}
|
||||||
|
|
||||||
if (close(child_process.in))
|
if (close(child_process.in))
|
||||||
write_err = 1;
|
write_err = 1;
|
||||||
if (write_err)
|
if (write_err)
|
||||||
|
@ -370,7 +376,7 @@ static int filter_buffer(int in, int out, void *data)
|
||||||
return (write_err || status);
|
return (write_err || status);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int apply_filter(const char *path, const char *src, size_t len,
|
static int apply_filter(const char *path, const char *src, size_t len, int fd,
|
||||||
struct strbuf *dst, const char *cmd)
|
struct strbuf *dst, const char *cmd)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -391,11 +397,12 @@ static int apply_filter(const char *path, const char *src, size_t len,
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
memset(&async, 0, sizeof(async));
|
memset(&async, 0, sizeof(async));
|
||||||
async.proc = filter_buffer;
|
async.proc = filter_buffer_or_fd;
|
||||||
async.data = ¶ms;
|
async.data = ¶ms;
|
||||||
async.out = -1;
|
async.out = -1;
|
||||||
params.src = src;
|
params.src = src;
|
||||||
params.size = len;
|
params.size = len;
|
||||||
|
params.fd = fd;
|
||||||
params.cmd = cmd;
|
params.cmd = cmd;
|
||||||
params.path = path;
|
params.path = path;
|
||||||
|
|
||||||
|
@ -746,6 +753,25 @@ static void convert_attrs(struct conv_attrs *ca, const char *path)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int would_convert_to_git_filter_fd(const char *path)
|
||||||
|
{
|
||||||
|
struct conv_attrs ca;
|
||||||
|
|
||||||
|
convert_attrs(&ca, path);
|
||||||
|
if (!ca.drv)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Apply a filter to an fd only if the filter is required to succeed.
|
||||||
|
* We must die if the filter fails, because the original data before
|
||||||
|
* filtering is not available.
|
||||||
|
*/
|
||||||
|
if (!ca.drv->required)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean);
|
||||||
|
}
|
||||||
|
|
||||||
int convert_to_git(const char *path, const char *src, size_t len,
|
int convert_to_git(const char *path, const char *src, size_t len,
|
||||||
struct strbuf *dst, enum safe_crlf checksafe)
|
struct strbuf *dst, enum safe_crlf checksafe)
|
||||||
{
|
{
|
||||||
|
@ -760,7 +786,7 @@ int convert_to_git(const char *path, const char *src, size_t len,
|
||||||
required = ca.drv->required;
|
required = ca.drv->required;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret |= apply_filter(path, src, len, dst, filter);
|
ret |= apply_filter(path, src, len, -1, dst, filter);
|
||||||
if (!ret && required)
|
if (!ret && required)
|
||||||
die("%s: clean filter '%s' failed", path, ca.drv->name);
|
die("%s: clean filter '%s' failed", path, ca.drv->name);
|
||||||
|
|
||||||
|
@ -777,6 +803,23 @@ int convert_to_git(const char *path, const char *src, size_t len,
|
||||||
return ret | ident_to_git(path, src, len, dst, ca.ident);
|
return ret | ident_to_git(path, src, len, dst, ca.ident);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void convert_to_git_filter_fd(const char *path, int fd, struct strbuf *dst,
|
||||||
|
enum safe_crlf checksafe)
|
||||||
|
{
|
||||||
|
struct conv_attrs ca;
|
||||||
|
convert_attrs(&ca, path);
|
||||||
|
|
||||||
|
assert(ca.drv);
|
||||||
|
assert(ca.drv->clean);
|
||||||
|
|
||||||
|
if (!apply_filter(path, NULL, 0, fd, dst, ca.drv->clean))
|
||||||
|
die("%s: clean filter '%s' failed", path, ca.drv->name);
|
||||||
|
|
||||||
|
ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
|
||||||
|
crlf_to_git(path, dst->buf, dst->len, dst, ca.crlf_action, checksafe);
|
||||||
|
ident_to_git(path, dst->buf, dst->len, dst, ca.ident);
|
||||||
|
}
|
||||||
|
|
||||||
static int convert_to_working_tree_internal(const char *path, const char *src,
|
static int convert_to_working_tree_internal(const char *path, const char *src,
|
||||||
size_t len, struct strbuf *dst,
|
size_t len, struct strbuf *dst,
|
||||||
int normalizing)
|
int normalizing)
|
||||||
|
@ -810,7 +853,7 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ret_filter = apply_filter(path, src, len, dst, filter);
|
ret_filter = apply_filter(path, src, len, -1, dst, filter);
|
||||||
if (!ret_filter && required)
|
if (!ret_filter && required)
|
||||||
die("%s: smudge filter %s failed", path, ca.drv->name);
|
die("%s: smudge filter %s failed", path, ca.drv->name);
|
||||||
|
|
||||||
|
|
10
convert.h
10
convert.h
|
@ -40,11 +40,15 @@ extern int convert_to_working_tree(const char *path, const char *src,
|
||||||
size_t len, struct strbuf *dst);
|
size_t len, struct strbuf *dst);
|
||||||
extern int renormalize_buffer(const char *path, const char *src, size_t len,
|
extern int renormalize_buffer(const char *path, const char *src, size_t len,
|
||||||
struct strbuf *dst);
|
struct strbuf *dst);
|
||||||
static inline int would_convert_to_git(const char *path, const char *src,
|
static inline int would_convert_to_git(const char *path)
|
||||||
size_t len, enum safe_crlf checksafe)
|
|
||||||
{
|
{
|
||||||
return convert_to_git(path, src, len, NULL, checksafe);
|
return convert_to_git(path, NULL, 0, NULL, 0);
|
||||||
}
|
}
|
||||||
|
/* Precondition: would_convert_to_git_filter_fd(path) == true */
|
||||||
|
extern void convert_to_git_filter_fd(const char *path, int fd,
|
||||||
|
struct strbuf *dst,
|
||||||
|
enum safe_crlf checksafe);
|
||||||
|
extern int would_convert_to_git_filter_fd(const char *path);
|
||||||
|
|
||||||
/*****************************************************************
|
/*****************************************************************
|
||||||
*
|
*
|
||||||
|
|
26
copy.c
26
copy.c
|
@ -4,34 +4,17 @@ int copy_fd(int ifd, int ofd)
|
||||||
{
|
{
|
||||||
while (1) {
|
while (1) {
|
||||||
char buffer[8192];
|
char buffer[8192];
|
||||||
char *buf = buffer;
|
|
||||||
ssize_t len = xread(ifd, buffer, sizeof(buffer));
|
ssize_t len = xread(ifd, buffer, sizeof(buffer));
|
||||||
if (!len)
|
if (!len)
|
||||||
break;
|
break;
|
||||||
if (len < 0) {
|
if (len < 0) {
|
||||||
int read_error = errno;
|
|
||||||
close(ifd);
|
|
||||||
return error("copy-fd: read returned %s",
|
return error("copy-fd: read returned %s",
|
||||||
strerror(read_error));
|
strerror(errno));
|
||||||
}
|
|
||||||
while (len) {
|
|
||||||
int written = xwrite(ofd, buf, len);
|
|
||||||
if (written > 0) {
|
|
||||||
buf += written;
|
|
||||||
len -= written;
|
|
||||||
}
|
|
||||||
else if (!written) {
|
|
||||||
close(ifd);
|
|
||||||
return error("copy-fd: write returned 0");
|
|
||||||
} else {
|
|
||||||
int write_error = errno;
|
|
||||||
close(ifd);
|
|
||||||
return error("copy-fd: write returned %s",
|
|
||||||
strerror(write_error));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if (write_in_full(ofd, buffer, len) < 0)
|
||||||
|
return error("copy-fd: write returned %s",
|
||||||
|
strerror(errno));
|
||||||
}
|
}
|
||||||
close(ifd);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,6 +43,7 @@ int copy_file(const char *dst, const char *src, int mode)
|
||||||
return fdo;
|
return fdo;
|
||||||
}
|
}
|
||||||
status = copy_fd(fdi, fdo);
|
status = copy_fd(fdi, fdo);
|
||||||
|
close(fdi);
|
||||||
if (close(fdo) != 0)
|
if (close(fdo) != 0)
|
||||||
return error("%s: close error: %s", dst, strerror(errno));
|
return error("%s: close error: %s", dst, strerror(errno));
|
||||||
|
|
||||||
|
|
|
@ -224,8 +224,11 @@ int hold_lock_file_for_append(struct lock_file *lk, const char *path, int flags)
|
||||||
} else if (copy_fd(orig_fd, fd)) {
|
} else if (copy_fd(orig_fd, fd)) {
|
||||||
if (flags & LOCK_DIE_ON_ERROR)
|
if (flags & LOCK_DIE_ON_ERROR)
|
||||||
exit(128);
|
exit(128);
|
||||||
|
close(orig_fd);
|
||||||
close(fd);
|
close(fd);
|
||||||
return -1;
|
return -1;
|
||||||
|
} else {
|
||||||
|
close(orig_fd);
|
||||||
}
|
}
|
||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
|
|
60
sha1_file.c
60
sha1_file.c
|
@ -663,10 +663,26 @@ void release_pack_memory(size_t need)
|
||||||
; /* nothing */
|
; /* nothing */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void mmap_limit_check(size_t length)
|
||||||
|
{
|
||||||
|
static size_t limit = 0;
|
||||||
|
if (!limit) {
|
||||||
|
limit = git_env_ulong("GIT_MMAP_LIMIT", 0);
|
||||||
|
if (!limit)
|
||||||
|
limit = SIZE_MAX;
|
||||||
|
}
|
||||||
|
if (length > limit)
|
||||||
|
die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX,
|
||||||
|
(uintmax_t)length, (uintmax_t)limit);
|
||||||
|
}
|
||||||
|
|
||||||
void *xmmap(void *start, size_t length,
|
void *xmmap(void *start, size_t length,
|
||||||
int prot, int flags, int fd, off_t offset)
|
int prot, int flags, int fd, off_t offset)
|
||||||
{
|
{
|
||||||
void *ret = mmap(start, length, prot, flags, fd, offset);
|
void *ret;
|
||||||
|
|
||||||
|
mmap_limit_check(length);
|
||||||
|
ret = mmap(start, length, prot, flags, fd, offset);
|
||||||
if (ret == MAP_FAILED) {
|
if (ret == MAP_FAILED) {
|
||||||
if (!length)
|
if (!length)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -3076,6 +3092,29 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int index_stream_convert_blob(unsigned char *sha1, int fd,
|
||||||
|
const char *path, unsigned flags)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
const int write_object = flags & HASH_WRITE_OBJECT;
|
||||||
|
struct strbuf sbuf = STRBUF_INIT;
|
||||||
|
|
||||||
|
assert(path);
|
||||||
|
assert(would_convert_to_git_filter_fd(path));
|
||||||
|
|
||||||
|
convert_to_git_filter_fd(path, fd, &sbuf,
|
||||||
|
write_object ? safe_crlf : SAFE_CRLF_FALSE);
|
||||||
|
|
||||||
|
if (write_object)
|
||||||
|
ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
|
||||||
|
sha1);
|
||||||
|
else
|
||||||
|
ret = hash_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
|
||||||
|
sha1);
|
||||||
|
strbuf_release(&sbuf);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static int index_pipe(unsigned char *sha1, int fd, enum object_type type,
|
static int index_pipe(unsigned char *sha1, int fd, enum object_type type,
|
||||||
const char *path, unsigned flags)
|
const char *path, unsigned flags)
|
||||||
{
|
{
|
||||||
|
@ -3141,15 +3180,22 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
|
||||||
enum object_type type, const char *path, unsigned flags)
|
enum object_type type, const char *path, unsigned flags)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
size_t size = xsize_t(st->st_size);
|
|
||||||
|
|
||||||
if (!S_ISREG(st->st_mode))
|
/*
|
||||||
|
* Call xsize_t() only when needed to avoid potentially unnecessary
|
||||||
|
* die() for large files.
|
||||||
|
*/
|
||||||
|
if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path))
|
||||||
|
ret = index_stream_convert_blob(sha1, fd, path, flags);
|
||||||
|
else if (!S_ISREG(st->st_mode))
|
||||||
ret = index_pipe(sha1, fd, type, path, flags);
|
ret = index_pipe(sha1, fd, type, path, flags);
|
||||||
else if (size <= big_file_threshold || type != OBJ_BLOB ||
|
else if (st->st_size <= big_file_threshold || type != OBJ_BLOB ||
|
||||||
(path && would_convert_to_git(path, NULL, 0, 0)))
|
(path && would_convert_to_git(path)))
|
||||||
ret = index_core(sha1, fd, size, type, path, flags);
|
ret = index_core(sha1, fd, xsize_t(st->st_size), type, path,
|
||||||
|
flags);
|
||||||
else
|
else
|
||||||
ret = index_stream(sha1, fd, size, type, path, flags);
|
ret = index_stream(sha1, fd, xsize_t(st->st_size), type, path,
|
||||||
|
flags);
|
||||||
close(fd);
|
close(fd);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -153,17 +153,23 @@ test_expect_success 'filter shell-escaped filenames' '
|
||||||
:
|
:
|
||||||
'
|
'
|
||||||
|
|
||||||
test_expect_success 'required filter success' '
|
test_expect_success 'required filter should filter data' '
|
||||||
git config filter.required.smudge cat &&
|
git config filter.required.smudge ./rot13.sh &&
|
||||||
git config filter.required.clean cat &&
|
git config filter.required.clean ./rot13.sh &&
|
||||||
git config filter.required.required true &&
|
git config filter.required.required true &&
|
||||||
|
|
||||||
echo "*.r filter=required" >.gitattributes &&
|
echo "*.r filter=required" >.gitattributes &&
|
||||||
|
|
||||||
echo test >test.r &&
|
cat test.o >test.r &&
|
||||||
git add test.r &&
|
git add test.r &&
|
||||||
|
|
||||||
rm -f test.r &&
|
rm -f test.r &&
|
||||||
git checkout -- test.r
|
git checkout -- test.r &&
|
||||||
|
cmp test.o test.r &&
|
||||||
|
|
||||||
|
./rot13.sh <test.o >expected &&
|
||||||
|
git cat-file blob :test.r >actual &&
|
||||||
|
cmp expected actual
|
||||||
'
|
'
|
||||||
|
|
||||||
test_expect_success 'required filter smudge failure' '
|
test_expect_success 'required filter smudge failure' '
|
||||||
|
@ -190,6 +196,14 @@ test_expect_success 'required filter clean failure' '
|
||||||
test_must_fail git add test.fc
|
test_must_fail git add test.fc
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'filtering large input to small output should use little memory' '
|
||||||
|
git config filter.devnull.clean "cat >/dev/null" &&
|
||||||
|
git config filter.devnull.required true &&
|
||||||
|
for i in $(test_seq 1 30); do printf "%1048576d" 1; done >30MB &&
|
||||||
|
echo "30MB filter=devnull" >.gitattributes &&
|
||||||
|
GIT_MMAP_LIMIT=1m GIT_ALLOC_LIMIT=1m git add 30MB
|
||||||
|
'
|
||||||
|
|
||||||
test_expect_success EXPENSIVE 'filter large file' '
|
test_expect_success EXPENSIVE 'filter large file' '
|
||||||
git config filter.largefile.smudge cat &&
|
git config filter.largefile.smudge cat &&
|
||||||
git config filter.largefile.clean cat &&
|
git config filter.largefile.clean cat &&
|
||||||
|
|
|
@ -13,7 +13,7 @@ test_expect_success setup '
|
||||||
echo X | dd of=large2 bs=1k seek=2000 &&
|
echo X | dd of=large2 bs=1k seek=2000 &&
|
||||||
echo X | dd of=large3 bs=1k seek=2000 &&
|
echo X | dd of=large3 bs=1k seek=2000 &&
|
||||||
echo Y | dd of=huge bs=1k seek=2500 &&
|
echo Y | dd of=huge bs=1k seek=2500 &&
|
||||||
GIT_ALLOC_LIMIT=1500 &&
|
GIT_ALLOC_LIMIT=1500k &&
|
||||||
export GIT_ALLOC_LIMIT
|
export GIT_ALLOC_LIMIT
|
||||||
'
|
'
|
||||||
|
|
||||||
|
|
19
wrapper.c
19
wrapper.c
|
@ -11,19 +11,20 @@ static void (*try_to_free_routine)(size_t size) = do_nothing;
|
||||||
|
|
||||||
static int memory_limit_check(size_t size, int gentle)
|
static int memory_limit_check(size_t size, int gentle)
|
||||||
{
|
{
|
||||||
static int limit = -1;
|
static size_t limit = 0;
|
||||||
if (limit == -1) {
|
if (!limit) {
|
||||||
const char *env = getenv("GIT_ALLOC_LIMIT");
|
limit = git_env_ulong("GIT_ALLOC_LIMIT", 0);
|
||||||
limit = env ? atoi(env) * 1024 : 0;
|
if (!limit)
|
||||||
|
limit = SIZE_MAX;
|
||||||
}
|
}
|
||||||
if (limit && size > limit) {
|
if (size > limit) {
|
||||||
if (gentle) {
|
if (gentle) {
|
||||||
error("attempting to allocate %"PRIuMAX" over limit %d",
|
error("attempting to allocate %"PRIuMAX" over limit %"PRIuMAX,
|
||||||
(intmax_t)size, limit);
|
(uintmax_t)size, (uintmax_t)limit);
|
||||||
return -1;
|
return -1;
|
||||||
} else
|
} else
|
||||||
die("attempting to allocate %"PRIuMAX" over limit %d",
|
die("attempting to allocate %"PRIuMAX" over limit %"PRIuMAX,
|
||||||
(intmax_t)size, limit);
|
(uintmax_t)size, (uintmax_t)limit);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче