From 7cd17e80579c2bffd6245837175a6e1b12a78045 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:02:54 -0400 Subject: [PATCH 01/70] show-branch: avoid segfault with --reflog of unborn branch When no branch is given to the "--reflog" option, we resolve HEAD to get the default branch. However, if HEAD points to an unborn branch, resolve_ref returns NULL, and we later segfault trying to access it. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/show-branch.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/show-branch.c b/builtin/show-branch.c index 408ce70307..092b59b0b3 100644 --- a/builtin/show-branch.c +++ b/builtin/show-branch.c @@ -743,6 +743,8 @@ int cmd_show_branch(int ac, const char **av, const char *prefix) fake_av[1] = NULL; av = fake_av; ac = 1; + if (!*av) + die("no branches given, and HEAD is not valid"); } if (ac != 1) die("--reflog option needs one branch name"); From d270d7b7a2d631c3d11315f20bb0cf15e438dafa Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:03:05 -0400 Subject: [PATCH 02/70] mailsplit: fix FILE* leak in split_maildir If we encounter an error while splitting a maildir, we exit the function early, leaking the open filehandle. This isn't a big deal, since we exit the program soon after, but it's easy enough to be careful. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/mailsplit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/builtin/mailsplit.c b/builtin/mailsplit.c index 8e02ea109a..9de06e3cf7 100644 --- a/builtin/mailsplit.c +++ b/builtin/mailsplit.c @@ -150,6 +150,7 @@ static int split_maildir(const char *maildir, const char *dir, { char file[PATH_MAX]; char name[PATH_MAX]; + FILE *f = NULL; int ret = -1; int i; struct string_list list = STRING_LIST_INIT_DUP; @@ -160,7 +161,6 @@ static int split_maildir(const char *maildir, const char *dir, goto out; for (i = 0; i < list.nr; i++) { - FILE *f; snprintf(file, sizeof(file), "%s/%s", maildir, list.items[i].string); f = fopen(file, "r"); if (!f) { @@ -177,10 +177,13 @@ static int split_maildir(const char *maildir, const char *dir, split_one(f, name, 1); fclose(f); + f = NULL; } ret = skip; out: + if (f) + fclose(f); string_list_clear(&list, 1); return ret; } From 108332c7a04d309cfed872350ab5c8b8e198f944 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:03:49 -0400 Subject: [PATCH 03/70] archive-tar: fix minor indentation violation This looks like a simple omission from 8539070 (archive-tar: unindent write_tar_entry by one level, 2012-05-03). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- archive-tar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archive-tar.c b/archive-tar.c index 0d1e6bd754..b6b30bb577 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -233,7 +233,7 @@ static int write_tar_entry(struct archiver_args *args, size_t rest = pathlen - plen - 1; if (plen > 0 && rest <= sizeof(header.name)) { memcpy(header.prefix, path, plen); - memcpy(header.name, path + plen + 1, rest); + memcpy(header.name, path + plen + 1, rest); } else { sprintf(header.name, "%s.data", sha1_to_hex(sha1)); From fbe85e73ce425b25c15f0b1f3900a9bf895a9793 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:05:30 -0400 Subject: [PATCH 04/70] fsck: don't fsck alternates for connectivity-only check Commit 02976bf (fsck: introduce `git fsck --connectivity-only`, 2015-06-22) recently gave fsck an option to perform only a subset of the checks, by skipping the fsck_object_dir() call. However, it does so only for the local object directory, and we still do expensive checks on any alternate repos. We should skip them in this case, too. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fsck.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 079470342f..46c7235180 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -678,16 +678,17 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) git_config(fsck_config, NULL); fsck_head_link(); - if (!connectivity_only) + if (!connectivity_only) { fsck_object_dir(get_object_directory()); - prepare_alt_odb(); - for (alt = alt_odb_list; alt; alt = alt->next) { - char namebuf[PATH_MAX]; - int namelen = alt->name - alt->base; - memcpy(namebuf, alt->base, namelen); - namebuf[namelen - 1] = 0; - fsck_object_dir(namebuf); + prepare_alt_odb(); + for (alt = alt_odb_list; alt; alt = alt->next) { + char namebuf[PATH_MAX]; + int namelen = alt->name - alt->base; + memcpy(namebuf, alt->base, namelen); + namebuf[namelen - 1] = 0; + fsck_object_dir(namebuf); + } } if (check_full) { From 7b03c89ebd10396ac7569f0c8c4fa0b4efd4f7ed Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:05:37 -0400 Subject: [PATCH 05/70] add xsnprintf helper function There are a number of places in the code where we call sprintf(), with the assumption that the output will fit into the buffer. In many cases this is true (e.g., formatting a number into a large buffer), but it is hard to tell immediately from looking at the code. It would be nice if we had some run-time check to make sure that our assumption is correct (and to communicate to readers of the code that we are not blindly calling sprintf, but have actually thought about this case). This patch introduces xsnprintf, which behaves just like snprintf, except that it dies whenever the output is truncated. This acts as a sort of assert() for these cases, which can help find places where the assumption is violated (as opposed to truncating and proceeding, which may just silently give a wrong answer). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- git-compat-util.h | 3 +++ wrapper.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/git-compat-util.h b/git-compat-util.h index f649e81f11..348b9dcc1c 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -744,6 +744,9 @@ static inline size_t xsize_t(off_t len) return (size_t)len; } +__attribute__((format (printf, 3, 4))) +extern int xsnprintf(char *dst, size_t max, const char *fmt, ...); + /* in ctype.c, for kwset users */ extern const unsigned char tolower_trans_tbl[256]; diff --git a/wrapper.c b/wrapper.c index 0e22d43814..6fcaa4dc62 100644 --- a/wrapper.c +++ b/wrapper.c @@ -621,6 +621,22 @@ char *xgetcwd(void) return strbuf_detach(&sb, NULL); } +int xsnprintf(char *dst, size_t max, const char *fmt, ...) +{ + va_list ap; + int len; + + va_start(ap, fmt); + len = vsnprintf(dst, max, fmt, ap); + va_end(ap); + + if (len < 0) + die("BUG: your snprintf is broken"); + if (len >= max) + die("BUG: attempt to snprintf into too-small buffer"); + return len; +} + static int write_file_v(const char *path, int fatal, const char *fmt, va_list params) { From bb3788cebb814aa003941abcf484da872aa61412 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:05:40 -0400 Subject: [PATCH 06/70] add git_path_buf helper function If you have a function that uses git_path a lot, but would prefer to avoid the static buffers, it's useful to keep a single scratch buffer locally and reuse it for each call. You used to be able to do this with git_snpath: char buf[PATH_MAX]; foo(git_snpath(buf, sizeof(buf), "foo")); bar(git_snpath(buf, sizeof(buf), "bar")); but since 1a83c24, git_snpath has been replaced with strbuf_git_path. This is good, because it removes the arbitrary PATH_MAX limit. But using strbuf_git_path is more awkward for two reasons: 1. It adds to the buffer, rather than replacing it. This is consistent with other strbuf functions, but makes reuse of a single buffer more tedious. 2. It doesn't return the buffer, so you can't format as part of a function's arguments. The new git_path_buf solves both of these, so you can use it like: struct strbuf buf = STRBUF_INIT; foo(git_path_buf(&buf, "foo")); bar(git_path_buf(&buf, "bar")); strbuf_release(&buf); Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- cache.h | 2 ++ path.c | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/cache.h b/cache.h index 79066e57dc..e231e47121 100644 --- a/cache.h +++ b/cache.h @@ -723,6 +723,8 @@ extern char *mksnpath(char *buf, size_t n, const char *fmt, ...) __attribute__((format (printf, 3, 4))); extern void strbuf_git_path(struct strbuf *sb, const char *fmt, ...) __attribute__((format (printf, 2, 3))); +extern char *git_path_buf(struct strbuf *buf, const char *fmt, ...) + __attribute__((format (printf, 2, 3))); extern void strbuf_git_path_submodule(struct strbuf *sb, const char *path, const char *fmt, ...) __attribute__((format (printf, 3, 4))); diff --git a/path.c b/path.c index 95acbafa68..46a4d2714b 100644 --- a/path.c +++ b/path.c @@ -175,6 +175,16 @@ static void do_git_path(struct strbuf *buf, const char *fmt, va_list args) strbuf_cleanup_path(buf); } +char *git_path_buf(struct strbuf *buf, const char *fmt, ...) +{ + va_list args; + strbuf_reset(buf); + va_start(args, fmt); + do_git_path(buf, fmt, args); + va_end(args); + return buf->buf; +} + void strbuf_git_path(struct strbuf *sb, const char *fmt, ...) { va_list args; From 399ad553ce87fca77a9bc5a0e734a361a9e8a5a3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:05:43 -0400 Subject: [PATCH 07/70] strbuf: make strbuf_complete_line more generic The strbuf_complete_line function makes sure that a buffer ends in a newline. But we may want to do this for any character (e.g., "/" on the end of a path). Let's factor out a generic version, and keep strbuf_complete_line as a thin wrapper. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- strbuf.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/strbuf.h b/strbuf.h index aef2794651..43f27c3a69 100644 --- a/strbuf.h +++ b/strbuf.h @@ -491,10 +491,21 @@ extern void strbuf_add_lines(struct strbuf *sb, const char *prefix, const char * */ extern void strbuf_addstr_xml_quoted(struct strbuf *sb, const char *s); +/** + * "Complete" the contents of `sb` by ensuring that either it ends with the + * character `term`, or it is empty. This can be used, for example, + * to ensure that text ends with a newline, but without creating an empty + * blank line if there is no content in the first place. + */ +static inline void strbuf_complete(struct strbuf *sb, char term) +{ + if (sb->len && sb->buf[sb->len - 1] != term) + strbuf_addch(sb, term); +} + static inline void strbuf_complete_line(struct strbuf *sb) { - if (sb->len && sb->buf[sb->len - 1] != '\n') - strbuf_addch(sb, '\n'); + strbuf_complete(sb, '\n'); } extern int strbuf_branchname(struct strbuf *sb, const char *name); From af49c6d0918bf04aad89bd885a4eef5767a33d0e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:05:45 -0400 Subject: [PATCH 08/70] add reentrant variants of sha1_to_hex and find_unique_abbrev The sha1_to_hex and find_unique_abbrev functions always write into reusable static buffers. There are a few problems with this: - future calls overwrite our result. This is especially annoying with find_unique_abbrev, which does not have a ring of buffers, so you cannot even printf() a result that has two abbreviated sha1s. - if you want to put the result into another buffer, we often strcpy, which looks suspicious when auditing for overflows. This patch introduces sha1_to_hex_r and find_unique_abbrev_r, which write into a user-provided buffer. Of course this is just punting on the overflow-auditing, as the buffer obviously needs to be GIT_SHA1_HEXSZ + 1 bytes. But it is much easier to audit, since that is a well-known size. We retain the non-reentrant forms, which just become thin wrappers around the reentrant ones. This patch also adds a strbuf variant of find_unique_abbrev, which will be handy in later patches. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- cache.h | 31 ++++++++++++++++++++++++++++++- hex.c | 13 +++++++++---- sha1_name.c | 16 +++++++++++----- strbuf.c | 9 +++++++++ strbuf.h | 8 ++++++++ 5 files changed, 67 insertions(+), 10 deletions(-) diff --git a/cache.h b/cache.h index e231e47121..030b880257 100644 --- a/cache.h +++ b/cache.h @@ -785,7 +785,24 @@ extern char *sha1_pack_name(const unsigned char *sha1); */ extern char *sha1_pack_index_name(const unsigned char *sha1); -extern const char *find_unique_abbrev(const unsigned char *sha1, int); +/* + * Return an abbreviated sha1 unique within this repository's object database. + * The result will be at least `len` characters long, and will be NUL + * terminated. + * + * The non-`_r` version returns a static buffer which will be overwritten by + * subsequent calls. + * + * The `_r` variant writes to a buffer supplied by the caller, which must be at + * least `GIT_SHA1_HEXSZ + 1` bytes. The return value is the number of bytes + * written (excluding the NUL terminator). + * + * Note that while this version avoids the static buffer, it is not fully + * reentrant, as it calls into other non-reentrant git code. + */ +extern const char *find_unique_abbrev(const unsigned char *sha1, int len); +extern int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len); + extern const unsigned char null_sha1[GIT_SHA1_RAWSZ]; static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2) @@ -1067,6 +1084,18 @@ extern int for_each_abbrev(const char *prefix, each_abbrev_fn, void *); extern int get_sha1_hex(const char *hex, unsigned char *sha1); extern int get_oid_hex(const char *hex, struct object_id *sha1); +/* + * Convert a binary sha1 to its hex equivalent. The `_r` variant is reentrant, + * and writes the NUL-terminated output to the buffer `out`, which must be at + * least `GIT_SHA1_HEXSZ + 1` bytes, and returns a pointer to out for + * convenience. + * + * The non-`_r` variant returns a static buffer, but uses a ring of 4 + * buffers, making it safe to make multiple calls for a single statement, like: + * + * printf("%s -> %s", sha1_to_hex(one), sha1_to_hex(two)); + */ +extern char *sha1_to_hex_r(char *out, const unsigned char *sha1); extern char *sha1_to_hex(const unsigned char *sha1); /* static buffer result! */ extern char *oid_to_hex(const struct object_id *oid); /* same static buffer as sha1_to_hex */ diff --git a/hex.c b/hex.c index 899b74a08c..0519f853b2 100644 --- a/hex.c +++ b/hex.c @@ -61,12 +61,10 @@ int get_oid_hex(const char *hex, struct object_id *oid) return get_sha1_hex(hex, oid->hash); } -char *sha1_to_hex(const unsigned char *sha1) +char *sha1_to_hex_r(char *buffer, const unsigned char *sha1) { - static int bufno; - static char hexbuffer[4][GIT_SHA1_HEXSZ + 1]; static const char hex[] = "0123456789abcdef"; - char *buffer = hexbuffer[3 & ++bufno], *buf = buffer; + char *buf = buffer; int i; for (i = 0; i < GIT_SHA1_RAWSZ; i++) { @@ -79,6 +77,13 @@ char *sha1_to_hex(const unsigned char *sha1) return buffer; } +char *sha1_to_hex(const unsigned char *sha1) +{ + static int bufno; + static char hexbuffer[4][GIT_SHA1_HEXSZ + 1]; + return sha1_to_hex_r(hexbuffer[3 & ++bufno], sha1); +} + char *oid_to_hex(const struct object_id *oid) { return sha1_to_hex(oid->hash); diff --git a/sha1_name.c b/sha1_name.c index da6874c15e..c58b4771c0 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -368,14 +368,13 @@ int for_each_abbrev(const char *prefix, each_abbrev_fn fn, void *cb_data) return ds.ambiguous; } -const char *find_unique_abbrev(const unsigned char *sha1, int len) +int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len) { int status, exists; - static char hex[41]; - memcpy(hex, sha1_to_hex(sha1), 40); + sha1_to_hex_r(hex, sha1); if (len == 40 || !len) - return hex; + return 40; exists = has_sha1_file(sha1); while (len < 40) { unsigned char sha1_ret[20]; @@ -384,10 +383,17 @@ const char *find_unique_abbrev(const unsigned char *sha1, int len) ? !status : status == SHORT_NAME_NOT_FOUND) { hex[len] = 0; - return hex; + return len; } len++; } + return len; +} + +const char *find_unique_abbrev(const unsigned char *sha1, int len) +{ + static char hex[GIT_SHA1_HEXSZ + 1]; + find_unique_abbrev_r(hex, sha1, len); return hex; } diff --git a/strbuf.c b/strbuf.c index 29df55b1b0..f3c44fb3b3 100644 --- a/strbuf.c +++ b/strbuf.c @@ -743,3 +743,12 @@ void strbuf_addftime(struct strbuf *sb, const char *fmt, const struct tm *tm) } strbuf_setlen(sb, sb->len + len); } + +void strbuf_add_unique_abbrev(struct strbuf *sb, const unsigned char *sha1, + int abbrev_len) +{ + int r; + strbuf_grow(sb, GIT_SHA1_HEXSZ + 1); + r = find_unique_abbrev_r(sb->buf + sb->len, sha1, abbrev_len); + strbuf_setlen(sb, sb->len + r); +} diff --git a/strbuf.h b/strbuf.h index 43f27c3a69..0f9c8a72ba 100644 --- a/strbuf.h +++ b/strbuf.h @@ -474,6 +474,14 @@ static inline struct strbuf **strbuf_split(const struct strbuf *sb, */ extern void strbuf_list_free(struct strbuf **); +/** + * Add the abbreviation, as generated by find_unique_abbrev, of `sha1` to + * the strbuf `sb`. + */ +extern void strbuf_add_unique_abbrev(struct strbuf *sb, + const unsigned char *sha1, + int abbrev_len); + /** * Launch the user preferred editor to edit a file and fill the buffer * with the file's contents upon the user completing their editing. The From c1fd0809174a31edd17e97c1161e01907f41a4fc Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:05:48 -0400 Subject: [PATCH 09/70] fsck: use strbuf to generate alternate directories When fsck-ing alternates, we make a copy of the alternate directory in a fixed PATH_MAX buffer. We memcpy directly, without any check whether we are overflowing the buffer. This is OK if PATH_MAX is a true representation of the maximum path on the system, because any path here will have already been vetted by the alternates subsystem. But that is not true on every system, so we should be more careful. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fsck.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 46c7235180..a019f4a240 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -683,11 +683,12 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) prepare_alt_odb(); for (alt = alt_odb_list; alt; alt = alt->next) { - char namebuf[PATH_MAX]; - int namelen = alt->name - alt->base; - memcpy(namebuf, alt->base, namelen); - namebuf[namelen - 1] = 0; - fsck_object_dir(namebuf); + /* directory name, minus trailing slash */ + size_t namelen = alt->name - alt->base - 1; + struct strbuf name = STRBUF_INIT; + strbuf_add(&name, alt->base, namelen); + fsck_object_dir(name.buf); + strbuf_release(&name); } } From 1d895f194ff612057989f477dc106aa1c7ac2016 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:05:51 -0400 Subject: [PATCH 10/70] mailsplit: make PATH_MAX buffers dynamic There are several PATH_MAX-sized buffers in mailsplit, along with some questionable uses of sprintf. These are not really of security interest, as local mailsplit pathnames are not typically under control of an attacker, and you could generally only overflow a few numbers at the end of a path that approaches PATH_MAX (a longer path would choke mailsplit long before). But it does not hurt to be careful, and as a bonus we lift some limits for systems with too-small PATH_MAX varibles. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/mailsplit.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/builtin/mailsplit.c b/builtin/mailsplit.c index 9de06e3cf7..104277acc4 100644 --- a/builtin/mailsplit.c +++ b/builtin/mailsplit.c @@ -98,30 +98,37 @@ static int populate_maildir_list(struct string_list *list, const char *path) { DIR *dir; struct dirent *dent; - char name[PATH_MAX]; + char *name = NULL; char *subs[] = { "cur", "new", NULL }; char **sub; + int ret = -1; for (sub = subs; *sub; ++sub) { - snprintf(name, sizeof(name), "%s/%s", path, *sub); + free(name); + name = xstrfmt("%s/%s", path, *sub); if ((dir = opendir(name)) == NULL) { if (errno == ENOENT) continue; error("cannot opendir %s (%s)", name, strerror(errno)); - return -1; + goto out; } while ((dent = readdir(dir)) != NULL) { if (dent->d_name[0] == '.') continue; - snprintf(name, sizeof(name), "%s/%s", *sub, dent->d_name); + free(name); + name = xstrfmt("%s/%s", *sub, dent->d_name); string_list_insert(list, name); } closedir(dir); } - return 0; + ret = 0; + +out: + free(name); + return ret; } static int maildir_filename_cmp(const char *a, const char *b) @@ -148,8 +155,7 @@ static int maildir_filename_cmp(const char *a, const char *b) static int split_maildir(const char *maildir, const char *dir, int nr_prec, int skip) { - char file[PATH_MAX]; - char name[PATH_MAX]; + char *file = NULL; FILE *f = NULL; int ret = -1; int i; @@ -161,7 +167,11 @@ static int split_maildir(const char *maildir, const char *dir, goto out; for (i = 0; i < list.nr; i++) { - snprintf(file, sizeof(file), "%s/%s", maildir, list.items[i].string); + char *name; + + free(file); + file = xstrfmt("%s/%s", maildir, list.items[i].string); + f = fopen(file, "r"); if (!f) { error("cannot open mail %s (%s)", file, strerror(errno)); @@ -173,8 +183,9 @@ static int split_maildir(const char *maildir, const char *dir, goto out; } - sprintf(name, "%s/%0*d", dir, nr_prec, ++skip); + name = xstrfmt("%s/%0*d", dir, nr_prec, ++skip); split_one(f, name, 1); + free(name); fclose(f); f = NULL; @@ -184,6 +195,7 @@ static int split_maildir(const char *maildir, const char *dir, out: if (f) fclose(f); + free(file); string_list_clear(&list, 1); return ret; } @@ -191,7 +203,6 @@ out: static int split_mbox(const char *file, const char *dir, int allow_bare, int nr_prec, int skip) { - char name[PATH_MAX]; int ret = -1; int peek; @@ -218,8 +229,9 @@ static int split_mbox(const char *file, const char *dir, int allow_bare, } while (!file_done) { - sprintf(name, "%s/%0*d", dir, nr_prec, ++skip); + char *name = xstrfmt("%s/%0*d", dir, nr_prec, ++skip); file_done = split_one(f, name, allow_bare); + free(name); } if (f != stdin) From 0bb443fdd2518600f88434e2aad8f515546ee707 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:05:54 -0400 Subject: [PATCH 11/70] trace: use strbuf for quote_crnl output When we output GIT_TRACE_SETUP paths, we quote any meta-characters. But our buffer to hold the result is only PATH_MAX bytes, and we could double the size of the input path (if every character needs quoting). We could use a 2*PATH_MAX buffer, if we assume the input will never be more than PATH_MAX. But it's easier still to just switch to a strbuf and not worry about whether the input can exceed PATH_MAX or not. The original copied the "p2" pointer to "p1", advancing both. Since this gets rid of "p1", let's also drop "p2", whose name is now confusing. We can just advance the original "path" pointer. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- trace.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/trace.c b/trace.c index 7393926ebc..4aeea60973 100644 --- a/trace.c +++ b/trace.c @@ -277,25 +277,24 @@ void trace_performance_fl(const char *file, int line, uint64_t nanos, static const char *quote_crnl(const char *path) { - static char new_path[PATH_MAX]; - const char *p2 = path; - char *p1 = new_path; + static struct strbuf new_path = STRBUF_INIT; if (!path) return NULL; - while (*p2) { - switch (*p2) { - case '\\': *p1++ = '\\'; *p1++ = '\\'; break; - case '\n': *p1++ = '\\'; *p1++ = 'n'; break; - case '\r': *p1++ = '\\'; *p1++ = 'r'; break; + strbuf_reset(&new_path); + + while (*path) { + switch (*path) { + case '\\': strbuf_addstr(&new_path, "\\\\"); break; + case '\n': strbuf_addstr(&new_path, "\\n"); break; + case '\r': strbuf_addstr(&new_path, "\\r"); break; default: - *p1++ = *p2; + strbuf_addch(&new_path, *path); } - p2++; + path++; } - *p1 = '\0'; - return new_path; + return new_path.buf; } /* FIXME: move prefix to startup_info struct and get rid of this arg */ From 3131977de1476185209d4d56a0494f5b30ee5557 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:05:57 -0400 Subject: [PATCH 12/70] progress: store throughput display in a strbuf Coverity noticed that we strncpy() into a fixed-size buffer without making sure that it actually ended up NUL-terminated. This is unlikely to be a bug in practice, since throughput strings rarely hit 32 characters, but it would be nice to clean it up. The most obvious way to do so is to add a NUL-terminator. But instead, this patch switches the fixed-size buffer out for a strbuf. At first glance this seems much less efficient, until we realize that filling in the fixed-size buffer is done by writing into a strbuf and copying the result! By writing straight to the buffer, we actually end up more efficient: 1. We avoid an extra copy of the bytes. 2. Rather than malloc/free each time progress is shown, we can strbuf_reset and use the same buffer each time. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- progress.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/progress.c b/progress.c index 2e31bec60f..a3efcfd34c 100644 --- a/progress.c +++ b/progress.c @@ -25,7 +25,7 @@ struct throughput { unsigned int last_bytes[TP_IDX_MAX]; unsigned int last_misecs[TP_IDX_MAX]; unsigned int idx; - char display[32]; + struct strbuf display; }; struct progress { @@ -98,7 +98,7 @@ static int display(struct progress *progress, unsigned n, const char *done) } progress->last_value = n; - tp = (progress->throughput) ? progress->throughput->display : ""; + tp = (progress->throughput) ? progress->throughput->display.buf : ""; eol = done ? done : " \r"; if (progress->total) { unsigned percent = n * 100 / progress->total; @@ -129,6 +129,7 @@ static int display(struct progress *progress, unsigned n, const char *done) static void throughput_string(struct strbuf *buf, off_t total, unsigned int rate) { + strbuf_reset(buf); strbuf_addstr(buf, ", "); strbuf_humanise_bytes(buf, total); strbuf_addstr(buf, " | "); @@ -141,7 +142,6 @@ void display_throughput(struct progress *progress, off_t total) struct throughput *tp; uint64_t now_ns; unsigned int misecs, count, rate; - struct strbuf buf = STRBUF_INIT; if (!progress) return; @@ -154,6 +154,7 @@ void display_throughput(struct progress *progress, off_t total) if (tp) { tp->prev_total = tp->curr_total = total; tp->prev_ns = now_ns; + strbuf_init(&tp->display, 0); } return; } @@ -193,9 +194,7 @@ void display_throughput(struct progress *progress, off_t total) tp->last_misecs[tp->idx] = misecs; tp->idx = (tp->idx + 1) % TP_IDX_MAX; - throughput_string(&buf, total, rate); - strncpy(tp->display, buf.buf, sizeof(tp->display)); - strbuf_release(&buf); + throughput_string(&tp->display, total, rate); if (progress->last_value != -1 && progress_update) display(progress, progress->last_value, NULL); } @@ -250,12 +249,9 @@ void stop_progress_msg(struct progress **p_progress, const char *msg) bufp = (len < sizeof(buf)) ? buf : xmalloc(len + 1); if (tp) { - struct strbuf strbuf = STRBUF_INIT; unsigned int rate = !tp->avg_misecs ? 0 : tp->avg_bytes / tp->avg_misecs; - throughput_string(&strbuf, tp->curr_total, rate); - strncpy(tp->display, strbuf.buf, sizeof(tp->display)); - strbuf_release(&strbuf); + throughput_string(&tp->display, tp->curr_total, rate); } progress_update = 1; sprintf(bufp, ", %s.\n", msg); @@ -264,6 +260,8 @@ void stop_progress_msg(struct progress **p_progress, const char *msg) free(bufp); } clear_progress_signal(); + if (progress->throughput) + strbuf_release(&progress->throughput->display); free(progress->throughput); free(progress); } From 04724222d5aad4da48674e69aab715273f05dded Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:06:03 -0400 Subject: [PATCH 13/70] test-dump-cache-tree: avoid overflow of cache-tree name When dumping a cache-tree, we sprintf sub-tree names directly into a fixed-size buffer, which can overflow. We can trivially fix this by converting to xsnprintf to at least notice and die. This probably should handle arbitrary-sized names, but there's not much point. It's used only by the test scripts, so the trivial fix is enough. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- test-dump-cache-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-dump-cache-tree.c b/test-dump-cache-tree.c index 54c0872fcb..bb53c0aa65 100644 --- a/test-dump-cache-tree.c +++ b/test-dump-cache-tree.c @@ -47,7 +47,7 @@ static int dump_cache_tree(struct cache_tree *it, struct cache_tree_sub *rdwn; rdwn = cache_tree_sub(ref, down->name); - sprintf(path, "%s%.*s/", pfx, down->namelen, down->name); + xsnprintf(path, sizeof(path), "%s%.*s/", pfx, down->namelen, down->name); if (dump_cache_tree(down->cache_tree, rdwn->cache_tree, path)) errs = 1; } From db85a8a9c2fb492d3cd528dbbcc52075c607cf79 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:06:06 -0400 Subject: [PATCH 14/70] compat/inet_ntop: fix off-by-one in inet_ntop4 Our compat inet_ntop4 function writes to a temporary buffer with snprintf, and then uses strcpy to put the result into the final "dst" buffer. We check the return value of snprintf against the size of "dst", but fail to account for the NUL terminator. As a result, we may overflow "dst" with a single NUL. In practice, this doesn't happen because the output of inet_ntop is limited, and we provide buffers that are way oversized. We can fix the off-by-one check easily, but while we are here let's also use strlcpy for increased safety, just in case there are other bugs lurking. As a side note, this compat code seems to be BSD-derived. Searching for "vixie inet_ntop" turns up NetBSD's latest version of the same code, which has an identical fix (and switches to strlcpy, too!). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- compat/inet_ntop.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compat/inet_ntop.c b/compat/inet_ntop.c index 90b7cc45f3..68307262be 100644 --- a/compat/inet_ntop.c +++ b/compat/inet_ntop.c @@ -53,11 +53,11 @@ inet_ntop4(const u_char *src, char *dst, size_t size) nprinted = snprintf(tmp, sizeof(tmp), fmt, src[0], src[1], src[2], src[3]); if (nprinted < 0) return (NULL); /* we assume "errno" was set by "snprintf()" */ - if ((size_t)nprinted > size) { + if ((size_t)nprinted >= size) { errno = ENOSPC; return (NULL); } - strcpy(dst, tmp); + strlcpy(dst, tmp, size); return (dst); } @@ -154,7 +154,7 @@ inet_ntop6(const u_char *src, char *dst, size_t size) errno = ENOSPC; return (NULL); } - strcpy(dst, tmp); + strlcpy(dst, tmp, size); return (dst); } #endif From 5096d4909f9b13c7a650d9dbb7c9702ea7413566 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:06:08 -0400 Subject: [PATCH 15/70] convert trivial sprintf / strcpy calls to xsnprintf We sometimes sprintf into fixed-size buffers when we know that the buffer is large enough to fit the input (either because it's a constant, or because it's numeric input that is bounded in size). Likewise with strcpy of constant strings. However, these sites make it hard to audit sprintf and strcpy calls for buffer overflows, as a reader has to cross-reference the size of the array with the input. Let's use xsnprintf instead, which communicates to a reader that we don't expect this to overflow (and catches the mistake in case we do). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- archive-tar.c | 2 +- builtin/gc.c | 2 +- builtin/init-db.c | 11 ++++++----- builtin/ls-tree.c | 9 +++++---- builtin/merge-index.c | 2 +- builtin/merge-recursive.c | 2 +- builtin/read-tree.c | 2 +- builtin/unpack-file.c | 2 +- compat/mingw.c | 8 +++++--- compat/winansi.c | 2 +- connect.c | 2 +- convert.c | 3 ++- daemon.c | 4 ++-- diff.c | 12 ++++++------ http-push.c | 2 +- http.c | 6 +++--- ll-merge.c | 12 ++++++------ refs.c | 8 ++++---- sideband.c | 4 ++-- strbuf.c | 4 ++-- 20 files changed, 52 insertions(+), 47 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index b6b30bb577..d543f93fc9 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -301,7 +301,7 @@ static int write_global_extended_header(struct archiver_args *args) memset(&header, 0, sizeof(header)); *header.typeflag = TYPEFLAG_GLOBAL_HEADER; mode = 0100666; - strcpy(header.name, "pax_global_header"); + xsnprintf(header.name, sizeof(header.name), "pax_global_header"); prepare_header(args, &header, mode, ext_header.len); write_blocked(&header, sizeof(header)); write_blocked(ext_header.buf, ext_header.len); diff --git a/builtin/gc.c b/builtin/gc.c index 0ad8d30b56..57584bc99f 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -194,7 +194,7 @@ static const char *lock_repo_for_gc(int force, pid_t* ret_pid) return NULL; if (gethostname(my_host, sizeof(my_host))) - strcpy(my_host, "unknown"); + xsnprintf(my_host, sizeof(my_host), "unknown"); pidfile_path = git_pathdup("gc.pid"); fd = hold_lock_file_for_update(&lock, pidfile_path, diff --git a/builtin/init-db.c b/builtin/init-db.c index 69323e186c..e7d0e31f46 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -262,7 +262,8 @@ static int create_default_files(const char *template_path) } /* This forces creation of new config file */ - sprintf(repo_version_string, "%d", GIT_REPO_VERSION); + xsnprintf(repo_version_string, sizeof(repo_version_string), + "%d", GIT_REPO_VERSION); git_config_set("core.repositoryformatversion", repo_version_string); path[len] = 0; @@ -414,13 +415,13 @@ int init_db(const char *template_dir, unsigned int flags) */ if (shared_repository < 0) /* force to the mode value */ - sprintf(buf, "0%o", -shared_repository); + xsnprintf(buf, sizeof(buf), "0%o", -shared_repository); else if (shared_repository == PERM_GROUP) - sprintf(buf, "%d", OLD_PERM_GROUP); + xsnprintf(buf, sizeof(buf), "%d", OLD_PERM_GROUP); else if (shared_repository == PERM_EVERYBODY) - sprintf(buf, "%d", OLD_PERM_EVERYBODY); + xsnprintf(buf, sizeof(buf), "%d", OLD_PERM_EVERYBODY); else - die("oops"); + die("BUG: invalid value for shared_repository"); git_config_set("core.sharedrepository", buf); git_config_set("receive.denyNonFastforwards", "true"); } diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c index 3b04a0f082..0e30d86230 100644 --- a/builtin/ls-tree.c +++ b/builtin/ls-tree.c @@ -96,12 +96,13 @@ static int show_tree(const unsigned char *sha1, struct strbuf *base, if (!strcmp(type, blob_type)) { unsigned long size; if (sha1_object_info(sha1, &size) == OBJ_BAD) - strcpy(size_text, "BAD"); + xsnprintf(size_text, sizeof(size_text), + "BAD"); else - snprintf(size_text, sizeof(size_text), - "%lu", size); + xsnprintf(size_text, sizeof(size_text), + "%lu", size); } else - strcpy(size_text, "-"); + xsnprintf(size_text, sizeof(size_text), "-"); printf("%06o %s %s %7s\t", mode, type, find_unique_abbrev(sha1, abbrev), size_text); diff --git a/builtin/merge-index.c b/builtin/merge-index.c index 1a1eafa6fd..1d66111917 100644 --- a/builtin/merge-index.c +++ b/builtin/merge-index.c @@ -23,7 +23,7 @@ static int merge_entry(int pos, const char *path) break; found++; strcpy(hexbuf[stage], sha1_to_hex(ce->sha1)); - sprintf(ownbuf[stage], "%o", ce->ce_mode); + xsnprintf(ownbuf[stage], sizeof(ownbuf[stage]), "%o", ce->ce_mode); arguments[stage] = hexbuf[stage]; arguments[stage + 4] = ownbuf[stage]; } while (++pos < active_nr); diff --git a/builtin/merge-recursive.c b/builtin/merge-recursive.c index a90f28f34d..491efd556e 100644 --- a/builtin/merge-recursive.c +++ b/builtin/merge-recursive.c @@ -14,7 +14,7 @@ static const char *better_branch_name(const char *branch) if (strlen(branch) != 40) return branch; - sprintf(githead_env, "GITHEAD_%s", branch); + xsnprintf(githead_env, sizeof(githead_env), "GITHEAD_%s", branch); name = getenv(githead_env); return name ? name : branch; } diff --git a/builtin/read-tree.c b/builtin/read-tree.c index 2379e11069..8c693e7568 100644 --- a/builtin/read-tree.c +++ b/builtin/read-tree.c @@ -90,7 +90,7 @@ static int debug_merge(const struct cache_entry * const *stages, debug_stage("index", stages[0], o); for (i = 1; i <= o->merge_size; i++) { char buf[24]; - sprintf(buf, "ent#%d", i); + xsnprintf(buf, sizeof(buf), "ent#%d", i); debug_stage(buf, stages[i], o); } return 0; diff --git a/builtin/unpack-file.c b/builtin/unpack-file.c index 19200291a2..6fc6bcdf7f 100644 --- a/builtin/unpack-file.c +++ b/builtin/unpack-file.c @@ -12,7 +12,7 @@ static char *create_temp_file(unsigned char *sha1) if (!buf || type != OBJ_BLOB) die("unable to read blob object %s", sha1_to_hex(sha1)); - strcpy(path, ".merge_file_XXXXXX"); + xsnprintf(path, sizeof(path), ".merge_file_XXXXXX"); fd = xmkstemp(path); if (write_in_full(fd, buf, size) != size) die_errno("unable to write temp-file"); diff --git a/compat/mingw.c b/compat/mingw.c index f74da235f5..a168800ae0 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2133,9 +2133,11 @@ int uname(struct utsname *buf) { DWORD v = GetVersion(); memset(buf, 0, sizeof(*buf)); - strcpy(buf->sysname, "Windows"); - sprintf(buf->release, "%u.%u", v & 0xff, (v >> 8) & 0xff); + xsnprintf(buf->sysname, sizeof(buf->sysname), "Windows"); + xsnprintf(buf->release, sizeof(buf->release), + "%u.%u", v & 0xff, (v >> 8) & 0xff); /* assuming NT variants only.. */ - sprintf(buf->version, "%u", (v >> 16) & 0x7fff); + xsnprintf(buf->version, sizeof(buf->version), + "%u", (v >> 16) & 0x7fff); return 0; } diff --git a/compat/winansi.c b/compat/winansi.c index efc5bb3a4b..ceff55bd67 100644 --- a/compat/winansi.c +++ b/compat/winansi.c @@ -539,7 +539,7 @@ void winansi_init(void) return; /* create a named pipe to communicate with the console thread */ - sprintf(name, "\\\\.\\pipe\\winansi%lu", GetCurrentProcessId()); + xsnprintf(name, sizeof(name), "\\\\.\\pipe\\winansi%lu", GetCurrentProcessId()); hwrite = CreateNamedPipe(name, PIPE_ACCESS_OUTBOUND, PIPE_TYPE_BYTE | PIPE_WAIT, 1, BUFFER_SIZE, 0, 0, NULL); if (hwrite == INVALID_HANDLE_VALUE) diff --git a/connect.c b/connect.c index c0144d859a..1d5c5e0055 100644 --- a/connect.c +++ b/connect.c @@ -332,7 +332,7 @@ static const char *ai_name(const struct addrinfo *ai) static char addr[NI_MAXHOST]; if (getnameinfo(ai->ai_addr, ai->ai_addrlen, addr, sizeof(addr), NULL, 0, NI_NUMERICHOST) != 0) - strcpy(addr, "(unknown)"); + xsnprintf(addr, sizeof(addr), "(unknown)"); return addr; } diff --git a/convert.c b/convert.c index f3bd3e93fb..814e814438 100644 --- a/convert.c +++ b/convert.c @@ -1289,7 +1289,8 @@ static struct stream_filter *ident_filter(const unsigned char *sha1) { struct ident_filter *ident = xmalloc(sizeof(*ident)); - sprintf(ident->ident, ": %s $", sha1_to_hex(sha1)); + xsnprintf(ident->ident, sizeof(ident->ident), + ": %s $", sha1_to_hex(sha1)); strbuf_init(&ident->left, 0); ident->filter.vtbl = &ident_vtbl; ident->state = 0; diff --git a/daemon.c b/daemon.c index f9eb296888..5218a3f1cc 100644 --- a/daemon.c +++ b/daemon.c @@ -901,7 +901,7 @@ static const char *ip2str(int family, struct sockaddr *sin, socklen_t len) inet_ntop(family, &((struct sockaddr_in*)sin)->sin_addr, ip, len); break; default: - strcpy(ip, ""); + xsnprintf(ip, sizeof(ip), ""); } return ip; } @@ -916,7 +916,7 @@ static int setup_named_sock(char *listen_addr, int listen_port, struct socketlis int gai; long flags; - sprintf(pbuf, "%d", listen_port); + xsnprintf(pbuf, sizeof(pbuf), "%d", listen_port); memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; diff --git a/diff.c b/diff.c index 08508f6a20..788e371dec 100644 --- a/diff.c +++ b/diff.c @@ -2880,7 +2880,7 @@ static void prep_temp_blob(const char *path, struct diff_tempfile *temp, temp->name = get_tempfile_path(&temp->tempfile); strcpy(temp->hex, sha1_to_hex(sha1)); temp->hex[40] = 0; - sprintf(temp->mode, "%06o", mode); + xsnprintf(temp->mode, sizeof(temp->mode), "%06o", mode); strbuf_release(&buf); strbuf_release(&template); free(path_dup); @@ -2897,8 +2897,8 @@ static struct diff_tempfile *prepare_temp_file(const char *name, * a '+' entry produces this for file-1. */ temp->name = "/dev/null"; - strcpy(temp->hex, "."); - strcpy(temp->mode, "."); + xsnprintf(temp->hex, sizeof(temp->hex), "."); + xsnprintf(temp->mode, sizeof(temp->mode), "."); return temp; } @@ -2935,7 +2935,7 @@ static struct diff_tempfile *prepare_temp_file(const char *name, * !(one->sha1_valid), as long as * DIFF_FILE_VALID(one). */ - sprintf(temp->mode, "%06o", one->mode); + xsnprintf(temp->mode, sizeof(temp->mode), "%06o", one->mode); } return temp; } @@ -4081,9 +4081,9 @@ const char *diff_unique_abbrev(const unsigned char *sha1, int len) if (abblen < 37) { static char hex[41]; if (len < abblen && abblen <= len + 2) - sprintf(hex, "%s%.*s", abbrev, len+3-abblen, ".."); + xsnprintf(hex, sizeof(hex), "%s%.*s", abbrev, len+3-abblen, ".."); else - sprintf(hex, "%s...", abbrev); + xsnprintf(hex, sizeof(hex), "%s...", abbrev); return hex; } return sha1_to_hex(sha1); diff --git a/http-push.c b/http-push.c index c98dad23df..154e67bb05 100644 --- a/http-push.c +++ b/http-push.c @@ -881,7 +881,7 @@ static struct remote_lock *lock_remote(const char *path, long timeout) strbuf_addf(&out_buffer.buf, LOCK_REQUEST, escaped); free(escaped); - sprintf(timeout_header, "Timeout: Second-%ld", timeout); + xsnprintf(timeout_header, sizeof(timeout_header), "Timeout: Second-%ld", timeout); dav_headers = curl_slist_append(dav_headers, timeout_header); dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml"); diff --git a/http.c b/http.c index 9dce38025c..7b02259961 100644 --- a/http.c +++ b/http.c @@ -1104,7 +1104,7 @@ static void write_accept_language(struct strbuf *buf) decimal_places++, max_q *= 10) ; - sprintf(q_format, ";q=0.%%0%dd", decimal_places); + xsnprintf(q_format, sizeof(q_format), ";q=0.%%0%dd", decimal_places); strbuf_addstr(buf, "Accept-Language: "); @@ -1601,7 +1601,7 @@ struct http_pack_request *new_http_pack_request( fprintf(stderr, "Resuming fetch of pack %s at byte %ld\n", sha1_to_hex(target->sha1), prev_posn); - sprintf(range, "Range: bytes=%ld-", prev_posn); + xsnprintf(range, sizeof(range), "Range: bytes=%ld-", prev_posn); preq->range_header = curl_slist_append(NULL, range); curl_easy_setopt(preq->slot->curl, CURLOPT_HTTPHEADER, preq->range_header); @@ -1761,7 +1761,7 @@ struct http_object_request *new_http_object_request(const char *base_url, fprintf(stderr, "Resuming fetch of object %s at byte %ld\n", hex, prev_posn); - sprintf(range, "Range: bytes=%ld-", prev_posn); + xsnprintf(range, sizeof(range), "Range: bytes=%ld-", prev_posn); range_header = curl_slist_append(range_header, range); curl_easy_setopt(freq->slot->curl, CURLOPT_HTTPHEADER, range_header); diff --git a/ll-merge.c b/ll-merge.c index fc3c049594..56f73b3932 100644 --- a/ll-merge.c +++ b/ll-merge.c @@ -142,11 +142,11 @@ static struct ll_merge_driver ll_merge_drv[] = { { "union", "built-in union merge", ll_union_merge }, }; -static void create_temp(mmfile_t *src, char *path) +static void create_temp(mmfile_t *src, char *path, size_t len) { int fd; - strcpy(path, ".merge_file_XXXXXX"); + xsnprintf(path, len, ".merge_file_XXXXXX"); fd = xmkstemp(path); if (write_in_full(fd, src->ptr, src->size) != src->size) die_errno("unable to write temp-file"); @@ -187,10 +187,10 @@ static int ll_ext_merge(const struct ll_merge_driver *fn, result->ptr = NULL; result->size = 0; - create_temp(orig, temp[0]); - create_temp(src1, temp[1]); - create_temp(src2, temp[2]); - sprintf(temp[3], "%d", marker_size); + create_temp(orig, temp[0], sizeof(temp[0])); + create_temp(src1, temp[1], sizeof(temp[1])); + create_temp(src2, temp[2], sizeof(temp[2])); + xsnprintf(temp[3], sizeof(temp[3]), "%d", marker_size); strbuf_expand(&cmd, fn->cmdline, strbuf_expand_dict_cb, &dict); diff --git a/refs.c b/refs.c index 4e15f60d98..d5c8b2f51b 100644 --- a/refs.c +++ b/refs.c @@ -3326,10 +3326,10 @@ static int log_ref_write_fd(int fd, const unsigned char *old_sha1, msglen = msg ? strlen(msg) : 0; maxlen = strlen(committer) + msglen + 100; logrec = xmalloc(maxlen); - len = sprintf(logrec, "%s %s %s\n", - sha1_to_hex(old_sha1), - sha1_to_hex(new_sha1), - committer); + len = xsnprintf(logrec, maxlen, "%s %s %s\n", + sha1_to_hex(old_sha1), + sha1_to_hex(new_sha1), + committer); if (msglen) len += copy_msg(logrec + len - 1, msg) - 1; diff --git a/sideband.c b/sideband.c index 7f9dc229fb..fde8adc000 100644 --- a/sideband.c +++ b/sideband.c @@ -137,11 +137,11 @@ ssize_t send_sideband(int fd, int band, const char *data, ssize_t sz, int packet if (packet_max - 5 < n) n = packet_max - 5; if (0 <= band) { - sprintf(hdr, "%04x", n + 5); + xsnprintf(hdr, sizeof(hdr), "%04x", n + 5); hdr[4] = band; write_or_die(fd, hdr, 5); } else { - sprintf(hdr, "%04x", n + 4); + xsnprintf(hdr, sizeof(hdr), "%04x", n + 4); write_or_die(fd, hdr, 4); } write_or_die(fd, p, n); diff --git a/strbuf.c b/strbuf.c index f3c44fb3b3..107c45d291 100644 --- a/strbuf.c +++ b/strbuf.c @@ -245,8 +245,8 @@ void strbuf_add_commented_lines(struct strbuf *out, const char *buf, size_t size static char prefix2[2]; if (prefix1[0] != comment_line_char) { - sprintf(prefix1, "%c ", comment_line_char); - sprintf(prefix2, "%c", comment_line_char); + xsnprintf(prefix1, sizeof(prefix1), "%c ", comment_line_char); + xsnprintf(prefix2, sizeof(prefix2), "%c", comment_line_char); } add_lines(out, prefix1, prefix2, buf, size); } From f2f026752993054c1b712b6f4ae3ff167db38cbe Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:06:24 -0400 Subject: [PATCH 16/70] archive-tar: use xsnprintf for trivial formatting When we generate tar headers, we sprintf() values directly into a struct with the fixed-size header values. For the most part this is fine, as we are formatting small values (e.g., the octal format of "mode & 0x7777" is of fixed length). But it's still a good idea to use xsnprintf here. It communicates to readers what our expectation is, and it provides a run-time check that we are not overflowing the buffers. The one exception here is the mtime, which comes from the epoch time of the commit we are archiving. For sane values, this fits into the 12-byte value allocated in the header. But since git can handle 64-bit times, if I claim to be a visitor from the year 10,000 AD, I can overflow the buffer. This turns out to be harmless, as we simply overflow into the chksum field, which is then overwritten. This case is also best as an xsnprintf. It should never come up, short of extremely malformed dates, and in that case we are probably better off dying than silently truncating the date value (and we cannot expand the size of the buffer, since it is dictated by the ustar format). Our friends in the year 5138 (when we legitimately flip to a 12-digit epoch) can deal with that problem then. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- archive-tar.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index d543f93fc9..501ca97760 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -167,21 +167,21 @@ static void prepare_header(struct archiver_args *args, struct ustar_header *header, unsigned int mode, unsigned long size) { - sprintf(header->mode, "%07o", mode & 07777); - sprintf(header->size, "%011lo", S_ISREG(mode) ? size : 0); - sprintf(header->mtime, "%011lo", (unsigned long) args->time); + xsnprintf(header->mode, sizeof(header->mode), "%07o", mode & 07777); + xsnprintf(header->size, sizeof(header->size), "%011lo", S_ISREG(mode) ? size : 0); + xsnprintf(header->mtime, sizeof(header->mtime), "%011lo", (unsigned long) args->time); - sprintf(header->uid, "%07o", 0); - sprintf(header->gid, "%07o", 0); + xsnprintf(header->uid, sizeof(header->uid), "%07o", 0); + xsnprintf(header->gid, sizeof(header->gid), "%07o", 0); strlcpy(header->uname, "root", sizeof(header->uname)); strlcpy(header->gname, "root", sizeof(header->gname)); - sprintf(header->devmajor, "%07o", 0); - sprintf(header->devminor, "%07o", 0); + xsnprintf(header->devmajor, sizeof(header->devmajor), "%07o", 0); + xsnprintf(header->devminor, sizeof(header->devminor), "%07o", 0); memcpy(header->magic, "ustar", 6); memcpy(header->version, "00", 2); - sprintf(header->chksum, "%07o", ustar_header_chksum(header)); + snprintf(header->chksum, sizeof(header->chksum), "%07o", ustar_header_chksum(header)); } static int write_extended_header(struct archiver_args *args, @@ -193,7 +193,7 @@ static int write_extended_header(struct archiver_args *args, memset(&header, 0, sizeof(header)); *header.typeflag = TYPEFLAG_EXT_HEADER; mode = 0100666; - sprintf(header.name, "%s.paxheader", sha1_to_hex(sha1)); + xsnprintf(header.name, sizeof(header.name), "%s.paxheader", sha1_to_hex(sha1)); prepare_header(args, &header, mode, size); write_blocked(&header, sizeof(header)); write_blocked(buffer, size); @@ -235,8 +235,8 @@ static int write_tar_entry(struct archiver_args *args, memcpy(header.prefix, path, plen); memcpy(header.name, path + plen + 1, rest); } else { - sprintf(header.name, "%s.data", - sha1_to_hex(sha1)); + xsnprintf(header.name, sizeof(header.name), "%s.data", + sha1_to_hex(sha1)); strbuf_append_ext_header(&ext_header, "path", path, pathlen); } @@ -259,8 +259,8 @@ static int write_tar_entry(struct archiver_args *args, if (S_ISLNK(mode)) { if (size > sizeof(header.linkname)) { - sprintf(header.linkname, "see %s.paxheader", - sha1_to_hex(sha1)); + xsnprintf(header.linkname, sizeof(header.linkname), + "see %s.paxheader", sha1_to_hex(sha1)); strbuf_append_ext_header(&ext_header, "linkpath", buffer, size); } else From ef1286d3c0ba714c6c2ae87e14edf3c462aef114 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:06:42 -0400 Subject: [PATCH 17/70] use xsnprintf for generating git object headers We generally use 32-byte buffers to format git's "type size" header fields. These should not generally overflow unless you can produce some truly gigantic objects (and our types come from our internal array of constant strings). But it is a good idea to use xsnprintf to make sure this is the case. Note that we slightly modify the interface to write_sha1_file_prepare, which nows uses "hdrlen" as an "in" parameter as well as an "out" (on the way in it stores the allocated size of the header, and on the way out it returns the ultimate size of the header). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 2 +- bulk-checkin.c | 4 ++-- fast-import.c | 4 ++-- http-push.c | 2 +- sha1_file.c | 13 +++++++------ 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 3431de2362..1ad1bde696 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -441,7 +441,7 @@ static void *unpack_entry_data(unsigned long offset, unsigned long size, int hdrlen; if (!is_delta_type(type)) { - hdrlen = sprintf(hdr, "%s %lu", typename(type), size) + 1; + hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(type), size) + 1; git_SHA1_Init(&c); git_SHA1_Update(&c, hdr, hdrlen); } else diff --git a/bulk-checkin.c b/bulk-checkin.c index 7cffc3a579..4347f5c76a 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -200,8 +200,8 @@ static int deflate_to_pack(struct bulk_checkin_state *state, if (seekback == (off_t) -1) return error("cannot find the current offset"); - header_len = sprintf((char *)obuf, "%s %" PRIuMAX, - typename(type), (uintmax_t)size) + 1; + header_len = xsnprintf((char *)obuf, sizeof(obuf), "%s %" PRIuMAX, + typename(type), (uintmax_t)size) + 1; git_SHA1_Init(&ctx); git_SHA1_Update(&ctx, obuf, header_len); diff --git a/fast-import.c b/fast-import.c index 6c7c3c9b66..d0c25024cd 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1035,8 +1035,8 @@ static int store_object( git_SHA_CTX c; git_zstream s; - hdrlen = sprintf((char *)hdr,"%s %lu", typename(type), - (unsigned long)dat->len) + 1; + hdrlen = xsnprintf((char *)hdr, sizeof(hdr), "%s %lu", + typename(type), (unsigned long)dat->len) + 1; git_SHA1_Init(&c); git_SHA1_Update(&c, hdr, hdrlen); git_SHA1_Update(&c, dat->buf, dat->len); diff --git a/http-push.c b/http-push.c index 154e67bb05..1f3788f634 100644 --- a/http-push.c +++ b/http-push.c @@ -361,7 +361,7 @@ static void start_put(struct transfer_request *request) git_zstream stream; unpacked = read_sha1_file(request->obj->sha1, &type, &len); - hdrlen = sprintf(hdr, "%s %lu", typename(type), len) + 1; + hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(type), len) + 1; /* Set it up */ git_deflate_init(&stream, zlib_compression_level); diff --git a/sha1_file.c b/sha1_file.c index d295a3225a..f106091474 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1464,7 +1464,7 @@ int check_sha1_signature(const unsigned char *sha1, void *map, return -1; /* Generate the header */ - hdrlen = sprintf(hdr, "%s %lu", typename(obj_type), size) + 1; + hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(obj_type), size) + 1; /* Sha1.. */ git_SHA1_Init(&c); @@ -2930,7 +2930,7 @@ static void write_sha1_file_prepare(const void *buf, unsigned long len, git_SHA_CTX c; /* Generate the header */ - *hdrlen = sprintf(hdr, "%s %lu", type, len)+1; + *hdrlen = xsnprintf(hdr, *hdrlen, "%s %lu", type, len)+1; /* Sha1.. */ git_SHA1_Init(&c); @@ -2993,7 +2993,7 @@ int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1) { char hdr[32]; - int hdrlen; + int hdrlen = sizeof(hdr); write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen); return 0; } @@ -3139,7 +3139,7 @@ static int freshen_packed_object(const unsigned char *sha1) int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1) { char hdr[32]; - int hdrlen; + int hdrlen = sizeof(hdr); /* Normally if we have it in the pack then we do not bother writing * it out into .git/objects/??/?{38} file. @@ -3157,7 +3157,8 @@ int hash_sha1_file_literally(const void *buf, unsigned long len, const char *typ int hdrlen, status = 0; /* type string, SP, %lu of the length plus NUL must fit this */ - header = xmalloc(strlen(type) + 32); + hdrlen = strlen(type) + 32; + header = xmalloc(hdrlen); write_sha1_file_prepare(buf, len, type, sha1, header, &hdrlen); if (!(flags & HASH_WRITE_OBJECT)) @@ -3185,7 +3186,7 @@ int force_object_loose(const unsigned char *sha1, time_t mtime) buf = read_packed_sha1(sha1, &type, &len); if (!buf) return error("cannot read sha1_file for %s", sha1_to_hex(sha1)); - hdrlen = sprintf(hdr, "%s %lu", typename(type), len) + 1; + hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(type), len) + 1; ret = write_loose_object(sha1, hdr, hdrlen, buf, len, mtime); free(buf); From c3bb0ac796c21490f478914441526817e4685606 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:06:44 -0400 Subject: [PATCH 18/70] find_short_object_filename: convert sprintf to xsnprintf We use sprintf() to format some hex data into a buffer. The buffer is clearly long enough, and using snprintf here is not necessary. And in fact, it does not really make anything easier to audit, as the size we feed to snprintf accounts for the magic extra 42 bytes found in each alt->name field of struct alternate_object_database (which is there exactly to do this formatting). Still, it is nice to remove an sprintf call and replace it with an xsnprintf and explanatory comment, which makes it easier to audit the code base for overflows. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sha1_name.c b/sha1_name.c index c58b4771c0..80753b6770 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -96,11 +96,15 @@ static void find_short_object_filename(int len, const char *hex_pfx, struct disa } fakeent->next = alt_odb_list; - sprintf(hex, "%.2s", hex_pfx); + xsnprintf(hex, sizeof(hex), "%.2s", hex_pfx); for (alt = fakeent; alt && !ds->ambiguous; alt = alt->next) { struct dirent *de; DIR *dir; - sprintf(alt->name, "%.2s/", hex_pfx); + /* + * every alt_odb struct has 42 extra bytes after the base + * for exactly this purpose + */ + xsnprintf(alt->name, 42, "%.2s/", hex_pfx); dir = opendir(alt->base); if (!dir) continue; From f5691aa640ae2c1c5f85037e093de7f310c0eac7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:06:46 -0400 Subject: [PATCH 19/70] stop_progress_msg: convert sprintf to xsnprintf The usual arguments for using xsnprintf over sprintf apply, but this case is a little tricky. We print to a fixed-size buffer if we have room, and otherwise to an allocated buffer. So there should be no overflow here, but it is still good to communicate our intention, as well as to check our earlier math for how much space the string will need. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- progress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/progress.c b/progress.c index a3efcfd34c..353bd37416 100644 --- a/progress.c +++ b/progress.c @@ -254,7 +254,7 @@ void stop_progress_msg(struct progress **p_progress, const char *msg) throughput_string(&tp->display, tp->curr_total, rate); } progress_update = 1; - sprintf(bufp, ", %s.\n", msg); + xsnprintf(bufp, len + 1, ", %s.\n", msg); display(progress, progress->last_value, bufp); if (buf != bufp) free(bufp); From 48bdf86995423736f36557d744841b08c8bf4e14 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:06:48 -0400 Subject: [PATCH 20/70] compat/hstrerror: convert sprintf to snprintf This is a trivially correct use of sprintf, as our error number should not be excessively long. But it's still nice to drop an sprintf call. Note that we cannot use xsnprintf here, because this is compat code which does not load git-compat-util.h. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- compat/hstrerror.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compat/hstrerror.c b/compat/hstrerror.c index 069c555da4..b85a2fa956 100644 --- a/compat/hstrerror.c +++ b/compat/hstrerror.c @@ -16,6 +16,6 @@ const char *githstrerror(int err) case TRY_AGAIN: return "Non-authoritative \"host not found\", or SERVERFAIL"; } - sprintf(buffer, "Name resolution error %d", err); + snprintf(buffer, sizeof(buffer), "Name resolution error %d", err); return buffer; } From 19bdd3e7e160a0b000c15d8bf6d33f4149e3f911 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:06:51 -0400 Subject: [PATCH 21/70] grep: use xsnprintf to format failure message This looks at first glance like the sprintf can overflow our buffer, but it's actually fine; the p->origin string is something constant and small, like "command line" or "-e option". Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- grep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/grep.c b/grep.c index b58c7c6434..6c68d5bd2e 100644 --- a/grep.c +++ b/grep.c @@ -306,9 +306,9 @@ static NORETURN void compile_regexp_failed(const struct grep_pat *p, char where[1024]; if (p->no) - sprintf(where, "In '%s' at %d, ", p->origin, p->no); + xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no); else if (p->origin) - sprintf(where, "%s, ", p->origin); + xsnprintf(where, sizeof(where), "%s, ", p->origin); else where[0] = 0; From 330c8e26701c33f1d74dbe3c2692f29b5ed4ba5f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:06:53 -0400 Subject: [PATCH 22/70] entry.c: convert strcpy to xsnprintf This particular conversion is non-obvious, because nobody has passed our function the length of the destination buffer. However, the interface to checkout_entry specifies that the buffer must be at least TEMPORARY_FILENAME_LENGTH bytes long, so we can check that (meaning the existing code was not buggy, but merely worrisome to somebody reading it). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- entry.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/entry.c b/entry.c index 1eda8e9471..582c40071a 100644 --- a/entry.c +++ b/entry.c @@ -96,8 +96,8 @@ static int open_output_fd(char *path, const struct cache_entry *ce, int to_tempf { int symlink = (ce->ce_mode & S_IFMT) != S_IFREG; if (to_tempfile) { - strcpy(path, symlink - ? ".merge_link_XXXXXX" : ".merge_file_XXXXXX"); + xsnprintf(path, TEMPORARY_FILENAME_LENGTH, "%s", + symlink ? ".merge_link_XXXXXX" : ".merge_file_XXXXXX"); return mkstemp(path); } else { return create_file(path, !symlink ? ce->ce_mode : 0666); From 48bcc1c3cc09db1a6da0ce47460fae6e5f7edd4b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:06:55 -0400 Subject: [PATCH 23/70] add_packed_git: convert strcpy into xsnprintf We have the path "foo.idx", and we create a buffer big enough to hold "foo.pack" and "foo.keep", and then strcpy straight into it. This isn't a bug (we have enough space), but it's very hard to tell from the strcpy that this is so. Let's instead use strip_suffix to take off the ".idx", record the size of our allocation, and use xsnprintf to make sure we don't violate our assumptions. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- cache.h | 2 +- sha1_file.c | 21 +++++++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/cache.h b/cache.h index 030b880257..d206d64550 100644 --- a/cache.h +++ b/cache.h @@ -1309,7 +1309,7 @@ extern void close_pack_windows(struct packed_git *); extern void unuse_pack(struct pack_window **); extern void free_pack_by_name(const char *); extern void clear_delta_base_cache(void); -extern struct packed_git *add_packed_git(const char *, int, int); +extern struct packed_git *add_packed_git(const char *path, size_t path_len, int local); /* * Return the SHA-1 of the nth object within the specified packfile. diff --git a/sha1_file.c b/sha1_file.c index f106091474..592226eb7b 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1146,11 +1146,12 @@ static void try_to_free_pack_memory(size_t size) release_pack_memory(size); } -struct packed_git *add_packed_git(const char *path, int path_len, int local) +struct packed_git *add_packed_git(const char *path, size_t path_len, int local) { static int have_set_try_to_free_routine; struct stat st; - struct packed_git *p = alloc_packed_git(path_len + 2); + size_t alloc; + struct packed_git *p; if (!have_set_try_to_free_routine) { have_set_try_to_free_routine = 1; @@ -1161,18 +1162,22 @@ struct packed_git *add_packed_git(const char *path, int path_len, int local) * Make sure a corresponding .pack file exists and that * the index looks sane. */ - path_len -= strlen(".idx"); - if (path_len < 1) { - free(p); + if (!strip_suffix_mem(path, &path_len, ".idx")) return NULL; - } + + /* + * ".pack" is long enough to hold any suffix we're adding (and + * the use xsnprintf double-checks that) + */ + alloc = path_len + strlen(".pack") + 1; + p = alloc_packed_git(alloc); memcpy(p->pack_name, path, path_len); - strcpy(p->pack_name + path_len, ".keep"); + xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep"); if (!access(p->pack_name, F_OK)) p->pack_keep = 1; - strcpy(p->pack_name + path_len, ".pack"); + xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack"); if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) { free(p); return NULL; From 0cc41428596ec1cd3862918ef781793ef7346ba5 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:06:58 -0400 Subject: [PATCH 24/70] http-push: replace strcat with xsnprintf We account for these strcats in our initial allocation, but the code is confusing to follow and verify. Let's remember our original allocation length, and then xsnprintf can verify that we don't exceed it. Note that we can't just use xstrfmt here (which would be even cleaner) because the code tries to grow the buffer only when necessary. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- http-push.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/http-push.c b/http-push.c index 1f3788f634..37baff818f 100644 --- a/http-push.c +++ b/http-push.c @@ -786,21 +786,21 @@ xml_start_tag(void *userData, const char *name, const char **atts) { struct xml_ctx *ctx = (struct xml_ctx *)userData; const char *c = strchr(name, ':'); - int new_len; + int old_namelen, new_len; if (c == NULL) c = name; else c++; - new_len = strlen(ctx->name) + strlen(c) + 2; + old_namelen = strlen(ctx->name); + new_len = old_namelen + strlen(c) + 2; if (new_len > ctx->len) { ctx->name = xrealloc(ctx->name, new_len); ctx->len = new_len; } - strcat(ctx->name, "."); - strcat(ctx->name, c); + xsnprintf(ctx->name + old_namelen, ctx->len - old_namelen, ".%s", c); free(ctx->cdata); ctx->cdata = NULL; From b7115a350b5c01ce0ae7a8735e4235d4b2367b5f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:00 -0400 Subject: [PATCH 25/70] receive-pack: convert strncpy to xsnprintf This strncpy is pointless; we pass the strlen() of the src string, meaning that it works just like a memcpy. Worse, though, is that the size has no relation to the destination buffer, meaning it is a potential overflow. In practice, it's not. We pass only short constant strings like "warning: " and "error: ", which are much smaller than the destination buffer. We can make this much simpler by just using xsnprintf, which will check for overflow and return the size for our next vsnprintf, without us having to run a separate strlen(). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index e6b93d0264..04d2bdf3f3 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -280,10 +280,10 @@ static void rp_warning(const char *err, ...) __attribute__((format (printf, 1, 2 static void report_message(const char *prefix, const char *err, va_list params) { - int sz = strlen(prefix); + int sz; char msg[4096]; - strncpy(msg, prefix, sz); + sz = xsnprintf(msg, sizeof(msg), "%s", prefix); sz += vsnprintf(msg + sz, sizeof(msg) - sz, err, params); if (sz > (sizeof(msg) - 1)) sz = sizeof(msg) - 1; From 75faa45ae0230b321bf72027b2274315d7e14e34 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:03 -0400 Subject: [PATCH 26/70] replace trivial malloc + sprintf / strcpy calls with xstrfmt It's a common pattern to do: foo = xmalloc(strlen(one) + strlen(two) + 1 + 1); sprintf(foo, "%s %s", one, two); (or possibly some variant with strcpy()s or a more complicated length computation). We can switch these to use xstrfmt, which is shorter, involves less error-prone manual computation, and removes many sprintf and strcpy calls which make it harder to audit the code for real buffer overflows. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/apply.c | 5 +---- builtin/ls-remote.c | 8 ++------ builtin/name-rev.c | 13 +++++-------- environment.c | 7 ++----- imap-send.c | 5 ++--- reflog-walk.c | 7 +++---- remote.c | 7 +------ setup.c | 12 +++--------- unpack-trees.c | 4 +--- 9 files changed, 20 insertions(+), 48 deletions(-) diff --git a/builtin/apply.c b/builtin/apply.c index 4aa53f7fd8..094a20f489 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -698,10 +698,7 @@ static char *find_name_common(const char *line, const char *def, } if (root) { - char *ret = xmalloc(root_len + len + 1); - strcpy(ret, root); - memcpy(ret + root_len, start, len); - ret[root_len + len] = '\0'; + char *ret = xstrfmt("%s%.*s", root, len, start); return squash_slash(ret); } diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c index 4554dbc8a9..5b6d679a63 100644 --- a/builtin/ls-remote.c +++ b/builtin/ls-remote.c @@ -93,12 +93,8 @@ int cmd_ls_remote(int argc, const char **argv, const char *prefix) if (argv[i]) { int j; pattern = xcalloc(argc - i + 1, sizeof(const char *)); - for (j = i; j < argc; j++) { - int len = strlen(argv[j]); - char *p = xmalloc(len + 3); - sprintf(p, "*/%s", argv[j]); - pattern[j - i] = p; - } + for (j = i; j < argc; j++) + pattern[j - i] = xstrfmt("*/%s", argv[j]); } remote = remote_get(dest); if (!remote) { diff --git a/builtin/name-rev.c b/builtin/name-rev.c index 248a3eb260..8a3a0cd61e 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -56,19 +56,16 @@ copy_data: parents = parents->next, parent_number++) { if (parent_number > 1) { int len = strlen(tip_name); - char *new_name = xmalloc(len + - 1 + decimal_length(generation) + /* ~ */ - 1 + 2 + /* ^NN */ - 1); + char *new_name; if (len > 2 && !strcmp(tip_name + len - 2, "^0")) len -= 2; if (generation > 0) - sprintf(new_name, "%.*s~%d^%d", len, tip_name, - generation, parent_number); + new_name = xstrfmt("%.*s~%d^%d", len, tip_name, + generation, parent_number); else - sprintf(new_name, "%.*s^%d", len, tip_name, - parent_number); + new_name = xstrfmt("%.*s^%d", len, tip_name, + parent_number); name_rev(parents->item, new_name, 0, distance + MERGE_TRAVERSAL_WEIGHT, 0); diff --git a/environment.c b/environment.c index a533aed630..c5b65f5e23 100644 --- a/environment.c +++ b/environment.c @@ -143,11 +143,8 @@ static char *git_path_from_env(const char *envvar, const char *git_dir, const char *path, int *fromenv) { const char *value = getenv(envvar); - if (!value) { - char *buf = xmalloc(strlen(git_dir) + strlen(path) + 2); - sprintf(buf, "%s/%s", git_dir, path); - return buf; - } + if (!value) + return xstrfmt("%s/%s", git_dir, path); if (fromenv) *fromenv = 1; return xstrdup(value); diff --git a/imap-send.c b/imap-send.c index 37ac4aa86a..e9faaeaf2a 100644 --- a/imap-send.c +++ b/imap-send.c @@ -889,9 +889,8 @@ static char *cram(const char *challenge_64, const char *user, const char *pass) } /* response: " " */ - resp_len = strlen(user) + 1 + strlen(hex) + 1; - response = xmalloc(resp_len); - sprintf(response, "%s %s", user, hex); + response = xstrfmt("%s %s", user, hex); + resp_len = strlen(response) + 1; response_64 = xmalloc(ENCODED_SIZE(resp_len) + 1); encoded_len = EVP_EncodeBlock((unsigned char *)response_64, diff --git a/reflog-walk.c b/reflog-walk.c index f8e743a23b..85b8a54241 100644 --- a/reflog-walk.c +++ b/reflog-walk.c @@ -56,12 +56,11 @@ static struct complete_reflogs *read_complete_reflog(const char *ref) } } if (reflogs->nr == 0) { - int len = strlen(ref); - char *refname = xmalloc(len + 12); - sprintf(refname, "refs/%s", ref); + char *refname = xstrfmt("refs/%s", ref); for_each_reflog_ent(refname, read_one_reflog, reflogs); if (reflogs->nr == 0) { - sprintf(refname, "refs/heads/%s", ref); + free(refname); + refname = xstrfmt("refs/heads/%s", ref); for_each_reflog_ent(refname, read_one_reflog, reflogs); } free(refname); diff --git a/remote.c b/remote.c index 26504b7447..5ab0f7f7a5 100644 --- a/remote.c +++ b/remote.c @@ -65,7 +65,6 @@ static int valid_remote(const struct remote *remote) static const char *alias_url(const char *url, struct rewrites *r) { int i, j; - char *ret; struct counted_string *longest; int longest_i; @@ -86,11 +85,7 @@ static const char *alias_url(const char *url, struct rewrites *r) if (!longest) return url; - ret = xmalloc(r->rewrite[longest_i]->baselen + - (strlen(url) - longest->len) + 1); - strcpy(ret, r->rewrite[longest_i]->base); - strcpy(ret + r->rewrite[longest_i]->baselen, url + longest->len); - return ret; + return xstrfmt("%s%s", r->rewrite[longest_i]->base, url + longest->len); } static void add_push_refspec(struct remote *remote, const char *ref) diff --git a/setup.c b/setup.c index a17c51e61d..2b64cbbbfa 100644 --- a/setup.c +++ b/setup.c @@ -99,10 +99,7 @@ char *prefix_path_gently(const char *prefix, int len, return NULL; } } else { - sanitized = xmalloc(len + strlen(path) + 1); - if (len) - memcpy(sanitized, prefix, len); - strcpy(sanitized + len, path); + sanitized = xstrfmt("%.*s%s", len, prefix, path); if (remaining_prefix) *remaining_prefix = len; if (normalize_path_copy_len(sanitized, sanitized, remaining_prefix)) { @@ -468,11 +465,8 @@ const char *read_gitfile_gently(const char *path, int *return_error_code) if (!is_absolute_path(dir) && (slash = strrchr(path, '/'))) { size_t pathlen = slash+1 - path; - size_t dirlen = pathlen + len - 8; - dir = xmalloc(dirlen + 1); - strncpy(dir, path, pathlen); - strncpy(dir + pathlen, buf + 8, len - 8); - dir[dirlen] = '\0'; + dir = xstrfmt("%.*s%.*s", (int)pathlen, path, + (int)(len - 8), buf + 8); free(buf); buf = dir; } diff --git a/unpack-trees.c b/unpack-trees.c index f932e80e86..8e2032f4e5 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1350,9 +1350,7 @@ static int verify_clean_subdirectory(const struct cache_entry *ce, * Then we need to make sure that we do not lose a locally * present file that is not ignored. */ - pathbuf = xmalloc(namelen + 2); - memcpy(pathbuf, ce->name, namelen); - strcpy(pathbuf+namelen, "/"); + pathbuf = xstrfmt("%.*s/", namelen, ce->name); memset(&d, 0, sizeof(d)); if (o->dir) From 3ec832c4b563f3dd1f23399c3bdede1168cc77e7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:05 -0400 Subject: [PATCH 27/70] config: use xstrfmt in normalize_value We xmalloc a fixed-size buffer and sprintf into it; this is OK because the size of our formatting types is finite, but that's not immediately clear to a reader auditing sprintf calls. Let's switch to xstrfmt, which is shorter and obviously correct. Note that just dropping the common xmalloc here causes gcc to complain with -Wmaybe-uninitialized. That's because if "types" does not match any of our known types, we never write anything into the "normalized" pointer. With the current code, gcc doesn't notice because we always return a valid pointer (just one which might point to uninitialized data, but the compiler doesn't know that). In other words, the current code is potentially buggy if new types are added without updating this spot. So let's take this opportunity to clean up the function a bit more. We can drop the "normalized" pointer entirely, and just return directly from each code path. And then add an assertion at the end in case we haven't covered any cases. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/config.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/builtin/config.c b/builtin/config.c index 71acc44143..adc772786a 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -246,8 +246,6 @@ free_strings: static char *normalize_value(const char *key, const char *value) { - char *normalized; - if (!value) return NULL; @@ -258,27 +256,21 @@ static char *normalize_value(const char *key, const char *value) * "~/foobar/" in the config file, and to expand the ~ * when retrieving the value. */ - normalized = xstrdup(value); - else { - normalized = xmalloc(64); - if (types == TYPE_INT) { - int64_t v = git_config_int64(key, value); - sprintf(normalized, "%"PRId64, v); - } - else if (types == TYPE_BOOL) - sprintf(normalized, "%s", - git_config_bool(key, value) ? "true" : "false"); - else if (types == TYPE_BOOL_OR_INT) { - int is_bool, v; - v = git_config_bool_or_int(key, value, &is_bool); - if (!is_bool) - sprintf(normalized, "%d", v); - else - sprintf(normalized, "%s", v ? "true" : "false"); - } + return xstrdup(value); + if (types == TYPE_INT) + return xstrfmt("%"PRId64, git_config_int64(key, value)); + if (types == TYPE_BOOL) + return xstrdup(git_config_bool(key, value) ? "true" : "false"); + if (types == TYPE_BOOL_OR_INT) { + int is_bool, v; + v = git_config_bool_or_int(key, value, &is_bool); + if (!is_bool) + return xstrfmt("%d", v); + else + return xstrdup(v ? "true" : "false"); } - return normalized; + die("BUG: cannot normalize type %d", types); } static int get_color_found; From 2805bb59708e1cf049445fb8b21de1a710a3a16c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:07 -0400 Subject: [PATCH 28/70] fetch: replace static buffer with xstrfmt We parse the INFINITE_DEPTH constant into a static, fixed-size buffer using sprintf. This buffer is sufficiently large for the current constant, but it's a suspicious pattern, as the constant is defined far away, and it's not immediately obvious that 12 bytes are large enough to hold it. We can just use xstrfmt here, which gets rid of any question of the buffer size. It also removes any concerns with object lifetime, which means we do not have to wonder why this buffer deep within a conditional is marked "static" (we never free our newly allocated result, of course, but that's OK; it's global that lasts the lifetime of the whole program anyway). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 9a3869f4ff..470372552c 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1156,11 +1156,8 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) die(_("--depth and --unshallow cannot be used together")); else if (!is_repository_shallow()) die(_("--unshallow on a complete repository does not make sense")); - else { - static char inf_depth[12]; - sprintf(inf_depth, "%d", INFINITE_DEPTH); - depth = inf_depth; - } + else + depth = xstrfmt("%d", INFINITE_DEPTH); } /* no need to be strict, transport_set_option() will validate it again */ From 9ae97018fb2e7f30ab92fdc2965d1dcff2c5c296 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:09 -0400 Subject: [PATCH 29/70] use strip_suffix and xstrfmt to replace suffix When we want to convert "foo.pack" to "foo.idx", we do it by duplicating the original string and then munging the bytes in place. Let's use strip_suffix and xstrfmt instead, which has several advantages: 1. It's more clear what the intent is. 2. It does not implicitly rely on the fact that strlen(".idx") <= strlen(".pack") to avoid an overflow. 3. We communicate the assumption that the input file ends with ".pack" (and get a run-time check that this is so). 4. We drop calls to strcpy, which makes auditing the code base easier. Likewise, we can do this to convert ".pack" to ".bitmap", avoiding some manual memory computation. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- http.c | 7 ++++--- pack-bitmap.c | 13 ++++--------- sha1_file.c | 6 ++++-- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/http.c b/http.c index 7b02259961..e0ff876cd9 100644 --- a/http.c +++ b/http.c @@ -1511,6 +1511,7 @@ int finish_http_pack_request(struct http_pack_request *preq) struct packed_git **lst; struct packed_git *p = preq->target; char *tmp_idx; + size_t len; struct child_process ip = CHILD_PROCESS_INIT; const char *ip_argv[8]; @@ -1524,9 +1525,9 @@ int finish_http_pack_request(struct http_pack_request *preq) lst = &((*lst)->next); *lst = (*lst)->next; - tmp_idx = xstrdup(preq->tmpfile); - strcpy(tmp_idx + strlen(tmp_idx) - strlen(".pack.temp"), - ".idx.temp"); + if (!strip_suffix(preq->tmpfile, ".pack.temp", &len)) + die("BUG: pack tmpfile does not end in .pack.temp?"); + tmp_idx = xstrfmt("%.*s.idx.temp", (int)len, preq->tmpfile); ip_argv[0] = "index-pack"; ip_argv[1] = "-o"; diff --git a/pack-bitmap.c b/pack-bitmap.c index 637770af81..7dfcb341d6 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -252,16 +252,11 @@ static int load_bitmap_entries_v1(struct bitmap_index *index) static char *pack_bitmap_filename(struct packed_git *p) { - char *idx_name; - int len; + size_t len; - len = strlen(p->pack_name) - strlen(".pack"); - idx_name = xmalloc(len + strlen(".bitmap") + 1); - - memcpy(idx_name, p->pack_name, len); - memcpy(idx_name + len, ".bitmap", strlen(".bitmap") + 1); - - return idx_name; + if (!strip_suffix(p->pack_name, ".pack", &len)) + die("BUG: pack_name does not end in .pack"); + return xstrfmt("%.*s.bitmap", (int)len, p->pack_name); } static int open_pack_bitmap_1(struct packed_git *packfile) diff --git a/sha1_file.c b/sha1_file.c index 592226eb7b..2be1afdde0 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -671,13 +671,15 @@ static int check_packed_git_idx(const char *path, struct packed_git *p) int open_pack_index(struct packed_git *p) { char *idx_name; + size_t len; int ret; if (p->index_data) return 0; - idx_name = xstrdup(p->pack_name); - strcpy(idx_name + strlen(idx_name) - strlen(".pack"), ".idx"); + if (!strip_suffix(p->pack_name, ".pack", &len)) + die("BUG: pack_name does not end in .pack"); + idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name); ret = check_packed_git_idx(idx_name, p); free(idx_name); return ret; From a5e03bf5c686e9f2da5e94e091e0ea531d40c6b8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:12 -0400 Subject: [PATCH 30/70] ref-filter: drop sprintf and strcpy calls The ref-filter code comes from for-each-ref, and inherited a number of raw sprintf and strcpy calls. These are generally all safe, as we custom-size the buffers, or are formatting numbers into sufficiently large buffers. But we can make the resulting code even simpler and more obviously correct by using some of our helper functions. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- ref-filter.c | 70 +++++++++++++++++----------------------------------- 1 file changed, 22 insertions(+), 48 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index f38dee4f60..1f718704e7 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -192,9 +192,7 @@ static int grab_objectname(const char *name, const unsigned char *sha1, struct atom_value *v) { if (!strcmp(name, "objectname")) { - char *s = xmalloc(41); - strcpy(s, sha1_to_hex(sha1)); - v->s = s; + v->s = xstrdup(sha1_to_hex(sha1)); return 1; } if (!strcmp(name, "objectname:short")) { @@ -219,10 +217,8 @@ static void grab_common_values(struct atom_value *val, int deref, struct object if (!strcmp(name, "objecttype")) v->s = typename(obj->type); else if (!strcmp(name, "objectsize")) { - char *s = xmalloc(40); - sprintf(s, "%lu", sz); v->ul = sz; - v->s = s; + v->s = xstrfmt("%lu", sz); } else if (deref) grab_objectname(name, obj->sha1, v); @@ -246,11 +242,8 @@ static void grab_tag_values(struct atom_value *val, int deref, struct object *ob v->s = tag->tag; else if (!strcmp(name, "type") && tag->tagged) v->s = typename(tag->tagged->type); - else if (!strcmp(name, "object") && tag->tagged) { - char *s = xmalloc(41); - strcpy(s, sha1_to_hex(tag->tagged->sha1)); - v->s = s; - } + else if (!strcmp(name, "object") && tag->tagged) + v->s = xstrdup(sha1_to_hex(tag->tagged->sha1)); } } @@ -268,32 +261,22 @@ static void grab_commit_values(struct atom_value *val, int deref, struct object if (deref) name++; if (!strcmp(name, "tree")) { - char *s = xmalloc(41); - strcpy(s, sha1_to_hex(commit->tree->object.sha1)); - v->s = s; + v->s = xstrdup(sha1_to_hex(commit->tree->object.sha1)); } - if (!strcmp(name, "numparent")) { - char *s = xmalloc(40); + else if (!strcmp(name, "numparent")) { v->ul = commit_list_count(commit->parents); - sprintf(s, "%lu", v->ul); - v->s = s; + v->s = xstrfmt("%lu", v->ul); } else if (!strcmp(name, "parent")) { - int num = commit_list_count(commit->parents); - int i; struct commit_list *parents; - char *s = xmalloc(41 * num + 1); - v->s = s; - for (i = 0, parents = commit->parents; - parents; - parents = parents->next, i = i + 41) { + struct strbuf s = STRBUF_INIT; + for (parents = commit->parents; parents; parents = parents->next) { struct commit *parent = parents->item; - strcpy(s+i, sha1_to_hex(parent->object.sha1)); - if (parents->next) - s[i+40] = ' '; + if (parents != commit->parents) + strbuf_addch(&s, ' '); + strbuf_addstr(&s, sha1_to_hex(parent->object.sha1)); } - if (!i) - *s = '\0'; + v->s = strbuf_detach(&s, NULL); } } } @@ -700,7 +683,6 @@ static void populate_value(struct ref_array_item *ref) else if (!strcmp(formatp, "track") && (starts_with(name, "upstream") || starts_with(name, "push"))) { - char buf[40]; if (stat_tracking_info(branch, &num_ours, &num_theirs, NULL)) @@ -708,17 +690,13 @@ static void populate_value(struct ref_array_item *ref) if (!num_ours && !num_theirs) v->s = ""; - else if (!num_ours) { - sprintf(buf, "[behind %d]", num_theirs); - v->s = xstrdup(buf); - } else if (!num_theirs) { - sprintf(buf, "[ahead %d]", num_ours); - v->s = xstrdup(buf); - } else { - sprintf(buf, "[ahead %d, behind %d]", - num_ours, num_theirs); - v->s = xstrdup(buf); - } + else if (!num_ours) + v->s = xstrfmt("[behind %d]", num_theirs); + else if (!num_theirs) + v->s = xstrfmt("[ahead %d]", num_ours); + else + v->s = xstrfmt("[ahead %d, behind %d]", + num_ours, num_theirs); continue; } else if (!strcmp(formatp, "trackshort") && (starts_with(name, "upstream") || @@ -745,12 +723,8 @@ static void populate_value(struct ref_array_item *ref) if (!deref) v->s = refname; - else { - int len = strlen(refname); - char *s = xmalloc(len + 4); - sprintf(s, "%s^{}", refname); - v->s = s; - } + else + v->s = xstrfmt("%s^{}", refname); } for (i = 0; i < used_atom_cnt; i++) { From acd47eec992160d76d5d4a2e98940de3fdc10552 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:14 -0400 Subject: [PATCH 31/70] help: drop prepend function in favor of xstrfmt This function predates xstrfmt, and its functionality is a subset. Let's just use xstrfmt. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/help.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/builtin/help.c b/builtin/help.c index 3422e73079..fba8c01e24 100644 --- a/builtin/help.c +++ b/builtin/help.c @@ -295,16 +295,6 @@ static int is_git_command(const char *s) is_in_cmdlist(&other_cmds, s); } -static const char *prepend(const char *prefix, const char *cmd) -{ - size_t pre_len = strlen(prefix); - size_t cmd_len = strlen(cmd); - char *p = xmalloc(pre_len + cmd_len + 1); - memcpy(p, prefix, pre_len); - strcpy(p + pre_len, cmd); - return p; -} - static const char *cmd_to_page(const char *git_cmd) { if (!git_cmd) @@ -312,9 +302,9 @@ static const char *cmd_to_page(const char *git_cmd) else if (starts_with(git_cmd, "git")) return git_cmd; else if (is_git_command(git_cmd)) - return prepend("git-", git_cmd); + return xstrfmt("git-%s", git_cmd); else - return prepend("git", git_cmd); + return xstrfmt("git%s", git_cmd); } static void setup_man_path(void) From c978610dc841efe300bf4d1ee61b24d78b13c4f4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:16 -0400 Subject: [PATCH 32/70] mailmap: replace strcpy with xstrdup We want to make a copy of a string without any leading whitespace. To do so, we allocate a buffer large enough to hold the original, skip past the whitespace, then copy that. It's much simpler to just allocate after we've skipped, in which case we can just copy the remainder of the string, leaving no question of whether "len" is large enough. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- mailmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mailmap.c b/mailmap.c index 9e9589730f..f4a0f1cf27 100644 --- a/mailmap.c +++ b/mailmap.c @@ -162,11 +162,10 @@ static void read_mailmap_line(struct string_list *map, char *buffer, char *cp; free(*repo_abbrev); - *repo_abbrev = xmalloc(len); for (cp = buffer + abblen; isspace(*cp); cp++) ; /* nothing */ - strcpy(*repo_abbrev, cp); + *repo_abbrev = xstrdup(cp); } return; } From f28e3ab231d36c454445fe778ffb931920606109 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:18 -0400 Subject: [PATCH 33/70] read_branches_file: simplify string handling This function does a lot of manual string handling, and has some unnecessary limits. This patch cleans up a number of things: 1. Drop the arbitrary 1000-byte limit on the size of the remote name (we do not have such a limit in any of the other remote-reading mechanisms). 2. Replace fgets into a fixed-size buffer with a strbuf, eliminating any limits on the length of the URL. 3. Replace manual whitespace handling with strbuf_trim (since we now have a strbuf). This also gets rid of a call to strcpy, and the confusing reuse of the "p" pointer for multiple purposes. 4. We currently build up the refspecs over multiple strbuf calls. We do this to handle the fact that the URL "frag" may not be present. But rather than have multiple conditionals, let's just default "frag" to "master". This lets us format the refspecs with a single xstrfmt. It's shorter, and easier to see what the final string looks like. We also update the misleading comment in this area (the local branch is named after the remote name, not after the branch name on the remote side). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- remote.c | 54 ++++++++++++++++++++---------------------------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/remote.c b/remote.c index 5ab0f7f7a5..22a60fcc0b 100644 --- a/remote.c +++ b/remote.c @@ -293,56 +293,42 @@ static void read_remotes_file(struct remote *remote) static void read_branches_file(struct remote *remote) { char *frag; - struct strbuf branch = STRBUF_INIT; - int n = 1000; - FILE *f = fopen(git_path("branches/%.*s", n, remote->name), "r"); - char *s, *p; - int len; + struct strbuf buf = STRBUF_INIT; + FILE *f = fopen(git_path("branches/%s", remote->name), "r"); if (!f) return; - s = fgets(buffer, BUF_SIZE, f); - fclose(f); - if (!s) - return; - while (isspace(*s)) - s++; - if (!*s) + + strbuf_getline(&buf, f, '\n'); + strbuf_trim(&buf); + if (!buf.len) { + strbuf_release(&buf); return; + } + remote->origin = REMOTE_BRANCHES; - p = s + strlen(s); - while (isspace(p[-1])) - *--p = 0; - len = p - s; - p = xmalloc(len + 1); - strcpy(p, s); /* * The branches file would have URL and optionally * #branch specified. The "master" (or specified) branch is - * fetched and stored in the local branch of the same name. + * fetched and stored in the local branch matching the + * remote name. */ - frag = strchr(p, '#'); - if (frag) { + frag = strchr(buf.buf, '#'); + if (frag) *(frag++) = '\0'; - strbuf_addf(&branch, "refs/heads/%s", frag); - } else - strbuf_addstr(&branch, "refs/heads/master"); + else + frag = "master"; + + add_url_alias(remote, strbuf_detach(&buf, NULL)); + add_fetch_refspec(remote, xstrfmt("refs/heads/%s:refs/heads/%s", + frag, remote->name)); - strbuf_addf(&branch, ":refs/heads/%s", remote->name); - add_url_alias(remote, p); - add_fetch_refspec(remote, strbuf_detach(&branch, NULL)); /* * Cogito compatible push: push current HEAD to remote #branch * (master if missing) */ - strbuf_init(&branch, 0); - strbuf_addstr(&branch, "HEAD"); - if (frag) - strbuf_addf(&branch, ":refs/heads/%s", frag); - else - strbuf_addstr(&branch, ":refs/heads/master"); - add_push_refspec(remote, strbuf_detach(&branch, NULL)); + add_push_refspec(remote, xstrfmt("HEAD:refs/heads/%s", frag)); remote->fetch_tags = 1; /* always auto-follow */ } From 0e265a92a1d2b9275d638f696c65c9bbe747e78c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:20 -0400 Subject: [PATCH 34/70] read_remotes_file: simplify string handling The main motivation for this cleanup is to switch our line-reading to a strbuf, which removes the use of a fixed-size buffer (which limited the size of remote URLs). Since we have the strbuf, we can make use of strbuf_rtrim(). While we're here, we can also simplify the parsing of each line. First, we can use skip_prefix() to avoid some magic numbers. But second, we can avoid splitting the parsing and actions for each line into two stages. Right now we figure out which type of line we have, set an int to a magic number, skip any intermediate whitespace, and then act on the resulting value based on the magic number. Instead, let's factor the whitespace skipping into a function. That lets us avoid the magic numbers and keep the actions close to the parsing. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- remote.c | 55 ++++++++++++++++++------------------------------------- 1 file changed, 18 insertions(+), 37 deletions(-) diff --git a/remote.c b/remote.c index 22a60fcc0b..a01f13a211 100644 --- a/remote.c +++ b/remote.c @@ -54,9 +54,6 @@ static const char *pushremote_name; static struct rewrites rewrites; static struct rewrites rewrites_push; -#define BUF_SIZE (2048) -static char buffer[BUF_SIZE]; - static int valid_remote(const struct remote *remote) { return (!!remote->url) || (!!remote->foreign_vcs); @@ -243,50 +240,34 @@ static void add_instead_of(struct rewrite *rewrite, const char *instead_of) rewrite->instead_of_nr++; } +static const char *skip_spaces(const char *s) +{ + while (isspace(*s)) + s++; + return s; +} + static void read_remotes_file(struct remote *remote) { + struct strbuf buf = STRBUF_INIT; FILE *f = fopen(git_path("remotes/%s", remote->name), "r"); if (!f) return; remote->origin = REMOTE_REMOTES; - while (fgets(buffer, BUF_SIZE, f)) { - int value_list; - char *s, *p; + while (strbuf_getline(&buf, f, '\n') != EOF) { + const char *v; - if (starts_with(buffer, "URL:")) { - value_list = 0; - s = buffer + 4; - } else if (starts_with(buffer, "Push:")) { - value_list = 1; - s = buffer + 5; - } else if (starts_with(buffer, "Pull:")) { - value_list = 2; - s = buffer + 5; - } else - continue; + strbuf_rtrim(&buf); - while (isspace(*s)) - s++; - if (!*s) - continue; - - p = s + strlen(s); - while (isspace(p[-1])) - *--p = 0; - - switch (value_list) { - case 0: - add_url_alias(remote, xstrdup(s)); - break; - case 1: - add_push_refspec(remote, xstrdup(s)); - break; - case 2: - add_fetch_refspec(remote, xstrdup(s)); - break; - } + if (skip_prefix(buf.buf, "URL:", &v)) + add_url_alias(remote, xstrdup(skip_spaces(v))); + else if (skip_prefix(buf.buf, "Push:", &v)) + add_push_refspec(remote, xstrdup(skip_spaces(v))); + else if (skip_prefix(buf.buf, "Pull:", &v)) + add_fetch_refspec(remote, xstrdup(skip_spaces(v))); } + strbuf_release(&buf); fclose(f); } From 495127dbcbd53e89d7edee8db42bfa7e57c8a120 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:22 -0400 Subject: [PATCH 35/70] resolve_ref: use strbufs for internal buffers resolve_ref already uses a strbuf internally when generating pathnames, but it uses fixed-size buffers for storing the refname and symbolic refs. This means that you cannot actually point HEAD to a ref that is larger than 256 bytes. We can lift this limit by using strbufs here, too. Like sb_path, we pass the the buffers into our helper function, so that we can easily clean up all output paths. We can also drop the "unsafe" name from our helper function, as it no longer uses a single static buffer (but of course resolve_ref_unsafe is still unsafe, because the static buffers moved there). As a bonus, we also get to drop some strcpy calls between the two fixed buffers (that cannot currently overflow because the two buffers are sized identically). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- refs.c | 57 ++++++++++++++++++++++------------------- t/t1401-symbolic-ref.sh | 29 +++++++++++++++++++++ 2 files changed, 59 insertions(+), 27 deletions(-) diff --git a/refs.c b/refs.c index d5c8b2f51b..c2709de2a0 100644 --- a/refs.c +++ b/refs.c @@ -1579,16 +1579,15 @@ static int resolve_missing_loose_ref(const char *refname, } /* This function needs to return a meaningful errno on failure */ -static const char *resolve_ref_unsafe_1(const char *refname, - int resolve_flags, - unsigned char *sha1, - int *flags, - struct strbuf *sb_path) +static const char *resolve_ref_1(const char *refname, + int resolve_flags, + unsigned char *sha1, + int *flags, + struct strbuf *sb_refname, + struct strbuf *sb_path, + struct strbuf *sb_contents) { int depth = MAXDEPTH; - ssize_t len; - char buffer[256]; - static char refname_buffer[256]; int bad_name = 0; if (flags) @@ -1654,19 +1653,18 @@ static const char *resolve_ref_unsafe_1(const char *refname, /* Follow "normalized" - ie "refs/.." symlinks by hand */ if (S_ISLNK(st.st_mode)) { - len = readlink(path, buffer, sizeof(buffer)-1); - if (len < 0) { + strbuf_reset(sb_contents); + if (strbuf_readlink(sb_contents, path, 0) < 0) { if (errno == ENOENT || errno == EINVAL) /* inconsistent with lstat; retry */ goto stat_ref; else return NULL; } - buffer[len] = 0; - if (starts_with(buffer, "refs/") && - !check_refname_format(buffer, 0)) { - strcpy(refname_buffer, buffer); - refname = refname_buffer; + if (starts_with(sb_contents->buf, "refs/") && + !check_refname_format(sb_contents->buf, 0)) { + strbuf_swap(sb_refname, sb_contents); + refname = sb_refname->buf; if (flags) *flags |= REF_ISSYMREF; if (resolve_flags & RESOLVE_REF_NO_RECURSE) { @@ -1695,28 +1693,26 @@ static const char *resolve_ref_unsafe_1(const char *refname, else return NULL; } - len = read_in_full(fd, buffer, sizeof(buffer)-1); - if (len < 0) { + strbuf_reset(sb_contents); + if (strbuf_read(sb_contents, fd, 256) < 0) { int save_errno = errno; close(fd); errno = save_errno; return NULL; } close(fd); - while (len && isspace(buffer[len-1])) - len--; - buffer[len] = '\0'; + strbuf_rtrim(sb_contents); /* * Is it a symbolic ref? */ - if (!starts_with(buffer, "ref:")) { + if (!starts_with(sb_contents->buf, "ref:")) { /* * Please note that FETCH_HEAD has a second * line containing other data. */ - if (get_sha1_hex(buffer, sha1) || - (buffer[40] != '\0' && !isspace(buffer[40]))) { + if (get_sha1_hex(sb_contents->buf, sha1) || + (sb_contents->buf[40] != '\0' && !isspace(sb_contents->buf[40]))) { if (flags) *flags |= REF_ISBROKEN; errno = EINVAL; @@ -1731,10 +1727,12 @@ static const char *resolve_ref_unsafe_1(const char *refname, } if (flags) *flags |= REF_ISSYMREF; - buf = buffer + 4; + buf = sb_contents->buf + 4; while (isspace(*buf)) buf++; - refname = strcpy(refname_buffer, buf); + strbuf_reset(sb_refname); + strbuf_addstr(sb_refname, buf); + refname = sb_refname->buf; if (resolve_flags & RESOLVE_REF_NO_RECURSE) { hashclr(sha1); return refname; @@ -1756,10 +1754,15 @@ static const char *resolve_ref_unsafe_1(const char *refname, const char *resolve_ref_unsafe(const char *refname, int resolve_flags, unsigned char *sha1, int *flags) { + static struct strbuf sb_refname = STRBUF_INIT; + struct strbuf sb_contents = STRBUF_INIT; struct strbuf sb_path = STRBUF_INIT; - const char *ret = resolve_ref_unsafe_1(refname, resolve_flags, - sha1, flags, &sb_path); + const char *ret; + + ret = resolve_ref_1(refname, resolve_flags, sha1, flags, + &sb_refname, &sb_path, &sb_contents); strbuf_release(&sb_path); + strbuf_release(&sb_contents); return ret; } diff --git a/t/t1401-symbolic-ref.sh b/t/t1401-symbolic-ref.sh index 36378b0e3f..20b022ae33 100755 --- a/t/t1401-symbolic-ref.sh +++ b/t/t1401-symbolic-ref.sh @@ -63,4 +63,33 @@ test_expect_success 'symbolic-ref fails to delete real ref' ' ' reset_to_sane +test_expect_success 'create large ref name' ' + # make 256+ character ref; some systems may not handle that, + # so be gentle + long=0123456789abcdef && + long=$long/$long/$long/$long && + long=$long/$long/$long/$long && + long_ref=refs/heads/$long && + tree=$(git write-tree) && + commit=$(echo foo | git commit-tree $tree) && + if git update-ref $long_ref $commit; then + test_set_prereq LONG_REF + else + echo >&2 "long refs not supported" + fi +' + +test_expect_success LONG_REF 'symbolic-ref can point to large ref name' ' + git symbolic-ref HEAD $long_ref && + echo $long_ref >expect && + git symbolic-ref HEAD >actual && + test_cmp expect actual +' + +test_expect_success LONG_REF 'we can parse long symbolic ref' ' + echo $commit >expect && + git rev-parse --verify HEAD >actual && + test_cmp expect actual +' + test_done From 0cb9d6d6b63ad7fea4ca8363f7f1f921b1e16ec7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:25 -0400 Subject: [PATCH 36/70] upload-archive: convert sprintf to strbuf When we report an error to the client, we format it into a fixed-size buffer using vsprintf(). This can't actually overflow in practice, since we only format a very tame subset of strings (mostly strerror() output). However, it's hard to tell immediately, so let's just use a strbuf so readers do not have to wonder. We do add an allocation here, but the performance is not important; the next step is to call die() anyway. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/upload-archive.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/builtin/upload-archive.c b/builtin/upload-archive.c index 32ab94cd06..dbfe14f3fe 100644 --- a/builtin/upload-archive.c +++ b/builtin/upload-archive.c @@ -49,15 +49,14 @@ int cmd_upload_archive_writer(int argc, const char **argv, const char *prefix) __attribute__((format (printf, 1, 2))) static void error_clnt(const char *fmt, ...) { - char buf[1024]; + struct strbuf buf = STRBUF_INIT; va_list params; - int len; va_start(params, fmt); - len = vsprintf(buf, fmt, params); + strbuf_vaddf(&buf, fmt, params); va_end(params); - send_sideband(1, 3, buf, len, LARGE_PACKET_MAX); - die("sent error to the client: %s", buf); + send_sideband(1, 3, buf.buf, buf.len, LARGE_PACKET_MAX); + die("sent error to the client: %s", buf.buf); } static ssize_t process_input(int child_fd, int band) From df1ed03a6fabf58b63b26950c71d36eff4aa25e8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:27 -0400 Subject: [PATCH 37/70] remote-ext: simplify git pkt-line generation We format a pkt-line into a heap buffer, which requires manual computation of the required size, and uses some bare sprintf calls. We could use a strbuf instead, which would take care of the computation for us. But it's even easier still to use packet_write(). Besides handling the formatting and writing for us, it fixes two things: 1. Our manual max-size check used 0xFFFF, while technically LARGE_PACKET_MAX is slightly smaller than this. 2. Our packet will now be output as part of GIT_TRACE_PACKET debugging. Unfortunately packet_write() does not let us build up the buffer progressively, so we do have to repeat ourselves a little depending on the "vhost" setting, but the end result is still far more readable than the original. Since there were no tests covering this feature at all, we'll add a few into t5802. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/remote-ext.c | 34 +++++----------------------------- t/t5802-connect-helper.sh | 28 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/builtin/remote-ext.c b/builtin/remote-ext.c index 3b8c22cc75..e3cd25d580 100644 --- a/builtin/remote-ext.c +++ b/builtin/remote-ext.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "transport.h" #include "run-command.h" +#include "pkt-line.h" /* * URL syntax: @@ -142,36 +143,11 @@ static const char **parse_argv(const char *arg, const char *service) static void send_git_request(int stdin_fd, const char *serv, const char *repo, const char *vhost) { - size_t bufferspace; - size_t wpos = 0; - char *buffer; - - /* - * Request needs 12 bytes extra if there is vhost (xxxx \0host=\0) and - * 6 bytes extra (xxxx \0) if there is no vhost. - */ - if (vhost) - bufferspace = strlen(serv) + strlen(repo) + strlen(vhost) + 12; + if (!vhost) + packet_write(stdin_fd, "%s %s%c", serv, repo, 0); else - bufferspace = strlen(serv) + strlen(repo) + 6; - - if (bufferspace > 0xFFFF) - die("Request too large to send"); - buffer = xmalloc(bufferspace); - - /* Make the packet. */ - wpos = sprintf(buffer, "%04x%s %s%c", (unsigned)bufferspace, - serv, repo, 0); - - /* Add vhost if any. */ - if (vhost) - sprintf(buffer + wpos, "host=%s%c", vhost, 0); - - /* Send the request */ - if (write_in_full(stdin_fd, buffer, bufferspace) < 0) - die_errno("Failed to send request"); - - free(buffer); + packet_write(stdin_fd, "%s %s%chost=%s%c", serv, repo, 0, + vhost, 0); } static int run_child(const char *arg, const char *service) diff --git a/t/t5802-connect-helper.sh b/t/t5802-connect-helper.sh index 878faf2b63..b7a7f9d588 100755 --- a/t/t5802-connect-helper.sh +++ b/t/t5802-connect-helper.sh @@ -69,4 +69,32 @@ test_expect_success 'update backfilled tag without primary transfer' ' test_cmp expect actual ' + +test_expect_success 'set up fake git-daemon' ' + mkdir remote && + git init --bare remote/one.git && + mkdir remote/host && + git init --bare remote/host/two.git && + write_script fake-daemon <<-\EOF && + git daemon --inetd \ + --informative-errors \ + --export-all \ + --base-path="$TRASH_DIRECTORY/remote" \ + --interpolated-path="$TRASH_DIRECTORY/remote/%H%D" \ + "$TRASH_DIRECTORY/remote" + EOF + export TRASH_DIRECTORY && + PATH=$TRASH_DIRECTORY:$PATH +' + +test_expect_success 'ext command can connect to git daemon (no vhost)' ' + rm -rf dst && + git clone "ext::fake-daemon %G/one.git" dst +' + +test_expect_success 'ext command can connect to git daemon (vhost)' ' + rm -rf dst && + git clone "ext::fake-daemon %G/two.git %Vhost" dst +' + test_done From 7d0581a9abe733d8880113370c4d956b50f5bd9f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:29 -0400 Subject: [PATCH 38/70] http-push: use strbuf instead of fwrite_buffer The http-push code defines an fwrite_buffer function for use as a curl callback; it just writes to a strbuf. There's no reason we need to use it ourselves, as we know we have a strbuf. This lets us format directly into it, rather than dealing with an extra temporary buffer (which required manual length computation). While we're here, let's also remove the literal tabs from the source in favor of "\t", which is more visually obvious. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- http-push.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/http-push.c b/http-push.c index 37baff818f..e501c282e6 100644 --- a/http-push.c +++ b/http-push.c @@ -1459,8 +1459,6 @@ static void add_remote_info_ref(struct remote_ls_ctx *ls) { struct strbuf *buf = (struct strbuf *)ls->userData; struct object *o; - int len; - char *ref_info; struct ref *ref; ref = alloc_ref(ls->dentry_name); @@ -1484,23 +1482,14 @@ static void add_remote_info_ref(struct remote_ls_ctx *ls) return; } - len = strlen(ls->dentry_name) + 42; - ref_info = xcalloc(len + 1, 1); - sprintf(ref_info, "%s %s\n", - sha1_to_hex(ref->old_sha1), ls->dentry_name); - fwrite_buffer(ref_info, 1, len, buf); - free(ref_info); + strbuf_addf(buf, "%s\t%s\n", + sha1_to_hex(ref->old_sha1), ls->dentry_name); if (o->type == OBJ_TAG) { o = deref_tag(o, ls->dentry_name, 0); - if (o) { - len = strlen(ls->dentry_name) + 45; - ref_info = xcalloc(len + 1, 1); - sprintf(ref_info, "%s %s^{}\n", - sha1_to_hex(o->sha1), ls->dentry_name); - fwrite_buffer(ref_info, 1, len, buf); - free(ref_info); - } + if (o) + strbuf_addf(buf, "%s\t%s^{}\n", + sha1_to_hex(o->sha1), ls->dentry_name); } free(ref); } From 54ba4c5fa2d7de216ca090ac2e657728462c81d5 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:31 -0400 Subject: [PATCH 39/70] http-walker: store url in a strbuf We do an unchecked sprintf directly into our url buffer. This doesn't overflow because we know that it was sized for "$base/objects/info/http-alternates", and we are writing "$base/objects/info/alternates", which must be smaller. But that is not immediately obvious to a reader who is looking for buffer overflows. Let's switch to a strbuf, so that we do not have to think about this issue at all. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- http-walker.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/http-walker.c b/http-walker.c index 88da5468e7..2c721f0c30 100644 --- a/http-walker.c +++ b/http-walker.c @@ -29,7 +29,7 @@ struct object_request { struct alternates_request { struct walker *walker; const char *base; - char *url; + struct strbuf *url; struct strbuf *buffer; struct active_request_slot *slot; int http_specific; @@ -195,10 +195,11 @@ static void process_alternates_response(void *callback_data) /* Try reusing the slot to get non-http alternates */ alt_req->http_specific = 0; - sprintf(alt_req->url, "%s/objects/info/alternates", - base); + strbuf_reset(alt_req->url); + strbuf_addf(alt_req->url, "%s/objects/info/alternates", + base); curl_easy_setopt(slot->curl, CURLOPT_URL, - alt_req->url); + alt_req->url->buf); active_requests++; slot->in_use = 1; if (slot->finished != NULL) @@ -312,7 +313,7 @@ static void process_alternates_response(void *callback_data) static void fetch_alternates(struct walker *walker, const char *base) { struct strbuf buffer = STRBUF_INIT; - char *url; + struct strbuf url = STRBUF_INIT; struct active_request_slot *slot; struct alternates_request alt_req; struct walker_data *cdata = walker->data; @@ -338,7 +339,7 @@ static void fetch_alternates(struct walker *walker, const char *base) if (walker->get_verbosely) fprintf(stderr, "Getting alternates list for %s\n", base); - url = xstrfmt("%s/objects/info/http-alternates", base); + strbuf_addf(&url, "%s/objects/info/http-alternates", base); /* * Use a callback to process the result, since another request @@ -351,10 +352,10 @@ static void fetch_alternates(struct walker *walker, const char *base) curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer); curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer); - curl_easy_setopt(slot->curl, CURLOPT_URL, url); + curl_easy_setopt(slot->curl, CURLOPT_URL, url.buf); alt_req.base = base; - alt_req.url = url; + alt_req.url = &url; alt_req.buffer = &buffer; alt_req.http_specific = 1; alt_req.slot = slot; @@ -365,7 +366,7 @@ static void fetch_alternates(struct walker *walker, const char *base) cdata->got_alternates = -1; strbuf_release(&buffer); - free(url); + strbuf_release(&url); } static int fetch_indices(struct walker *walker, struct alt_base *repo) From ac5190cc48bd75586566ccc052304d40bbc63147 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:34 -0400 Subject: [PATCH 40/70] sha1_get_pack_name: use a strbuf We do some manual memory computation here, and there's no check that our 60 is not overflowed by the raw sprintf (it isn't, because the "which" parameter is never longer than "pack"). We can simplify this greatly with a strbuf. Technically the end result is not identical, as the original took care not to rewrite the object directory on each call for performance reasons. We could do that here, too (by saving the baselen and resetting to it), but it's not worth the complexity; this function is not called a lot (generally once per packfile that we open). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_file.c | 39 ++++++++++----------------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/sha1_file.c b/sha1_file.c index 2be1afdde0..c26fdcbd88 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -208,44 +208,25 @@ const char *sha1_file_name(const unsigned char *sha1) * provided by the caller. which should be "pack" or "idx". */ static char *sha1_get_pack_name(const unsigned char *sha1, - char **name, char **base, const char *which) + struct strbuf *buf, + const char *which) { - static const char hex[] = "0123456789abcdef"; - char *buf; - int i; - - if (!*base) { - const char *sha1_file_directory = get_object_directory(); - int len = strlen(sha1_file_directory); - *base = xmalloc(len + 60); - sprintf(*base, "%s/pack/pack-1234567890123456789012345678901234567890.%s", - sha1_file_directory, which); - *name = *base + len + 11; - } - - buf = *name; - - for (i = 0; i < 20; i++) { - unsigned int val = *sha1++; - *buf++ = hex[val >> 4]; - *buf++ = hex[val & 0xf]; - } - - return *base; + strbuf_reset(buf); + strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(), + sha1_to_hex(sha1), which); + return buf->buf; } char *sha1_pack_name(const unsigned char *sha1) { - static char *name, *base; - - return sha1_get_pack_name(sha1, &name, &base, "pack"); + static struct strbuf buf = STRBUF_INIT; + return sha1_get_pack_name(sha1, &buf, "pack"); } char *sha1_pack_index_name(const unsigned char *sha1) { - static char *name, *base; - - return sha1_get_pack_name(sha1, &name, &base, "idx"); + static struct strbuf buf = STRBUF_INIT; + return sha1_get_pack_name(sha1, &buf, "idx"); } struct alternate_object_database *alt_odb_list; From e2b021eb5b5ce2f4be3c10f1f8063981b1a53053 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 4 Oct 2015 23:43:14 -0400 Subject: [PATCH 41/70] precompose_utf8: drop unused variable The result of iconv is assigned to a variable, but we never use it (instead, we check errno and whether the function consumed all bytes). Let's drop the assignment, as it triggers gcc's -Wunused-but-set-variable. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- compat/precompose_utf8.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/compat/precompose_utf8.c b/compat/precompose_utf8.c index 95fe849e42..044c68617b 100644 --- a/compat/precompose_utf8.c +++ b/compat/precompose_utf8.c @@ -139,9 +139,8 @@ struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *prec_dir) size_t inleft = namelenz; char *outpos = &prec_dir->dirent_nfc->d_name[0]; size_t outsz = prec_dir->dirent_nfc->max_name_len; - size_t cnt; errno = 0; - cnt = iconv(prec_dir->ic_precompose, &cp, &inleft, &outpos, &outsz); + iconv(prec_dir->ic_precompose, &cp, &inleft, &outpos, &outsz); if (errno || inleft) { /* * iconv() failed and errno could be E2BIG, EILSEQ, EINVAL, EBADF From fdf729661a777d8bd598f40055d92b2df5601332 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 4 Oct 2015 23:45:26 -0400 Subject: [PATCH 42/70] probe_utf8_pathname_composition: use internal strbuf When we are initializing a .git directory, we may call probe_utf8_pathname_composition to detect utf8 mangling. We pass in a path buffer for it to use, and it blindly strcpy()s into it, not knowing whether the buffer is large enough to hold the result or not. In practice this isn't a big deal, because the buffer we pass in already contains "$GIT_DIR/config", and we append only a few extra bytes to it. But we can easily do the right thing just by calling git_path_buf ourselves. Technically this results in a different pathname (before we appended our utf8 characters to the "config" path, and now they get their own files in $GIT_DIR), but that should not matter for our purposes. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/init-db.c | 2 +- compat/precompose_utf8.c | 18 ++++++++++-------- compat/precompose_utf8.h | 2 +- git-compat-util.h | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/builtin/init-db.c b/builtin/init-db.c index e7d0e31f46..89addda4fd 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -312,7 +312,7 @@ static int create_default_files(const char *template_path) strcpy(path + len, "CoNfIg"); if (!access(path, F_OK)) git_config_set("core.ignorecase", "true"); - probe_utf8_pathname_composition(path, len); + probe_utf8_pathname_composition(); } return reinit; diff --git a/compat/precompose_utf8.c b/compat/precompose_utf8.c index 044c68617b..079070ff1d 100644 --- a/compat/precompose_utf8.c +++ b/compat/precompose_utf8.c @@ -36,24 +36,26 @@ static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c) } -void probe_utf8_pathname_composition(char *path, int len) +void probe_utf8_pathname_composition(void) { + struct strbuf path = STRBUF_INIT; static const char *auml_nfc = "\xc3\xa4"; static const char *auml_nfd = "\x61\xcc\x88"; int output_fd; if (precomposed_unicode != -1) return; /* We found it defined in the global config, respect it */ - strcpy(path + len, auml_nfc); - output_fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0600); + git_path_buf(&path, "%s", auml_nfc); + output_fd = open(path.buf, O_CREAT|O_EXCL|O_RDWR, 0600); if (output_fd >= 0) { close(output_fd); - strcpy(path + len, auml_nfd); - precomposed_unicode = access(path, R_OK) ? 0 : 1; + git_path_buf(&path, "%s", auml_nfd); + precomposed_unicode = access(path.buf, R_OK) ? 0 : 1; git_config_set("core.precomposeunicode", precomposed_unicode ? "true" : "false"); - strcpy(path + len, auml_nfc); - if (unlink(path)) - die_errno(_("failed to unlink '%s'"), path); + git_path_buf(&path, "%s", auml_nfc); + if (unlink(path.buf)) + die_errno(_("failed to unlink '%s'"), path.buf); } + strbuf_release(&path); } diff --git a/compat/precompose_utf8.h b/compat/precompose_utf8.h index 3b73585fc5..a94e7c4342 100644 --- a/compat/precompose_utf8.h +++ b/compat/precompose_utf8.h @@ -27,7 +27,7 @@ typedef struct { } PREC_DIR; void precompose_argv(int argc, const char **argv); -void probe_utf8_pathname_composition(char *, int); +void probe_utf8_pathname_composition(void); PREC_DIR *precompose_utf8_opendir(const char *dirname); struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *dirp); diff --git a/git-compat-util.h b/git-compat-util.h index 348b9dcc1c..9a3e5591cb 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -229,7 +229,7 @@ typedef unsigned long uintptr_t; #else #define precompose_str(in,i_nfd2nfc) #define precompose_argv(c,v) -#define probe_utf8_pathname_composition(a,b) +#define probe_utf8_pathname_composition() #endif #ifdef MKDIR_WO_TRAILING_SLASH From 9c28390bda4bb86f48a3145417c1cb1892782c47 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 4 Oct 2015 23:46:04 -0400 Subject: [PATCH 43/70] init: use strbufs to store paths The init code predates strbufs, and uses PATH_MAX-sized buffers along with many manual checks on intermediate sizes (some of which make magic assumptions, such as that init will not create a path inside .git longer than 50 characters). We can simplify this greatly by using strbufs, which drops some hard-to-verify strcpy calls in favor of git_path_buf. While we're in the area, let's also convert existing calls to git_path to the safer git_path_buf (our existing calls were passed to pretty tame functions, and so were not a problem, but it's easy to be consistent and safe here). Note that we had an explicit test that "git init" rejects long template directories. This comes from 32d1776 (init: Do not segfault on big GIT_TEMPLATE_DIR environment variable, 2009-04-18). We can drop the test_must_fail here, as we now accept this and need only confirm that we don't segfault, which was the original point of the test. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/init-db.c | 172 ++++++++++++++++++++-------------------------- t/t0001-init.sh | 4 +- 2 files changed, 76 insertions(+), 100 deletions(-) diff --git a/builtin/init-db.c b/builtin/init-db.c index 89addda4fd..f59f40768e 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -36,10 +36,11 @@ static void safe_create_dir(const char *dir, int share) die(_("Could not make %s writable by group"), dir); } -static void copy_templates_1(char *path, int baselen, - char *template, int template_baselen, +static void copy_templates_1(struct strbuf *path, struct strbuf *template, DIR *dir) { + size_t path_baselen = path->len; + size_t template_baselen = template->len; struct dirent *de; /* Note: if ".git/hooks" file exists in the repository being @@ -49,77 +50,64 @@ static void copy_templates_1(char *path, int baselen, * with the way the namespace under .git/ is organized, should * be really carefully chosen. */ - safe_create_dir(path, 1); + safe_create_dir(path->buf, 1); while ((de = readdir(dir)) != NULL) { struct stat st_git, st_template; - int namelen; int exists = 0; + strbuf_setlen(path, path_baselen); + strbuf_setlen(template, template_baselen); + if (de->d_name[0] == '.') continue; - namelen = strlen(de->d_name); - if ((PATH_MAX <= baselen + namelen) || - (PATH_MAX <= template_baselen + namelen)) - die(_("insanely long template name %s"), de->d_name); - memcpy(path + baselen, de->d_name, namelen+1); - memcpy(template + template_baselen, de->d_name, namelen+1); - if (lstat(path, &st_git)) { + strbuf_addstr(path, de->d_name); + strbuf_addstr(template, de->d_name); + if (lstat(path->buf, &st_git)) { if (errno != ENOENT) - die_errno(_("cannot stat '%s'"), path); + die_errno(_("cannot stat '%s'"), path->buf); } else exists = 1; - if (lstat(template, &st_template)) - die_errno(_("cannot stat template '%s'"), template); + if (lstat(template->buf, &st_template)) + die_errno(_("cannot stat template '%s'"), template->buf); if (S_ISDIR(st_template.st_mode)) { - DIR *subdir = opendir(template); - int baselen_sub = baselen + namelen; - int template_baselen_sub = template_baselen + namelen; + DIR *subdir = opendir(template->buf); if (!subdir) - die_errno(_("cannot opendir '%s'"), template); - path[baselen_sub++] = - template[template_baselen_sub++] = '/'; - path[baselen_sub] = - template[template_baselen_sub] = 0; - copy_templates_1(path, baselen_sub, - template, template_baselen_sub, - subdir); + die_errno(_("cannot opendir '%s'"), template->buf); + strbuf_addch(path, '/'); + strbuf_addch(template, '/'); + copy_templates_1(path, template, subdir); closedir(subdir); } else if (exists) continue; else if (S_ISLNK(st_template.st_mode)) { - char lnk[256]; - int len; - len = readlink(template, lnk, sizeof(lnk)); - if (len < 0) - die_errno(_("cannot readlink '%s'"), template); - if (sizeof(lnk) <= len) - die(_("insanely long symlink %s"), template); - lnk[len] = 0; - if (symlink(lnk, path)) - die_errno(_("cannot symlink '%s' '%s'"), lnk, path); + struct strbuf lnk = STRBUF_INIT; + if (strbuf_readlink(&lnk, template->buf, 0) < 0) + die_errno(_("cannot readlink '%s'"), template->buf); + if (symlink(lnk.buf, path->buf)) + die_errno(_("cannot symlink '%s' '%s'"), + lnk.buf, path->buf); + strbuf_release(&lnk); } else if (S_ISREG(st_template.st_mode)) { - if (copy_file(path, template, st_template.st_mode)) - die_errno(_("cannot copy '%s' to '%s'"), template, - path); + if (copy_file(path->buf, template->buf, st_template.st_mode)) + die_errno(_("cannot copy '%s' to '%s'"), + template->buf, path->buf); } else - error(_("ignoring template %s"), template); + error(_("ignoring template %s"), template->buf); } } static void copy_templates(const char *template_dir) { - char path[PATH_MAX]; - char template_path[PATH_MAX]; - int template_len; + struct strbuf path = STRBUF_INIT; + struct strbuf template_path = STRBUF_INIT; + size_t template_len; DIR *dir; - const char *git_dir = get_git_dir(); - int len = strlen(git_dir); char *to_free = NULL; if (!template_dir) @@ -132,26 +120,23 @@ static void copy_templates(const char *template_dir) free(to_free); return; } - template_len = strlen(template_dir); - if (PATH_MAX <= (template_len+strlen("/config"))) - die(_("insanely long template path %s"), template_dir); - strcpy(template_path, template_dir); - if (template_path[template_len-1] != '/') { - template_path[template_len++] = '/'; - template_path[template_len] = 0; - } - dir = opendir(template_path); + + strbuf_addstr(&template_path, template_dir); + strbuf_complete(&template_path, '/'); + template_len = template_path.len; + + dir = opendir(template_path.buf); if (!dir) { warning(_("templates not found %s"), template_dir); goto free_return; } /* Make sure that template is from the correct vintage */ - strcpy(template_path + template_len, "config"); + strbuf_addstr(&template_path, "config"); repository_format_version = 0; git_config_from_file(check_repository_format_version, - template_path, NULL); - template_path[template_len] = 0; + template_path.buf, NULL); + strbuf_setlen(&template_path, template_len); if (repository_format_version && repository_format_version != GIT_REPO_VERSION) { @@ -162,17 +147,15 @@ static void copy_templates(const char *template_dir) goto close_free_return; } - memcpy(path, git_dir, len); - if (len && path[len - 1] != '/') - path[len++] = '/'; - path[len] = 0; - copy_templates_1(path, len, - template_path, template_len, - dir); + strbuf_addstr(&path, get_git_dir()); + strbuf_complete(&path, '/'); + copy_templates_1(&path, &template_path, dir); close_free_return: closedir(dir); free_return: free(to_free); + strbuf_release(&path); + strbuf_release(&template_path); } static int git_init_db_config(const char *k, const char *v, void *cb) @@ -199,28 +182,20 @@ static int needs_work_tree_config(const char *git_dir, const char *work_tree) static int create_default_files(const char *template_path) { - const char *git_dir = get_git_dir(); - unsigned len = strlen(git_dir); - static char path[PATH_MAX]; struct stat st1; + struct strbuf buf = STRBUF_INIT; + char *path; char repo_version_string[10]; char junk[2]; int reinit; int filemode; - if (len > sizeof(path)-50) - die(_("insane git directory %s"), git_dir); - memcpy(path, git_dir, len); - - if (len && path[len-1] != '/') - path[len++] = '/'; - /* * Create .git/refs/{heads,tags} */ - safe_create_dir(git_path("refs"), 1); - safe_create_dir(git_path("refs/heads"), 1); - safe_create_dir(git_path("refs/tags"), 1); + safe_create_dir(git_path_buf(&buf, "refs"), 1); + safe_create_dir(git_path_buf(&buf, "refs/heads"), 1); + safe_create_dir(git_path_buf(&buf, "refs/tags"), 1); /* Just look for `init.templatedir` */ git_config(git_init_db_config, NULL); @@ -244,16 +219,16 @@ static int create_default_files(const char *template_path) */ if (shared_repository) { adjust_shared_perm(get_git_dir()); - adjust_shared_perm(git_path("refs")); - adjust_shared_perm(git_path("refs/heads")); - adjust_shared_perm(git_path("refs/tags")); + adjust_shared_perm(git_path_buf(&buf, "refs")); + adjust_shared_perm(git_path_buf(&buf, "refs/heads")); + adjust_shared_perm(git_path_buf(&buf, "refs/tags")); } /* * Create the default symlink from ".git/HEAD" to the "master" * branch, if it does not exist yet. */ - strcpy(path + len, "HEAD"); + path = git_path_buf(&buf, "HEAD"); reinit = (!access(path, R_OK) || readlink(path, junk, sizeof(junk)-1) != -1); if (!reinit) { @@ -266,10 +241,8 @@ static int create_default_files(const char *template_path) "%d", GIT_REPO_VERSION); git_config_set("core.repositoryformatversion", repo_version_string); - path[len] = 0; - strcpy(path + len, "config"); - /* Check filemode trustability */ + path = git_path_buf(&buf, "config"); filemode = TEST_FILEMODE; if (TEST_FILEMODE && !lstat(path, &st1)) { struct stat st2; @@ -290,14 +263,13 @@ static int create_default_files(const char *template_path) /* allow template config file to override the default */ if (log_all_ref_updates == -1) git_config_set("core.logallrefupdates", "true"); - if (needs_work_tree_config(git_dir, work_tree)) + if (needs_work_tree_config(get_git_dir(), work_tree)) git_config_set("core.worktree", work_tree); } if (!reinit) { /* Check if symlink is supported in the work tree */ - path[len] = 0; - strcpy(path + len, "tXXXXXX"); + path = git_path_buf(&buf, "tXXXXXX"); if (!close(xmkstemp(path)) && !unlink(path) && !symlink("testing", path) && @@ -308,31 +280,35 @@ static int create_default_files(const char *template_path) git_config_set("core.symlinks", "false"); /* Check if the filesystem is case-insensitive */ - path[len] = 0; - strcpy(path + len, "CoNfIg"); + path = git_path_buf(&buf, "CoNfIg"); if (!access(path, F_OK)) git_config_set("core.ignorecase", "true"); probe_utf8_pathname_composition(); } + strbuf_release(&buf); return reinit; } static void create_object_directory(void) { - const char *object_directory = get_object_directory(); - int len = strlen(object_directory); - char *path = xmalloc(len + 40); + struct strbuf path = STRBUF_INIT; + size_t baselen; - memcpy(path, object_directory, len); + strbuf_addstr(&path, get_object_directory()); + baselen = path.len; - safe_create_dir(object_directory, 1); - strcpy(path+len, "/pack"); - safe_create_dir(path, 1); - strcpy(path+len, "/info"); - safe_create_dir(path, 1); + safe_create_dir(path.buf, 1); - free(path); + strbuf_setlen(&path, baselen); + strbuf_addstr(&path, "/pack"); + safe_create_dir(path.buf, 1); + + strbuf_setlen(&path, baselen); + strbuf_addstr(&path, "/info"); + safe_create_dir(path.buf, 1); + + strbuf_release(&path); } int set_git_dir_init(const char *git_dir, const char *real_git_dir, diff --git a/t/t0001-init.sh b/t/t0001-init.sh index 7de8d85ee8..f91bbcfc85 100755 --- a/t/t0001-init.sh +++ b/t/t0001-init.sh @@ -202,8 +202,8 @@ test_expect_success 'init honors global core.sharedRepository' ' x$(git config -f shared-honor-global/.git/config core.sharedRepository) ' -test_expect_success 'init rejects insanely long --template' ' - test_must_fail git init --template=$(printf "x%09999dx" 1) test +test_expect_success 'init allows insanely long --template' ' + git init --template=$(printf "x%09999dx" 1) test ' test_expect_success 'init creates a new directory' ' From 6c31c22cebe9f0b117fd93ee7792d88e82aaa61e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:38 -0400 Subject: [PATCH 44/70] apply: convert root string to strbuf We use manual computation and strcpy to allocate the "root" variable. This would be much simpler using xstrfmt. But since we store the length, too, we can just use a strbuf, which handles that for us. Note that we stop distinguishing between "no root" and "empty root" in some cases, but that's OK; the results are the same (e.g., inserting an empty string is a noop). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/apply.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/builtin/apply.c b/builtin/apply.c index 094a20f489..deb1364fa8 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -77,8 +77,7 @@ static enum ws_ignore { static const char *patch_input_file; -static const char *root; -static int root_len; +static struct strbuf root = STRBUF_INIT; static int read_stdin = 1; static int options; @@ -494,8 +493,8 @@ static char *find_name_gnu(const char *line, const char *def, int p_value) } strbuf_remove(&name, 0, cp - name.buf); - if (root) - strbuf_insert(&name, 0, root, root_len); + if (root.len) + strbuf_insert(&name, 0, root.buf, root.len); return squash_slash(strbuf_detach(&name, NULL)); } @@ -697,8 +696,8 @@ static char *find_name_common(const char *line, const char *def, return squash_slash(xstrdup(def)); } - if (root) { - char *ret = xstrfmt("%s%.*s", root, len, start); + if (root.len) { + char *ret = xstrfmt("%s%.*s", root.buf, len, start); return squash_slash(ret); } @@ -1274,8 +1273,8 @@ static int parse_git_header(const char *line, int len, unsigned int size, struct * the default name from the header. */ patch->def_name = git_header_name(line, len); - if (patch->def_name && root) { - char *s = xstrfmt("%s%s", root, patch->def_name); + if (patch->def_name && root.len) { + char *s = xstrfmt("%s%s", root.buf, patch->def_name); free(patch->def_name); patch->def_name = s; } @@ -4498,14 +4497,9 @@ static int option_parse_whitespace(const struct option *opt, static int option_parse_directory(const struct option *opt, const char *arg, int unset) { - root_len = strlen(arg); - if (root_len && arg[root_len - 1] != '/') { - char *new_root; - root = new_root = xmalloc(root_len + 2); - strcpy(new_root, arg); - strcpy(new_root + root_len++, "/"); - } else - root = arg; + strbuf_reset(&root); + strbuf_addstr(&root, arg); + strbuf_complete(&root, '/'); return 0; } From bd22d4ffbc10052fef1a6c52aec066ee64236340 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:40 -0400 Subject: [PATCH 45/70] transport: use strbufs for status table "quickref" strings We generate range strings like "1234abcd...5678efab" for use in the the fetch and push status tables. We use fixed-size buffers along with strcat to do so. These aren't buggy, as our manual size computation is correct, but there's nothing checking that this is so. Let's switch them to strbufs instead, which are obviously correct, and make it easier to audit the code base for problematic calls to strcat(). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch.c | 22 ++++++++++++---------- transport.c | 13 +++++++------ 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 470372552c..841880ec2c 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -528,36 +528,38 @@ static int update_local_ref(struct ref *ref, } if (in_merge_bases(current, updated)) { - char quickref[83]; + struct strbuf quickref = STRBUF_INIT; int r; - strcpy(quickref, find_unique_abbrev(current->object.sha1, DEFAULT_ABBREV)); - strcat(quickref, ".."); - strcat(quickref, find_unique_abbrev(ref->new_sha1, DEFAULT_ABBREV)); + strbuf_add_unique_abbrev(&quickref, current->object.sha1, DEFAULT_ABBREV); + strbuf_addstr(&quickref, ".."); + strbuf_add_unique_abbrev(&quickref, ref->new_sha1, DEFAULT_ABBREV); if ((recurse_submodules != RECURSE_SUBMODULES_OFF) && (recurse_submodules != RECURSE_SUBMODULES_ON)) check_for_new_submodule_commits(ref->new_sha1); r = s_update_ref("fast-forward", ref, 1); strbuf_addf(display, "%c %-*s %-*s -> %s%s", r ? '!' : ' ', - TRANSPORT_SUMMARY_WIDTH, quickref, + TRANSPORT_SUMMARY_WIDTH, quickref.buf, REFCOL_WIDTH, remote, pretty_ref, r ? _(" (unable to update local ref)") : ""); + strbuf_release(&quickref); return r; } else if (force || ref->force) { - char quickref[84]; + struct strbuf quickref = STRBUF_INIT; int r; - strcpy(quickref, find_unique_abbrev(current->object.sha1, DEFAULT_ABBREV)); - strcat(quickref, "..."); - strcat(quickref, find_unique_abbrev(ref->new_sha1, DEFAULT_ABBREV)); + strbuf_add_unique_abbrev(&quickref, current->object.sha1, DEFAULT_ABBREV); + strbuf_addstr(&quickref, "..."); + strbuf_add_unique_abbrev(&quickref, ref->new_sha1, DEFAULT_ABBREV); if ((recurse_submodules != RECURSE_SUBMODULES_OFF) && (recurse_submodules != RECURSE_SUBMODULES_ON)) check_for_new_submodule_commits(ref->new_sha1); r = s_update_ref("forced-update", ref, 1); strbuf_addf(display, "%c %-*s %-*s -> %s (%s)", r ? '!' : '+', - TRANSPORT_SUMMARY_WIDTH, quickref, + TRANSPORT_SUMMARY_WIDTH, quickref.buf, REFCOL_WIDTH, remote, pretty_ref, r ? _("unable to update local ref") : _("forced update")); + strbuf_release(&quickref); return r; } else { strbuf_addf(display, "! %-*s %-*s -> %s %s", diff --git a/transport.c b/transport.c index 2d51348f3f..3b47d493d1 100644 --- a/transport.c +++ b/transport.c @@ -654,23 +654,24 @@ static void print_ok_ref_status(struct ref *ref, int porcelain) "[new branch]"), ref, ref->peer_ref, NULL, porcelain); else { - char quickref[84]; + struct strbuf quickref = STRBUF_INIT; char type; const char *msg; - strcpy(quickref, status_abbrev(ref->old_sha1)); + strbuf_addstr(&quickref, status_abbrev(ref->old_sha1)); if (ref->forced_update) { - strcat(quickref, "..."); + strbuf_addstr(&quickref, "..."); type = '+'; msg = "forced update"; } else { - strcat(quickref, ".."); + strbuf_addstr(&quickref, ".."); type = ' '; msg = NULL; } - strcat(quickref, status_abbrev(ref->new_sha1)); + strbuf_addstr(&quickref, status_abbrev(ref->new_sha1)); - print_ref_status(type, quickref, ref, ref->peer_ref, msg, porcelain); + print_ref_status(type, quickref.buf, ref, ref->peer_ref, msg, porcelain); + strbuf_release(&quickref); } } From b4600fbe071898068ef30be2766ee75880ec308b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:43 -0400 Subject: [PATCH 46/70] merge-recursive: convert malloc / strcpy to strbuf This would be a fairly routine use of xstrfmt, except that we need to remember the length of the result to pass to cache_name_pos. So just use a strbuf, which makes this simple. As a bonus, this gets rid of confusing references to "pathlen+1". The "1" is for the trailing slash we added, but that is automatically accounted for in the strbuf's len parameter. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- merge-recursive.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/merge-recursive.c b/merge-recursive.c index 44d85bea4b..a5e74d85fd 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -630,25 +630,24 @@ static char *unique_path(struct merge_options *o, const char *path, const char * static int dir_in_way(const char *path, int check_working_copy) { - int pos, pathlen = strlen(path); - char *dirpath = xmalloc(pathlen + 2); + int pos; + struct strbuf dirpath = STRBUF_INIT; struct stat st; - strcpy(dirpath, path); - dirpath[pathlen] = '/'; - dirpath[pathlen+1] = '\0'; + strbuf_addstr(&dirpath, path); + strbuf_addch(&dirpath, '/'); - pos = cache_name_pos(dirpath, pathlen+1); + pos = cache_name_pos(dirpath.buf, dirpath.len); if (pos < 0) pos = -1 - pos; if (pos < active_nr && - !strncmp(dirpath, active_cache[pos]->name, pathlen+1)) { - free(dirpath); + !strncmp(dirpath.buf, active_cache[pos]->name, dirpath.len)) { + strbuf_release(&dirpath); return 1; } - free(dirpath); + strbuf_release(&dirpath); return check_working_copy && !lstat(path, &st) && S_ISDIR(st.st_mode); } From e9ba678175da28607d57043e1363c6252880dd7f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:45 -0400 Subject: [PATCH 47/70] enter_repo: convert fixed-size buffers to strbufs We use two PATH_MAX-sized buffers to represent the repo path, and must make sure not to overflow them. We do take care to check the lengths, but the logic is rather hard to follow, as we use several magic numbers (e.g., "PATH_MAX - 10"). And in fact you _can_ overflow the buffer if you have a ".git" file with an extremely long path in it. By switching to strbufs, these problems all go away. We do, however, retain the check that the initial input we get is no larger than PATH_MAX. This function is an entry point for untrusted repo names from the network, and it's a good idea to keep a sanity check (both to avoid allocating arbitrary amounts of memory, and also as a layer of defense against any downstream users of the names). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- path.c | 57 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/path.c b/path.c index 46a4d2714b..60e0390906 100644 --- a/path.c +++ b/path.c @@ -391,8 +391,8 @@ return_null: */ const char *enter_repo(const char *path, int strict) { - static char used_path[PATH_MAX]; - static char validated_path[PATH_MAX]; + static struct strbuf validated_path = STRBUF_INIT; + static struct strbuf used_path = STRBUF_INIT; if (!path) return NULL; @@ -407,46 +407,47 @@ const char *enter_repo(const char *path, int strict) while ((1 < len) && (path[len-1] == '/')) len--; + /* + * We can handle arbitrary-sized buffers, but this remains as a + * sanity check on untrusted input. + */ if (PATH_MAX <= len) return NULL; - strncpy(used_path, path, len); used_path[len] = 0 ; - strcpy(validated_path, used_path); - if (used_path[0] == '~') { - char *newpath = expand_user_path(used_path); - if (!newpath || (PATH_MAX - 10 < strlen(newpath))) { - free(newpath); + strbuf_reset(&used_path); + strbuf_reset(&validated_path); + strbuf_add(&used_path, path, len); + strbuf_add(&validated_path, path, len); + + if (used_path.buf[0] == '~') { + char *newpath = expand_user_path(used_path.buf); + if (!newpath) return NULL; - } - /* - * Copy back into the static buffer. A pity - * since newpath was not bounded, but other - * branches of the if are limited by PATH_MAX - * anyway. - */ - strcpy(used_path, newpath); free(newpath); + strbuf_attach(&used_path, newpath, strlen(newpath), + strlen(newpath)); } - else if (PATH_MAX - 10 < len) - return NULL; - len = strlen(used_path); for (i = 0; suffix[i]; i++) { struct stat st; - strcpy(used_path + len, suffix[i]); - if (!stat(used_path, &st) && + size_t baselen = used_path.len; + strbuf_addstr(&used_path, suffix[i]); + if (!stat(used_path.buf, &st) && (S_ISREG(st.st_mode) || - (S_ISDIR(st.st_mode) && is_git_directory(used_path)))) { - strcat(validated_path, suffix[i]); + (S_ISDIR(st.st_mode) && is_git_directory(used_path.buf)))) { + strbuf_addstr(&validated_path, suffix[i]); break; } + strbuf_setlen(&used_path, baselen); } if (!suffix[i]) return NULL; - gitfile = read_gitfile(used_path) ; - if (gitfile) - strcpy(used_path, gitfile); - if (chdir(used_path)) + gitfile = read_gitfile(used_path.buf) ; + if (gitfile) { + strbuf_reset(&used_path); + strbuf_addstr(&used_path, gitfile); + } + if (chdir(used_path.buf)) return NULL; - path = validated_path; + path = validated_path.buf; } else if (chdir(path)) return NULL; From 4635768809885bb1c063bc9f9eee38e413f85f0d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:47 -0400 Subject: [PATCH 48/70] remove_leading_path: use a strbuf for internal storage This function strcpy's directly into a PATH_MAX-sized buffer. There's only one caller, which feeds the git_dir into it, so it's not easy to trigger in practice (even if you fed a large $GIT_DIR through the environment or .git file, it would have to actually exist and be accessible on the filesystem to get to this point). We can fix it by moving to a strbuf. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- path.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/path.c b/path.c index 60e0390906..c597473e62 100644 --- a/path.c +++ b/path.c @@ -632,7 +632,7 @@ const char *relative_path(const char *in, const char *prefix, */ const char *remove_leading_path(const char *in, const char *prefix) { - static char buf[PATH_MAX + 1]; + static struct strbuf buf = STRBUF_INIT; int i = 0, j = 0; if (!prefix || !prefix[0]) @@ -661,11 +661,13 @@ const char *remove_leading_path(const char *in, const char *prefix) return in; while (is_dir_sep(in[j])) j++; + + strbuf_reset(&buf); if (!in[j]) - strcpy(buf, "."); + strbuf_addstr(&buf, "."); else - strcpy(buf, in + j); - return buf; + strbuf_addstr(&buf, in + j); + return buf.buf; } /* From d4b3d11a03c5733a37656ca2f23171be6efad7d3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:49 -0400 Subject: [PATCH 49/70] write_loose_object: convert to strbuf When creating a loose object tempfile, we use a fixed PATH_MAX-sized buffer, and strcpy directly into it. This isn't buggy, because we do a rough check of the size, but there's no verification that our guesstimate of the required space is enough (in fact, it's several bytes too big for the current naming scheme). Let's switch to a strbuf, which makes this much easier to verify. The allocation overhead should be negligible, since we are replacing a static buffer with a static strbuf, and we'll only need to allocate on the first call. While we're here, we can also document a subtle interaction with mkstemp that would be easy to overlook. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_file.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/sha1_file.c b/sha1_file.c index c26fdcbd88..4211af1d89 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -3011,29 +3011,31 @@ static inline int directory_size(const char *filename) * We want to avoid cross-directory filename renames, because those * can have problems on various filesystems (FAT, NFS, Coda). */ -static int create_tmpfile(char *buffer, size_t bufsiz, const char *filename) +static int create_tmpfile(struct strbuf *tmp, const char *filename) { int fd, dirlen = directory_size(filename); - if (dirlen + 20 > bufsiz) { - errno = ENAMETOOLONG; - return -1; - } - memcpy(buffer, filename, dirlen); - strcpy(buffer + dirlen, "tmp_obj_XXXXXX"); - fd = git_mkstemp_mode(buffer, 0444); + strbuf_reset(tmp); + strbuf_add(tmp, filename, dirlen); + strbuf_addstr(tmp, "tmp_obj_XXXXXX"); + fd = git_mkstemp_mode(tmp->buf, 0444); if (fd < 0 && dirlen && errno == ENOENT) { - /* Make sure the directory exists */ - memcpy(buffer, filename, dirlen); - buffer[dirlen-1] = 0; - if (mkdir(buffer, 0777) && errno != EEXIST) + /* + * Make sure the directory exists; note that the contents + * of the buffer are undefined after mkstemp returns an + * error, so we have to rewrite the whole buffer from + * scratch. + */ + strbuf_reset(tmp); + strbuf_add(tmp, filename, dirlen - 1); + if (mkdir(tmp->buf, 0777) && errno != EEXIST) return -1; - if (adjust_shared_perm(buffer)) + if (adjust_shared_perm(tmp->buf)) return -1; /* Try again */ - strcpy(buffer + dirlen - 1, "/tmp_obj_XXXXXX"); - fd = git_mkstemp_mode(buffer, 0444); + strbuf_addstr(tmp, "/tmp_obj_XXXXXX"); + fd = git_mkstemp_mode(tmp->buf, 0444); } return fd; } @@ -3046,10 +3048,10 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen, git_zstream stream; git_SHA_CTX c; unsigned char parano_sha1[20]; - static char tmp_file[PATH_MAX]; + static struct strbuf tmp_file = STRBUF_INIT; const char *filename = sha1_file_name(sha1); - fd = create_tmpfile(tmp_file, sizeof(tmp_file), filename); + fd = create_tmpfile(&tmp_file, filename); if (fd < 0) { if (errno == EACCES) return error("insufficient permission for adding an object to repository database %s", get_object_directory()); @@ -3098,12 +3100,12 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen, struct utimbuf utb; utb.actime = mtime; utb.modtime = mtime; - if (utime(tmp_file, &utb) < 0) + if (utime(tmp_file.buf, &utb) < 0) warning("failed utime() on %s: %s", - tmp_file, strerror(errno)); + tmp_file.buf, strerror(errno)); } - return finalize_object_file(tmp_file, filename); + return finalize_object_file(tmp_file.buf, filename); } static int freshen_loose_object(const unsigned char *sha1) From 43bb66ae0b2f1e379b73e3f0f697ca0d93e2a57b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:52 -0400 Subject: [PATCH 50/70] diagnose_invalid_index_path: use strbuf to avoid strcpy/strcat We dynamically allocate a buffer and then strcpy and strcat into it. This isn't buggy, but we'd prefer to avoid these suspicious functions. This would be a good candidate for converstion to xstrfmt, but we need to record the length for dealing with index entries. A strbuf handles that for us. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/sha1_name.c b/sha1_name.c index 80753b6770..3242c5ea46 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -1293,8 +1293,7 @@ static void diagnose_invalid_index_path(int stage, const struct cache_entry *ce; int pos; unsigned namelen = strlen(filename); - unsigned fullnamelen; - char *fullname; + struct strbuf fullname = STRBUF_INIT; if (!prefix) prefix = ""; @@ -1314,21 +1313,19 @@ static void diagnose_invalid_index_path(int stage, } /* Confusion between relative and absolute filenames? */ - fullnamelen = namelen + strlen(prefix); - fullname = xmalloc(fullnamelen + 1); - strcpy(fullname, prefix); - strcat(fullname, filename); - pos = cache_name_pos(fullname, fullnamelen); + strbuf_addstr(&fullname, prefix); + strbuf_addstr(&fullname, filename); + pos = cache_name_pos(fullname.buf, fullname.len); if (pos < 0) pos = -pos - 1; if (pos < active_nr) { ce = active_cache[pos]; - if (ce_namelen(ce) == fullnamelen && - !memcmp(ce->name, fullname, fullnamelen)) + if (ce_namelen(ce) == fullname.len && + !memcmp(ce->name, fullname.buf, fullname.len)) die("Path '%s' is in the index, but not '%s'.\n" "Did you mean ':%d:%s' aka ':%d:./%s'?", - fullname, filename, - ce_stage(ce), fullname, + fullname.buf, filename, + ce_stage(ce), fullname.buf, ce_stage(ce), filename); } @@ -1338,7 +1335,7 @@ static void diagnose_invalid_index_path(int stage, die("Path '%s' does not exist (neither on disk nor in the index).", filename); - free(fullname); + strbuf_release(&fullname); } From 984a43b902f005520e27268fecd10b9b517c0b17 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:54 -0400 Subject: [PATCH 51/70] fetch-pack: use argv_array for index-pack / unpack-objects This cleans up a magic number that must be kept in sync with the rest of the code (the number of argv slots). It also lets us drop some fixed buffers and an sprintf (since we can now use argv_array_pushf). We do still have to keep one fixed buffer for calling gethostname, but at least now the size computations for it are much simpler. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- fetch-pack.c | 56 +++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/fetch-pack.c b/fetch-pack.c index 820251a8d8..2dabee97b2 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -681,11 +681,10 @@ static int get_pack(struct fetch_pack_args *args, int xd[2], char **pack_lockfile) { struct async demux; - const char *argv[22]; - char keep_arg[256]; - char hdr_arg[256]; - const char **av, *cmd_name; int do_keep = args->keep_pack; + const char *cmd_name; + struct pack_header header; + int pass_header = 0; struct child_process cmd = CHILD_PROCESS_INIT; int ret; @@ -705,17 +704,11 @@ static int get_pack(struct fetch_pack_args *args, else demux.out = xd[0]; - cmd.argv = argv; - av = argv; - *hdr_arg = 0; if (!args->keep_pack && unpack_limit) { - struct pack_header header; if (read_pack_header(demux.out, &header)) die("protocol error: bad pack header"); - snprintf(hdr_arg, sizeof(hdr_arg), - "--pack_header=%"PRIu32",%"PRIu32, - ntohl(header.hdr_version), ntohl(header.hdr_entries)); + pass_header = 1; if (ntohl(header.hdr_entries) < unpack_limit) do_keep = 0; else @@ -723,44 +716,49 @@ static int get_pack(struct fetch_pack_args *args, } if (alternate_shallow_file) { - *av++ = "--shallow-file"; - *av++ = alternate_shallow_file; + argv_array_push(&cmd.args, "--shallow-file"); + argv_array_push(&cmd.args, alternate_shallow_file); } if (do_keep) { if (pack_lockfile) cmd.out = -1; - *av++ = cmd_name = "index-pack"; - *av++ = "--stdin"; + cmd_name = "index-pack"; + argv_array_push(&cmd.args, cmd_name); + argv_array_push(&cmd.args, "--stdin"); if (!args->quiet && !args->no_progress) - *av++ = "-v"; + argv_array_push(&cmd.args, "-v"); if (args->use_thin_pack) - *av++ = "--fix-thin"; + argv_array_push(&cmd.args, "--fix-thin"); if (args->lock_pack || unpack_limit) { - int s = sprintf(keep_arg, - "--keep=fetch-pack %"PRIuMAX " on ", (uintmax_t) getpid()); - if (gethostname(keep_arg + s, sizeof(keep_arg) - s)) - strcpy(keep_arg + s, "localhost"); - *av++ = keep_arg; + char hostname[256]; + if (gethostname(hostname, sizeof(hostname))) + xsnprintf(hostname, sizeof(hostname), "localhost"); + argv_array_pushf(&cmd.args, + "--keep=fetch-pack %"PRIuMAX " on %s", + (uintmax_t)getpid(), hostname); } if (args->check_self_contained_and_connected) - *av++ = "--check-self-contained-and-connected"; + argv_array_push(&cmd.args, "--check-self-contained-and-connected"); } else { - *av++ = cmd_name = "unpack-objects"; + cmd_name = "unpack-objects"; + argv_array_push(&cmd.args, cmd_name); if (args->quiet || args->no_progress) - *av++ = "-q"; + argv_array_push(&cmd.args, "-q"); args->check_self_contained_and_connected = 0; } - if (*hdr_arg) - *av++ = hdr_arg; + + if (pass_header) + argv_array_pushf(&cmd.args, "--pack_header=%"PRIu32",%"PRIu32, + ntohl(header.hdr_version), + ntohl(header.hdr_entries)); if (fetch_fsck_objects >= 0 ? fetch_fsck_objects : transfer_fsck_objects >= 0 ? transfer_fsck_objects : 0) - *av++ = "--strict"; - *av++ = NULL; + argv_array_push(&cmd.args, "--strict"); cmd.in = demux.out; cmd.git_cmd = 1; From a0355f6bcde6fb62d9629927b2e241353b38aeef Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:56 -0400 Subject: [PATCH 52/70] http-push: use an argv_array for setup_revisions This drops the magic number for the fixed-size argv arrays, so we do not have to wonder if we are overflowing it. We can also drop some confusing sha1_to_hex memory allocation (which seems to predate the ring of buffers allowing multiple calls), and get rid of an unchecked sprintf call. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- http-push.c | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/http-push.c b/http-push.c index e501c282e6..43a9036594 100644 --- a/http-push.c +++ b/http-push.c @@ -10,6 +10,7 @@ #include "remote.h" #include "list-objects.h" #include "sigchain.h" +#include "argv-array.h" #ifdef EXPAT_NEEDS_XMLPARSE_H #include @@ -1856,9 +1857,7 @@ int main(int argc, char **argv) new_refs = 0; for (ref = remote_refs; ref; ref = ref->next) { char old_hex[60], *new_hex; - const char *commit_argv[5]; - int commit_argc; - char *new_sha1_hex, *old_sha1_hex; + struct argv_array commit_argv = ARGV_ARRAY_INIT; if (!ref->peer_ref) continue; @@ -1937,27 +1936,15 @@ int main(int argc, char **argv) } /* Set up revision info for this refspec */ - commit_argc = 3; - new_sha1_hex = xstrdup(sha1_to_hex(ref->new_sha1)); - old_sha1_hex = NULL; - commit_argv[1] = "--objects"; - commit_argv[2] = new_sha1_hex; - if (!push_all && !is_null_sha1(ref->old_sha1)) { - old_sha1_hex = xmalloc(42); - sprintf(old_sha1_hex, "^%s", - sha1_to_hex(ref->old_sha1)); - commit_argv[3] = old_sha1_hex; - commit_argc++; - } - commit_argv[commit_argc] = NULL; + argv_array_push(&commit_argv, ""); /* ignored */ + argv_array_push(&commit_argv, "--objects"); + argv_array_push(&commit_argv, sha1_to_hex(ref->new_sha1)); + if (!push_all && !is_null_sha1(ref->old_sha1)) + argv_array_pushf(&commit_argv, "^%s", + sha1_to_hex(ref->old_sha1)); init_revisions(&revs, setup_git_directory()); - setup_revisions(commit_argc, commit_argv, &revs, NULL); + setup_revisions(commit_argv.argc, commit_argv.argv, &revs, NULL); revs.edge_hint = 0; /* just in case */ - free(new_sha1_hex); - if (old_sha1_hex) { - free(old_sha1_hex); - commit_argv[1] = NULL; - } /* Generate a list of objects that need to be pushed */ pushing = 0; @@ -1986,6 +1973,7 @@ int main(int argc, char **argv) printf("%s %s\n", !rc ? "ok" : "error", ref->name); unlock_remote(ref_lock); check_locks(); + argv_array_clear(&commit_argv); } /* Update remote server info if appropriate */ From 0b282cc4b2ceaf2f7f3376f0303f63e9d02fdc91 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:58 -0400 Subject: [PATCH 53/70] stat_tracking_info: convert to argv_array In addition to dropping the magic number for the fixed-size argv, we can also drop a fixed-length buffer and some strcpy's into it. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- remote.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/remote.c b/remote.c index a01f13a211..1101f82eaf 100644 --- a/remote.c +++ b/remote.c @@ -8,6 +8,7 @@ #include "tag.h" #include "string-list.h" #include "mergesort.h" +#include "argv-array.h" enum map_direction { FROM_SRC, FROM_DST }; @@ -1997,10 +1998,9 @@ int stat_tracking_info(struct branch *branch, int *num_ours, int *num_theirs, { unsigned char sha1[20]; struct commit *ours, *theirs; - char symmetric[84]; struct rev_info revs; - const char *rev_argv[10], *base; - int rev_argc; + const char *base; + struct argv_array argv = ARGV_ARRAY_INIT; /* Cannot stat unless we are marked to build on top of somebody else. */ base = branch_get_upstream(branch, NULL); @@ -2029,19 +2029,15 @@ int stat_tracking_info(struct branch *branch, int *num_ours, int *num_theirs, } /* Run "rev-list --left-right ours...theirs" internally... */ - rev_argc = 0; - rev_argv[rev_argc++] = NULL; - rev_argv[rev_argc++] = "--left-right"; - rev_argv[rev_argc++] = symmetric; - rev_argv[rev_argc++] = "--"; - rev_argv[rev_argc] = NULL; - - strcpy(symmetric, sha1_to_hex(ours->object.sha1)); - strcpy(symmetric + 40, "..."); - strcpy(symmetric + 43, sha1_to_hex(theirs->object.sha1)); + argv_array_push(&argv, ""); /* ignored */ + argv_array_push(&argv, "--left-right"); + argv_array_pushf(&argv, "%s...%s", + sha1_to_hex(ours->object.sha1), + sha1_to_hex(theirs->object.sha1)); + argv_array_push(&argv, "--"); init_revisions(&revs, NULL); - setup_revisions(rev_argc, rev_argv, &revs, NULL); + setup_revisions(argv.argc, argv.argv, &revs, NULL); if (prepare_revision_walk(&revs)) die("revision walk setup failed"); @@ -2061,6 +2057,8 @@ int stat_tracking_info(struct branch *branch, int *num_ours, int *num_theirs, /* clear object flags smudged by the above traversal */ clear_commit_marks(ours, ALL_REV_FLAGS); clear_commit_marks(theirs, ALL_REV_FLAGS); + + argv_array_clear(&argv); return 0; } From f063d38b808e184675e5b0931ea0167a013b3fcb Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:00 -0400 Subject: [PATCH 54/70] daemon: use cld->env_array when re-spawning This avoids an ugly strcat into a fixed-size buffer. It's not wrong (the buffer is plenty large enough for an IPv6 address plus some minor formatting), but it takes some effort to verify that. Unfortunately we are still stuck with some fixed-size buffers to hold the output of inet_ntop. But at least we now pass very easy-to-verify parameters, rather than doing a manual computation to account for other data in the buffer. As a side effect, this also fixes the case where we might pass an uninitialized portbuf buffer through the environment. This probably couldn't happen in practice, as it would mean that addr->sa_family was neither AF_INET nor AF_INET6 (and that is all we are listening on). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- daemon.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/daemon.c b/daemon.c index 5218a3f1cc..56679a15fe 100644 --- a/daemon.c +++ b/daemon.c @@ -811,8 +811,6 @@ static char **cld_argv; static void handle(int incoming, struct sockaddr *addr, socklen_t addrlen) { struct child_process cld = CHILD_PROCESS_INIT; - char addrbuf[300] = "REMOTE_ADDR=", portbuf[300]; - char *env[] = { addrbuf, portbuf, NULL }; if (max_connections && live_children >= max_connections) { kill_some_child(); @@ -826,27 +824,23 @@ static void handle(int incoming, struct sockaddr *addr, socklen_t addrlen) } if (addr->sa_family == AF_INET) { + char buf[128] = ""; struct sockaddr_in *sin_addr = (void *) addr; - inet_ntop(addr->sa_family, &sin_addr->sin_addr, addrbuf + 12, - sizeof(addrbuf) - 12); - snprintf(portbuf, sizeof(portbuf), "REMOTE_PORT=%d", - ntohs(sin_addr->sin_port)); + inet_ntop(addr->sa_family, &sin_addr->sin_addr, buf, sizeof(buf)); + argv_array_pushf(&cld.env_array, "REMOTE_ADDR=%s", buf); + argv_array_pushf(&cld.env_array, "REMOTE_PORT=%d", + ntohs(sin_addr->sin_port)); #ifndef NO_IPV6 } else if (addr->sa_family == AF_INET6) { + char buf[128] = ""; struct sockaddr_in6 *sin6_addr = (void *) addr; - - char *buf = addrbuf + 12; - *buf++ = '['; *buf = '\0'; /* stpcpy() is cool */ - inet_ntop(AF_INET6, &sin6_addr->sin6_addr, buf, - sizeof(addrbuf) - 13); - strcat(buf, "]"); - - snprintf(portbuf, sizeof(portbuf), "REMOTE_PORT=%d", - ntohs(sin6_addr->sin6_port)); + inet_ntop(AF_INET6, &sin6_addr->sin6_addr, buf, sizeof(buf)); + argv_array_pushf(&cld.env_array, "REMOTE_ADDR=[%s]", buf); + argv_array_pushf(&cld.env_array, "REMOTE_PORT=%d", + ntohs(sin6_addr->sin6_port)); #endif } - cld.env = (const char **)env; cld.argv = (const char **)cld_argv; cld.in = incoming; cld.out = dup(incoming); From d59f765ac9b3d6fc2e6bea262222b80493055f12 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:03 -0400 Subject: [PATCH 55/70] use sha1_to_hex_r() instead of strcpy Before sha1_to_hex_r() existed, a simple way to get hex sha1 into a buffer was with: strcpy(buf, sha1_to_hex(sha1)); This isn't wrong (assuming the buf is 41 characters), but it makes auditing the code base for bad strcpy() calls harder, as these become false positives. Let's convert them to sha1_to_hex_r(), and likewise for some calls to find_unique_abbrev(). While we're here, we'll double-check that all of the buffers are correctly sized, and use the more obvious GIT_SHA1_HEXSZ constant. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/blame.c | 8 ++++---- builtin/merge-index.c | 4 ++-- builtin/merge.c | 20 ++++++++++---------- builtin/receive-pack.c | 15 +++++++++------ builtin/rev-list.c | 4 ++-- diff.c | 9 ++++----- 6 files changed, 31 insertions(+), 29 deletions(-) diff --git a/builtin/blame.c b/builtin/blame.c index 4db01c195c..e253ac0dcb 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1867,9 +1867,9 @@ static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent, int cnt; const char *cp; struct origin *suspect = ent->suspect; - char hex[41]; + char hex[GIT_SHA1_HEXSZ + 1]; - strcpy(hex, sha1_to_hex(suspect->commit->object.sha1)); + sha1_to_hex_r(hex, suspect->commit->object.sha1); printf("%s %d %d %d\n", hex, ent->s_lno + 1, @@ -1905,11 +1905,11 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt) const char *cp; struct origin *suspect = ent->suspect; struct commit_info ci; - char hex[41]; + char hex[GIT_SHA1_HEXSZ + 1]; int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP); get_commit_info(suspect->commit, &ci, 1); - strcpy(hex, sha1_to_hex(suspect->commit->object.sha1)); + sha1_to_hex_r(hex, suspect->commit->object.sha1); cp = nth_line(sb, ent->lno); for (cnt = 0; cnt < ent->num_lines; cnt++) { diff --git a/builtin/merge-index.c b/builtin/merge-index.c index 1d66111917..1c3427c36c 100644 --- a/builtin/merge-index.c +++ b/builtin/merge-index.c @@ -9,7 +9,7 @@ static int merge_entry(int pos, const char *path) { int found; const char *arguments[] = { pgm, "", "", "", path, "", "", "", NULL }; - char hexbuf[4][60]; + char hexbuf[4][GIT_SHA1_HEXSZ + 1]; char ownbuf[4][60]; if (pos >= active_nr) @@ -22,7 +22,7 @@ static int merge_entry(int pos, const char *path) if (strcmp(ce->name, path)) break; found++; - strcpy(hexbuf[stage], sha1_to_hex(ce->sha1)); + sha1_to_hex_r(hexbuf[stage], ce->sha1); xsnprintf(ownbuf[stage], sizeof(ownbuf[stage]), "%o", ce->ce_mode); arguments[stage] = hexbuf[stage]; arguments[stage + 4] = ownbuf[stage]; diff --git a/builtin/merge.c b/builtin/merge.c index a0edacab20..a0a9328292 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -1319,13 +1319,13 @@ int cmd_merge(int argc, const char **argv, const char *prefix) if (verify_signatures) { for (p = remoteheads; p; p = p->next) { struct commit *commit = p->item; - char hex[41]; + char hex[GIT_SHA1_HEXSZ + 1]; struct signature_check signature_check; memset(&signature_check, 0, sizeof(signature_check)); check_commit_signature(commit, &signature_check); - strcpy(hex, find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV)); + find_unique_abbrev_r(hex, commit->object.sha1, DEFAULT_ABBREV); switch (signature_check.result) { case 'G': break; @@ -1415,15 +1415,15 @@ int cmd_merge(int argc, const char **argv, const char *prefix) /* Again the most common case of merging one remote. */ struct strbuf msg = STRBUF_INIT; struct commit *commit; - char hex[41]; - strcpy(hex, find_unique_abbrev(head_commit->object.sha1, DEFAULT_ABBREV)); - - if (verbosity >= 0) - printf(_("Updating %s..%s\n"), - hex, - find_unique_abbrev(remoteheads->item->object.sha1, - DEFAULT_ABBREV)); + if (verbosity >= 0) { + char from[GIT_SHA1_HEXSZ + 1], to[GIT_SHA1_HEXSZ + 1]; + find_unique_abbrev_r(from, head_commit->object.sha1, + DEFAULT_ABBREV); + find_unique_abbrev_r(to, remoteheads->item->object.sha1, + DEFAULT_ABBREV); + printf(_("Updating %s..%s\n"), from, to); + } strbuf_addstr(&msg, "Fast-forward"); if (have_message) strbuf_addstr(&msg, diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 04d2bdf3f3..39eb0646fd 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1071,8 +1071,11 @@ static void check_aliased_update(struct command *cmd, struct string_list *list) const char *dst_name; struct string_list_item *item; struct command *dst_cmd; - unsigned char sha1[20]; - char cmd_oldh[41], cmd_newh[41], dst_oldh[41], dst_newh[41]; + unsigned char sha1[GIT_SHA1_RAWSZ]; + char cmd_oldh[GIT_SHA1_HEXSZ + 1], + cmd_newh[GIT_SHA1_HEXSZ + 1], + dst_oldh[GIT_SHA1_HEXSZ + 1], + dst_newh[GIT_SHA1_HEXSZ + 1]; int flag; strbuf_addf(&buf, "%s%s", get_git_namespace(), cmd->ref_name); @@ -1103,10 +1106,10 @@ static void check_aliased_update(struct command *cmd, struct string_list *list) dst_cmd->skip_update = 1; - strcpy(cmd_oldh, find_unique_abbrev(cmd->old_sha1, DEFAULT_ABBREV)); - strcpy(cmd_newh, find_unique_abbrev(cmd->new_sha1, DEFAULT_ABBREV)); - strcpy(dst_oldh, find_unique_abbrev(dst_cmd->old_sha1, DEFAULT_ABBREV)); - strcpy(dst_newh, find_unique_abbrev(dst_cmd->new_sha1, DEFAULT_ABBREV)); + find_unique_abbrev_r(cmd_oldh, cmd->old_sha1, DEFAULT_ABBREV); + find_unique_abbrev_r(cmd_newh, cmd->new_sha1, DEFAULT_ABBREV); + find_unique_abbrev_r(dst_oldh, dst_cmd->old_sha1, DEFAULT_ABBREV); + find_unique_abbrev_r(dst_newh, dst_cmd->new_sha1, DEFAULT_ABBREV); rp_error("refusing inconsistent update between symref '%s' (%s..%s) and" " its target '%s' (%s..%s)", cmd->ref_name, cmd_oldh, cmd_newh, diff --git a/builtin/rev-list.c b/builtin/rev-list.c index d80d1ed359..491d298fa2 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -217,7 +217,7 @@ static void print_var_int(const char *var, int val) static int show_bisect_vars(struct rev_list_info *info, int reaches, int all) { int cnt, flags = info->flags; - char hex[41] = ""; + char hex[GIT_SHA1_HEXSZ + 1] = ""; struct commit_list *tried; struct rev_info *revs = info->revs; @@ -242,7 +242,7 @@ static int show_bisect_vars(struct rev_list_info *info, int reaches, int all) cnt = reaches; if (revs->commits) - strcpy(hex, sha1_to_hex(revs->commits->item->object.sha1)); + sha1_to_hex_r(hex, revs->commits->item->object.sha1); if (flags & BISECT_SHOW_ALL) { traverse_commit_list(revs, show_commit, show_object, info); diff --git a/diff.c b/diff.c index 788e371dec..2a37378ec8 100644 --- a/diff.c +++ b/diff.c @@ -322,7 +322,7 @@ static struct diff_tempfile { */ const char *name; - char hex[41]; + char hex[GIT_SHA1_HEXSZ + 1]; char mode[10]; /* @@ -2878,8 +2878,7 @@ static void prep_temp_blob(const char *path, struct diff_tempfile *temp, die_errno("unable to write temp-file"); close_tempfile(&temp->tempfile); temp->name = get_tempfile_path(&temp->tempfile); - strcpy(temp->hex, sha1_to_hex(sha1)); - temp->hex[40] = 0; + sha1_to_hex_r(temp->hex, sha1); xsnprintf(temp->mode, sizeof(temp->mode), "%06o", mode); strbuf_release(&buf); strbuf_release(&template); @@ -2926,9 +2925,9 @@ static struct diff_tempfile *prepare_temp_file(const char *name, /* we can borrow from the file in the work tree */ temp->name = name; if (!one->sha1_valid) - strcpy(temp->hex, sha1_to_hex(null_sha1)); + sha1_to_hex_r(temp->hex, null_sha1); else - strcpy(temp->hex, sha1_to_hex(one->sha1)); + sha1_to_hex_r(temp->hex, one->sha1); /* Even though we may sometimes borrow the * contents from the work tree, we always want * one->mode. mode is trustworthy even when From 2b87d3a89633fb6078203a77d2610175f94cef95 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:05 -0400 Subject: [PATCH 56/70] drop strcpy in favor of raw sha1_to_hex In some cases where we strcpy() the result of sha1_to_hex(), there's no need; the result goes directly into a printf statement, and we can simply pass the return value from sha1_to_hex() directly. When this code was originally written, sha1_to_hex used a single buffer, and it was not safe to use it twice within a single expression. That changed as of dcb3450 (sha1_to_hex() usage cleanup, 2006-05-03), but this code was never updated. History-dug-by: Eric Sunshine Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- http-push.c | 6 ++---- walker.c | 5 ++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/http-push.c b/http-push.c index 43a9036594..48f39b7f71 100644 --- a/http-push.c +++ b/http-push.c @@ -1856,7 +1856,6 @@ int main(int argc, char **argv) new_refs = 0; for (ref = remote_refs; ref; ref = ref->next) { - char old_hex[60], *new_hex; struct argv_array commit_argv = ARGV_ARRAY_INIT; if (!ref->peer_ref) @@ -1911,13 +1910,12 @@ int main(int argc, char **argv) } hashcpy(ref->new_sha1, ref->peer_ref->new_sha1); new_refs++; - strcpy(old_hex, sha1_to_hex(ref->old_sha1)); - new_hex = sha1_to_hex(ref->new_sha1); fprintf(stderr, "updating '%s'", ref->name); if (strcmp(ref->name, ref->peer_ref->name)) fprintf(stderr, " using '%s'", ref->peer_ref->name); - fprintf(stderr, "\n from %s\n to %s\n", old_hex, new_hex); + fprintf(stderr, "\n from %s\n to %s\n", + sha1_to_hex(ref->old_sha1), sha1_to_hex(ref->new_sha1)); if (dry_run) { if (helper_status) printf("ok %s\n", ref->name); diff --git a/walker.c b/walker.c index 44a936c1cf..cdeb63f319 100644 --- a/walker.c +++ b/walker.c @@ -17,10 +17,9 @@ void walker_say(struct walker *walker, const char *fmt, const char *hex) static void report_missing(const struct object *obj) { - char missing_hex[41]; - strcpy(missing_hex, sha1_to_hex(obj->sha1)); fprintf(stderr, "Cannot obtain needed %s %s\n", - obj->type ? typename(obj->type): "object", missing_hex); + obj->type ? typename(obj->type): "object", + sha1_to_hex(obj->sha1)); if (!is_null_sha1(current_commit_sha1)) fprintf(stderr, "while processing commit %s.\n", sha1_to_hex(current_commit_sha1)); From cbc8feeaf97db39ff7220c4d1b03e0c3fdd2401c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:07 -0400 Subject: [PATCH 57/70] color: add overflow checks for parsing colors Our color parsing is designed to never exceed COLOR_MAXLEN bytes. But the relationship between that hand-computed number and the parsing code is not at all obvious, and we merely hope that it has been computed correctly for all cases. Let's mark the expected "end" pointer for the destination buffer and make sure that we do not exceed it. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- color.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/color.c b/color.c index 9027352ad7..22782f8ba3 100644 --- a/color.c +++ b/color.c @@ -150,22 +150,24 @@ int color_parse(const char *value, char *dst) * already have the ANSI escape code in it. "out" should have enough * space in it to fit any color. */ -static char *color_output(char *out, const struct color *c, char type) +static char *color_output(char *out, int len, const struct color *c, char type) { switch (c->type) { case COLOR_UNSPECIFIED: case COLOR_NORMAL: break; case COLOR_ANSI: + if (len < 2) + die("BUG: color parsing ran out of space"); *out++ = type; *out++ = '0' + c->value; break; case COLOR_256: - out += sprintf(out, "%c8;5;%d", type, c->value); + out += xsnprintf(out, len, "%c8;5;%d", type, c->value); break; case COLOR_RGB: - out += sprintf(out, "%c8;2;%d;%d;%d", type, - c->red, c->green, c->blue); + out += xsnprintf(out, len, "%c8;2;%d;%d;%d", type, + c->red, c->green, c->blue); break; } return out; @@ -180,12 +182,13 @@ int color_parse_mem(const char *value, int value_len, char *dst) { const char *ptr = value; int len = value_len; + char *end = dst + COLOR_MAXLEN; unsigned int attr = 0; struct color fg = { COLOR_UNSPECIFIED }; struct color bg = { COLOR_UNSPECIFIED }; if (!strncasecmp(value, "reset", len)) { - strcpy(dst, GIT_COLOR_RESET); + xsnprintf(dst, end - dst, GIT_COLOR_RESET); return 0; } @@ -224,12 +227,19 @@ int color_parse_mem(const char *value, int value_len, char *dst) goto bad; } +#undef OUT +#define OUT(x) do { \ + if (dst == end) \ + die("BUG: color parsing ran out of space"); \ + *dst++ = (x); \ +} while(0) + if (attr || !color_empty(&fg) || !color_empty(&bg)) { int sep = 0; int i; - *dst++ = '\033'; - *dst++ = '['; + OUT('\033'); + OUT('['); for (i = 0; attr; i++) { unsigned bit = (1 << i); @@ -237,27 +247,28 @@ int color_parse_mem(const char *value, int value_len, char *dst) continue; attr &= ~bit; if (sep++) - *dst++ = ';'; - dst += sprintf(dst, "%d", i); + OUT(';'); + dst += xsnprintf(dst, end - dst, "%d", i); } if (!color_empty(&fg)) { if (sep++) - *dst++ = ';'; + OUT(';'); /* foreground colors are all in the 3x range */ - dst = color_output(dst, &fg, '3'); + dst = color_output(dst, end - dst, &fg, '3'); } if (!color_empty(&bg)) { if (sep++) - *dst++ = ';'; + OUT(';'); /* background colors are all in the 4x range */ - dst = color_output(dst, &bg, '4'); + dst = color_output(dst, end - dst, &bg, '4'); } - *dst++ = 'm'; + OUT('m'); } - *dst = 0; + OUT(0); return 0; bad: return error(_("invalid color value: %.*s"), value_len, value); +#undef OUT } int git_config_colorbool(const char *var, const char *value) From 6f687c21c07bfb83a63049640cba5fb82c174198 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:09 -0400 Subject: [PATCH 58/70] use alloc_ref rather than hand-allocating "struct ref" This saves us some manual computation, and eliminates a call to strcpy. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch.c | 3 +-- remote-curl.c | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 841880ec2c..ed84963a57 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -639,8 +639,7 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, continue; if (rm->peer_ref) { - ref = xcalloc(1, sizeof(*ref) + strlen(rm->peer_ref->name) + 1); - strcpy(ref->name, rm->peer_ref->name); + ref = alloc_ref(rm->peer_ref->name); hashcpy(ref->old_sha1, rm->peer_ref->old_sha1); hashcpy(ref->new_sha1, rm->old_sha1); ref->force = rm->peer_ref->force; diff --git a/remote-curl.c b/remote-curl.c index 71fbbb694f..cc7a8a66fa 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -168,10 +168,7 @@ static struct ref *parse_info_refs(struct discovery *heads) url.buf); data[i] = 0; ref_name = mid + 1; - ref = xmalloc(sizeof(struct ref) + - strlen(ref_name) + 1); - memset(ref, 0, sizeof(struct ref)); - strcpy(ref->name, ref_name); + ref = alloc_ref(ref_name); get_sha1_hex(start, ref->old_sha1); if (!refs) refs = ref; From c7ab0ba3405dc6bc8ade1296ef070a5a89660e76 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:12 -0400 Subject: [PATCH 59/70] avoid sprintf and strcpy with flex arrays When we are allocating a struct with a FLEX_ARRAY member, we generally compute the size of the array and then sprintf or strcpy into it. Normally we could improve a dynamic allocation like this by using xstrfmt, but it doesn't work here; we have to account for the size of the rest of the struct. But we can improve things a bit by storing the length that we use for the allocation, and then feeding it to xsnprintf or memcpy, which makes it more obvious that we are not writing more than the allocated number of bytes. It would be nice if we had some kind of helper for allocating generic flex arrays, but it doesn't work that well: - the call signature is a little bit unwieldy: d = flex_struct(sizeof(*d), offsetof(d, path), fmt, ...); You need offsetof here instead of just writing to the end of the base size, because we don't know how the struct is packed (partially this is because FLEX_ARRAY might not be zero, though we can account for that; but the size of the struct may actually be rounded up for alignment, and we can't know that). - some sites do clever things, like over-allocating because they know they will write larger things into the buffer later (e.g., struct packed_git here). So we're better off to just write out each allocation (or add type-specific helpers, though many of these are one-off allocations anyway). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- archive.c | 5 +++-- builtin/blame.c | 5 +++-- fast-import.c | 6 ++++-- refs.c | 8 ++++---- sha1_file.c | 5 +++-- submodule.c | 6 ++++-- 6 files changed, 21 insertions(+), 14 deletions(-) diff --git a/archive.c b/archive.c index 01b0899b3f..4ac86c8373 100644 --- a/archive.c +++ b/archive.c @@ -171,13 +171,14 @@ static void queue_directory(const unsigned char *sha1, unsigned mode, int stage, struct archiver_context *c) { struct directory *d; - d = xmallocz(sizeof(*d) + base->len + 1 + strlen(filename)); + size_t len = base->len + 1 + strlen(filename) + 1; + d = xmalloc(sizeof(*d) + len); d->up = c->bottom; d->baselen = base->len; d->mode = mode; d->stage = stage; c->bottom = d; - d->len = sprintf(d->path, "%.*s%s/", (int)base->len, base->buf, filename); + d->len = xsnprintf(d->path, len, "%.*s%s/", (int)base->len, base->buf, filename); hashcpy(d->oid.hash, sha1); } diff --git a/builtin/blame.c b/builtin/blame.c index e253ac0dcb..e70fb6dac3 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -459,12 +459,13 @@ static void queue_blames(struct scoreboard *sb, struct origin *porigin, static struct origin *make_origin(struct commit *commit, const char *path) { struct origin *o; - o = xcalloc(1, sizeof(*o) + strlen(path) + 1); + size_t pathlen = strlen(path) + 1; + o = xcalloc(1, sizeof(*o) + pathlen); o->commit = commit; o->refcnt = 1; o->next = commit->util; commit->util = o; - strcpy(o->path, path); + memcpy(o->path, path, pathlen); /* includes NUL */ return o; } diff --git a/fast-import.c b/fast-import.c index d0c25024cd..895c6b4a7e 100644 --- a/fast-import.c +++ b/fast-import.c @@ -863,13 +863,15 @@ static void start_packfile(void) { static char tmp_file[PATH_MAX]; struct packed_git *p; + int namelen; struct pack_header hdr; int pack_fd; pack_fd = odb_mkstemp(tmp_file, sizeof(tmp_file), "pack/tmp_pack_XXXXXX"); - p = xcalloc(1, sizeof(*p) + strlen(tmp_file) + 2); - strcpy(p->pack_name, tmp_file); + namelen = strlen(tmp_file) + 2; + p = xcalloc(1, sizeof(*p) + namelen); + xsnprintf(p->pack_name, namelen, "%s", tmp_file); p->pack_fd = pack_fd; p->do_not_close = 1; pack_file = sha1fd(pack_fd, p->pack_name); diff --git a/refs.c b/refs.c index c2709de2a0..9937a40484 100644 --- a/refs.c +++ b/refs.c @@ -2695,7 +2695,7 @@ static int pack_if_possible_fn(struct ref_entry *entry, void *cb_data) int namelen = strlen(entry->name) + 1; struct ref_to_prune *n = xcalloc(1, sizeof(*n) + namelen); hashcpy(n->sha1, entry->u.value.oid.hash); - strcpy(n->name, entry->name); + memcpy(n->name, entry->name, namelen); /* includes NUL */ n->next = cb->ref_to_prune; cb->ref_to_prune = n; } @@ -3984,10 +3984,10 @@ void ref_transaction_free(struct ref_transaction *transaction) static struct ref_update *add_update(struct ref_transaction *transaction, const char *refname) { - size_t len = strlen(refname); - struct ref_update *update = xcalloc(1, sizeof(*update) + len + 1); + size_t len = strlen(refname) + 1; + struct ref_update *update = xcalloc(1, sizeof(*update) + len); - strcpy((char *)update->refname, refname); + memcpy((char *)update->refname, refname, len); /* includes NUL */ ALLOC_GROW(transaction->updates, transaction->nr + 1, transaction->alloc); transaction->updates[transaction->nr++] = update; return update; diff --git a/sha1_file.c b/sha1_file.c index 4211af1d89..cc3de244eb 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1180,9 +1180,10 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local) struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path) { const char *path = sha1_pack_name(sha1); - struct packed_git *p = alloc_packed_git(strlen(path) + 1); + int alloc = strlen(path) + 1; + struct packed_git *p = alloc_packed_git(alloc); - strcpy(p->pack_name, path); + memcpy(p->pack_name, path, alloc); /* includes NUL */ hashcpy(p->sha1, sha1); if (check_packed_git_idx(idx_path, p)) { free(p); diff --git a/submodule.c b/submodule.c index 245ed4dfbb..c480ed53b4 100644 --- a/submodule.c +++ b/submodule.c @@ -122,6 +122,7 @@ static int add_submodule_odb(const char *path) struct strbuf objects_directory = STRBUF_INIT; struct alternate_object_database *alt_odb; int ret = 0; + int alloc; const char *git_dir; strbuf_addf(&objects_directory, "%s/.git", path); @@ -142,9 +143,10 @@ static int add_submodule_odb(const char *path) objects_directory.len)) goto done; - alt_odb = xmalloc(objects_directory.len + 42 + sizeof(*alt_odb)); + alloc = objects_directory.len + 42; /* for "12/345..." sha1 */ + alt_odb = xmalloc(sizeof(*alt_odb) + alloc); alt_odb->next = alt_odb_list; - strcpy(alt_odb->base, objects_directory.buf); + xsnprintf(alt_odb->base, alloc, "%s", objects_directory.buf); alt_odb->name = alt_odb->base + objects_directory.len; alt_odb->name[2] = '/'; alt_odb->name[40] = '\0'; From b26cb7c777f8fcca5ec26cc6a7f9eba85378104f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:14 -0400 Subject: [PATCH 60/70] receive-pack: simplify keep_arg computation To generate "--keep=receive-pack $pid on $host", we write progressively into a single buffer, which requires keeping track of how much we've written so far. But since the result is destined to go into our argv array, we can simply use argv_array_pushf. Unfortunately we still have to have a fixed-size buffer for the gethostname() call, but at least it now doesn't involve any extra size computation. And as a bonus, we drop an sprintf and a strcpy call. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 39eb0646fd..bcb624bc05 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1524,15 +1524,18 @@ static const char *unpack(int err_fd, struct shallow_info *si) if (status) return "unpack-objects abnormal exit"; } else { - int s; - char keep_arg[256]; - - s = sprintf(keep_arg, "--keep=receive-pack %"PRIuMAX" on ", (uintmax_t) getpid()); - if (gethostname(keep_arg + s, sizeof(keep_arg) - s)) - strcpy(keep_arg + s, "localhost"); + char hostname[256]; argv_array_pushl(&child.args, "index-pack", - "--stdin", hdr_arg, keep_arg, NULL); + "--stdin", hdr_arg, NULL); + + if (gethostname(hostname, sizeof(hostname))) + xsnprintf(hostname, sizeof(hostname), "localhost"); + argv_array_pushf(&child.args, + "--keep=receive-pack %"PRIuMAX" on %s", + (uintmax_t)getpid(), + hostname); + if (fsck_objects) argv_array_pushf(&child.args, "--strict%s", fsck_msg_types.buf); From 4c9ac3bfaad9e32a7a98178d1f01779a3698144f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:16 -0400 Subject: [PATCH 61/70] help: clean up kfmclient munging When we are going to launch "/path/to/konqueror", we instead rewrite this into "/path/to/kfmclient" by duplicating the original string and writing over the ending bits. This can be done more obviously with strip_suffix and xstrfmt. Note that we also fix a subtle bug with the "filename" parameter, which is passed as argv[0] to the child. If the user has configured a program name with no directory component, we always pass the string "kfmclient", even if your program is called something else. But if you give a full path, we give the basename of that path. But more bizarrely, if we rewrite "konqueror" to "kfmclient", we still pass "konqueror". The history of this function doesn't reveal anything interesting, so it looks like just an oversight from combining the suffix-munging with the basename-finding. Let's just call basename on the munged path, which produces consistent results (if you gave a program, whether a full path or not, we pass its basename). Probably this doesn't matter at all in practice, but it makes the code slightly less confusing to read. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/help.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/builtin/help.c b/builtin/help.c index fba8c01e24..e1650ab3f9 100644 --- a/builtin/help.c +++ b/builtin/help.c @@ -140,17 +140,10 @@ static void exec_man_konqueror(const char *path, const char *page) /* It's simpler to launch konqueror using kfmclient. */ if (path) { - const char *file = strrchr(path, '/'); - if (file && !strcmp(file + 1, "konqueror")) { - char *new = xstrdup(path); - char *dest = strrchr(new, '/'); - - /* strlen("konqueror") == strlen("kfmclient") */ - strcpy(dest + 1, "kfmclient"); - path = new; - } - if (file) - filename = file; + size_t len; + if (strip_suffix(path, "/konqueror", &len)) + path = xstrfmt("%.*s/kfmclient", (int)len, path); + filename = basename((char *)path); } else path = "kfmclient"; strbuf_addf(&man_page, "man:%s(1)", page); From 34fa79a6cde56d6d428ab0d3160cb094ebad3305 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:19 -0400 Subject: [PATCH 62/70] prefer memcpy to strcpy When we already know the length of a string (e.g., because we just malloc'd to fit it), it's nicer to use memcpy than strcpy, as it makes it more obvious that we are not going to overflow the buffer (because the size we pass matches the size in the allocation). This also eliminates calls to strcpy, which make auditing the code base harder. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- compat/nedmalloc/nedmalloc.c | 5 +++-- fast-import.c | 5 +++-- revision.c | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/compat/nedmalloc/nedmalloc.c b/compat/nedmalloc/nedmalloc.c index 609ebba125..a0a16eb1bb 100644 --- a/compat/nedmalloc/nedmalloc.c +++ b/compat/nedmalloc/nedmalloc.c @@ -957,8 +957,9 @@ char *strdup(const char *s1) { char *s2 = 0; if (s1) { - s2 = malloc(strlen(s1) + 1); - strcpy(s2, s1); + size_t len = strlen(s1) + 1; + s2 = malloc(len); + memcpy(s2, s1, len); } return s2; } diff --git a/fast-import.c b/fast-import.c index 895c6b4a7e..cf6d8bc0ce 100644 --- a/fast-import.c +++ b/fast-import.c @@ -644,8 +644,9 @@ static void *pool_calloc(size_t count, size_t size) static char *pool_strdup(const char *s) { - char *r = pool_alloc(strlen(s) + 1); - strcpy(r, s); + size_t len = strlen(s) + 1; + char *r = pool_alloc(len); + memcpy(r, s, len); return r; } diff --git a/revision.c b/revision.c index af2a18ed74..22364636d1 100644 --- a/revision.c +++ b/revision.c @@ -38,7 +38,7 @@ char *path_name(const struct name_path *path, const char *name) } n = xmalloc(len); m = n + len - (nlen + 1); - strcpy(m, name); + memcpy(m, name, nlen + 1); for (p = path; p; p = p->up) { if (p->elem_len) { m -= p->elem_len + 1; From 7ce4fb948c6d196a376a37840cb80ae95b5897ec Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:21 -0400 Subject: [PATCH 63/70] color: add color_set helper for copying raw colors To set up default colors, we sometimes strcpy() from the default string literals into our color buffers. This isn't a bug (assuming the destination is COLOR_MAXLEN bytes), but makes it harder to audit the code for problematic strcpy calls. Let's introduce a color_set which copies under the assumption that there are COLOR_MAXLEN bytes in the destination (of course you can call it on a smaller buffer, so this isn't providing a huge amount of safety, but it's more convenient than calling xsnprintf yourself). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- color.c | 5 +++++ color.h | 7 +++++++ grep.c | 32 ++++++++++++++++---------------- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/color.c b/color.c index 22782f8ba3..8f85153d0d 100644 --- a/color.c +++ b/color.c @@ -145,6 +145,11 @@ int color_parse(const char *value, char *dst) return color_parse_mem(value, strlen(value), dst); } +void color_set(char *dst, const char *color_bytes) +{ + xsnprintf(dst, COLOR_MAXLEN, "%s", color_bytes); +} + /* * Write the ANSI color codes for "c" to "out"; the string should * already have the ANSI escape code in it. "out" should have enough diff --git a/color.h b/color.h index 7fe77fb55e..e155d13f78 100644 --- a/color.h +++ b/color.h @@ -75,6 +75,13 @@ extern int color_stdout_is_tty; int git_color_config(const char *var, const char *value, void *cb); int git_color_default_config(const char *var, const char *value, void *cb); +/* + * Set the color buffer (which must be COLOR_MAXLEN bytes) + * to the raw color bytes; this is useful for initializing + * default color variables. + */ +void color_set(char *dst, const char *color_bytes); + int git_config_colorbool(const char *var, const char *value); int want_color(int var); int color_parse(const char *value, char *dst); diff --git a/grep.c b/grep.c index 6c68d5bd2e..7b2b96a437 100644 --- a/grep.c +++ b/grep.c @@ -31,14 +31,14 @@ void init_grep_defaults(void) opt->max_depth = -1; opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED; opt->extended_regexp_option = 0; - strcpy(opt->color_context, ""); - strcpy(opt->color_filename, ""); - strcpy(opt->color_function, ""); - strcpy(opt->color_lineno, ""); - strcpy(opt->color_match_context, GIT_COLOR_BOLD_RED); - strcpy(opt->color_match_selected, GIT_COLOR_BOLD_RED); - strcpy(opt->color_selected, ""); - strcpy(opt->color_sep, GIT_COLOR_CYAN); + color_set(opt->color_context, ""); + color_set(opt->color_filename, ""); + color_set(opt->color_function, ""); + color_set(opt->color_lineno, ""); + color_set(opt->color_match_context, GIT_COLOR_BOLD_RED); + color_set(opt->color_match_selected, GIT_COLOR_BOLD_RED); + color_set(opt->color_selected, ""); + color_set(opt->color_sep, GIT_COLOR_CYAN); opt->color = -1; } @@ -151,14 +151,14 @@ void grep_init(struct grep_opt *opt, const char *prefix) opt->regflags = def->regflags; opt->relative = def->relative; - strcpy(opt->color_context, def->color_context); - strcpy(opt->color_filename, def->color_filename); - strcpy(opt->color_function, def->color_function); - strcpy(opt->color_lineno, def->color_lineno); - strcpy(opt->color_match_context, def->color_match_context); - strcpy(opt->color_match_selected, def->color_match_selected); - strcpy(opt->color_selected, def->color_selected); - strcpy(opt->color_sep, def->color_sep); + color_set(opt->color_context, def->color_context); + color_set(opt->color_filename, def->color_filename); + color_set(opt->color_function, def->color_function); + color_set(opt->color_lineno, def->color_lineno); + color_set(opt->color_match_context, def->color_match_context); + color_set(opt->color_match_selected, def->color_match_selected); + color_set(opt->color_selected, def->color_selected); + color_set(opt->color_sep, def->color_sep); } void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt) From 02e32b7debbcbe5910c11a515801751b349577d7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:24 -0400 Subject: [PATCH 64/70] notes: document length of fanout path with a constant We know that a fanned-out sha1 in a notes tree cannot be more than "aa/bb/cc/...", and we have an assert() to confirm that. But let's factor out that length into a constant so we can be sure it is used consistently. And even though we assert() earlier, let's replace a strcpy with xsnprintf, so it is clear to a reader that all cases are covered. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- notes.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/notes.c b/notes.c index eacd2a61da..db77922130 100644 --- a/notes.c +++ b/notes.c @@ -539,6 +539,9 @@ static unsigned char determine_fanout(struct int_node *tree, unsigned char n, return fanout + 1; } +/* hex SHA1 + 19 * '/' + NUL */ +#define FANOUT_PATH_MAX 40 + 19 + 1 + static void construct_path_with_fanout(const unsigned char *sha1, unsigned char fanout, char *path) { @@ -551,7 +554,7 @@ static void construct_path_with_fanout(const unsigned char *sha1, path[i++] = '/'; fanout--; } - strcpy(path + i, hex_sha1 + j); + xsnprintf(path + i, FANOUT_PATH_MAX - i, "%s", hex_sha1 + j); } static int for_each_note_helper(struct notes_tree *t, struct int_node *tree, @@ -562,7 +565,7 @@ static int for_each_note_helper(struct notes_tree *t, struct int_node *tree, void *p; int ret = 0; struct leaf_node *l; - static char path[40 + 19 + 1]; /* hex SHA1 + 19 * '/' + NUL */ + static char path[FANOUT_PATH_MAX]; fanout = determine_fanout(tree, n, fanout); for (i = 0; i < 16; i++) { @@ -595,7 +598,7 @@ redo: /* invoke callback with subtree */ unsigned int path_len = l->key_sha1[19] * 2 + fanout; - assert(path_len < 40 + 19); + assert(path_len < FANOUT_PATH_MAX - 1); construct_path_with_fanout(l->key_sha1, fanout, path); /* Create trailing slash, if needed */ From eddda371449ba925d91d04c615d084adf1b43a33 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:26 -0400 Subject: [PATCH 65/70] convert strncpy to memcpy strncpy is known to be a confusing function because of its termination semantics. These calls are all correct, but it takes some examination to see why. In particular, every one of them expects to copy up to the length limit, and then makes some arrangement for terminating the result. We can just use memcpy, along with noting explicitly how the result is terminated (if it is not already obvious). That should make it more clear to a reader that we are doing the right thing. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/help.c | 4 ++-- fast-import.c | 2 +- tag.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/help.c b/builtin/help.c index e1650ab3f9..1cd0c1ee44 100644 --- a/builtin/help.c +++ b/builtin/help.c @@ -176,7 +176,7 @@ static void add_man_viewer(const char *name) while (*p) p = &((*p)->next); *p = xcalloc(1, (sizeof(**p) + len + 1)); - strncpy((*p)->name, name, len); + memcpy((*p)->name, name, len); /* NUL-terminated by xcalloc */ } static int supported_man_viewer(const char *name, size_t len) @@ -192,7 +192,7 @@ static void do_add_man_viewer_info(const char *name, { struct man_viewer_info_list *new = xcalloc(1, sizeof(*new) + len + 1); - strncpy(new->name, name, len); + memcpy(new->name, name, len); /* NUL-terminated by xcalloc */ new->info = xstrdup(value); new->next = man_viewer_info_list; man_viewer_info_list = new; diff --git a/fast-import.c b/fast-import.c index cf6d8bc0ce..4d01efcb9d 100644 --- a/fast-import.c +++ b/fast-import.c @@ -703,7 +703,7 @@ static struct atom_str *to_atom(const char *s, unsigned short len) c = pool_alloc(sizeof(struct atom_str) + len + 1); c->str_len = len; - strncpy(c->str_dat, s, len); + memcpy(c->str_dat, s, len); c->str_dat[len] = 0; c->next_atom = atom_table[hc]; atom_table[hc] = c; diff --git a/tag.c b/tag.c index 5b0ac62ed8..5b2a06d92b 100644 --- a/tag.c +++ b/tag.c @@ -82,7 +82,7 @@ int parse_tag_buffer(struct tag *item, const void *data, unsigned long size) nl = memchr(bufptr, '\n', tail - bufptr); if (!nl || sizeof(type) <= (nl - bufptr)) return -1; - strncpy(type, bufptr, nl - bufptr); + memcpy(type, bufptr, nl - bufptr); type[nl - bufptr] = '\0'; bufptr = nl + 1; From 144e4cf7092ee8cff44e9c7600aaa7515ad6a78f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:28 -0400 Subject: [PATCH 66/70] fsck: drop inode-sorting code Fsck tries to access loose objects in order of inode number, with the hope that this would make cold cache access faster on a spinning disk. This dates back to 7e8c174 (fsck-cache: sort entries by inode number, 2005-05-02), which predates the invention of packfiles. These days, there's not much point in trying to optimize cold cache for a large number of loose objects. You are much better off to simply pack the objects, which will reduce the disk footprint _and_ provide better locality of data access. So while you can certainly construct pathological cases where this code might help, it is not worth the trouble anymore. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fsck.c | 70 ++------------------------------------------------ 1 file changed, 2 insertions(+), 68 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index a019f4a240..73c35963b8 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -39,14 +39,6 @@ static int show_dangling = 1; #define ERROR_REACHABLE 02 #define ERROR_PACK 04 -#ifdef NO_D_INO_IN_DIRENT -#define SORT_DIRENT 0 -#define DIRENT_SORT_HINT(de) 0 -#else -#define SORT_DIRENT 1 -#define DIRENT_SORT_HINT(de) ((de)->d_ino) -#endif - static int fsck_config(const char *var, const char *value, void *cb) { if (strcmp(var, "fsck.skiplist") == 0) { @@ -373,64 +365,6 @@ static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type, return fsck_obj(obj); } -/* - * This is the sorting chunk size: make it reasonably - * big so that we can sort well.. - */ -#define MAX_SHA1_ENTRIES (1024) - -struct sha1_entry { - unsigned long ino; - unsigned char sha1[20]; -}; - -static struct { - unsigned long nr; - struct sha1_entry *entry[MAX_SHA1_ENTRIES]; -} sha1_list; - -static int ino_compare(const void *_a, const void *_b) -{ - const struct sha1_entry *a = _a, *b = _b; - unsigned long ino1 = a->ino, ino2 = b->ino; - return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0; -} - -static void fsck_sha1_list(void) -{ - int i, nr = sha1_list.nr; - - if (SORT_DIRENT) - qsort(sha1_list.entry, nr, - sizeof(struct sha1_entry *), ino_compare); - for (i = 0; i < nr; i++) { - struct sha1_entry *entry = sha1_list.entry[i]; - unsigned char *sha1 = entry->sha1; - - sha1_list.entry[i] = NULL; - if (fsck_sha1(sha1)) - errors_found |= ERROR_OBJECT; - free(entry); - } - sha1_list.nr = 0; -} - -static void add_sha1_list(unsigned char *sha1, unsigned long ino) -{ - struct sha1_entry *entry = xmalloc(sizeof(*entry)); - int nr; - - entry->ino = ino; - hashcpy(entry->sha1, sha1); - nr = sha1_list.nr; - if (nr == MAX_SHA1_ENTRIES) { - fsck_sha1_list(); - nr = 0; - } - sha1_list.entry[nr] = entry; - sha1_list.nr = ++nr; -} - static inline int is_loose_object_file(struct dirent *de, char *name, unsigned char *sha1) { @@ -459,7 +393,8 @@ static void fsck_dir(int i, char *path) if (is_dot_or_dotdot(de->d_name)) continue; if (is_loose_object_file(de, name, sha1)) { - add_sha1_list(sha1, DIRENT_SORT_HINT(de)); + if (fsck_sha1(sha1)) + errors_found |= ERROR_OBJECT; continue; } if (starts_with(de->d_name, "tmp_obj_")) @@ -573,7 +508,6 @@ static void fsck_object_dir(const char *path) display_progress(progress, i+1); } stop_progress(&progress); - fsck_sha1_list(); } static int fsck_head_link(void) From e23a91b04702fb3d7a85195ef52bdeacd3ee9433 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:30 -0400 Subject: [PATCH 67/70] Makefile: drop D_INO_IN_DIRENT build knob Now that fsck has dropped its inode-sorting, there are no longer any users of this knob, and it can go away. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Makefile | 5 ----- config.mak.uname | 3 --- configure.ac | 7 ------- 3 files changed, 15 deletions(-) diff --git a/Makefile b/Makefile index 8d5df7ea1e..2f350cae35 100644 --- a/Makefile +++ b/Makefile @@ -74,8 +74,6 @@ all:: # Define HAVE_PATHS_H if you have paths.h and want to use the default PATH # it specifies. # -# Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent. -# # Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks # d_type in struct dirent (Cygwin 1.5, fixed in Cygwin 1.7). # @@ -1160,9 +1158,6 @@ endif ifdef NO_D_TYPE_IN_DIRENT BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT endif -ifdef NO_D_INO_IN_DIRENT - BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT -endif ifdef NO_GECOS_IN_PWENT BASIC_CFLAGS += -DNO_GECOS_IN_PWENT endif diff --git a/config.mak.uname b/config.mak.uname index 943c43965e..f34dcaad20 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -166,7 +166,6 @@ endif ifeq ($(uname_O),Cygwin) ifeq ($(shell expr "$(uname_R)" : '1\.[1-6]\.'),4) NO_D_TYPE_IN_DIRENT = YesPlease - NO_D_INO_IN_DIRENT = YesPlease NO_STRCASESTR = YesPlease NO_MEMMEM = YesPlease NO_MKSTEMPS = YesPlease @@ -370,7 +369,6 @@ ifeq ($(uname_S),Windows) NO_POSIX_GOODIES = UnfortunatelyYes NATIVE_CRLF = YesPlease DEFAULT_HELP_FORMAT = html - NO_D_INO_IN_DIRENT = YesPlease CC = compat/vcbuild/scripts/clink.pl AR = compat/vcbuild/scripts/lib.pl @@ -520,7 +518,6 @@ ifneq (,$(findstring MINGW,$(uname_S))) NO_INET_NTOP = YesPlease NO_POSIX_GOODIES = UnfortunatelyYes DEFAULT_HELP_FORMAT = html - NO_D_INO_IN_DIRENT = YesPlease COMPAT_CFLAGS += -D__USE_MINGW_ACCESS -D_USE_32BIT_TIME_T -DNOGDI -Icompat -Icompat/win32 COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\" COMPAT_OBJS += compat/mingw.o compat/winansi.o \ diff --git a/configure.ac b/configure.ac index 14012fad72..3fcca618d2 100644 --- a/configure.ac +++ b/configure.ac @@ -767,13 +767,6 @@ elif test x$ac_cv_member_struct_stat_st_mtim_tv_nsec != xyes; then GIT_CONF_SUBST([NO_NSEC]) fi # -# Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent. -AC_CHECK_MEMBER(struct dirent.d_ino, -[NO_D_INO_IN_DIRENT=], -[NO_D_INO_IN_DIRENT=YesPlease], -[#include ]) -GIT_CONF_SUBST([NO_D_INO_IN_DIRENT]) -# # Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks # d_type in struct dirent (latest Cygwin -- will be fixed soonish). AC_CHECK_MEMBER(struct dirent.d_type, From f0766bf94ec7a398a5a524053ea5334883c16a2e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:33 -0400 Subject: [PATCH 68/70] fsck: use for_each_loose_file_in_objdir Since 27e1e22 (prune: factor out loose-object directory traversal, 2014-10-15), we now have a generic callback system for iterating over the loose object directories. This is used by prune, count-objects, etc. We did not convert git-fsck at the time because it implemented an inode-sorting scheme that was not part of the generic code. Now that the inode-sorting code is gone, we can reuse the generic code. The result is shorter, hopefully more readable, and drops some unchecked sprintf calls. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fsck.c | 70 +++++++++++++++++--------------------------------- 1 file changed, 24 insertions(+), 46 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 73c35963b8..d50efd5e8c 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -365,45 +365,6 @@ static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type, return fsck_obj(obj); } -static inline int is_loose_object_file(struct dirent *de, - char *name, unsigned char *sha1) -{ - if (strlen(de->d_name) != 38) - return 0; - memcpy(name + 2, de->d_name, 39); - return !get_sha1_hex(name, sha1); -} - -static void fsck_dir(int i, char *path) -{ - DIR *dir = opendir(path); - struct dirent *de; - char name[100]; - - if (!dir) - return; - - if (verbose) - fprintf(stderr, "Checking directory %s\n", path); - - sprintf(name, "%02x", i); - while ((de = readdir(dir)) != NULL) { - unsigned char sha1[20]; - - if (is_dot_or_dotdot(de->d_name)) - continue; - if (is_loose_object_file(de, name, sha1)) { - if (fsck_sha1(sha1)) - errors_found |= ERROR_OBJECT; - continue; - } - if (starts_with(de->d_name, "tmp_obj_")) - continue; - fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name); - } - closedir(dir); -} - static int default_refs; static void fsck_handle_reflog_sha1(const char *refname, unsigned char *sha1) @@ -491,9 +452,28 @@ static void get_default_heads(void) } } +static int fsck_loose(const unsigned char *sha1, const char *path, void *data) +{ + if (fsck_sha1(sha1)) + errors_found |= ERROR_OBJECT; + return 0; +} + +static int fsck_cruft(const char *basename, const char *path, void *data) +{ + if (!starts_with(basename, "tmp_obj_")) + fprintf(stderr, "bad sha1 file: %s\n", path); + return 0; +} + +static int fsck_subdir(int nr, const char *path, void *progress) +{ + display_progress(progress, nr + 1); + return 0; +} + static void fsck_object_dir(const char *path) { - int i; struct progress *progress = NULL; if (verbose) @@ -501,12 +481,10 @@ static void fsck_object_dir(const char *path) if (show_progress) progress = start_progress(_("Checking object directories"), 256); - for (i = 0; i < 256; i++) { - static char dir[4096]; - sprintf(dir, "%s/%02x", path, i); - fsck_dir(i, dir); - display_progress(progress, i+1); - } + + for_each_loose_file_in_objdir(path, fsck_loose, fsck_cruft, fsck_subdir, + progress); + display_progress(progress, 256); stop_progress(&progress); } From 00b6c178c3ab475098f7a0bc63b2df2da508020c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:35 -0400 Subject: [PATCH 69/70] use strbuf_complete to conditionally append slash When working with paths in strbufs, we frequently want to ensure that a directory contains a trailing slash before appending to it. We can shorten this code (and make the intent more obvious) by calling strbuf_complete. Most of these cases are trivially identical conversions, but there are two things to note: - in a few cases we did not check that the strbuf is non-empty (which would lead to an out-of-bounds memory access). These were generally not triggerable in practice, either from earlier assertions, or typically because we would have just fed the strbuf to opendir(), which would choke on an empty path. - in a few cases we indexed the buffer with "original_len" or similar, rather than the current sb->len, and it is not immediately obvious from the diff that they are the same. In all of these cases, I manually verified that the strbuf does not change between the assignment and the strbuf_complete call. This does not convert cases which look like: if (sb->len && !is_dir_sep(sb->buf[sb->len - 1])) strbuf_addch(sb, '/'); as those are obviously semantically different. Some of these cases arguably should be doing that, but that is out of scope for this change, which aims purely for cleanup with no behavior change (and at least it will make such sites easier to find and examine in the future, as we can grep for strbuf_complete). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/clean.c | 6 ++---- builtin/log.c | 3 +-- diff-no-index.c | 6 ++---- dir.c | 6 ++---- path.c | 3 +-- refs.c | 3 +-- url.c | 3 +-- 7 files changed, 10 insertions(+), 20 deletions(-) diff --git a/builtin/clean.c b/builtin/clean.c index df53def63f..d7acb94a95 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -159,8 +159,7 @@ static int is_git_repository(struct strbuf *path) int gitfile_error; size_t orig_path_len = path->len; assert(orig_path_len != 0); - if (path->buf[orig_path_len - 1] != '/') - strbuf_addch(path, '/'); + strbuf_complete(path, '/'); strbuf_addstr(path, ".git"); if (read_gitfile_gently(path->buf, &gitfile_error) || is_git_directory(path->buf)) ret = 1; @@ -206,8 +205,7 @@ static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag, return res; } - if (path->buf[original_len - 1] != '/') - strbuf_addch(path, '/'); + strbuf_complete(path, '/'); len = path->len; while ((e = readdir(dir)) != NULL) { diff --git a/builtin/log.c b/builtin/log.c index a491d3dea0..dda671d975 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -796,8 +796,7 @@ static int reopen_stdout(struct commit *commit, const char *subject, if (filename.len >= PATH_MAX - FORMAT_PATCH_NAME_MAX - suffix_len) return error(_("name of output directory is too long")); - if (filename.buf[filename.len - 1] != '/') - strbuf_addch(&filename, '/'); + strbuf_complete(&filename, '/'); } if (rev->numbered_files) diff --git a/diff-no-index.c b/diff-no-index.c index 0320605a84..8e0fd270b5 100644 --- a/diff-no-index.c +++ b/diff-no-index.c @@ -136,15 +136,13 @@ static int queue_diff(struct diff_options *o, if (name1) { strbuf_addstr(&buffer1, name1); - if (buffer1.len && buffer1.buf[buffer1.len - 1] != '/') - strbuf_addch(&buffer1, '/'); + strbuf_complete(&buffer1, '/'); len1 = buffer1.len; } if (name2) { strbuf_addstr(&buffer2, name2); - if (buffer2.len && buffer2.buf[buffer2.len - 1] != '/') - strbuf_addch(&buffer2, '/'); + strbuf_complete(&buffer2, '/'); len2 = buffer2.len; } diff --git a/dir.c b/dir.c index 7b25634832..79fdad8425 100644 --- a/dir.c +++ b/dir.c @@ -1519,8 +1519,7 @@ static enum path_treatment treat_path_fast(struct dir_struct *dir, } strbuf_addstr(path, cdir->ucd->name); /* treat_one_path() does this before it calls treat_directory() */ - if (path->buf[path->len - 1] != '/') - strbuf_addch(path, '/'); + strbuf_complete(path, '/'); if (cdir->ucd->check_only) /* * check_only is set as a result of treat_directory() getting @@ -2126,8 +2125,7 @@ static int remove_dir_recurse(struct strbuf *path, int flag, int *kept_up) else return -1; } - if (path->buf[original_len - 1] != '/') - strbuf_addch(path, '/'); + strbuf_complete(path, '/'); len = path->len; while ((e = readdir(dir)) != NULL) { diff --git a/path.c b/path.c index c597473e62..c105a9e083 100644 --- a/path.c +++ b/path.c @@ -240,8 +240,7 @@ static void do_submodule_path(struct strbuf *buf, const char *path, const char *git_dir; strbuf_addstr(buf, path); - if (buf->len && buf->buf[buf->len - 1] != '/') - strbuf_addch(buf, '/'); + strbuf_complete(buf, '/'); strbuf_addstr(buf, ".git"); git_dir = read_gitfile(buf->buf); diff --git a/refs.c b/refs.c index 9937a40484..b2a922945d 100644 --- a/refs.c +++ b/refs.c @@ -2193,8 +2193,7 @@ int for_each_glob_ref_in(each_ref_fn fn, const char *pattern, if (!has_glob_specials(pattern)) { /* Append implied '/' '*' if not present. */ - if (real_pattern.buf[real_pattern.len - 1] != '/') - strbuf_addch(&real_pattern, '/'); + strbuf_complete(&real_pattern, '/'); /* No need to check for '*', there is none. */ strbuf_addch(&real_pattern, '*'); } diff --git a/url.c b/url.c index 7ca2a69e10..2d89ad190c 100644 --- a/url.c +++ b/url.c @@ -120,8 +120,7 @@ char *url_decode_parameter_value(const char **query) void end_url_with_slash(struct strbuf *buf, const char *url) { strbuf_addstr(buf, url); - if (buf->len && buf->buf[buf->len - 1] != '/') - strbuf_addch(buf, '/'); + strbuf_complete(buf, '/'); } void str_end_url_with_slash(const char *url, char **dest) { From 34e02deb60b4db22243d47846eb926de9e0d1cf9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:37 -0400 Subject: [PATCH 70/70] name-rev: use strip_suffix to avoid magic numbers The manual size computations here are correct, but using strip_suffix makes that obvious, and hopefully communicates the intent of the code more clearly. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/name-rev.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/builtin/name-rev.c b/builtin/name-rev.c index 8a3a0cd61e..0377fc1142 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -55,16 +55,15 @@ copy_data: parents; parents = parents->next, parent_number++) { if (parent_number > 1) { - int len = strlen(tip_name); + size_t len; char *new_name; - if (len > 2 && !strcmp(tip_name + len - 2, "^0")) - len -= 2; + strip_suffix(tip_name, "^0", &len); if (generation > 0) - new_name = xstrfmt("%.*s~%d^%d", len, tip_name, + new_name = xstrfmt("%.*s~%d^%d", (int)len, tip_name, generation, parent_number); else - new_name = xstrfmt("%.*s^%d", len, tip_name, + new_name = xstrfmt("%.*s^%d", (int)len, tip_name, parent_number); name_rev(parents->item, new_name, 0,