From 3921a0b3c3441d189f838656b30fbafa25e9eec4 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Mon, 21 Aug 2017 15:24:30 -0600 Subject: [PATCH 1/3] perf: add test for writing the index A performance test for writing the index to be able to determine if changes to allocating ondisk structure help. Signed-off-by: Kevin Willford Signed-off-by: Junio C Hamano --- Makefile | 1 + t/helper/test-write-cache.c | 23 +++++++++++++++++++++++ t/perf/p0007-write-cache.sh | 29 +++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+) create mode 100644 t/helper/test-write-cache.c create mode 100755 t/perf/p0007-write-cache.sh diff --git a/Makefile b/Makefile index 461c845d33..6e9b80751e 100644 --- a/Makefile +++ b/Makefile @@ -655,6 +655,7 @@ TEST_PROGRAMS_NEED_X += test-parse-options TEST_PROGRAMS_NEED_X += test-path-utils TEST_PROGRAMS_NEED_X += test-prio-queue TEST_PROGRAMS_NEED_X += test-read-cache +TEST_PROGRAMS_NEED_X += test-write-cache TEST_PROGRAMS_NEED_X += test-ref-store TEST_PROGRAMS_NEED_X += test-regex TEST_PROGRAMS_NEED_X += test-revision-walking diff --git a/t/helper/test-write-cache.c b/t/helper/test-write-cache.c new file mode 100644 index 0000000000..b7ee039669 --- /dev/null +++ b/t/helper/test-write-cache.c @@ -0,0 +1,23 @@ +#include "cache.h" +#include "lockfile.h" + +static struct lock_file index_lock; + +int cmd_main(int argc, const char **argv) +{ + int i, cnt = 1, lockfd; + if (argc == 2) + cnt = strtol(argv[1], NULL, 0); + setup_git_directory(); + read_cache(); + for (i = 0; i < cnt; i++) { + lockfd = hold_locked_index(&index_lock, LOCK_DIE_ON_ERROR); + if (0 <= lockfd) { + write_locked_index(&the_index, &index_lock, COMMIT_LOCK); + } else { + rollback_lock_file(&index_lock); + } + } + + return 0; +} diff --git a/t/perf/p0007-write-cache.sh b/t/perf/p0007-write-cache.sh new file mode 100755 index 0000000000..261fe92fd9 --- /dev/null +++ b/t/perf/p0007-write-cache.sh @@ -0,0 +1,29 @@ +#!/bin/sh + +test_description="Tests performance of writing the index" + +. ./perf-lib.sh + +test_perf_default_repo + +test_expect_success "setup repo" ' + if git rev-parse --verify refs/heads/p0006-ballast^{commit} + then + echo Assuming synthetic repo from many-files.sh + git config --local core.sparsecheckout 1 + cat >.git/info/sparse-checkout <<-EOF + /* + !ballast/* + EOF + else + echo Assuming non-synthetic repo... + fi && + nr_files=$(git ls-files | wc -l) +' + +count=3 +test_perf "write_locked_index $count times ($nr_files files)" " + test-write-cache $count +" + +test_done From b50386c7c038ccea8f70d2a66ac78f189240ef05 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Mon, 21 Aug 2017 15:24:31 -0600 Subject: [PATCH 2/3] read-cache: fix memory leak in do_write_index The previous_name_buf was never getting released when there was an error in ce_write_entry or allow was false and execution was returned to the caller. Signed-off-by: Kevin Willford Signed-off-by: Junio C Hamano --- read-cache.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/read-cache.c b/read-cache.c index acfb028f48..47220cc30d 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2192,7 +2192,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, int newfd = tempfile->fd; git_SHA_CTX c; struct cache_header hdr; - int i, err, removed, extended, hdr_version; + int i, err = 0, removed, extended, hdr_version; struct cache_entry **cache = istate->cache; int entries = istate->cache_nr; struct stat st; @@ -2247,15 +2247,21 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, if (allow) warning(msg, ce->name); else - return error(msg, ce->name); + err = error(msg, ce->name); drop_cache_tree = 1; } if (ce_write_entry(&c, newfd, ce, previous_name) < 0) - return -1; + err = -1; + + if (err) + break; } strbuf_release(&previous_name_buf); + if (err) + return err; + /* Write extension data here */ if (!strip_extensions && istate->split_index) { struct strbuf sb = STRBUF_INIT; From ce012deb98650b5b7bba58d534fed27628e34234 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Mon, 21 Aug 2017 15:24:32 -0600 Subject: [PATCH 3/3] read-cache: avoid allocating every ondisk entry when writing When writing the index for each entry an ondisk struct will be allocated and freed in ce_write_entry. We can do better by using a ondisk struct on the stack for each entry. This is accomplished by using a stack ondisk_cache_entry_extended outside looping through the entries in do_write_index. Only the fixed fields of this struct are used when writing and depending on whether it is extended or not the flags2 field will be written. The name field is not used and instead the cache_entry name field is used directly when writing out the name. Because ce_write is using a buffer and memcpy to fill the buffer before flushing to disk, we don't have to worry about doing multiple ce_write calls. Running the p0007-write-cache.sh tests would save anywhere between 3-7% when the index had over a million entries with no performance degradation on small repos. Signed-off-by: Kevin Willford Signed-off-by: Junio C Hamano --- read-cache.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/read-cache.c b/read-cache.c index 47220cc30d..694bed8d82 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1499,6 +1499,7 @@ struct ondisk_cache_entry_extended { }; /* These are only used for v3 or lower */ +#define align_padding_size(size, len) ((size + (len) + 8) & ~7) - (size + len) #define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7) #define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len) #define ondisk_cache_entry_extended_size(len) align_flex_name(ondisk_cache_entry_extended,len) @@ -2032,7 +2033,7 @@ static void ce_smudge_racily_clean_entry(struct cache_entry *ce) } /* Copy miscellaneous fields but not the name */ -static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk, +static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk, struct cache_entry *ce) { short flags; @@ -2056,32 +2057,35 @@ static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk, struct ondisk_cache_entry_extended *ondisk2; ondisk2 = (struct ondisk_cache_entry_extended *)ondisk; ondisk2->flags2 = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16); - return ondisk2->name; - } - else { - return ondisk->name; } } static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce, - struct strbuf *previous_name) + struct strbuf *previous_name, struct ondisk_cache_entry *ondisk) { int size; - struct ondisk_cache_entry *ondisk; int saved_namelen = saved_namelen; /* compiler workaround */ - char *name; int result; + static unsigned char padding[8] = { 0x00 }; if (ce->ce_flags & CE_STRIP_NAME) { saved_namelen = ce_namelen(ce); ce->ce_namelen = 0; } + if (ce->ce_flags & CE_EXTENDED) + size = offsetof(struct ondisk_cache_entry_extended, name); + else + size = offsetof(struct ondisk_cache_entry, name); + if (!previous_name) { - size = ondisk_ce_size(ce); - ondisk = xcalloc(1, size); - name = copy_cache_entry_to_ondisk(ondisk, ce); - memcpy(name, ce->name, ce_namelen(ce)); + int len = ce_namelen(ce); + copy_cache_entry_to_ondisk(ondisk, ce); + result = ce_write(c, fd, ondisk, size); + if (!result) + result = ce_write(c, fd, ce->name, len); + if (!result) + result = ce_write(c, fd, padding, align_padding_size(size, len)); } else { int common, to_remove, prefix_size; unsigned char to_remove_vi[16]; @@ -2094,16 +2098,12 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce, to_remove = previous_name->len - common; prefix_size = encode_varint(to_remove, to_remove_vi); - if (ce->ce_flags & CE_EXTENDED) - size = offsetof(struct ondisk_cache_entry_extended, name); - else - size = offsetof(struct ondisk_cache_entry, name); - size += prefix_size + (ce_namelen(ce) - common + 1); - - ondisk = xcalloc(1, size); - name = copy_cache_entry_to_ondisk(ondisk, ce); - memcpy(name, to_remove_vi, prefix_size); - memcpy(name + prefix_size, ce->name + common, ce_namelen(ce) - common); + copy_cache_entry_to_ondisk(ondisk, ce); + result = ce_write(c, fd, ondisk, size); + if (!result) + result = ce_write(c, fd, to_remove_vi, prefix_size); + if (!result) + result = ce_write(c, fd, ce->name + common, ce_namelen(ce) - common + 1); strbuf_splice(previous_name, common, to_remove, ce->name + common, ce_namelen(ce) - common); @@ -2113,8 +2113,6 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce, ce->ce_flags &= ~CE_STRIP_NAME; } - result = ce_write(c, fd, ondisk, size); - free(ondisk); return result; } @@ -2196,6 +2194,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, struct cache_entry **cache = istate->cache; int entries = istate->cache_nr; struct stat st; + struct ondisk_cache_entry_extended ondisk; struct strbuf previous_name_buf = STRBUF_INIT, *previous_name; int drop_cache_tree = 0; @@ -2232,6 +2231,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, return -1; previous_name = (hdr_version == 4) ? &previous_name_buf : NULL; + for (i = 0; i < entries; i++) { struct cache_entry *ce = cache[i]; if (ce->ce_flags & CE_REMOVE) @@ -2251,7 +2251,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, drop_cache_tree = 1; } - if (ce_write_entry(&c, newfd, ce, previous_name) < 0) + if (ce_write_entry(&c, newfd, ce, previous_name, (struct ondisk_cache_entry *)&ondisk) < 0) err = -1; if (err)