From cc817ca3ef2267c21af9589a7f92190a3659906c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 22 Jun 2017 20:19:48 +0200 Subject: [PATCH 1/4] sha1_name: cache readdir(3) results in find_short_object_filename() Read each loose object subdirectory at most once when looking for unique abbreviated hashes. This speeds up commands like "git log --pretty=%h" considerably, which previously caused one readdir(3) call for each candidate, even for subdirectories that were visited before. The new cache is kept until the program ends and never invalidated. The same is already true for pack indexes. The inherent racy nature of finding unique short hashes makes it still fit for this purpose -- a conflicting new object may be added at any time. Tasks with higher consistency requirements should not use it, though. The cached object names are stored in an oid_array, which is quite compact. The bitmap for remembering which subdir was already read is stored as a char array, with one char per directory -- that's not quite as compact, but really simple and incurs only an overhead equivalent to 11 hashes after all. Suggested-by: Jeff King Helped-by: Jeff King Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- cache.h | 17 +++++++++++++++++ sha1_file.c | 12 ++++++------ sha1_name.c | 52 +++++++++++++++++++++++++++++++--------------------- 3 files changed, 54 insertions(+), 27 deletions(-) diff --git a/cache.h b/cache.h index e1f0e182ad..1ad914084b 100644 --- a/cache.h +++ b/cache.h @@ -11,6 +11,7 @@ #include "string-list.h" #include "pack-revindex.h" #include "hash.h" +#include "sha1-array.h" #ifndef platform_SHA_CTX /* @@ -1579,6 +1580,16 @@ extern struct alternate_object_database { struct strbuf scratch; size_t base_len; + /* + * Used to store the results of readdir(3) calls when searching + * for unique abbreviated hashes. This cache is never + * invalidated, thus it's racy and not necessarily accurate. + * That's fine for its purpose; don't use it for tasks requiring + * greater accuracy! + */ + char loose_objects_subdir_seen[256]; + struct oid_array loose_objects_cache; + char path[FLEX_ARRAY]; } *alt_odb_list; extern void prepare_alt_odb(void); @@ -1797,6 +1808,12 @@ typedef int each_loose_cruft_fn(const char *basename, typedef int each_loose_subdir_fn(int nr, const char *path, void *data); +int for_each_file_in_obj_subdir(int subdir_nr, + struct strbuf *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data); int for_each_loose_file_in_objdir(const char *path, each_loose_object_fn obj_cb, each_loose_cruft_fn cruft_cb, diff --git a/sha1_file.c b/sha1_file.c index 59a4ed2ed3..5e0ee2b68b 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -3735,12 +3735,12 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect) typename(expect)); } -static int for_each_file_in_obj_subdir(int subdir_nr, - struct strbuf *path, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data) +int for_each_file_in_obj_subdir(int subdir_nr, + struct strbuf *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data) { size_t baselen = path->len; DIR *dir = opendir(path->buf); diff --git a/sha1_name.c b/sha1_name.c index 8eec9f7c1b..76cb76a844 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -77,10 +77,19 @@ static void update_candidates(struct disambiguate_state *ds, const struct object /* otherwise, current can be discarded and candidate is still good */ } +static int append_loose_object(const struct object_id *oid, const char *path, + void *data) +{ + oid_array_append(data, oid); + return 0; +} + +static int match_sha(unsigned, const unsigned char *, const unsigned char *); + static void find_short_object_filename(struct disambiguate_state *ds) { + int subdir_nr = ds->bin_pfx.hash[0]; struct alternate_object_database *alt; - char hex[GIT_MAX_HEXSZ]; static struct alternate_object_database *fakeent; if (!fakeent) { @@ -95,29 +104,30 @@ static void find_short_object_filename(struct disambiguate_state *ds) } fakeent->next = alt_odb_list; - xsnprintf(hex, sizeof(hex), "%.2s", ds->hex_pfx); for (alt = fakeent; alt && !ds->ambiguous; alt = alt->next) { - struct strbuf *buf = alt_scratch_buf(alt); - struct dirent *de; - DIR *dir; + int pos; - strbuf_addf(buf, "%.2s/", ds->hex_pfx); - dir = opendir(buf->buf); - if (!dir) - continue; - - while (!ds->ambiguous && (de = readdir(dir)) != NULL) { - struct object_id oid; - - if (strlen(de->d_name) != GIT_SHA1_HEXSZ - 2) - continue; - if (memcmp(de->d_name, ds->hex_pfx + 2, ds->len - 2)) - continue; - memcpy(hex + 2, de->d_name, GIT_SHA1_HEXSZ - 2); - if (!get_oid_hex(hex, &oid)) - update_candidates(ds, &oid); + if (!alt->loose_objects_subdir_seen[subdir_nr]) { + struct strbuf *buf = alt_scratch_buf(alt); + strbuf_addf(buf, "%02x/", subdir_nr); + for_each_file_in_obj_subdir(subdir_nr, buf, + append_loose_object, + NULL, NULL, + &alt->loose_objects_cache); + alt->loose_objects_subdir_seen[subdir_nr] = 1; + } + + pos = oid_array_lookup(&alt->loose_objects_cache, &ds->bin_pfx); + if (pos < 0) + pos = -1 - pos; + while (!ds->ambiguous && pos < alt->loose_objects_cache.nr) { + const struct object_id *oid; + oid = alt->loose_objects_cache.oid + pos; + if (!match_sha(ds->len, ds->bin_pfx.hash, oid->hash)) + break; + update_candidates(ds, oid); + pos++; } - closedir(dir); } } From 5a5bd5765ac5d180bf540ba7c6540a9f3dc16f4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 24 Jun 2017 14:12:07 +0200 Subject: [PATCH 2/4] p4205: add perf test script for pretty log formats Add simple performance tests for expanded log format placeholders. Suggested-by: Jeff King Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- t/perf/p4205-log-pretty-formats.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100755 t/perf/p4205-log-pretty-formats.sh diff --git a/t/perf/p4205-log-pretty-formats.sh b/t/perf/p4205-log-pretty-formats.sh new file mode 100755 index 0000000000..7c26f4f337 --- /dev/null +++ b/t/perf/p4205-log-pretty-formats.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +test_description='Tests the performance of various pretty format placeholders' + +. ./perf-lib.sh + +test_perf_default_repo + +for format in %H %h %T %t %P %p %h-%h-%h +do + test_perf "log with $format" " + git log --format=\"$format\" >/dev/null + " +done + +test_done From 0375f472d484041f9b1e5550b57d69286b3322e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 24 Jun 2017 14:12:30 +0200 Subject: [PATCH 3/4] sha1_file: let for_each_file_in_obj_subdir() handle subdir names The function for_each_file_in_obj_subdir() takes a object subdirectory number and expects the name of the same subdirectory to be included in the path strbuf. Avoid this redundancy by letting the function append the hexadecimal subdirectory name itself. This makes it a bit easier and safer to use the function -- it becomes impossible to specify different subdirectories in subdir_nr and path. Suggested-by: Jeff King Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- sha1_file.c | 22 ++++++++++++++-------- sha1_name.c | 1 - 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/sha1_file.c b/sha1_file.c index 5e0ee2b68b..98ce85acf9 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -3742,15 +3742,22 @@ int for_each_file_in_obj_subdir(int subdir_nr, each_loose_subdir_fn subdir_cb, void *data) { - size_t baselen = path->len; - DIR *dir = opendir(path->buf); + size_t origlen, baselen; + DIR *dir; struct dirent *de; int r = 0; + origlen = path->len; + strbuf_complete(path, '/'); + strbuf_addf(path, "%02x", subdir_nr); + baselen = path->len; + + dir = opendir(path->buf); if (!dir) { - if (errno == ENOENT) - return 0; - return error_errno("unable to open %s", path->buf); + if (errno != ENOENT) + r = error_errno("unable to open %s", path->buf); + strbuf_setlen(path, origlen); + return r; } while ((de = readdir(dir))) { @@ -3788,6 +3795,8 @@ int for_each_file_in_obj_subdir(int subdir_nr, if (!r && subdir_cb) r = subdir_cb(subdir_nr, path->buf, data); + strbuf_setlen(path, origlen); + return r; } @@ -3797,15 +3806,12 @@ int for_each_loose_file_in_objdir_buf(struct strbuf *path, each_loose_subdir_fn subdir_cb, void *data) { - size_t baselen = path->len; int r = 0; int i; for (i = 0; i < 256; i++) { - strbuf_addf(path, "/%02x", i); r = for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb, subdir_cb, data); - strbuf_setlen(path, baselen); if (r) break; } diff --git a/sha1_name.c b/sha1_name.c index 76cb76a844..8de0e2d3b4 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -109,7 +109,6 @@ static void find_short_object_filename(struct disambiguate_state *ds) if (!alt->loose_objects_subdir_seen[subdir_nr]) { struct strbuf *buf = alt_scratch_buf(alt); - strbuf_addf(buf, "%02x/", subdir_nr); for_each_file_in_obj_subdir(subdir_nr, buf, append_loose_object, NULL, NULL, From 70c49050d4a16a7e2990e4d3c91d9d12f62e631e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 24 Jun 2017 16:09:39 +0200 Subject: [PATCH 4/4] sha1_file: guard against invalid loose subdirectory numbers Loose object subdirectories have hexadecimal names based on the first byte of the hash of contained objects, thus their numerical representation can range from 0 (0x00) to 255 (0xff). Change the type of the corresponding variable in for_each_file_in_obj_subdir() and associated callback functions to unsigned int and add a range check. Suggested-by: Jeff King Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- builtin/fsck.c | 2 +- builtin/prune-packed.c | 2 +- builtin/prune.c | 2 +- cache.h | 4 ++-- sha1_file.c | 5 ++++- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index b5e13a4556..2686951381 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -537,7 +537,7 @@ static int fsck_cruft(const char *basename, const char *path, void *data) return 0; } -static int fsck_subdir(int nr, const char *path, void *progress) +static int fsck_subdir(unsigned int nr, const char *path, void *progress) { display_progress(progress, nr + 1); return 0; diff --git a/builtin/prune-packed.c b/builtin/prune-packed.c index c026299e78..ac978ad401 100644 --- a/builtin/prune-packed.c +++ b/builtin/prune-packed.c @@ -10,7 +10,7 @@ static const char * const prune_packed_usage[] = { static struct progress *progress; -static int prune_subdir(int nr, const char *path, void *data) +static int prune_subdir(unsigned int nr, const char *path, void *data) { int *opts = data; display_progress(progress, nr + 1); diff --git a/builtin/prune.c b/builtin/prune.c index 42633e0c6e..ea208c97f8 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -68,7 +68,7 @@ static int prune_cruft(const char *basename, const char *path, void *data) return 0; } -static int prune_subdir(int nr, const char *path, void *data) +static int prune_subdir(unsigned int nr, const char *path, void *data) { if (!show_only) rmdir(path); diff --git a/cache.h b/cache.h index 1ad914084b..7f7ec5d56d 100644 --- a/cache.h +++ b/cache.h @@ -1805,10 +1805,10 @@ typedef int each_loose_object_fn(const struct object_id *oid, typedef int each_loose_cruft_fn(const char *basename, const char *path, void *data); -typedef int each_loose_subdir_fn(int nr, +typedef int each_loose_subdir_fn(unsigned int nr, const char *path, void *data); -int for_each_file_in_obj_subdir(int subdir_nr, +int for_each_file_in_obj_subdir(unsigned int subdir_nr, struct strbuf *path, each_loose_object_fn obj_cb, each_loose_cruft_fn cruft_cb, diff --git a/sha1_file.c b/sha1_file.c index 98ce85acf9..77050a3801 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -3735,7 +3735,7 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect) typename(expect)); } -int for_each_file_in_obj_subdir(int subdir_nr, +int for_each_file_in_obj_subdir(unsigned int subdir_nr, struct strbuf *path, each_loose_object_fn obj_cb, each_loose_cruft_fn cruft_cb, @@ -3747,6 +3747,9 @@ int for_each_file_in_obj_subdir(int subdir_nr, struct dirent *de; int r = 0; + if (subdir_nr > 0xff) + BUG("invalid loose object subdirectory: %x", subdir_nr); + origlen = path->len; strbuf_complete(path, '/'); strbuf_addf(path, "%02x", subdir_nr);