зеркало из https://github.com/microsoft/git.git
read-cache.c: optimize reading index format v4
Index format v4 requires some more computation to assemble a path based on a previous one. The current code is not very efficient because - it doubles memory copy, we assemble the final path in a temporary first before putting it back to a cache_entry - strbuf_remove() in expand_name_field() is not exactly a good fit for stripping a part at the end, _setlen() would do the same job and is much cheaper. - the open-coded loop to find the end of the string in expand_name_field() can't beat an optimized strlen() This patch avoids the temporary buffer and writes directly to the new cache_entry, which addresses the first two points. The last point could also be avoided if the total string length fits in the first 12 bits of ce_flags, if not we fall back to strlen(). Running "test-tool read-cache 100" on webkit.git (275k files), reading v2 only takes 4.226 seconds, while v4 takes 5.711 seconds, 35% more time. The patch reduces read time on v4 to 4.319 seconds. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Родитель
fe8321ec05
Коммит
252d079cbd
124
read-cache.c
124
read-cache.c
|
@ -1713,63 +1713,24 @@ int read_index(struct index_state *istate)
|
|||
return read_index_from(istate, get_index_file(), get_git_dir());
|
||||
}
|
||||
|
||||
static struct cache_entry *cache_entry_from_ondisk(struct mem_pool *mem_pool,
|
||||
static struct cache_entry *create_from_disk(struct index_state *istate,
|
||||
struct ondisk_cache_entry *ondisk,
|
||||
unsigned int flags,
|
||||
const char *name,
|
||||
size_t len)
|
||||
unsigned long *ent_size,
|
||||
const struct cache_entry *previous_ce)
|
||||
{
|
||||
struct cache_entry *ce = mem_pool__ce_alloc(mem_pool, len);
|
||||
|
||||
ce->ce_stat_data.sd_ctime.sec = get_be32(&ondisk->ctime.sec);
|
||||
ce->ce_stat_data.sd_mtime.sec = get_be32(&ondisk->mtime.sec);
|
||||
ce->ce_stat_data.sd_ctime.nsec = get_be32(&ondisk->ctime.nsec);
|
||||
ce->ce_stat_data.sd_mtime.nsec = get_be32(&ondisk->mtime.nsec);
|
||||
ce->ce_stat_data.sd_dev = get_be32(&ondisk->dev);
|
||||
ce->ce_stat_data.sd_ino = get_be32(&ondisk->ino);
|
||||
ce->ce_mode = get_be32(&ondisk->mode);
|
||||
ce->ce_stat_data.sd_uid = get_be32(&ondisk->uid);
|
||||
ce->ce_stat_data.sd_gid = get_be32(&ondisk->gid);
|
||||
ce->ce_stat_data.sd_size = get_be32(&ondisk->size);
|
||||
ce->ce_flags = flags & ~CE_NAMEMASK;
|
||||
ce->ce_namelen = len;
|
||||
ce->index = 0;
|
||||
hashcpy(ce->oid.hash, ondisk->sha1);
|
||||
memcpy(ce->name, name, len);
|
||||
ce->name[len] = '\0';
|
||||
return ce;
|
||||
}
|
||||
|
||||
/*
|
||||
struct cache_entry *ce;
|
||||
size_t len;
|
||||
const char *name;
|
||||
unsigned int flags;
|
||||
size_t copy_len;
|
||||
/*
|
||||
* Adjacent cache entries tend to share the leading paths, so it makes
|
||||
* sense to only store the differences in later entries. In the v4
|
||||
* on-disk format of the index, each on-disk cache entry stores the
|
||||
* number of bytes to be stripped from the end of the previous name,
|
||||
* and the bytes to append to the result, to come up with its name.
|
||||
*/
|
||||
static unsigned long expand_name_field(struct strbuf *name, const char *cp_)
|
||||
{
|
||||
const unsigned char *ep, *cp = (const unsigned char *)cp_;
|
||||
size_t len = decode_varint(&cp);
|
||||
|
||||
if (name->len < len)
|
||||
die("malformed name field in the index");
|
||||
strbuf_remove(name, name->len - len, len);
|
||||
for (ep = cp; *ep; ep++)
|
||||
; /* find the end */
|
||||
strbuf_add(name, cp, ep - cp);
|
||||
return (const char *)ep + 1 - cp_;
|
||||
}
|
||||
|
||||
static struct cache_entry *create_from_disk(struct mem_pool *mem_pool,
|
||||
struct ondisk_cache_entry *ondisk,
|
||||
unsigned long *ent_size,
|
||||
struct strbuf *previous_name)
|
||||
{
|
||||
struct cache_entry *ce;
|
||||
size_t len;
|
||||
const char *name;
|
||||
unsigned int flags;
|
||||
int expand_name_field = istate->version == 4;
|
||||
|
||||
/* On-disk flags are just 16 bits */
|
||||
flags = get_be16(&ondisk->flags);
|
||||
|
@ -1789,21 +1750,54 @@ static struct cache_entry *create_from_disk(struct mem_pool *mem_pool,
|
|||
else
|
||||
name = ondisk->name;
|
||||
|
||||
if (!previous_name) {
|
||||
/* v3 and earlier */
|
||||
if (len == CE_NAMEMASK)
|
||||
if (expand_name_field) {
|
||||
const unsigned char *cp = (const unsigned char *)name;
|
||||
size_t strip_len, previous_len;
|
||||
|
||||
previous_len = previous_ce ? previous_ce->ce_namelen : 0;
|
||||
strip_len = decode_varint(&cp);
|
||||
if (previous_len < strip_len) {
|
||||
if (previous_ce)
|
||||
die(_("malformed name field in the index, near path '%s'"),
|
||||
previous_ce->name);
|
||||
else
|
||||
die(_("malformed name field in the index in the first path"));
|
||||
}
|
||||
copy_len = previous_len - strip_len;
|
||||
name = (const char *)cp;
|
||||
}
|
||||
|
||||
if (len == CE_NAMEMASK) {
|
||||
len = strlen(name);
|
||||
ce = cache_entry_from_ondisk(mem_pool, ondisk, flags, name, len);
|
||||
if (expand_name_field)
|
||||
len += copy_len;
|
||||
}
|
||||
|
||||
*ent_size = ondisk_ce_size(ce);
|
||||
ce = mem_pool__ce_alloc(istate->ce_mem_pool, len);
|
||||
|
||||
ce->ce_stat_data.sd_ctime.sec = get_be32(&ondisk->ctime.sec);
|
||||
ce->ce_stat_data.sd_mtime.sec = get_be32(&ondisk->mtime.sec);
|
||||
ce->ce_stat_data.sd_ctime.nsec = get_be32(&ondisk->ctime.nsec);
|
||||
ce->ce_stat_data.sd_mtime.nsec = get_be32(&ondisk->mtime.nsec);
|
||||
ce->ce_stat_data.sd_dev = get_be32(&ondisk->dev);
|
||||
ce->ce_stat_data.sd_ino = get_be32(&ondisk->ino);
|
||||
ce->ce_mode = get_be32(&ondisk->mode);
|
||||
ce->ce_stat_data.sd_uid = get_be32(&ondisk->uid);
|
||||
ce->ce_stat_data.sd_gid = get_be32(&ondisk->gid);
|
||||
ce->ce_stat_data.sd_size = get_be32(&ondisk->size);
|
||||
ce->ce_flags = flags & ~CE_NAMEMASK;
|
||||
ce->ce_namelen = len;
|
||||
ce->index = 0;
|
||||
hashcpy(ce->oid.hash, ondisk->sha1);
|
||||
|
||||
if (expand_name_field) {
|
||||
if (copy_len)
|
||||
memcpy(ce->name, previous_ce->name, copy_len);
|
||||
memcpy(ce->name + copy_len, name, len + 1 - copy_len);
|
||||
*ent_size = (name - ((char *)ondisk)) + len + 1 - copy_len;
|
||||
} else {
|
||||
unsigned long consumed;
|
||||
consumed = expand_name_field(previous_name, name);
|
||||
ce = cache_entry_from_ondisk(mem_pool, ondisk, flags,
|
||||
previous_name->buf,
|
||||
previous_name->len);
|
||||
|
||||
*ent_size = (name - ((char *)ondisk)) + consumed;
|
||||
memcpy(ce->name, name, len + 1);
|
||||
*ent_size = ondisk_ce_size(ce);
|
||||
}
|
||||
return ce;
|
||||
}
|
||||
|
@ -1898,7 +1892,7 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
|
|||
struct cache_header *hdr;
|
||||
void *mmap;
|
||||
size_t mmap_size;
|
||||
struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
|
||||
const struct cache_entry *previous_ce = NULL;
|
||||
|
||||
if (istate->initialized)
|
||||
return istate->cache_nr;
|
||||
|
@ -1936,11 +1930,9 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
|
|||
istate->initialized = 1;
|
||||
|
||||
if (istate->version == 4) {
|
||||
previous_name = &previous_name_buf;
|
||||
mem_pool_init(&istate->ce_mem_pool,
|
||||
estimate_cache_size_from_compressed(istate->cache_nr));
|
||||
} else {
|
||||
previous_name = NULL;
|
||||
mem_pool_init(&istate->ce_mem_pool,
|
||||
estimate_cache_size(mmap_size, istate->cache_nr));
|
||||
}
|
||||
|
@ -1952,12 +1944,12 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
|
|||
unsigned long consumed;
|
||||
|
||||
disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
|
||||
ce = create_from_disk(istate->ce_mem_pool, disk_ce, &consumed, previous_name);
|
||||
ce = create_from_disk(istate, disk_ce, &consumed, previous_ce);
|
||||
set_index_entry(istate, i, ce);
|
||||
|
||||
src_offset += consumed;
|
||||
previous_ce = ce;
|
||||
}
|
||||
strbuf_release(&previous_name_buf);
|
||||
istate->timestamp.sec = st.st_mtime;
|
||||
istate->timestamp.nsec = ST_MTIME_NSEC(st);
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче