Merge branch 'ds/find-unique-abbrev-optim'

Optimize the code to find shortest unique prefix of object names.

* ds/find-unique-abbrev-optim:
  sha1_name: minimize OID comparisons during disambiguation
  sha1_name: parse less while finding common prefix
  sha1_name: unroll len loop in find_unique_abbrev_r()
  p4211-line-log.sh: add log --online --raw --parents perf test
This commit is contained in:
Junio C Hamano 2017-11-06 14:24:25 +09:00
Родитель fb4cd88ad4 0e87b85683
Коммит cb52b49db5
2 изменённых файлов: 123 добавлений и 16 удалений

Просмотреть файл

@ -153,7 +153,9 @@ static void unique_in_pack(struct packed_git *p,
uint32_t num, last, i, first = 0;
const struct object_id *current = NULL;
open_pack_index(p);
if (open_pack_index(p) || !p->num_objects)
return;
num = p->num_objects;
last = num;
while (first < last) {
@ -474,10 +476,104 @@ static unsigned msb(unsigned long val)
return r;
}
struct min_abbrev_data {
unsigned int init_len;
unsigned int cur_len;
char *hex;
const unsigned char *hash;
};
static inline char get_hex_char_from_oid(const struct object_id *oid,
unsigned int pos)
{
static const char hex[] = "0123456789abcdef";
if ((pos & 1) == 0)
return hex[oid->hash[pos >> 1] >> 4];
else
return hex[oid->hash[pos >> 1] & 0xf];
}
static int extend_abbrev_len(const struct object_id *oid, void *cb_data)
{
struct min_abbrev_data *mad = cb_data;
unsigned int i = mad->init_len;
while (mad->hex[i] && mad->hex[i] == get_hex_char_from_oid(oid, i))
i++;
if (i < GIT_MAX_RAWSZ && i >= mad->cur_len)
mad->cur_len = i + 1;
return 0;
}
static void find_abbrev_len_for_pack(struct packed_git *p,
struct min_abbrev_data *mad)
{
int match = 0;
uint32_t num, last, first = 0;
struct object_id oid;
if (open_pack_index(p) || !p->num_objects)
return;
num = p->num_objects;
last = num;
while (first < last) {
uint32_t mid = first + (last - first) / 2;
const unsigned char *current;
int cmp;
current = nth_packed_object_sha1(p, mid);
cmp = hashcmp(mad->hash, current);
if (!cmp) {
match = 1;
first = mid;
break;
}
if (cmp > 0) {
first = mid + 1;
continue;
}
last = mid;
}
/*
* first is now the position in the packfile where we would insert
* mad->hash if it does not exist (or the position of mad->hash if
* it does exist). Hence, we consider a maximum of three objects
* nearby for the abbreviation length.
*/
mad->init_len = 0;
if (!match) {
nth_packed_object_oid(&oid, p, first);
extend_abbrev_len(&oid, mad);
} else if (first < num - 1) {
nth_packed_object_oid(&oid, p, first + 1);
extend_abbrev_len(&oid, mad);
}
if (first > 0) {
nth_packed_object_oid(&oid, p, first - 1);
extend_abbrev_len(&oid, mad);
}
mad->init_len = mad->cur_len;
}
static void find_abbrev_len_packed(struct min_abbrev_data *mad)
{
struct packed_git *p;
prepare_packed_git();
for (p = packed_git; p; p = p->next)
find_abbrev_len_for_pack(p, mad);
}
int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
{
int status, exists;
struct disambiguate_state ds;
struct min_abbrev_data mad;
struct object_id oid_ret;
if (len < 0) {
unsigned long count = approximate_object_count();
/*
@ -503,19 +599,26 @@ int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
sha1_to_hex_r(hex, sha1);
if (len == GIT_SHA1_HEXSZ || !len)
return GIT_SHA1_HEXSZ;
exists = has_sha1_file(sha1);
while (len < GIT_SHA1_HEXSZ) {
struct object_id oid_ret;
status = get_short_oid(hex, len, &oid_ret, GET_OID_QUIETLY);
if (exists
? !status
: status == SHORT_NAME_NOT_FOUND) {
hex[len] = 0;
return len;
}
len++;
}
return len;
mad.init_len = len;
mad.cur_len = len;
mad.hex = hex;
mad.hash = sha1;
find_abbrev_len_packed(&mad);
if (init_object_disambiguation(hex, mad.cur_len, &ds) < 0)
return -1;
ds.fn = extend_abbrev_len;
ds.always_call_fn = 1;
ds.cb_data = (void *)&mad;
find_short_object_filename(&ds);
(void)finish_object_disambiguation(&ds, &oid_ret);
hex[mad.cur_len] = 0;
return mad.cur_len;
}
const char *find_unique_abbrev(const unsigned char *sha1, int len)

Просмотреть файл

@ -31,4 +31,8 @@ test_perf 'git log -L (renames on)' '
git log -M -L 1:"$file" >/dev/null
'
test_perf 'git log --oneline --raw --parents' '
git log --oneline --raw --parents >/dev/null
'
test_done