pack-objects: refer to delta objects by index instead of pointer

These delta pointers always point to elements in the objects[] array
in packing_data struct. We can only hold maximum 4G of those objects
because the array size in nr_objects is uint32_t. We could use
uint32_t indexes to address these elements instead of pointers. On
64-bit architecture (8 bytes per pointer) this would save 4 bytes per
pointer.

Convert these delta pointers to indexes. Since we need to handle NULL
pointers as well, the index is shifted by one [1].

[1] This means we can only index 2^32-2 objects even though nr_objects
    could contain 2^32-1 objects. It should not be a problem in
    practice because when we grow objects[], nr_alloc would probably
    blow up long before nr_objects hits the wall.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Nguyễn Thái Ngọc Duy 2018-04-14 17:35:06 +02:00 коммит произвёл Junio C Hamano
Родитель 43fa44fa3b
Коммит 898eba5e63
2 изменённых файлов: 125 добавлений и 59 удалений

Просмотреть файл

@ -32,6 +32,12 @@
#include "object-store.h" #include "object-store.h"
#define IN_PACK(obj) oe_in_pack(&to_pack, obj) #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
#define DELTA(obj) oe_delta(&to_pack, obj)
#define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
#define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
#define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
#define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
#define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
static const char *pack_usage[] = { static const char *pack_usage[] = {
N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"), N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
@ -129,10 +135,11 @@ static void *get_delta(struct object_entry *entry)
buf = read_object_file(&entry->idx.oid, &type, &size); buf = read_object_file(&entry->idx.oid, &type, &size);
if (!buf) if (!buf)
die("unable to read %s", oid_to_hex(&entry->idx.oid)); die("unable to read %s", oid_to_hex(&entry->idx.oid));
base_buf = read_object_file(&entry->delta->idx.oid, &type, &base_size); base_buf = read_object_file(&DELTA(entry)->idx.oid, &type,
&base_size);
if (!base_buf) if (!base_buf)
die("unable to read %s", die("unable to read %s",
oid_to_hex(&entry->delta->idx.oid)); oid_to_hex(&DELTA(entry)->idx.oid));
delta_buf = diff_delta(base_buf, base_size, delta_buf = diff_delta(base_buf, base_size,
buf, size, &delta_size, 0); buf, size, &delta_size, 0);
if (!delta_buf || delta_size != entry->delta_size) if (!delta_buf || delta_size != entry->delta_size)
@ -288,12 +295,12 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
size = entry->delta_size; size = entry->delta_size;
buf = entry->delta_data; buf = entry->delta_data;
entry->delta_data = NULL; entry->delta_data = NULL;
type = (allow_ofs_delta && entry->delta->idx.offset) ? type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
OBJ_OFS_DELTA : OBJ_REF_DELTA; OBJ_OFS_DELTA : OBJ_REF_DELTA;
} else { } else {
buf = get_delta(entry); buf = get_delta(entry);
size = entry->delta_size; size = entry->delta_size;
type = (allow_ofs_delta && entry->delta->idx.offset) ? type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
OBJ_OFS_DELTA : OBJ_REF_DELTA; OBJ_OFS_DELTA : OBJ_REF_DELTA;
} }
@ -317,7 +324,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
* encoding of the relative offset for the delta * encoding of the relative offset for the delta
* base from this object's position in the pack. * base from this object's position in the pack.
*/ */
off_t ofs = entry->idx.offset - entry->delta->idx.offset; off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
unsigned pos = sizeof(dheader) - 1; unsigned pos = sizeof(dheader) - 1;
dheader[pos] = ofs & 127; dheader[pos] = ofs & 127;
while (ofs >>= 7) while (ofs >>= 7)
@ -343,7 +350,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
return 0; return 0;
} }
hashwrite(f, header, hdrlen); hashwrite(f, header, hdrlen);
hashwrite(f, entry->delta->idx.oid.hash, 20); hashwrite(f, DELTA(entry)->idx.oid.hash, 20);
hdrlen += 20; hdrlen += 20;
} else { } else {
if (limit && hdrlen + datalen + 20 >= limit) { if (limit && hdrlen + datalen + 20 >= limit) {
@ -379,8 +386,8 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
dheader[MAX_PACK_OBJECT_HEADER]; dheader[MAX_PACK_OBJECT_HEADER];
unsigned hdrlen; unsigned hdrlen;
if (entry->delta) if (DELTA(entry))
type = (allow_ofs_delta && entry->delta->idx.offset) ? type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
OBJ_OFS_DELTA : OBJ_REF_DELTA; OBJ_OFS_DELTA : OBJ_REF_DELTA;
hdrlen = encode_in_pack_object_header(header, sizeof(header), hdrlen = encode_in_pack_object_header(header, sizeof(header),
type, entry->size); type, entry->size);
@ -408,7 +415,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
} }
if (type == OBJ_OFS_DELTA) { if (type == OBJ_OFS_DELTA) {
off_t ofs = entry->idx.offset - entry->delta->idx.offset; off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
unsigned pos = sizeof(dheader) - 1; unsigned pos = sizeof(dheader) - 1;
dheader[pos] = ofs & 127; dheader[pos] = ofs & 127;
while (ofs >>= 7) while (ofs >>= 7)
@ -427,7 +434,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
return 0; return 0;
} }
hashwrite(f, header, hdrlen); hashwrite(f, header, hdrlen);
hashwrite(f, entry->delta->idx.oid.hash, 20); hashwrite(f, DELTA(entry)->idx.oid.hash, 20);
hdrlen += 20; hdrlen += 20;
reused_delta++; reused_delta++;
} else { } else {
@ -467,13 +474,13 @@ static off_t write_object(struct hashfile *f,
else else
limit = pack_size_limit - write_offset; limit = pack_size_limit - write_offset;
if (!entry->delta) if (!DELTA(entry))
usable_delta = 0; /* no delta */ usable_delta = 0; /* no delta */
else if (!pack_size_limit) else if (!pack_size_limit)
usable_delta = 1; /* unlimited packfile */ usable_delta = 1; /* unlimited packfile */
else if (entry->delta->idx.offset == (off_t)-1) else if (DELTA(entry)->idx.offset == (off_t)-1)
usable_delta = 0; /* base was written to another pack */ usable_delta = 0; /* base was written to another pack */
else if (entry->delta->idx.offset) else if (DELTA(entry)->idx.offset)
usable_delta = 1; /* base already exists in this pack */ usable_delta = 1; /* base already exists in this pack */
else else
usable_delta = 0; /* base could end up in another pack */ usable_delta = 0; /* base could end up in another pack */
@ -489,7 +496,7 @@ static off_t write_object(struct hashfile *f,
/* ... but pack split may override that */ /* ... but pack split may override that */
else if (oe_type(entry) != entry->in_pack_type) else if (oe_type(entry) != entry->in_pack_type)
to_reuse = 0; /* pack has delta which is unusable */ to_reuse = 0; /* pack has delta which is unusable */
else if (entry->delta) else if (DELTA(entry))
to_reuse = 0; /* we want to pack afresh */ to_reuse = 0; /* we want to pack afresh */
else else
to_reuse = 1; /* we have it in-pack undeltified, to_reuse = 1; /* we have it in-pack undeltified,
@ -541,12 +548,12 @@ static enum write_one_status write_one(struct hashfile *f,
} }
/* if we are deltified, write out base object first. */ /* if we are deltified, write out base object first. */
if (e->delta) { if (DELTA(e)) {
e->idx.offset = 1; /* now recurse */ e->idx.offset = 1; /* now recurse */
switch (write_one(f, e->delta, offset)) { switch (write_one(f, DELTA(e), offset)) {
case WRITE_ONE_RECURSIVE: case WRITE_ONE_RECURSIVE:
/* we cannot depend on this one */ /* we cannot depend on this one */
e->delta = NULL; SET_DELTA(e, NULL);
break; break;
default: default:
break; break;
@ -608,34 +615,34 @@ static void add_descendants_to_write_order(struct object_entry **wo,
/* add this node... */ /* add this node... */
add_to_write_order(wo, endp, e); add_to_write_order(wo, endp, e);
/* all its siblings... */ /* all its siblings... */
for (s = e->delta_sibling; s; s = s->delta_sibling) { for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
add_to_write_order(wo, endp, s); add_to_write_order(wo, endp, s);
} }
} }
/* drop down a level to add left subtree nodes if possible */ /* drop down a level to add left subtree nodes if possible */
if (e->delta_child) { if (DELTA_CHILD(e)) {
add_to_order = 1; add_to_order = 1;
e = e->delta_child; e = DELTA_CHILD(e);
} else { } else {
add_to_order = 0; add_to_order = 0;
/* our sibling might have some children, it is next */ /* our sibling might have some children, it is next */
if (e->delta_sibling) { if (DELTA_SIBLING(e)) {
e = e->delta_sibling; e = DELTA_SIBLING(e);
continue; continue;
} }
/* go back to our parent node */ /* go back to our parent node */
e = e->delta; e = DELTA(e);
while (e && !e->delta_sibling) { while (e && !DELTA_SIBLING(e)) {
/* we're on the right side of a subtree, keep /* we're on the right side of a subtree, keep
* going up until we can go right again */ * going up until we can go right again */
e = e->delta; e = DELTA(e);
} }
if (!e) { if (!e) {
/* done- we hit our original root node */ /* done- we hit our original root node */
return; return;
} }
/* pass it off to sibling at this level */ /* pass it off to sibling at this level */
e = e->delta_sibling; e = DELTA_SIBLING(e);
} }
}; };
} }
@ -646,7 +653,7 @@ static void add_family_to_write_order(struct object_entry **wo,
{ {
struct object_entry *root; struct object_entry *root;
for (root = e; root->delta; root = root->delta) for (root = e; DELTA(root); root = DELTA(root))
; /* nothing */ ; /* nothing */
add_descendants_to_write_order(wo, endp, root); add_descendants_to_write_order(wo, endp, root);
} }
@ -661,8 +668,8 @@ static struct object_entry **compute_write_order(void)
for (i = 0; i < to_pack.nr_objects; i++) { for (i = 0; i < to_pack.nr_objects; i++) {
objects[i].tagged = 0; objects[i].tagged = 0;
objects[i].filled = 0; objects[i].filled = 0;
objects[i].delta_child = NULL; SET_DELTA_CHILD(&objects[i], NULL);
objects[i].delta_sibling = NULL; SET_DELTA_SIBLING(&objects[i], NULL);
} }
/* /*
@ -672,11 +679,11 @@ static struct object_entry **compute_write_order(void)
*/ */
for (i = to_pack.nr_objects; i > 0;) { for (i = to_pack.nr_objects; i > 0;) {
struct object_entry *e = &objects[--i]; struct object_entry *e = &objects[--i];
if (!e->delta) if (!DELTA(e))
continue; continue;
/* Mark me as the first child */ /* Mark me as the first child */
e->delta_sibling = e->delta->delta_child; e->delta_sibling_idx = DELTA(e)->delta_child_idx;
e->delta->delta_child = e; SET_DELTA_CHILD(DELTA(e), e);
} }
/* /*
@ -1493,10 +1500,10 @@ static void check_object(struct object_entry *entry)
* circular deltas. * circular deltas.
*/ */
oe_set_type(entry, entry->in_pack_type); oe_set_type(entry, entry->in_pack_type);
entry->delta = base_entry; SET_DELTA(entry, base_entry);
entry->delta_size = entry->size; entry->delta_size = entry->size;
entry->delta_sibling = base_entry->delta_child; entry->delta_sibling_idx = base_entry->delta_child_idx;
base_entry->delta_child = entry; SET_DELTA_CHILD(base_entry, entry);
unuse_pack(&w_curs); unuse_pack(&w_curs);
return; return;
} }
@ -1567,17 +1574,19 @@ static int pack_offset_sort(const void *_a, const void *_b)
*/ */
static void drop_reused_delta(struct object_entry *entry) static void drop_reused_delta(struct object_entry *entry)
{ {
struct object_entry **p = &entry->delta->delta_child; unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
struct object_info oi = OBJECT_INFO_INIT; struct object_info oi = OBJECT_INFO_INIT;
enum object_type type; enum object_type type;
while (*p) { while (*idx) {
if (*p == entry) struct object_entry *oe = &to_pack.objects[*idx - 1];
*p = (*p)->delta_sibling;
if (oe == entry)
*idx = oe->delta_sibling_idx;
else else
p = &(*p)->delta_sibling; idx = &oe->delta_sibling_idx;
} }
entry->delta = NULL; SET_DELTA(entry, NULL);
entry->depth = 0; entry->depth = 0;
oi.sizep = &entry->size; oi.sizep = &entry->size;
@ -1617,7 +1626,7 @@ static void break_delta_chains(struct object_entry *entry)
for (cur = entry, total_depth = 0; for (cur = entry, total_depth = 0;
cur; cur;
cur = cur->delta, total_depth++) { cur = DELTA(cur), total_depth++) {
if (cur->dfs_state == DFS_DONE) { if (cur->dfs_state == DFS_DONE) {
/* /*
* We've already seen this object and know it isn't * We've already seen this object and know it isn't
@ -1642,7 +1651,7 @@ static void break_delta_chains(struct object_entry *entry)
* it's not a delta, we're done traversing, but we'll mark it * it's not a delta, we're done traversing, but we'll mark it
* done to save time on future traversals. * done to save time on future traversals.
*/ */
if (!cur->delta) { if (!DELTA(cur)) {
cur->dfs_state = DFS_DONE; cur->dfs_state = DFS_DONE;
break; break;
} }
@ -1665,7 +1674,7 @@ static void break_delta_chains(struct object_entry *entry)
* We keep all commits in the chain that we examined. * We keep all commits in the chain that we examined.
*/ */
cur->dfs_state = DFS_ACTIVE; cur->dfs_state = DFS_ACTIVE;
if (cur->delta->dfs_state == DFS_ACTIVE) { if (DELTA(cur)->dfs_state == DFS_ACTIVE) {
drop_reused_delta(cur); drop_reused_delta(cur);
cur->dfs_state = DFS_DONE; cur->dfs_state = DFS_DONE;
break; break;
@ -1680,7 +1689,7 @@ static void break_delta_chains(struct object_entry *entry)
* an extra "next" pointer to keep going after we reset cur->delta. * an extra "next" pointer to keep going after we reset cur->delta.
*/ */
for (cur = entry; cur; cur = next) { for (cur = entry; cur; cur = next) {
next = cur->delta; next = DELTA(cur);
/* /*
* We should have a chain of zero or more ACTIVE states down to * We should have a chain of zero or more ACTIVE states down to
@ -1865,7 +1874,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
/* Now some size filtering heuristics. */ /* Now some size filtering heuristics. */
trg_size = trg_entry->size; trg_size = trg_entry->size;
if (!trg_entry->delta) { if (!DELTA(trg_entry)) {
max_size = trg_size/2 - 20; max_size = trg_size/2 - 20;
ref_depth = 1; ref_depth = 1;
} else { } else {
@ -1939,7 +1948,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
if (!delta_buf) if (!delta_buf)
return 0; return 0;
if (trg_entry->delta) { if (DELTA(trg_entry)) {
/* Prefer only shallower same-sized deltas. */ /* Prefer only shallower same-sized deltas. */
if (delta_size == trg_entry->delta_size && if (delta_size == trg_entry->delta_size &&
src->depth + 1 >= trg->depth) { src->depth + 1 >= trg->depth) {
@ -1968,7 +1977,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
free(delta_buf); free(delta_buf);
} }
trg_entry->delta = src_entry; SET_DELTA(trg_entry, src_entry);
trg_entry->delta_size = delta_size; trg_entry->delta_size = delta_size;
trg->depth = src->depth + 1; trg->depth = src->depth + 1;
@ -1977,13 +1986,13 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
static unsigned int check_delta_limit(struct object_entry *me, unsigned int n) static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
{ {
struct object_entry *child = me->delta_child; struct object_entry *child = DELTA_CHILD(me);
unsigned int m = n; unsigned int m = n;
while (child) { while (child) {
unsigned int c = check_delta_limit(child, n + 1); unsigned int c = check_delta_limit(child, n + 1);
if (m < c) if (m < c)
m = c; m = c;
child = child->delta_sibling; child = DELTA_SIBLING(child);
} }
return m; return m;
} }
@ -2052,7 +2061,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size,
* otherwise they would become too deep. * otherwise they would become too deep.
*/ */
max_depth = depth; max_depth = depth;
if (entry->delta_child) { if (DELTA_CHILD(entry)) {
max_depth -= check_delta_limit(entry, 0); max_depth -= check_delta_limit(entry, 0);
if (max_depth <= 0) if (max_depth <= 0)
goto next; goto next;
@ -2102,7 +2111,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size,
* depth, leaving it in the window is pointless. we * depth, leaving it in the window is pointless. we
* should evict it first. * should evict it first.
*/ */
if (entry->delta && max_depth <= n->depth) if (DELTA(entry) && max_depth <= n->depth)
continue; continue;
/* /*
@ -2110,7 +2119,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size,
* currently deltified object, to keep it longer. It will * currently deltified object, to keep it longer. It will
* be the first base object to be attempted next. * be the first base object to be attempted next.
*/ */
if (entry->delta) { if (DELTA(entry)) {
struct unpacked swap = array[best_base]; struct unpacked swap = array[best_base];
int dist = (window + idx - best_base) % window; int dist = (window + idx - best_base) % window;
int dst = best_base; int dst = best_base;
@ -2431,7 +2440,7 @@ static void prepare_pack(int window, int depth)
for (i = 0; i < to_pack.nr_objects; i++) { for (i = 0; i < to_pack.nr_objects; i++) {
struct object_entry *entry = to_pack.objects + i; struct object_entry *entry = to_pack.objects + i;
if (entry->delta) if (DELTA(entry))
/* This happens if we decided to reuse existing /* This happens if we decided to reuse existing
* delta from a pack. "reuse_delta &&" is implied. * delta from a pack. "reuse_delta &&" is implied.
*/ */

Просмотреть файл

@ -70,11 +70,11 @@ struct object_entry {
unsigned long size; /* uncompressed size */ unsigned long size; /* uncompressed size */
unsigned in_pack_idx:OE_IN_PACK_BITS; /* already in pack */ unsigned in_pack_idx:OE_IN_PACK_BITS; /* already in pack */
off_t in_pack_offset; off_t in_pack_offset;
struct object_entry *delta; /* delta base object */ uint32_t delta_idx; /* delta base object */
struct object_entry *delta_child; /* deltified objects who bases me */ uint32_t delta_child_idx; /* deltified objects who bases me */
struct object_entry *delta_sibling; /* other deltified objects who uint32_t delta_sibling_idx; /* other deltified objects who
* uses the same base as me * uses the same base as me
*/ */
void *delta_data; /* cached delta (uncompressed) */ void *delta_data; /* cached delta (uncompressed) */
unsigned long delta_size; /* delta data size (uncompressed) */ unsigned long delta_size; /* delta data size (uncompressed) */
unsigned long z_delta_size; /* delta data size (compressed) */ unsigned long z_delta_size; /* delta data size (compressed) */
@ -194,4 +194,61 @@ static inline void oe_set_in_pack(struct packing_data *pack,
pack->in_pack[e - pack->objects] = p; pack->in_pack[e - pack->objects] = p;
} }
static inline struct object_entry *oe_delta(
const struct packing_data *pack,
const struct object_entry *e)
{
if (e->delta_idx)
return &pack->objects[e->delta_idx - 1];
return NULL;
}
static inline void oe_set_delta(struct packing_data *pack,
struct object_entry *e,
struct object_entry *delta)
{
if (delta)
e->delta_idx = (delta - pack->objects) + 1;
else
e->delta_idx = 0;
}
static inline struct object_entry *oe_delta_child(
const struct packing_data *pack,
const struct object_entry *e)
{
if (e->delta_child_idx)
return &pack->objects[e->delta_child_idx - 1];
return NULL;
}
static inline void oe_set_delta_child(struct packing_data *pack,
struct object_entry *e,
struct object_entry *delta)
{
if (delta)
e->delta_child_idx = (delta - pack->objects) + 1;
else
e->delta_child_idx = 0;
}
static inline struct object_entry *oe_delta_sibling(
const struct packing_data *pack,
const struct object_entry *e)
{
if (e->delta_sibling_idx)
return &pack->objects[e->delta_sibling_idx - 1];
return NULL;
}
static inline void oe_set_delta_sibling(struct packing_data *pack,
struct object_entry *e,
struct object_entry *delta)
{
if (delta)
e->delta_sibling_idx = (delta - pack->objects) + 1;
else
e->delta_sibling_idx = 0;
}
#endif #endif