Merge branch 'ew/many-alternate-optim'

Optimization for repositories with many alternate object store.

* ew/many-alternate-optim:
  oidtree: a crit-bit tree for odb_loose_cache
  oidcpy_with_padding: constify `src' arg
  make object_directory.loose_objects_subdir_seen a bitmap
  avoid strlen via strbuf_addstr in link_alt_odb_entry
  speed up alt_odb_usable() with many alternates
This commit is contained in:
Junio C Hamano 2021-07-28 13:17:57 -07:00
Родитель 14793a4e37 92d8ed8ac1
Коммит e5cc59c77c
16 изменённых файлов: 535 добавлений и 52 удалений

Просмотреть файл

@ -726,6 +726,7 @@ TEST_BUILTINS_OBJS += test-mergesort.o
TEST_BUILTINS_OBJS += test-mktemp.o TEST_BUILTINS_OBJS += test-mktemp.o
TEST_BUILTINS_OBJS += test-oid-array.o TEST_BUILTINS_OBJS += test-oid-array.o
TEST_BUILTINS_OBJS += test-oidmap.o TEST_BUILTINS_OBJS += test-oidmap.o
TEST_BUILTINS_OBJS += test-oidtree.o
TEST_BUILTINS_OBJS += test-online-cpus.o TEST_BUILTINS_OBJS += test-online-cpus.o
TEST_BUILTINS_OBJS += test-parse-options.o TEST_BUILTINS_OBJS += test-parse-options.o
TEST_BUILTINS_OBJS += test-parse-pathspec-file.o TEST_BUILTINS_OBJS += test-parse-pathspec-file.o
@ -850,6 +851,7 @@ LIB_OBJS += branch.o
LIB_OBJS += bulk-checkin.o LIB_OBJS += bulk-checkin.o
LIB_OBJS += bundle.o LIB_OBJS += bundle.o
LIB_OBJS += cache-tree.o LIB_OBJS += cache-tree.o
LIB_OBJS += cbtree.o
LIB_OBJS += chdir-notify.o LIB_OBJS += chdir-notify.o
LIB_OBJS += checkout.o LIB_OBJS += checkout.o
LIB_OBJS += chunk-format.o LIB_OBJS += chunk-format.o
@ -945,6 +947,7 @@ LIB_OBJS += object.o
LIB_OBJS += oid-array.o LIB_OBJS += oid-array.o
LIB_OBJS += oidmap.o LIB_OBJS += oidmap.o
LIB_OBJS += oidset.o LIB_OBJS += oidset.o
LIB_OBJS += oidtree.o
LIB_OBJS += pack-bitmap-write.o LIB_OBJS += pack-bitmap-write.o
LIB_OBJS += pack-bitmap.o LIB_OBJS += pack-bitmap.o
LIB_OBJS += pack-check.o LIB_OBJS += pack-check.o

167
cbtree.c Normal file
Просмотреть файл

@ -0,0 +1,167 @@
/*
* crit-bit tree implementation, does no allocations internally
* For more information on crit-bit trees: https://cr.yp.to/critbit.html
* Based on Adam Langley's adaptation of Dan Bernstein's public domain code
* git clone https://github.com/agl/critbit.git
*/
#include "cbtree.h"
static struct cb_node *cb_node_of(const void *p)
{
return (struct cb_node *)((uintptr_t)p - 1);
}
/* locate the best match, does not do a final comparision */
static struct cb_node *cb_internal_best_match(struct cb_node *p,
const uint8_t *k, size_t klen)
{
while (1 & (uintptr_t)p) {
struct cb_node *q = cb_node_of(p);
uint8_t c = q->byte < klen ? k[q->byte] : 0;
size_t direction = (1 + (q->otherbits | c)) >> 8;
p = q->child[direction];
}
return p;
}
/* returns NULL if successful, existing cb_node if duplicate */
struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen)
{
size_t newbyte, newotherbits;
uint8_t c;
int newdirection;
struct cb_node **wherep, *p;
assert(!((uintptr_t)node & 1)); /* allocations must be aligned */
if (!t->root) { /* insert into empty tree */
t->root = node;
return NULL; /* success */
}
/* see if a node already exists */
p = cb_internal_best_match(t->root, node->k, klen);
/* find first differing byte */
for (newbyte = 0; newbyte < klen; newbyte++) {
if (p->k[newbyte] != node->k[newbyte])
goto different_byte_found;
}
return p; /* element exists, let user deal with it */
different_byte_found:
newotherbits = p->k[newbyte] ^ node->k[newbyte];
newotherbits |= newotherbits >> 1;
newotherbits |= newotherbits >> 2;
newotherbits |= newotherbits >> 4;
newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255;
c = p->k[newbyte];
newdirection = (1 + (newotherbits | c)) >> 8;
node->byte = newbyte;
node->otherbits = newotherbits;
node->child[1 - newdirection] = node;
/* find a place to insert it */
wherep = &t->root;
for (;;) {
struct cb_node *q;
size_t direction;
p = *wherep;
if (!(1 & (uintptr_t)p))
break;
q = cb_node_of(p);
if (q->byte > newbyte)
break;
if (q->byte == newbyte && q->otherbits > newotherbits)
break;
c = q->byte < klen ? node->k[q->byte] : 0;
direction = (1 + (q->otherbits | c)) >> 8;
wherep = q->child + direction;
}
node->child[newdirection] = *wherep;
*wherep = (struct cb_node *)(1 + (uintptr_t)node);
return NULL; /* success */
}
struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen)
{
struct cb_node *p = cb_internal_best_match(t->root, k, klen);
return p && !memcmp(p->k, k, klen) ? p : NULL;
}
struct cb_node *cb_unlink(struct cb_tree *t, const uint8_t *k, size_t klen)
{
struct cb_node **wherep = &t->root;
struct cb_node **whereq = NULL;
struct cb_node *q = NULL;
size_t direction = 0;
uint8_t c;
struct cb_node *p = t->root;
if (!p) return NULL; /* empty tree, nothing to delete */
/* traverse to find best match, keeping link to parent */
while (1 & (uintptr_t)p) {
whereq = wherep;
q = cb_node_of(p);
c = q->byte < klen ? k[q->byte] : 0;
direction = (1 + (q->otherbits | c)) >> 8;
wherep = q->child + direction;
p = *wherep;
}
if (memcmp(p->k, k, klen))
return NULL; /* no match, nothing unlinked */
/* found an exact match */
if (whereq) /* update parent */
*whereq = q->child[1 - direction];
else
t->root = NULL;
return p;
}
static enum cb_next cb_descend(struct cb_node *p, cb_iter fn, void *arg)
{
if (1 & (uintptr_t)p) {
struct cb_node *q = cb_node_of(p);
enum cb_next n = cb_descend(q->child[0], fn, arg);
return n == CB_BREAK ? n : cb_descend(q->child[1], fn, arg);
} else {
return fn(p, arg);
}
}
void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen,
cb_iter fn, void *arg)
{
struct cb_node *p = t->root;
struct cb_node *top = p;
size_t i = 0;
if (!p) return; /* empty tree */
/* Walk tree, maintaining top pointer */
while (1 & (uintptr_t)p) {
struct cb_node *q = cb_node_of(p);
uint8_t c = q->byte < klen ? kpfx[q->byte] : 0;
size_t direction = (1 + (q->otherbits | c)) >> 8;
p = q->child[direction];
if (q->byte < klen)
top = p;
}
for (i = 0; i < klen; i++) {
if (p->k[i] != kpfx[i])
return; /* "best" match failed */
}
cb_descend(top, fn, arg);
}

56
cbtree.h Normal file
Просмотреть файл

@ -0,0 +1,56 @@
/*
* crit-bit tree implementation, does no allocations internally
* For more information on crit-bit trees: https://cr.yp.to/critbit.html
* Based on Adam Langley's adaptation of Dan Bernstein's public domain code
* git clone https://github.com/agl/critbit.git
*
* This is adapted to store arbitrary data (not just NUL-terminated C strings
* and allocates no memory internally. The user needs to allocate
* "struct cb_node" and fill cb_node.k[] with arbitrary match data
* for memcmp.
* If "klen" is variable, then it should be embedded into "c_node.k[]"
* Recursion is bound by the maximum value of "klen" used.
*/
#ifndef CBTREE_H
#define CBTREE_H
#include "git-compat-util.h"
struct cb_node;
struct cb_node {
struct cb_node *child[2];
/*
* n.b. uint32_t for `byte' is excessive for OIDs,
* we may consider shorter variants if nothing else gets stored.
*/
uint32_t byte;
uint8_t otherbits;
uint8_t k[FLEX_ARRAY]; /* arbitrary data */
};
struct cb_tree {
struct cb_node *root;
};
enum cb_next {
CB_CONTINUE = 0,
CB_BREAK = 1
};
#define CBTREE_INIT { .root = NULL }
static inline void cb_init(struct cb_tree *t)
{
t->root = NULL;
}
struct cb_node *cb_lookup(struct cb_tree *, const uint8_t *k, size_t klen);
struct cb_node *cb_insert(struct cb_tree *, struct cb_node *, size_t klen);
struct cb_node *cb_unlink(struct cb_tree *t, const uint8_t *k, size_t klen);
typedef enum cb_next (*cb_iter)(struct cb_node *, void *arg);
void cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen,
cb_iter, void *arg);
#endif /* CBTREE_H */

10
dir.c
Просмотреть файл

@ -78,11 +78,21 @@ int fspathcmp(const char *a, const char *b)
return ignore_case ? strcasecmp(a, b) : strcmp(a, b); return ignore_case ? strcasecmp(a, b) : strcmp(a, b);
} }
int fspatheq(const char *a, const char *b)
{
return !fspathcmp(a, b);
}
int fspathncmp(const char *a, const char *b, size_t count) int fspathncmp(const char *a, const char *b, size_t count)
{ {
return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count); return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
} }
unsigned int fspathhash(const char *str)
{
return ignore_case ? strihash(str) : strhash(str);
}
int git_fnmatch(const struct pathspec_item *item, int git_fnmatch(const struct pathspec_item *item,
const char *pattern, const char *string, const char *pattern, const char *string,
int prefix) int prefix)

2
dir.h
Просмотреть файл

@ -489,7 +489,9 @@ int remove_dir_recursively(struct strbuf *path, int flag);
int remove_path(const char *path); int remove_path(const char *path);
int fspathcmp(const char *a, const char *b); int fspathcmp(const char *a, const char *b);
int fspatheq(const char *a, const char *b);
int fspathncmp(const char *a, const char *b, size_t count); int fspathncmp(const char *a, const char *b, size_t count);
unsigned int fspathhash(const char *str);
/* /*
* The prefix part of pattern must not contains wildcards. * The prefix part of pattern must not contains wildcards.

2
hash.h
Просмотреть файл

@ -265,7 +265,7 @@ static inline void oidcpy(struct object_id *dst, const struct object_id *src)
/* Like oidcpy() but zero-pads the unused bytes in dst's hash array. */ /* Like oidcpy() but zero-pads the unused bytes in dst's hash array. */
static inline void oidcpy_with_padding(struct object_id *dst, static inline void oidcpy_with_padding(struct object_id *dst,
struct object_id *src) const struct object_id *src)
{ {
size_t hashsz; size_t hashsz;

Просмотреть файл

@ -517,9 +517,9 @@ const char *loose_object_path(struct repository *r, struct strbuf *buf,
*/ */
static int alt_odb_usable(struct raw_object_store *o, static int alt_odb_usable(struct raw_object_store *o,
struct strbuf *path, struct strbuf *path,
const char *normalized_objdir) const char *normalized_objdir, khiter_t *pos)
{ {
struct object_directory *odb; int r;
/* Detect cases where alternate disappeared */ /* Detect cases where alternate disappeared */
if (!is_directory(path->buf)) { if (!is_directory(path->buf)) {
@ -533,14 +533,20 @@ static int alt_odb_usable(struct raw_object_store *o,
* Prevent the common mistake of listing the same * Prevent the common mistake of listing the same
* thing twice, or object directory itself. * thing twice, or object directory itself.
*/ */
for (odb = o->odb; odb; odb = odb->next) { if (!o->odb_by_path) {
if (!fspathcmp(path->buf, odb->path)) khiter_t p;
return 0;
}
if (!fspathcmp(path->buf, normalized_objdir))
return 0;
return 1; o->odb_by_path = kh_init_odb_path_map();
assert(!o->odb->next);
p = kh_put_odb_path_map(o->odb_by_path, o->odb->path, &r);
assert(r == 1); /* never used */
kh_value(o->odb_by_path, p) = o->odb;
}
if (fspatheq(path->buf, normalized_objdir))
return 0;
*pos = kh_put_odb_path_map(o->odb_by_path, path->buf, &r);
/* r: 0 = exists, 1 = never used, 2 = deleted */
return r == 0 ? 0 : 1;
} }
/* /*
@ -561,17 +567,18 @@ static int alt_odb_usable(struct raw_object_store *o,
static void read_info_alternates(struct repository *r, static void read_info_alternates(struct repository *r,
const char *relative_base, const char *relative_base,
int depth); int depth);
static int link_alt_odb_entry(struct repository *r, const char *entry, static int link_alt_odb_entry(struct repository *r, const struct strbuf *entry,
const char *relative_base, int depth, const char *normalized_objdir) const char *relative_base, int depth, const char *normalized_objdir)
{ {
struct object_directory *ent; struct object_directory *ent;
struct strbuf pathbuf = STRBUF_INIT; struct strbuf pathbuf = STRBUF_INIT;
khiter_t pos;
if (!is_absolute_path(entry) && relative_base) { if (!is_absolute_path(entry->buf) && relative_base) {
strbuf_realpath(&pathbuf, relative_base, 1); strbuf_realpath(&pathbuf, relative_base, 1);
strbuf_addch(&pathbuf, '/'); strbuf_addch(&pathbuf, '/');
} }
strbuf_addstr(&pathbuf, entry); strbuf_addbuf(&pathbuf, entry);
if (strbuf_normalize_path(&pathbuf) < 0 && relative_base) { if (strbuf_normalize_path(&pathbuf) < 0 && relative_base) {
error(_("unable to normalize alternate object path: %s"), error(_("unable to normalize alternate object path: %s"),
@ -587,23 +594,25 @@ static int link_alt_odb_entry(struct repository *r, const char *entry,
while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/') while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/')
strbuf_setlen(&pathbuf, pathbuf.len - 1); strbuf_setlen(&pathbuf, pathbuf.len - 1);
if (!alt_odb_usable(r->objects, &pathbuf, normalized_objdir)) { if (!alt_odb_usable(r->objects, &pathbuf, normalized_objdir, &pos)) {
strbuf_release(&pathbuf); strbuf_release(&pathbuf);
return -1; return -1;
} }
CALLOC_ARRAY(ent, 1); CALLOC_ARRAY(ent, 1);
ent->path = xstrdup(pathbuf.buf); /* pathbuf.buf is already in r->objects->odb_by_path */
ent->path = strbuf_detach(&pathbuf, NULL);
/* add the alternate entry */ /* add the alternate entry */
*r->objects->odb_tail = ent; *r->objects->odb_tail = ent;
r->objects->odb_tail = &(ent->next); r->objects->odb_tail = &(ent->next);
ent->next = NULL; ent->next = NULL;
assert(r->objects->odb_by_path);
kh_value(r->objects->odb_by_path, pos) = ent;
/* recursively add alternates */ /* recursively add alternates */
read_info_alternates(r, pathbuf.buf, depth + 1); read_info_alternates(r, ent->path, depth + 1);
strbuf_release(&pathbuf);
return 0; return 0;
} }
@ -660,7 +669,7 @@ static void link_alt_odb_entries(struct repository *r, const char *alt,
alt = parse_alt_odb_entry(alt, sep, &entry); alt = parse_alt_odb_entry(alt, sep, &entry);
if (!entry.len) if (!entry.len)
continue; continue;
link_alt_odb_entry(r, entry.buf, link_alt_odb_entry(r, &entry,
relative_base, depth, objdirbuf.buf); relative_base, depth, objdirbuf.buf);
} }
strbuf_release(&entry); strbuf_release(&entry);
@ -1178,7 +1187,7 @@ static int quick_has_loose(struct repository *r,
prepare_alt_odb(r); prepare_alt_odb(r);
for (odb = r->objects->odb; odb; odb = odb->next) { for (odb = r->objects->odb; odb; odb = odb->next) {
if (oid_array_lookup(odb_loose_cache(odb, oid), oid) >= 0) if (oidtree_contains(odb_loose_cache(odb, oid), oid))
return 1; return 1;
} }
return 0; return 0;
@ -2454,39 +2463,45 @@ int for_each_loose_object(each_loose_object_fn cb, void *data,
static int append_loose_object(const struct object_id *oid, const char *path, static int append_loose_object(const struct object_id *oid, const char *path,
void *data) void *data)
{ {
oid_array_append(data, oid); oidtree_insert(data, oid);
return 0; return 0;
} }
struct oid_array *odb_loose_cache(struct object_directory *odb, struct oidtree *odb_loose_cache(struct object_directory *odb,
const struct object_id *oid) const struct object_id *oid)
{ {
int subdir_nr = oid->hash[0]; int subdir_nr = oid->hash[0];
struct strbuf buf = STRBUF_INIT; struct strbuf buf = STRBUF_INIT;
size_t word_bits = bitsizeof(odb->loose_objects_subdir_seen[0]);
size_t word_index = subdir_nr / word_bits;
size_t mask = 1 << (subdir_nr % word_bits);
uint32_t *bitmap;
if (subdir_nr < 0 || if (subdir_nr < 0 ||
subdir_nr >= ARRAY_SIZE(odb->loose_objects_subdir_seen)) subdir_nr >= bitsizeof(odb->loose_objects_subdir_seen))
BUG("subdir_nr out of range"); BUG("subdir_nr out of range");
if (odb->loose_objects_subdir_seen[subdir_nr]) bitmap = &odb->loose_objects_subdir_seen[word_index];
return &odb->loose_objects_cache[subdir_nr]; if (*bitmap & mask)
return odb->loose_objects_cache;
if (!odb->loose_objects_cache) {
ALLOC_ARRAY(odb->loose_objects_cache, 1);
oidtree_init(odb->loose_objects_cache);
}
strbuf_addstr(&buf, odb->path); strbuf_addstr(&buf, odb->path);
for_each_file_in_obj_subdir(subdir_nr, &buf, for_each_file_in_obj_subdir(subdir_nr, &buf,
append_loose_object, append_loose_object,
NULL, NULL, NULL, NULL,
&odb->loose_objects_cache[subdir_nr]); odb->loose_objects_cache);
odb->loose_objects_subdir_seen[subdir_nr] = 1; *bitmap |= mask;
strbuf_release(&buf); strbuf_release(&buf);
return &odb->loose_objects_cache[subdir_nr]; return odb->loose_objects_cache;
} }
void odb_clear_loose_cache(struct object_directory *odb) void odb_clear_loose_cache(struct object_directory *odb)
{ {
int i; oidtree_clear(odb->loose_objects_cache);
FREE_AND_NULL(odb->loose_objects_cache);
for (i = 0; i < ARRAY_SIZE(odb->loose_objects_cache); i++)
oid_array_clear(&odb->loose_objects_cache[i]);
memset(&odb->loose_objects_subdir_seen, 0, memset(&odb->loose_objects_subdir_seen, 0,
sizeof(odb->loose_objects_subdir_seen)); sizeof(odb->loose_objects_subdir_seen));
} }

Просмотреть файл

@ -87,27 +87,21 @@ static void update_candidates(struct disambiguate_state *ds, const struct object
static int match_hash(unsigned, const unsigned char *, const unsigned char *); static int match_hash(unsigned, const unsigned char *, const unsigned char *);
static enum cb_next match_prefix(const struct object_id *oid, void *arg)
{
struct disambiguate_state *ds = arg;
/* no need to call match_hash, oidtree_each did prefix match */
update_candidates(ds, oid);
return ds->ambiguous ? CB_BREAK : CB_CONTINUE;
}
static void find_short_object_filename(struct disambiguate_state *ds) static void find_short_object_filename(struct disambiguate_state *ds)
{ {
struct object_directory *odb; struct object_directory *odb;
for (odb = ds->repo->objects->odb; odb && !ds->ambiguous; odb = odb->next) { for (odb = ds->repo->objects->odb; odb && !ds->ambiguous; odb = odb->next)
int pos; oidtree_each(odb_loose_cache(odb, &ds->bin_pfx),
struct oid_array *loose_objects; &ds->bin_pfx, ds->len, match_prefix, ds);
loose_objects = odb_loose_cache(odb, &ds->bin_pfx);
pos = oid_array_lookup(loose_objects, &ds->bin_pfx);
if (pos < 0)
pos = -1 - pos;
while (!ds->ambiguous && pos < loose_objects->nr) {
const struct object_id *oid;
oid = loose_objects->oid + pos;
if (!match_hash(ds->len, ds->bin_pfx.hash, oid->hash))
break;
update_candidates(ds, oid);
pos++;
}
}
} }
static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b)

Просмотреть файл

@ -7,6 +7,9 @@
#include "oid-array.h" #include "oid-array.h"
#include "strbuf.h" #include "strbuf.h"
#include "thread-utils.h" #include "thread-utils.h"
#include "khash.h"
#include "dir.h"
#include "oidtree.h"
struct object_directory { struct object_directory {
struct object_directory *next; struct object_directory *next;
@ -20,8 +23,8 @@ struct object_directory {
* *
* Be sure to call odb_load_loose_cache() before using. * Be sure to call odb_load_loose_cache() before using.
*/ */
char loose_objects_subdir_seen[256]; uint32_t loose_objects_subdir_seen[8]; /* 256 bits */
struct oid_array loose_objects_cache[256]; struct oidtree *loose_objects_cache;
/* /*
* Path to the alternative object store. If this is a relative path, * Path to the alternative object store. If this is a relative path,
@ -30,6 +33,9 @@ struct object_directory {
char *path; char *path;
}; };
KHASH_INIT(odb_path_map, const char * /* key: odb_path */,
struct object_directory *, 1, fspathhash, fspatheq);
void prepare_alt_odb(struct repository *r); void prepare_alt_odb(struct repository *r);
char *compute_alternate_path(const char *path, struct strbuf *err); char *compute_alternate_path(const char *path, struct strbuf *err);
typedef int alt_odb_fn(struct object_directory *, void *); typedef int alt_odb_fn(struct object_directory *, void *);
@ -54,7 +60,7 @@ void add_to_alternates_memory(const char *dir);
* Populate and return the loose object cache array corresponding to the * Populate and return the loose object cache array corresponding to the
* given object ID. * given object ID.
*/ */
struct oid_array *odb_loose_cache(struct object_directory *odb, struct oidtree *odb_loose_cache(struct object_directory *odb,
const struct object_id *oid); const struct object_id *oid);
/* Empty the loose object cache for the specified object directory. */ /* Empty the loose object cache for the specified object directory. */
@ -116,6 +122,8 @@ struct raw_object_store {
*/ */
struct object_directory *odb; struct object_directory *odb;
struct object_directory **odb_tail; struct object_directory **odb_tail;
kh_odb_path_map_t *odb_by_path;
int loaded_alternates; int loaded_alternates;
/* /*

Просмотреть файл

@ -511,6 +511,8 @@ static void free_object_directories(struct raw_object_store *o)
free_object_directory(o->odb); free_object_directory(o->odb);
o->odb = next; o->odb = next;
} }
kh_destroy_odb_path_map(o->odb_by_path);
o->odb_by_path = NULL;
} }
void raw_object_store_clear(struct raw_object_store *o) void raw_object_store_clear(struct raw_object_store *o)

104
oidtree.c Normal file
Просмотреть файл

@ -0,0 +1,104 @@
/*
* A wrapper around cbtree which stores oids
* May be used to replace oid-array for prefix (abbreviation) matches
*/
#include "oidtree.h"
#include "alloc.h"
#include "hash.h"
struct oidtree_node {
/* n.k[] is used to store "struct object_id" */
struct cb_node n;
};
struct oidtree_iter_data {
oidtree_iter fn;
void *arg;
size_t *last_nibble_at;
int algo;
uint8_t last_byte;
};
void oidtree_init(struct oidtree *ot)
{
cb_init(&ot->tree);
mem_pool_init(&ot->mem_pool, 0);
}
void oidtree_clear(struct oidtree *ot)
{
if (ot) {
mem_pool_discard(&ot->mem_pool, 0);
oidtree_init(ot);
}
}
void oidtree_insert(struct oidtree *ot, const struct object_id *oid)
{
struct oidtree_node *on;
if (!oid->algo)
BUG("oidtree_insert requires oid->algo");
on = mem_pool_alloc(&ot->mem_pool, sizeof(*on) + sizeof(*oid));
oidcpy_with_padding((struct object_id *)on->n.k, oid);
/*
* n.b. Current callers won't get us duplicates, here. If a
* future caller causes duplicates, there'll be a a small leak
* that won't be freed until oidtree_clear. Currently it's not
* worth maintaining a free list
*/
cb_insert(&ot->tree, &on->n, sizeof(*oid));
}
int oidtree_contains(struct oidtree *ot, const struct object_id *oid)
{
struct object_id k;
size_t klen = sizeof(k);
oidcpy_with_padding(&k, oid);
if (oid->algo == GIT_HASH_UNKNOWN)
klen -= sizeof(oid->algo);
/* cb_lookup relies on memcmp on the struct, so order matters: */
klen += BUILD_ASSERT_OR_ZERO(offsetof(struct object_id, hash) <
offsetof(struct object_id, algo));
return cb_lookup(&ot->tree, (const uint8_t *)&k, klen) ? 1 : 0;
}
static enum cb_next iter(struct cb_node *n, void *arg)
{
struct oidtree_iter_data *x = arg;
const struct object_id *oid = (const struct object_id *)n->k;
if (x->algo != GIT_HASH_UNKNOWN && x->algo != oid->algo)
return CB_CONTINUE;
if (x->last_nibble_at) {
if ((oid->hash[*x->last_nibble_at] ^ x->last_byte) & 0xf0)
return CB_CONTINUE;
}
return x->fn(oid, x->arg);
}
void oidtree_each(struct oidtree *ot, const struct object_id *oid,
size_t oidhexsz, oidtree_iter fn, void *arg)
{
size_t klen = oidhexsz / 2;
struct oidtree_iter_data x = { 0 };
assert(oidhexsz <= GIT_MAX_HEXSZ);
x.fn = fn;
x.arg = arg;
x.algo = oid->algo;
if (oidhexsz & 1) {
x.last_byte = oid->hash[klen];
x.last_nibble_at = &klen;
}
cb_each(&ot->tree, (const uint8_t *)oid, klen, iter, &x);
}

22
oidtree.h Normal file
Просмотреть файл

@ -0,0 +1,22 @@
#ifndef OIDTREE_H
#define OIDTREE_H
#include "cbtree.h"
#include "hash.h"
#include "mem-pool.h"
struct oidtree {
struct cb_tree tree;
struct mem_pool mem_pool;
};
void oidtree_init(struct oidtree *);
void oidtree_clear(struct oidtree *);
void oidtree_insert(struct oidtree *, const struct object_id *);
int oidtree_contains(struct oidtree *, const struct object_id *);
typedef enum cb_next (*oidtree_iter)(const struct object_id *, void *data);
void oidtree_each(struct oidtree *, const struct object_id *,
size_t oidhexsz, oidtree_iter, void *data);
#endif /* OIDTREE_H */

49
t/helper/test-oidtree.c Normal file
Просмотреть файл

@ -0,0 +1,49 @@
#include "test-tool.h"
#include "cache.h"
#include "oidtree.h"
static enum cb_next print_oid(const struct object_id *oid, void *data)
{
puts(oid_to_hex(oid));
return CB_CONTINUE;
}
int cmd__oidtree(int argc, const char **argv)
{
struct oidtree ot;
struct strbuf line = STRBUF_INIT;
int nongit_ok;
int algo = GIT_HASH_UNKNOWN;
oidtree_init(&ot);
setup_git_directory_gently(&nongit_ok);
while (strbuf_getline(&line, stdin) != EOF) {
const char *arg;
struct object_id oid;
if (skip_prefix(line.buf, "insert ", &arg)) {
if (get_oid_hex_any(arg, &oid) == GIT_HASH_UNKNOWN)
die("insert not a hexadecimal oid: %s", arg);
algo = oid.algo;
oidtree_insert(&ot, &oid);
} else if (skip_prefix(line.buf, "contains ", &arg)) {
if (get_oid_hex(arg, &oid))
die("contains not a hexadecimal oid: %s", arg);
printf("%d\n", oidtree_contains(&ot, &oid));
} else if (skip_prefix(line.buf, "each ", &arg)) {
char buf[GIT_MAX_HEXSZ + 1] = { '0' };
memset(&oid, 0, sizeof(oid));
memcpy(buf, arg, strlen(arg));
buf[hash_algos[algo].hexsz] = '\0';
get_oid_hex_any(buf, &oid);
oid.algo = algo;
oidtree_each(&ot, &oid, strlen(arg), print_oid, NULL);
} else if (!strcmp(line.buf, "clear")) {
oidtree_clear(&ot);
} else {
die("unknown command: %s", line.buf);
}
}
return 0;
}

Просмотреть файл

@ -43,6 +43,7 @@ static struct test_cmd cmds[] = {
{ "mktemp", cmd__mktemp }, { "mktemp", cmd__mktemp },
{ "oid-array", cmd__oid_array }, { "oid-array", cmd__oid_array },
{ "oidmap", cmd__oidmap }, { "oidmap", cmd__oidmap },
{ "oidtree", cmd__oidtree },
{ "online-cpus", cmd__online_cpus }, { "online-cpus", cmd__online_cpus },
{ "parse-options", cmd__parse_options }, { "parse-options", cmd__parse_options },
{ "parse-pathspec-file", cmd__parse_pathspec_file }, { "parse-pathspec-file", cmd__parse_pathspec_file },

Просмотреть файл

@ -32,6 +32,7 @@ int cmd__match_trees(int argc, const char **argv);
int cmd__mergesort(int argc, const char **argv); int cmd__mergesort(int argc, const char **argv);
int cmd__mktemp(int argc, const char **argv); int cmd__mktemp(int argc, const char **argv);
int cmd__oidmap(int argc, const char **argv); int cmd__oidmap(int argc, const char **argv);
int cmd__oidtree(int argc, const char **argv);
int cmd__online_cpus(int argc, const char **argv); int cmd__online_cpus(int argc, const char **argv);
int cmd__parse_options(int argc, const char **argv); int cmd__parse_options(int argc, const char **argv);
int cmd__parse_pathspec_file(int argc, const char** argv); int cmd__parse_pathspec_file(int argc, const char** argv);

49
t/t0069-oidtree.sh Executable file
Просмотреть файл

@ -0,0 +1,49 @@
#!/bin/sh
test_description='basic tests for the oidtree implementation'
. ./test-lib.sh
maxhexsz=$(test_oid hexsz)
echoid () {
prefix="${1:+$1 }"
shift
while test $# -gt 0
do
shortoid="$1"
shift
difference=$(($maxhexsz - ${#shortoid}))
printf "%s%s%0${difference}d\\n" "$prefix" "$shortoid" "0"
done
}
test_expect_success 'oidtree insert and contains' '
cat >expect <<-\EOF &&
0
0
0
1
1
0
EOF
{
echoid insert 444 1 2 3 4 5 a b c d e &&
echoid contains 44 441 440 444 4440 4444
echo clear
} | test-tool oidtree >actual &&
test_cmp expect actual
'
test_expect_success 'oidtree each' '
echoid "" 123 321 321 >expect &&
{
echoid insert f 9 8 123 321 a b c d e
echo each 12300
echo each 3211
echo each 3210
echo each 32100
echo clear
} | test-tool oidtree >actual &&
test_cmp expect actual
'
test_done