зеркало из https://github.com/microsoft/git.git
add oidset API
This is similar to many of our uses of sha1-array, but it overcomes one limitation of a sha1-array: when you are de-duplicating a large input with relatively few unique entries, sha1-array uses 20 bytes per non-unique entry. Whereas this set will use memory linear in the number of unique entries (albeit a few more than 20 bytes due to hashmap overhead). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Родитель
41a078c60b
Коммит
29c2bd5fa8
1
Makefile
1
Makefile
|
@ -781,6 +781,7 @@ LIB_OBJS += notes-cache.o
|
|||
LIB_OBJS += notes-merge.o
|
||||
LIB_OBJS += notes-utils.o
|
||||
LIB_OBJS += object.o
|
||||
LIB_OBJS += oidset.o
|
||||
LIB_OBJS += pack-bitmap.o
|
||||
LIB_OBJS += pack-bitmap-write.o
|
||||
LIB_OBJS += pack-check.o
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
#include "cache.h"
|
||||
#include "oidset.h"
|
||||
|
||||
struct oidset_entry {
|
||||
struct hashmap_entry hash;
|
||||
struct object_id oid;
|
||||
};
|
||||
|
||||
static int oidset_hashcmp(const void *va, const void *vb,
|
||||
const void *vkey)
|
||||
{
|
||||
const struct oidset_entry *a = va, *b = vb;
|
||||
const struct object_id *key = vkey;
|
||||
return oidcmp(&a->oid, key ? key : &b->oid);
|
||||
}
|
||||
|
||||
int oidset_contains(const struct oidset *set, const struct object_id *oid)
|
||||
{
|
||||
struct hashmap_entry key;
|
||||
|
||||
if (!set->map.cmpfn)
|
||||
return 0;
|
||||
|
||||
hashmap_entry_init(&key, sha1hash(oid->hash));
|
||||
return !!hashmap_get(&set->map, &key, oid);
|
||||
}
|
||||
|
||||
int oidset_insert(struct oidset *set, const struct object_id *oid)
|
||||
{
|
||||
struct oidset_entry *entry;
|
||||
|
||||
if (!set->map.cmpfn)
|
||||
hashmap_init(&set->map, oidset_hashcmp, 0);
|
||||
|
||||
if (oidset_contains(set, oid))
|
||||
return 1;
|
||||
|
||||
entry = xmalloc(sizeof(*entry));
|
||||
hashmap_entry_init(&entry->hash, sha1hash(oid->hash));
|
||||
oidcpy(&entry->oid, oid);
|
||||
|
||||
hashmap_add(&set->map, entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void oidset_clear(struct oidset *set)
|
||||
{
|
||||
hashmap_free(&set->map, 1);
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
#ifndef OIDSET_H
|
||||
#define OIDSET_H
|
||||
|
||||
/**
|
||||
* This API is similar to sha1-array, in that it maintains a set of object ids
|
||||
* in a memory-efficient way. The major differences are:
|
||||
*
|
||||
* 1. It uses a hash, so we can do online duplicate removal, rather than
|
||||
* sort-and-uniq at the end. This can reduce memory footprint if you have
|
||||
* a large list of oids with many duplicates.
|
||||
*
|
||||
* 2. The per-unique-oid memory footprint is slightly higher due to hash
|
||||
* table overhead.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A single oidset; should be zero-initialized (or use OIDSET_INIT).
|
||||
*/
|
||||
struct oidset {
|
||||
struct hashmap map;
|
||||
};
|
||||
|
||||
#define OIDSET_INIT { { NULL } }
|
||||
|
||||
/**
|
||||
* Returns true iff `set` contains `oid`.
|
||||
*/
|
||||
int oidset_contains(const struct oidset *set, const struct object_id *oid);
|
||||
|
||||
/**
|
||||
* Insert the oid into the set; a copy is made, so "oid" does not need
|
||||
* to persist after this function is called.
|
||||
*
|
||||
* Returns 1 if the oid was already in the set, 0 otherwise. This can be used
|
||||
* to perform an efficient check-and-add.
|
||||
*/
|
||||
int oidset_insert(struct oidset *set, const struct object_id *oid);
|
||||
|
||||
/**
|
||||
* Remove all entries from the oidset, freeing any resources associated with
|
||||
* it.
|
||||
*/
|
||||
void oidset_clear(struct oidset *set);
|
||||
|
||||
#endif /* OIDSET_H */
|
Загрузка…
Ссылка в новой задаче