зеркало из https://github.com/microsoft/git.git
Add string-specific memory pool
Intern strings so they can be compared by address and stored without wasting space. This library uses the macros in the obj_pool.h and trp.h to create a memory pool for strings and expose an API for handling them. [rr: added API docs] [jn: with some API simplifications, new documentation and tests] Signed-off-by: David Barr <david.barr@cordelta.com> Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Родитель
951f316470
Коммит
1d73b52f5b
|
@ -173,6 +173,7 @@
|
|||
/test-run-command
|
||||
/test-sha1
|
||||
/test-sigchain
|
||||
/test-string-pool
|
||||
/test-treap
|
||||
/common-cmds.h
|
||||
*.tar.gz
|
||||
|
|
9
Makefile
9
Makefile
|
@ -415,6 +415,7 @@ TEST_PROGRAMS_NEED_X += test-path-utils
|
|||
TEST_PROGRAMS_NEED_X += test-run-command
|
||||
TEST_PROGRAMS_NEED_X += test-sha1
|
||||
TEST_PROGRAMS_NEED_X += test-sigchain
|
||||
TEST_PROGRAMS_NEED_X += test-string-pool
|
||||
TEST_PROGRAMS_NEED_X += test-treap
|
||||
TEST_PROGRAMS_NEED_X += test-index-version
|
||||
|
||||
|
@ -1742,7 +1743,7 @@ ifndef NO_CURL
|
|||
endif
|
||||
XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
|
||||
xdiff/xmerge.o xdiff/xpatience.o
|
||||
VCSSVN_OBJS =
|
||||
VCSSVN_OBJS = vcs-svn/string_pool.o
|
||||
OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)
|
||||
|
||||
dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d)
|
||||
|
@ -1867,7 +1868,7 @@ xdiff-interface.o $(XDIFF_OBJS): \
|
|||
xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
|
||||
|
||||
$(VCSSVN_OBJS): \
|
||||
vcs-svn/obj_pool.h vcs-svn/trp.h
|
||||
vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h
|
||||
endif
|
||||
|
||||
exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \
|
||||
|
@ -2018,10 +2019,12 @@ test-delta$X: diff-delta.o patch-delta.o
|
|||
|
||||
test-parse-options$X: parse-options.o
|
||||
|
||||
test-string-pool$X: vcs-svn/lib.a
|
||||
|
||||
.PRECIOUS: $(TEST_OBJS)
|
||||
|
||||
test-%$X: test-%.o $(GITLIBS)
|
||||
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
|
||||
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS)
|
||||
|
||||
check-sha1:: test-sha1$X
|
||||
./test-sha1.sh
|
||||
|
|
|
@ -76,6 +76,22 @@ test_expect_success 'obj pool: high-water mark' '
|
|||
test_cmp expected actual
|
||||
'
|
||||
|
||||
test_expect_success 'string pool' '
|
||||
echo a does not equal b >expected.differ &&
|
||||
echo a equals a >expected.match &&
|
||||
echo equals equals equals >expected.matchmore &&
|
||||
|
||||
test-string-pool "a,--b" >actual.differ &&
|
||||
test-string-pool "a,a" >actual.match &&
|
||||
test-string-pool "equals-equals" >actual.matchmore &&
|
||||
test_must_fail test-string-pool a,a,a &&
|
||||
test_must_fail test-string-pool a &&
|
||||
|
||||
test_cmp expected.differ actual.differ &&
|
||||
test_cmp expected.match actual.match &&
|
||||
test_cmp expected.matchmore actual.matchmore
|
||||
'
|
||||
|
||||
test_expect_success 'treap sort' '
|
||||
cat <<-\EOF >unsorted &&
|
||||
68
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* test-string-pool.c: code to exercise the svn importer's string pool
|
||||
*/
|
||||
|
||||
#include "git-compat-util.h"
|
||||
#include "vcs-svn/string_pool.h"
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
const uint32_t unequal = pool_intern("does not equal");
|
||||
const uint32_t equal = pool_intern("equals");
|
||||
uint32_t buf[3];
|
||||
uint32_t n;
|
||||
|
||||
if (argc != 2)
|
||||
usage("test-string-pool <string>,<string>");
|
||||
|
||||
n = pool_tok_seq(3, buf, ",-", argv[1]);
|
||||
if (n >= 3)
|
||||
die("too many strings");
|
||||
if (n <= 1)
|
||||
die("too few strings");
|
||||
|
||||
buf[2] = buf[1];
|
||||
buf[1] = (buf[0] == buf[2]) ? equal : unequal;
|
||||
pool_print_seq(3, buf, ' ', stdout);
|
||||
fputc('\n', stdout);
|
||||
|
||||
pool_reset();
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
* Licensed under a two-clause BSD-style license.
|
||||
* See LICENSE for details.
|
||||
*/
|
||||
|
||||
#include "git-compat-util.h"
|
||||
#include "trp.h"
|
||||
#include "obj_pool.h"
|
||||
#include "string_pool.h"
|
||||
|
||||
static struct trp_root tree = { ~0 };
|
||||
|
||||
struct node {
|
||||
uint32_t offset;
|
||||
struct trp_node children;
|
||||
};
|
||||
|
||||
/* Two memory pools: one for struct node, and another for strings */
|
||||
obj_pool_gen(node, struct node, 4096)
|
||||
obj_pool_gen(string, char, 4096)
|
||||
|
||||
static char *node_value(struct node *node)
|
||||
{
|
||||
return node ? string_pointer(node->offset) : NULL;
|
||||
}
|
||||
|
||||
static int node_cmp(struct node *a, struct node *b)
|
||||
{
|
||||
return strcmp(node_value(a), node_value(b));
|
||||
}
|
||||
|
||||
/* Build a Treap from the node structure (a trp_node w/ offset) */
|
||||
trp_gen(static, tree_, struct node, children, node, node_cmp);
|
||||
|
||||
const char *pool_fetch(uint32_t entry)
|
||||
{
|
||||
return node_value(node_pointer(entry));
|
||||
}
|
||||
|
||||
uint32_t pool_intern(const char *key)
|
||||
{
|
||||
/* Canonicalize key */
|
||||
struct node *match = NULL, *node;
|
||||
uint32_t key_len;
|
||||
if (key == NULL)
|
||||
return ~0;
|
||||
key_len = strlen(key) + 1;
|
||||
node = node_pointer(node_alloc(1));
|
||||
node->offset = string_alloc(key_len);
|
||||
strcpy(node_value(node), key);
|
||||
match = tree_search(&tree, node);
|
||||
if (!match) {
|
||||
tree_insert(&tree, node);
|
||||
} else {
|
||||
node_free(1);
|
||||
string_free(key_len);
|
||||
node = match;
|
||||
}
|
||||
return node_offset(node);
|
||||
}
|
||||
|
||||
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr)
|
||||
{
|
||||
char *token = strtok_r(str, delim, saveptr);
|
||||
return token ? pool_intern(token) : ~0;
|
||||
}
|
||||
|
||||
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = 0; i < len && ~seq[i]; i++) {
|
||||
fputs(pool_fetch(seq[i]), stream);
|
||||
if (i < len - 1 && ~seq[i + 1])
|
||||
fputc(delim, stream);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str)
|
||||
{
|
||||
char *context = NULL;
|
||||
uint32_t token = ~0;
|
||||
uint32_t length;
|
||||
|
||||
if (sz == 0)
|
||||
return ~0;
|
||||
if (str)
|
||||
token = pool_tok_r(str, delim, &context);
|
||||
for (length = 0; length < sz; length++) {
|
||||
seq[length] = token;
|
||||
if (token == ~0)
|
||||
return length;
|
||||
token = pool_tok_r(NULL, delim, &context);
|
||||
}
|
||||
seq[sz - 1] = ~0;
|
||||
return sz;
|
||||
}
|
||||
|
||||
void pool_reset(void)
|
||||
{
|
||||
node_reset();
|
||||
string_reset();
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
#ifndef STRING_POOL_H_
|
||||
#define STRING_POOL_H_
|
||||
|
||||
uint32_t pool_intern(const char *key);
|
||||
const char *pool_fetch(uint32_t entry);
|
||||
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr);
|
||||
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream);
|
||||
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str);
|
||||
void pool_reset(void);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,43 @@
|
|||
string_pool API
|
||||
===============
|
||||
|
||||
The string_pool API provides facilities for replacing strings
|
||||
with integer keys that can be more easily compared and stored.
|
||||
The facilities are designed so that one could teach Git without
|
||||
too much trouble to store the information needed for these keys to
|
||||
remain valid over multiple executions.
|
||||
|
||||
Functions
|
||||
---------
|
||||
|
||||
pool_intern::
|
||||
Include a string in the string pool and get its key.
|
||||
If that string is already in the pool, retrieves its
|
||||
existing key.
|
||||
|
||||
pool_fetch::
|
||||
Retrieve the string associated to a given key.
|
||||
|
||||
pool_tok_r::
|
||||
Extract the key of the next token from a string.
|
||||
Interface mimics strtok_r.
|
||||
|
||||
pool_print_seq::
|
||||
Print a sequence of strings named by key to a file, using the
|
||||
specified delimiter to separate them.
|
||||
|
||||
If NULL (key ~0) appears in the sequence, the sequence ends
|
||||
early.
|
||||
|
||||
pool_tok_seq::
|
||||
Split a string into tokens, storing the keys of segments
|
||||
into a caller-provided array.
|
||||
|
||||
Unless sz is 0, the array will always be ~0-terminated.
|
||||
If there is not enough room for all the tokens, the
|
||||
array holds as many tokens as fit in the entries before
|
||||
the terminating ~0. Return value is the index after the
|
||||
last token, or sz if the tokens did not fit.
|
||||
|
||||
pool_reset::
|
||||
Deallocate storage for the string pool.
|
Загрузка…
Ссылка в новой задаче