tmp-objdir: introduce API for temporary object directories

Once objects are added to the object database by a process,
they cannot easily be deleted, as we don't know what other
processes may have started referencing them. We have to
clean them up with git-gc, which will apply the usual
reachability and grace-period checks.

This patch provides an alternative: it helps callers create
a temporary directory inside the object directory, and a
temporary environment which can be passed to sub-programs to
ask them to write there (the original object directory
remains accessible as an alternate of the temporary one).

See tmp-objdir.h for details on the API.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Jeff King 2016-10-03 16:49:11 -04:00 коммит произвёл Junio C Hamano
Родитель 526f108a27
Коммит 2564d994c9
3 изменённых файлов: 328 добавлений и 0 удалений

Просмотреть файл

@ -831,6 +831,7 @@ LIB_OBJS += submodule-config.o
LIB_OBJS += symlinks.o
LIB_OBJS += tag.o
LIB_OBJS += tempfile.o
LIB_OBJS += tmp-objdir.o
LIB_OBJS += trace.o
LIB_OBJS += trailer.o
LIB_OBJS += transport.o

273
tmp-objdir.c Normal file
Просмотреть файл

@ -0,0 +1,273 @@
#include "cache.h"
#include "tmp-objdir.h"
#include "dir.h"
#include "sigchain.h"
#include "string-list.h"
#include "strbuf.h"
#include "argv-array.h"
struct tmp_objdir {
struct strbuf path;
struct argv_array env;
};
/*
* Allow only one tmp_objdir at a time in a running process, which simplifies
* our signal/atexit cleanup routines. It's doubtful callers will ever need
* more than one, and we can expand later if so. You can have many such
* tmp_objdirs simultaneously in many processes, of course.
*/
static struct tmp_objdir *the_tmp_objdir;
static void tmp_objdir_free(struct tmp_objdir *t)
{
strbuf_release(&t->path);
argv_array_clear(&t->env);
free(t);
}
static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal)
{
int err;
if (!t)
return 0;
if (t == the_tmp_objdir)
the_tmp_objdir = NULL;
/*
* This may use malloc via strbuf_grow(), but we should
* have pre-grown t->path sufficiently so that this
* doesn't happen in practice.
*/
err = remove_dir_recursively(&t->path, 0);
/*
* When we are cleaning up due to a signal, we won't bother
* freeing memory; it may cause a deadlock if the signal
* arrived while libc's allocator lock is held.
*/
if (!on_signal)
tmp_objdir_free(t);
return err;
}
int tmp_objdir_destroy(struct tmp_objdir *t)
{
return tmp_objdir_destroy_1(t, 0);
}
static void remove_tmp_objdir(void)
{
tmp_objdir_destroy(the_tmp_objdir);
}
static void remove_tmp_objdir_on_signal(int signo)
{
tmp_objdir_destroy_1(the_tmp_objdir, 1);
sigchain_pop(signo);
raise(signo);
}
/*
* These env_* functions are for setting up the child environment; the
* "replace" variant overrides the value of any existing variable with that
* "key". The "append" variant puts our new value at the end of a list,
* separated by PATH_SEP (which is what separate values in
* GIT_ALTERNATE_OBJECT_DIRECTORIES).
*/
static void env_append(struct argv_array *env, const char *key, const char *val)
{
const char *old = getenv(key);
if (!old)
argv_array_pushf(env, "%s=%s", key, val);
else
argv_array_pushf(env, "%s=%s%c%s", key, old, PATH_SEP, val);
}
static void env_replace(struct argv_array *env, const char *key, const char *val)
{
argv_array_pushf(env, "%s=%s", key, val);
}
static int setup_tmp_objdir(const char *root)
{
char *path;
int ret = 0;
path = xstrfmt("%s/pack", root);
ret = mkdir(path, 0777);
free(path);
return ret;
}
struct tmp_objdir *tmp_objdir_create(void)
{
static int installed_handlers;
struct tmp_objdir *t;
if (the_tmp_objdir)
die("BUG: only one tmp_objdir can be used at a time");
t = xmalloc(sizeof(*t));
strbuf_init(&t->path, 0);
argv_array_init(&t->env);
strbuf_addf(&t->path, "%s/incoming-XXXXXX", get_object_directory());
/*
* Grow the strbuf beyond any filename we expect to be placed in it.
* If tmp_objdir_destroy() is called by a signal handler, then
* we should be able to use the strbuf to remove files without
* having to call malloc.
*/
strbuf_grow(&t->path, 1024);
if (!mkdtemp(t->path.buf)) {
/* free, not destroy, as we never touched the filesystem */
tmp_objdir_free(t);
return NULL;
}
the_tmp_objdir = t;
if (!installed_handlers) {
atexit(remove_tmp_objdir);
sigchain_push_common(remove_tmp_objdir_on_signal);
installed_handlers++;
}
if (setup_tmp_objdir(t->path.buf)) {
tmp_objdir_destroy(t);
return NULL;
}
env_append(&t->env, ALTERNATE_DB_ENVIRONMENT,
absolute_path(get_object_directory()));
env_replace(&t->env, DB_ENVIRONMENT, absolute_path(t->path.buf));
return t;
}
/*
* Make sure we copy packfiles and their associated metafiles in the correct
* order. All of these ends_with checks are slightly expensive to do in
* the midst of a sorting routine, but in practice it shouldn't matter.
* We will have a relatively small number of packfiles to order, and loose
* objects exit early in the first line.
*/
static int pack_copy_priority(const char *name)
{
if (!starts_with(name, "pack"))
return 0;
if (ends_with(name, ".keep"))
return 1;
if (ends_with(name, ".pack"))
return 2;
if (ends_with(name, ".idx"))
return 3;
return 4;
}
static int pack_copy_cmp(const char *a, const char *b)
{
return pack_copy_priority(a) - pack_copy_priority(b);
}
static int read_dir_paths(struct string_list *out, const char *path)
{
DIR *dh;
struct dirent *de;
dh = opendir(path);
if (!dh)
return -1;
while ((de = readdir(dh)))
if (!is_dot_or_dotdot(de->d_name))
string_list_append(out, de->d_name);
closedir(dh);
return 0;
}
static int migrate_paths(struct strbuf *src, struct strbuf *dst);
static int migrate_one(struct strbuf *src, struct strbuf *dst)
{
struct stat st;
if (stat(src->buf, &st) < 0)
return -1;
if (S_ISDIR(st.st_mode)) {
if (!mkdir(dst->buf, 0777)) {
if (adjust_shared_perm(dst->buf))
return -1;
} else if (errno != EEXIST)
return -1;
return migrate_paths(src, dst);
}
return finalize_object_file(src->buf, dst->buf);
}
static int migrate_paths(struct strbuf *src, struct strbuf *dst)
{
size_t src_len = src->len, dst_len = dst->len;
struct string_list paths = STRING_LIST_INIT_DUP;
int i;
int ret = 0;
if (read_dir_paths(&paths, src->buf) < 0)
return -1;
paths.cmp = pack_copy_cmp;
string_list_sort(&paths);
for (i = 0; i < paths.nr; i++) {
const char *name = paths.items[i].string;
strbuf_addf(src, "/%s", name);
strbuf_addf(dst, "/%s", name);
ret |= migrate_one(src, dst);
strbuf_setlen(src, src_len);
strbuf_setlen(dst, dst_len);
}
string_list_clear(&paths, 0);
return ret;
}
int tmp_objdir_migrate(struct tmp_objdir *t)
{
struct strbuf src = STRBUF_INIT, dst = STRBUF_INIT;
int ret;
if (!t)
return 0;
strbuf_addbuf(&src, &t->path);
strbuf_addstr(&dst, get_object_directory());
ret = migrate_paths(&src, &dst);
strbuf_release(&src);
strbuf_release(&dst);
tmp_objdir_destroy(t);
return ret;
}
const char **tmp_objdir_env(const struct tmp_objdir *t)
{
if (!t)
return NULL;
return t->env.argv;
}
void tmp_objdir_add_as_alternate(const struct tmp_objdir *t)
{
add_to_alternates_memory(t->path.buf);
}

54
tmp-objdir.h Normal file
Просмотреть файл

@ -0,0 +1,54 @@
#ifndef TMP_OBJDIR_H
#define TMP_OBJDIR_H
/*
* This API allows you to create a temporary object directory, advertise it to
* sub-processes via GIT_OBJECT_DIRECTORY and GIT_ALTERNATE_OBJECT_DIRECTORIES,
* and then either migrate its object into the main object directory, or remove
* it. The library handles unexpected signal/exit death by cleaning up the
* temporary directory.
*
* Example:
*
* struct tmp_objdir *t = tmp_objdir_create();
* if (!run_command_v_opt_cd_env(cmd, 0, NULL, tmp_objdir_env(t)) &&
* !tmp_objdir_migrate(t))
* printf("success!\n");
* else
* die("failed...tmp_objdir will clean up for us");
*
*/
struct tmp_objdir;
/*
* Create a new temporary object directory; returns NULL on failure.
*/
struct tmp_objdir *tmp_objdir_create(void);
/*
* Return a list of environment strings, suitable for use with
* child_process.env, that can be passed to child programs to make use of the
* temporary object directory.
*/
const char **tmp_objdir_env(const struct tmp_objdir *);
/*
* Finalize a temporary object directory by migrating its objects into the main
* object database, removing the temporary directory, and freeing any
* associated resources.
*/
int tmp_objdir_migrate(struct tmp_objdir *);
/*
* Destroy a temporary object directory, discarding any objects it contains.
*/
int tmp_objdir_destroy(struct tmp_objdir *);
/*
* Add the temporary object directory as an alternate object store in the
* current process.
*/
void tmp_objdir_add_as_alternate(const struct tmp_objdir *);
#endif /* TMP_OBJDIR_H */