gvfs-helper: create tool to fetch objects using the GVFS Protocol

Create gvfs-helper.  This is a helper tool to use the GVFS Protocol
REST API to fetch objects and configuration data from a GVFS cache-server
or Git server.  This tool uses libcurl to send object requests to either
server.  This tool creates loose objects and/or packfiles.

Create gvfs-helper-client.  This code resides within git proper and
uses the sub-process API to manage gvfs-helper as a long-running background
process.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
This commit is contained in:
Jeff Hostetler 2019-08-13 12:12:08 -04:00 коммит произвёл Johannes Schindelin
Родитель d153cee578
Коммит 23ecaba759
15 изменённых файлов: 2842 добавлений и 3 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -73,6 +73,7 @@
/git-gc
/git-get-tar-commit-id
/git-grep
/git-gvfs-helper
/git-hash-object
/git-help
/git-hook

Просмотреть файл

@ -441,6 +441,8 @@ include::config/gui.txt[]
include::config/guitool.txt[]
include::config/gvfs.txt[]
include::config/help.txt[]
include::config/http.txt[]

Просмотреть файл

@ -778,6 +778,9 @@ core.gvfs::
flag just blocks them from occurring at all.
--
core.useGvfsHelper::
TODO
core.sparseCheckout::
Enable "sparse checkout" feature. See linkgit:git-sparse-checkout[1]
for more information.

Просмотреть файл

@ -0,0 +1,5 @@
gvfs.cache-server::
TODO
gvfs.sharedcache::
TODO

Просмотреть файл

@ -1041,6 +1041,7 @@ LIB_OBJS += gpg-interface.o
LIB_OBJS += graph.o
LIB_OBJS += grep.o
LIB_OBJS += gvfs.o
LIB_OBJS += gvfs-helper-client.o
LIB_OBJS += hash-lookup.o
LIB_OBJS += hashmap.o
LIB_OBJS += help.o
@ -1617,6 +1618,8 @@ else
endif
BASIC_CFLAGS += $(CURL_CFLAGS)
PROGRAM_OBJS += gvfs-helper.o
REMOTE_CURL_PRIMARY = git-remote-http$X
REMOTE_CURL_ALIASES = git-remote-https$X git-remote-ftp$X git-remote-ftps$X
REMOTE_CURL_NAMES = $(REMOTE_CURL_PRIMARY) $(REMOTE_CURL_ALIASES)
@ -2858,6 +2861,10 @@ scalar$X: scalar.o GIT-LDFLAGS $(GITLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) \
$(filter %.o,$^) $(LIBS)
git-gvfs-helper$X: gvfs-helper.o http.o GIT-LDFLAGS $(GITLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
$(CURL_LIBCURL) $(EXPAT_LIBEXPAT) $(LIBS)
$(LIB_FILE): $(LIB_OBJS)
$(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^

Просмотреть файл

@ -1059,6 +1059,9 @@ extern int core_gvfs;
extern int precomposed_unicode;
extern int protect_hfs;
extern int protect_ntfs;
extern int core_use_gvfs_helper;
extern const char *gvfs_cache_server_url;
extern const char *gvfs_shared_cache_pathname;
extern int core_apply_sparse_checkout;
extern int core_sparse_checkout_cone;

Просмотреть файл

@ -24,6 +24,7 @@
#include "color.h"
#include "refs.h"
#include "worktree.h"
#include "transport.h"
struct config_source {
struct config_source *prev;
@ -1749,6 +1750,11 @@ int git_default_core_config(const char *var, const char *value, void *cb)
return 0;
}
if (!strcmp(var, "core.usegvfshelper")) {
core_use_gvfs_helper = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "core.sparsecheckout")) {
/* virtual file system relies on the sparse checkout logic so force it on */
if (core_virtualfilesystem)
@ -1891,6 +1897,37 @@ static int git_default_mailmap_config(const char *var, const char *value)
return 0;
}
static int git_default_gvfs_config(const char *var, const char *value)
{
if (!strcmp(var, "gvfs.cache-server")) {
const char *v2 = NULL;
if (!git_config_string(&v2, var, value) && v2 && *v2)
gvfs_cache_server_url = transport_anonymize_url(v2);
free((char*)v2);
return 0;
}
if (!strcmp(var, "gvfs.sharedcache") && value && *value) {
struct strbuf buf = STRBUF_INIT;
strbuf_addstr(&buf, value);
if (strbuf_normalize_path(&buf) < 0) {
/*
* Pretend it wasn't set. This will cause us to
* fallback to ".git/objects" effectively.
*/
strbuf_release(&buf);
return 0;
}
strbuf_trim_trailing_dir_sep(&buf);
gvfs_shared_cache_pathname = strbuf_detach(&buf, NULL);
return 0;
}
return 0;
}
int git_default_config(const char *var, const char *value, void *cb)
{
if (starts_with(var, "core."))
@ -1940,6 +1977,9 @@ int git_default_config(const char *var, const char *value, void *cb)
if (starts_with(var, "sparse."))
return git_default_sparse_config(var, value);
if (starts_with(var, "gvfs."))
return git_default_gvfs_config(var, value);
/* Add other config variables here and to Documentation/config.txt. */
return 0;
}

Просмотреть файл

@ -647,7 +647,7 @@ if(NOT CURL_FOUND)
add_compile_definitions(NO_CURL)
message(WARNING "git-http-push and git-http-fetch will not be built")
else()
list(APPEND PROGRAMS_BUILT git-http-fetch git-http-push git-imap-send git-remote-http)
list(APPEND PROGRAMS_BUILT git-http-fetch git-http-push git-imap-send git-remote-http git-gvfs-helper)
if(CURL_VERSION_STRING VERSION_GREATER_EQUAL 7.34.0)
add_compile_definitions(USE_CURL_FOR_IMAP_SEND)
endif()
@ -816,6 +816,9 @@ if(CURL_FOUND)
add_executable(git-http-push ${CMAKE_SOURCE_DIR}/http-push.c)
target_link_libraries(git-http-push http_obj common-main ${CURL_LIBRARIES} ${EXPAT_LIBRARIES})
endif()
add_executable(git-gvfs-helper ${CMAKE_SOURCE_DIR}/gvfs-helper.c)
target_link_libraries(git-gvfs-helper http_obj common-main ${CURL_LIBRARIES} )
endif()
parse_makefile_for_executables(git_builtin_extra "BUILT_INS")

Просмотреть файл

@ -89,6 +89,9 @@ int protect_hfs = PROTECT_HFS_DEFAULT;
#define PROTECT_NTFS_DEFAULT 1
#endif
int protect_ntfs = PROTECT_NTFS_DEFAULT;
int core_use_gvfs_helper;
const char *gvfs_cache_server_url;
const char *gvfs_shared_cache_pathname;
/*
* The character that begins a commented line in user-editable file

369
gvfs-helper-client.c Normal file
Просмотреть файл

@ -0,0 +1,369 @@
#include "cache.h"
#include "strvec.h"
#include "trace2.h"
#include "oidset.h"
#include "object.h"
#include "object-store.h"
#include "gvfs-helper-client.h"
#include "sub-process.h"
#include "sigchain.h"
#include "pkt-line.h"
#include "quote.h"
#include "packfile.h"
static struct oidset gh_client__oidset_queued = OIDSET_INIT;
static unsigned long gh_client__oidset_count;
static int gh_client__includes_immediate;
struct gh_server__process {
struct subprocess_entry subprocess; /* must be first */
unsigned int supported_capabilities;
};
static int gh_server__subprocess_map_initialized;
static struct hashmap gh_server__subprocess_map;
static struct object_directory *gh_client__chosen_odb;
#define CAP_GET (1u<<1)
static int gh_client__start_fn(struct subprocess_entry *subprocess)
{
static int versions[] = {1, 0};
static struct subprocess_capability capabilities[] = {
{ "get", CAP_GET },
{ NULL, 0 }
};
struct gh_server__process *entry = (struct gh_server__process *)subprocess;
return subprocess_handshake(subprocess, "gvfs-helper", versions,
NULL, capabilities,
&entry->supported_capabilities);
}
/*
* Send:
*
* get LF
* (<hex-oid> LF)*
* <flush>
*
*/
static int gh_client__get__send_command(struct child_process *process)
{
struct oidset_iter iter;
struct object_id *oid;
int err;
/*
* We assume that all of the packet_ routines call error()
* so that we don't have to.
*/
err = packet_write_fmt_gently(process->in, "get\n");
if (err)
return err;
oidset_iter_init(&gh_client__oidset_queued, &iter);
while ((oid = oidset_iter_next(&iter))) {
err = packet_write_fmt_gently(process->in, "%s\n",
oid_to_hex(oid));
if (err)
return err;
}
err = packet_flush_gently(process->in);
if (err)
return err;
return 0;
}
/*
* Update the loose object cache to include the newly created
* object.
*/
static void gh_client__update_loose_cache(const char *line)
{
const char *v1_oid;
struct object_id oid;
if (!skip_prefix(line, "loose ", &v1_oid))
BUG("update_loose_cache: invalid line '%s'", line);
odb_loose_cache_add_new_oid(gh_client__chosen_odb, &oid);
}
/*
* Update the packed-git list to include the newly created packfile.
*/
static void gh_client__update_packed_git(const char *line)
{
struct strbuf path = STRBUF_INIT;
const char *v1_filename;
struct packed_git *p;
int is_local;
if (!skip_prefix(line, "packfile ", &v1_filename))
BUG("update_packed_git: invalid line '%s'", line);
/*
* ODB[0] is the local .git/objects. All others are alternates.
*/
is_local = (gh_client__chosen_odb == the_repository->objects->odb);
strbuf_addf(&path, "%s/pack/%s",
gh_client__chosen_odb->path, v1_filename);
strbuf_strip_suffix(&path, ".pack");
strbuf_addstr(&path, ".idx");
p = add_packed_git(path.buf, path.len, is_local);
if (p)
install_packed_git_and_mru(the_repository, p);
}
/*
* We expect:
*
* <odb>
* <data>*
* <status>
* <flush>
*
* Where:
*
* <odb> ::= odb SP <directory> LF
*
* <data> ::= <packfile> / <loose>
*
* <packfile> ::= packfile SP <filename> LF
*
* <loose> ::= loose SP <hex-oid> LF
*
* <status> ::= ok LF
* / partial LF
* / error SP <message> LF
*
* Note that `gvfs-helper` controls how/if it chunks the request when
* it talks to the cache-server and/or main Git server. So it is
* possible for us to receive many packfiles and/or loose objects *AND
* THEN* get a hard network error or a 404 on an individual object.
*
* If we get a partial result, we can let the caller try to continue
* -- for example, maybe an immediate request for a tree object was
* grouped with a queued request for a blob. The tree-walk *might* be
* able to continue and let the 404 blob be handled later.
*/
static int gh_client__get__receive_response(
struct child_process *process,
enum gh_client__created *p_ghc,
int *p_nr_loose, int *p_nr_packfile)
{
enum gh_client__created ghc = GHC__CREATED__NOTHING;
const char *v1;
char *line;
int len;
int err = 0;
while (1) {
/*
* Warning: packet_read_line_gently() calls die()
* despite the _gently moniker.
*/
len = packet_read_line_gently(process->out, NULL, &line);
if ((len < 0) || !line)
break;
if (starts_with(line, "odb")) {
/* trust that this matches what we expect */
}
else if (starts_with(line, "packfile")) {
gh_client__update_packed_git(line);
ghc |= GHC__CREATED__PACKFILE;
*p_nr_packfile += 1;
}
else if (starts_with(line, "loose")) {
gh_client__update_loose_cache(line);
ghc |= GHC__CREATED__LOOSE;
*p_nr_loose += 1;
}
else if (starts_with(line, "ok"))
;
else if (starts_with(line, "partial"))
;
else if (skip_prefix(line, "error ", &v1)) {
error("gvfs-helper error: '%s'", v1);
err = -1;
}
}
*p_ghc = ghc;
return err;
}
static void gh_client__choose_odb(void)
{
if (gh_client__chosen_odb)
return;
prepare_alt_odb(the_repository);
gh_client__chosen_odb = the_repository->objects->odb;
}
static int gh_client__get(enum gh_client__created *p_ghc)
{
struct gh_server__process *entry;
struct child_process *process;
struct strvec argv = STRVEC_INIT;
struct strbuf quoted = STRBUF_INIT;
int nr_loose = 0;
int nr_packfile = 0;
int err = 0;
trace2_region_enter("gh-client", "get", the_repository);
gh_client__choose_odb();
/*
* TODO decide what defaults we want.
*/
strvec_push(&argv, "gvfs-helper");
strvec_push(&argv, "--fallback");
strvec_push(&argv, "--cache-server=trust");
strvec_pushf(&argv, "--shared-cache=%s",
gh_client__chosen_odb->path);
strvec_push(&argv, "server");
sq_quote_argv_pretty(&quoted, argv.v);
if (!gh_server__subprocess_map_initialized) {
gh_server__subprocess_map_initialized = 1;
hashmap_init(&gh_server__subprocess_map,
(hashmap_cmp_fn)cmd2process_cmp, NULL, 0);
entry = NULL;
} else
entry = (struct gh_server__process *)subprocess_find_entry(
&gh_server__subprocess_map, quoted.buf);
if (!entry) {
entry = xmalloc(sizeof(*entry));
entry->supported_capabilities = 0;
err = subprocess_start_strvec(
&gh_server__subprocess_map, &entry->subprocess, 1,
&argv, gh_client__start_fn);
if (err) {
free(entry);
goto leave_region;
}
}
process = &entry->subprocess.process;
if (!(CAP_GET & entry->supported_capabilities)) {
error("gvfs-helper: does not support GET");
subprocess_stop(&gh_server__subprocess_map,
(struct subprocess_entry *)entry);
free(entry);
err = -1;
goto leave_region;
}
sigchain_push(SIGPIPE, SIG_IGN);
err = gh_client__get__send_command(process);
if (!err)
err = gh_client__get__receive_response(process, p_ghc,
&nr_loose, &nr_packfile);
sigchain_pop(SIGPIPE);
if (err) {
subprocess_stop(&gh_server__subprocess_map,
(struct subprocess_entry *)entry);
free(entry);
}
leave_region:
strvec_clear(&argv);
strbuf_release(&quoted);
trace2_data_intmax("gh-client", the_repository,
"get/immediate", gh_client__includes_immediate);
trace2_data_intmax("gh-client", the_repository,
"get/nr_objects", gh_client__oidset_count);
if (nr_loose)
trace2_data_intmax("gh-client", the_repository,
"get/nr_loose", nr_loose);
if (nr_packfile)
trace2_data_intmax("gh-client", the_repository,
"get/nr_packfile", nr_packfile);
if (err)
trace2_data_intmax("gh-client", the_repository,
"get/error", err);
trace2_region_leave("gh-client", "get", the_repository);
oidset_clear(&gh_client__oidset_queued);
gh_client__oidset_count = 0;
gh_client__includes_immediate = 0;
return err;
}
void gh_client__queue_oid(const struct object_id *oid)
{
// TODO consider removing this trace2. it is useful for interactive
// TODO debugging, but may generate way too much noise for a data
// TODO event.
trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid));
if (!oidset_insert(&gh_client__oidset_queued, oid))
gh_client__oidset_count++;
}
/*
* This routine should actually take a "const struct oid_array *"
* rather than the component parts, but fetch_objects() uses
* this model (because of the call in sha1-file.c).
*/
void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr)
{
int k;
for (k = 0; k < oid_nr; k++)
gh_client__queue_oid(&oids[k]);
}
int gh_client__drain_queue(enum gh_client__created *p_ghc)
{
*p_ghc = GHC__CREATED__NOTHING;
if (!gh_client__oidset_count)
return 0;
return gh_client__get(p_ghc);
}
int gh_client__get_immediate(const struct object_id *oid,
enum gh_client__created *p_ghc)
{
gh_client__includes_immediate = 1;
// TODO consider removing this trace2. it is useful for interactive
// TODO debugging, but may generate way too much noise for a data
// TODO event.
trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid));
if (!oidset_insert(&gh_client__oidset_queued, oid))
gh_client__oidset_count++;
return gh_client__drain_queue(p_ghc);
}

68
gvfs-helper-client.h Normal file
Просмотреть файл

@ -0,0 +1,68 @@
#ifndef GVFS_HELPER_CLIENT_H
#define GVFS_HELPER_CLIENT_H
struct repository;
struct commit;
enum gh_client__created {
/*
* The _get_ operation did not create anything. If doesn't
* matter if `gvfs-helper` had errors or not -- just that
* nothing was created.
*/
GHC__CREATED__NOTHING = 0,
/*
* The _get_ operation created one or more packfiles.
*/
GHC__CREATED__PACKFILE = 1<<1,
/*
* The _get_ operation created one or more loose objects.
* (Not necessarily the for the individual OID you requested.)
*/
GHC__CREATED__LOOSE = 1<<2,
/*
* The _get_ operation created one or more packfilea *and*
* one or more loose objects.
*/
GHC__CREATED__PACKFILE_AND_LOOSE = (GHC__CREATED__PACKFILE |
GHC__CREATED__LOOSE),
};
/*
* Ask `gvfs-helper server` to immediately fetch a single object
* using "/gvfs/objects" GET semantics.
*
* A long-running background process is used to make subsequent
* requests more efficient.
*
* A loose object will be created in the shared-cache ODB and
* in-memory cache updated.
*/
int gh_client__get_immediate(const struct object_id *oid,
enum gh_client__created *p_ghc);
/*
* Queue this OID for a future fetch using `gvfs-helper service`.
* It does not wait.
*
* Callers should not rely on the queued object being on disk until
* the queue has been drained.
*/
void gh_client__queue_oid(const struct object_id *oid);
void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr);
/*
* Ask `gvfs-helper server` to fetch the set of queued OIDs using
* "/gvfs/objects" POST semantics.
*
* A long-running background process is used to subsequent requests
* more efficient.
*
* One or more packfiles will be created in the shared-cache ODB.
*/
int gh_client__drain_queue(enum gh_client__created *p_ghc);
#endif /* GVFS_HELPER_CLIENT_H */

2295
gvfs-helper.c Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -38,6 +38,7 @@
#include "sigchain.h"
#include "sub-process.h"
#include "pkt-line.h"
#include "gvfs-helper-client.h"
/* The maximum size for an object header. */
#define MAX_HEADER_LEN 32
@ -1685,7 +1686,7 @@ static int do_oid_object_info_extended(struct repository *r,
const struct object_id *real = oid;
int already_retried = 0;
int tried_hook = 0;
int tried_gvfs_helper = 0;
if (flags & OBJECT_INFO_LOOKUP_REPLACE)
real = lookup_replace_object(r, oid);
@ -1723,13 +1724,41 @@ retry:
if (!loose_object_info(r, real, oi, flags))
return 0;
if (core_use_gvfs_helper && !tried_gvfs_helper) {
enum gh_client__created ghc;
if (flags & OBJECT_INFO_SKIP_FETCH_OBJECT)
return -1;
gh_client__get_immediate(real, &ghc);
tried_gvfs_helper = 1;
/*
* Retry the lookup IIF `gvfs-helper` created one
* or more new packfiles or loose objects.
*/
if (ghc != GHC__CREATED__NOTHING)
continue;
/*
* If `gvfs-helper` fails, we just want to return -1.
* But allow the other providers to have a shot at it.
* (At least until we have a chance to consolidate
* them.)
*/
}
/* Not a loose object; someone else may have just packed it. */
if (!(flags & OBJECT_INFO_QUICK)) {
reprepare_packed_git(r);
if (find_pack_entry(r, real, &e))
break;
if (core_virtualize_objects && !tried_hook) {
// TODO Assert or at least trace2 if gvfs-helper
// TODO was tried and failed and then read-object-hook
// TODO is successful at getting this object.
tried_hook = 1;
// TODO BUG? Should 'oid' be 'real' ?
if (!read_object_process(oid))
goto retry;
}

Просмотреть файл

@ -1,5 +1,6 @@
#include "cache.h"
#include "object-store.h"
#include "gvfs-helper-client.h"
#include "promisor-remote.h"
#include "config.h"
#include "transport.h"
@ -196,7 +197,7 @@ struct promisor_remote *repo_promisor_remote_find(struct repository *r,
int repo_has_promisor_remote(struct repository *r)
{
return !!repo_promisor_remote_find(r, NULL);
return core_use_gvfs_helper || !!repo_promisor_remote_find(r, NULL);
}
static int remove_fetched_oids(struct repository *repo,
@ -243,6 +244,15 @@ void promisor_remote_get_direct(struct repository *repo,
if (oid_nr == 0)
return;
if (core_use_gvfs_helper) {
enum gh_client__created ghc = GHC__CREATED__NOTHING;
trace2_data_intmax("bug", the_repository, "fetch_objects/gvfs-helper", oid_nr);
gh_client__queue_oid_array(oids, oid_nr);
if (!gh_client__drain_queue(&ghc))
return;
die(_("failed to fetch missing objects from the remote"));
}
promisor_remote_init(repo);

1
t/helper/.gitignore поставляемый
Просмотреть файл

@ -1,2 +1,3 @@
/test-gvfs-protocol
/test-tool
/test-fake-ssh