Merge branch 'scalar-with-gvfs'

Prepare `scalar` to use the GVFS protocol instead of partial clone
(required to support Azure Repos).

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
Johannes Schindelin 2021-05-03 15:21:24 +02:00 коммит произвёл Victoria Dye
Родитель 666d13bab8 01dbeaae79
Коммит 5884b0a26a
15 изменённых файлов: 1105 добавлений и 53 удалений

Просмотреть файл

@ -8,7 +8,9 @@ scalar - A tool for managing large Git repositories
SYNOPSIS
--------
[verse]
scalar clone [--single-branch] [--branch <main-branch>] [--full-clone] <url> [<enlistment>]
scalar clone [--single-branch] [--branch <main-branch>] [--full-clone]
[--local-cache-path <path>] [--cache-server-url <url>]
<url> [<enlistment>]
scalar list
scalar register [<enlistment>]
scalar unregister [<enlistment>]
@ -16,6 +18,7 @@ scalar run ( all | config | commit-graph | fetch | loose-objects | pack-files )
scalar reconfigure [ --all | <enlistment> ]
scalar diagnose [<enlistment>]
scalar delete <enlistment>
scalar cache-server ( --get | --set <url> | --list [<remote>] ) [<enlistment>]
DESCRIPTION
-----------
@ -84,6 +87,17 @@ cloning. If the HEAD at the remote did not point at any branch when
A sparse-checkout is initialized by default. This behavior can be
turned off via `--full-clone`.
--local-cache-path <path>::
Override the path to the local cache root directory; Pre-fetched objects
are stored into a repository-dependent subdirectory of that path.
+
The default is `<drive>:\.scalarCache` on Windows (on the same drive as the
clone), and `~/.scalarCache` on macOS.
--cache-server-url <url>::
Retrieve missing objects from the specified remote, which is expected to
understand the GVFS protocol.
List
~~~~
@ -157,6 +171,27 @@ delete <enlistment>::
This subcommand lets you delete an existing Scalar enlistment from your
local file system, unregistering the repository.
Cache-server
~~~~~~~~~~~~
cache-server ( --get | --set <url> | --list [<remote>] ) [<enlistment>]::
This command lets you query or set the GVFS-enabled cache server used
to fetch missing objects.
--get::
This is the default command mode: query the currently-configured cache
server URL, if any.
--list::
Access the `gvfs/info` endpoint of the specified remote (default:
`origin`) to figure out which cache servers are available, if any.
+
In contrast to the `--get` command mode (which only accesses the local
repository), this command mode triggers a request via the network that
potentially requires authentication. If authentication is required, the
configured credential helper is employed (see linkgit:git-credential[1]
for details).
SEE ALSO
--------
linkgit:git-clone[1], linkgit:git-maintenance[1].

Просмотреть файл

@ -2725,6 +2725,7 @@ GIT_OBJS += git.o
.PHONY: git-objs
git-objs: $(GIT_OBJS)
SCALAR_OBJS := json-parser.o
SCALAR_OBJS += scalar.o
.PHONY: scalar-objs
scalar-objs: $(SCALAR_OBJS)
@ -2873,7 +2874,7 @@ $(REMOTE_CURL_PRIMARY): remote-curl.o http.o http-walker.o $(LAZYLOAD_LIBCURL_OB
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
$(CURL_LIBCURL) $(EXPAT_LIBEXPAT) $(LIBS)
scalar$X: scalar.o GIT-LDFLAGS $(GITLIBS)
scalar$X: $(SCALAR_OBJS) GIT-LDFLAGS $(GITLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) \
$(filter %.o,$^) $(LIBS)

Просмотреть файл

@ -797,7 +797,7 @@ target_link_libraries(git-sh-i18n--envsubst common-main)
add_executable(git-shell ${CMAKE_SOURCE_DIR}/shell.c)
target_link_libraries(git-shell common-main)
add_executable(scalar ${CMAKE_SOURCE_DIR}/scalar.c)
add_executable(scalar ${CMAKE_SOURCE_DIR}/scalar.c ${CMAKE_SOURCE_DIR}/json-parser.c)
target_link_libraries(scalar common-main)
if(CURL_FOUND)

Просмотреть файл

@ -18,8 +18,9 @@ Creating a new Scalar clone
---------------------------------------------------
The `clone` verb creates a local enlistment of a remote repository using the
partial clone feature available e.g. on GitHub.
partial clone feature available e.g. on GitHub, or using the
[GVFS protocol](https://github.com/microsoft/VFSForGit/blob/HEAD/Protocol.md),
such as Azure Repos.
```
scalar clone [options] <url> [<dir>]
@ -68,11 +69,26 @@ in `<path>`.
These options allow a user to customize their initial enlistment.
* `--full-clone`: If specified, do not initialize the sparse-checkout feature.
All files will be present in your `src` directory. This uses a Git partial
clone: blobs are downloaded on demand.
All files will be present in your `src` directory. This behaves very similar
to a Git partial clone in that blobs are downloaded on demand. However, it
will use the GVFS protocol to download all Git objects.
* `--cache-server-url=<url>`: If specified, set the intended cache server to
the specified `<url>`. All object queries will use the GVFS protocol to this
`<url>` instead of the origin remote. If the remote supplies a list of
cache servers via the `<url>/gvfs/config` endpoint, then the `clone` command
will select a nearby cache server from that list.
* `--branch=<ref>`: Specify the branch to checkout after clone.
* `--local-cache-path=<path>`: Use this option to override the path for the
local Scalar cache. If not specified, then Scalar will select a default
path to share objects with your other enlistments. On Windows, this path
is a subdirectory of `<Volume>:\.scalarCache\`. On Mac, this path is a
subdirectory of `~/.scalarCache/`. The default cache path is recommended so
multiple enlistments of the same remote repository share objects on the
same device.
### Advanced Options
The options below are not intended for use by a typical user. These are

Просмотреть файл

@ -28,10 +28,14 @@ these features for that repo (except partial clone) and start running suggested
maintenance in the background using
[the `git maintenance` feature](https://git-scm.com/docs/git-maintenance).
Repos cloned with the `scalar clone` command use partial clone to significantly
reduce the amount of data required to get started using a repository. By
delaying all blob downloads until they are required, Scalar allows you to work
with very large repositories quickly.
Repos cloned with the `scalar clone` command use partial clone or the
[GVFS protocol](https://github.com/microsoft/VFSForGit/blob/HEAD/Protocol.md)
to significantly reduce the amount of data required to get started
using a repository. By delaying all blob downloads until they are required,
Scalar allows you to work with very large repositories quickly. The GVFS
protocol allows a network of _cache servers_ to serve objects with lower
latency and higher throughput. The cache servers also reduce load on the
central server.
Documentation
-------------
@ -42,7 +46,7 @@ Documentation
* [Troubleshooting](troubleshooting.md):
Collect diagnostic information or update custom settings. Includes
`scalar diagnose`.
`scalar diagnose` and `scalar cache-server`.
* [The Philosophy of Scalar](philosophy.md): Why does Scalar work the way
it does, and how do we make decisions about its future?

Просмотреть файл

@ -13,22 +13,27 @@ Scalar only to configure those new settings. In particular, we ported
features like background maintenance to Git to make Scalar simpler and
make Git more powerful.
Services such as GitHub support partial clone , a standard adopted by the Git
project to download only part of the Git objects when cloning, and fetching
further objects on demand. If your hosting service supports partial clone, then
we absolutely recommend it as a way to greatly speed up your clone and fetch
times and to reduce how much disk space your Git repository requires. Scalar
will help with this!
Scalar ships inside [a custom version of Git][microsoft-git], but we are
working to make it available in other forks of Git. The only feature
that is not intended to ever reach the standard Git client is Scalar's use
of [the GVFS Protocol][gvfs-protocol], which is essentially an older
version of [Git's partial clone feature](https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/)
that was available first in Azure Repos. Services such as GitHub support
only partial clone instead of the GVFS protocol because that is the
standard adopted by the Git project. If your hosting service supports
partial clone, then we absolutely recommend it as a way to greatly speed
up your clone and fetch times and to reduce how much disk space your Git
repository requires. Scalar will help with this!
Most of the value of Scalar can be found in the core Git client. However, most
of the advanced features that really optimize Git's performance are off by
default for compatibility reasons. To really take advantage of Git's latest and
greatest features, you either need to study the [`git config`
documentation](https://git-scm.com/docs/git-config) and regularly read [the Git
release notes](https://github.com/git/git/tree/master/Documentation/RelNotes).
If you don't use the GVFS Protocol, then most of the value of Scalar can
be found in the core Git client. However, most of the advanced features
that really optimize Git's performance are off by default for compatibility
reasons. To really take advantage of Git's latest and greatest features,
you either need to study the [`git config` documentation](https://git-scm.com/docs/git-config)
and regularly read [the Git release notes](https://github.com/git/git/tree/master/Documentation/RelNotes).
Even if you do all that work and customize your Git settings on your machines,
you likely will want to share those settings with other team members. Or, you
can just use Scalar!
you likely will want to share those settings with other team members.
Or, you can just use Scalar!
Using `scalar register` on an existing Git repository will give you these
benefits:

Просмотреть файл

@ -18,3 +18,23 @@ files for that repository. This includes:
As the `diagnose` command completes, it provides the path of the resulting
zip file. This zip can be attached to bug reports to make the analysis easier.
Modifying Configuration Values
------------------------------
The Scalar-specific configuration is only available for repos using the
GVFS protocol.
### Cache Server URL
When using an enlistment cloned with `scalar clone` and the GVFS protocol,
you will have a value called the cache server URL. Cache servers are a feature
of the GVFS protocol to provide low-latency access to the on-demand object
requests. This modifies the `gvfs.cache-server` setting in your local Git config
file.
Run `scalar cache-server --get` to see the current cache server.
Run `scalar cache-server --list` to see the available cache server URLs.
Run `scalar cache-server --set=<url>` to set your cache server to `<url>`.

Просмотреть файл

@ -11,6 +11,7 @@
#include "packfile.h"
#include "parse-options.h"
#include "write-or-die.h"
#include "config.h"
struct archive_dir {
const char *path;
@ -107,6 +108,39 @@ cleanup:
return dtype;
}
static void dir_stats(struct strbuf *buf, const char *path)
{
DIR *dir = opendir(path);
struct dirent *e;
struct stat e_stat;
struct strbuf file_path = STRBUF_INIT;
size_t base_path_len;
if (!dir)
return;
strbuf_addstr(buf, "Contents of ");
strbuf_add_absolute_path(buf, path);
strbuf_addstr(buf, ":\n");
strbuf_add_absolute_path(&file_path, path);
strbuf_addch(&file_path, '/');
base_path_len = file_path.len;
while ((e = readdir(dir)) != NULL)
if (!is_dot_or_dotdot(e->d_name) && e->d_type == DT_REG) {
strbuf_setlen(&file_path, base_path_len);
strbuf_addstr(&file_path, e->d_name);
if (!stat(file_path.buf, &e_stat))
strbuf_addf(buf, "%-70s %16"PRIuMAX"\n",
e->d_name,
(uintmax_t)e_stat.st_size);
}
strbuf_release(&file_path);
closedir(dir);
}
static int count_files(struct strbuf *path)
{
DIR *dir = opendir(path->buf);
@ -219,7 +253,8 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode)
struct strvec archiver_args = STRVEC_INIT;
char **argv_copy = NULL;
int stdout_fd = -1, archiver_fd = -1;
struct strbuf buf = STRBUF_INIT;
char *cache_server_url = NULL, *shared_cache = NULL;
struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT;
int res, i;
struct archive_dir archive_dirs[] = {
{ ".git", 0 },
@ -254,6 +289,13 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode)
get_version_info(&buf, 1);
strbuf_addf(&buf, "Repository root: %s\n", the_repository->worktree);
git_config_get_string("gvfs.cache-server", &cache_server_url);
git_config_get_string("gvfs.sharedCache", &shared_cache);
strbuf_addf(&buf, "Cache Server: %s\nLocal Cache: %s\n\n",
cache_server_url ? cache_server_url : "None",
shared_cache ? shared_cache : "None");
get_disk_info(&buf);
write_or_die(stdout_fd, buf.buf, buf.len);
strvec_pushf(&archiver_args,
@ -284,6 +326,52 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode)
}
}
if (shared_cache) {
size_t path_len;
strbuf_reset(&buf);
strbuf_addf(&path, "%s/pack", shared_cache);
strbuf_reset(&buf);
strbuf_addstr(&buf, "--add-virtual-file=packs-cached.txt:");
dir_stats(&buf, path.buf);
strvec_push(&archiver_args, buf.buf);
strbuf_reset(&buf);
strbuf_addstr(&buf, "--add-virtual-file=objects-cached.txt:");
loose_objs_stats(&buf, shared_cache);
strvec_push(&archiver_args, buf.buf);
strbuf_reset(&path);
strbuf_addf(&path, "%s/info", shared_cache);
path_len = path.len;
if (is_directory(path.buf)) {
DIR *dir = opendir(path.buf);
struct dirent *e;
while ((e = readdir(dir))) {
if (!strcmp(".", e->d_name) || !strcmp("..", e->d_name))
continue;
if (e->d_type == DT_DIR)
continue;
strbuf_reset(&buf);
strbuf_addf(&buf, "--add-virtual-file=info/%s:", e->d_name);
strbuf_setlen(&path, path_len);
strbuf_addch(&path, '/');
strbuf_addstr(&path, e->d_name);
if (strbuf_read_file(&buf, path.buf, 0) < 0) {
res = error_errno(_("could not read '%s'"), path.buf);
goto diagnose_cleanup;
}
strvec_push(&archiver_args, buf.buf);
}
closedir(dir);
}
}
strvec_pushl(&archiver_args, "--prefix=",
oid_to_hex(the_hash_algo->empty_tree), "--", NULL);
@ -311,6 +399,8 @@ diagnose_cleanup:
free(argv_copy);
strvec_clear(&archiver_args);
strbuf_release(&buf);
free(cache_server_url);
free(shared_cache);
return res;
}

2
dir.c
Просмотреть файл

@ -3140,6 +3140,8 @@ static int cmp_icase(char a, char b)
{
if (a == b)
return 0;
if (is_dir_sep(a))
return is_dir_sep(b) ? 0 : -1;
if (ignore_case)
return toupper(a) - toupper(b);
return a - b;

Просмотреть файл

@ -202,6 +202,12 @@
// [2] Documentation/technical/long-running-process-protocol.txt
// [3] See GIT_TRACE_PACKET
//
// endpoint
//
// Fetch the given endpoint from the main Git server (specifying
// `gvfs/config` as endpoint is idempotent to the `config`
// command mentioned above).
//
//////////////////////////////////////////////////////////////////
#include "git-compat-util.h"
@ -3121,18 +3127,20 @@ static void do_req__with_fallback(const char *url_component,
*
* Return server's response buffer. This is probably a raw JSON string.
*/
static void do__http_get__gvfs_config(struct gh__response_status *status,
struct strbuf *config_data)
static void do__http_get__simple_endpoint(struct gh__response_status *status,
struct strbuf *response,
const char *endpoint,
const char *tr2_label)
{
struct gh__request_params params = GH__REQUEST_PARAMS_INIT;
strbuf_addstr(&params.tr2_label, "GET/config");
strbuf_addstr(&params.tr2_label, tr2_label);
params.b_is_post = 0;
params.b_write_to_file = 0;
/* cache-servers do not handle gvfs/config REST calls */
params.b_permit_cache_server_if_defined = 0;
params.buffer = config_data;
params.buffer = response;
params.objects_mode = GH__OBJECTS_MODE__NONE;
params.object_count = 1; /* a bit of a lie */
@ -3154,15 +3162,22 @@ static void do__http_get__gvfs_config(struct gh__response_status *status,
* see any need to report progress on the upload side of
* the GET. So just report progress on the download side.
*/
strbuf_addstr(&params.progress_base_phase3_msg,
"Receiving gvfs/config");
strbuf_addf(&params.progress_base_phase3_msg,
"Receiving %s", endpoint);
}
do_req__with_fallback("gvfs/config", &params, status);
do_req__with_fallback(endpoint, &params, status);
gh__request_params__release(&params);
}
static void do__http_get__gvfs_config(struct gh__response_status *status,
struct strbuf *config_data)
{
do__http_get__simple_endpoint(status, config_data, "gvfs/config",
"GET/config");
}
static void setup_gvfs_objects_progress(struct gh__request_params *params,
unsigned long num, unsigned long den)
{
@ -3607,6 +3622,35 @@ static enum gh__error_code do_sub_cmd__config(int argc, const char **argv)
return ec;
}
static enum gh__error_code do_sub_cmd__endpoint(int argc, const char **argv)
{
struct gh__response_status status = GH__RESPONSE_STATUS_INIT;
struct strbuf data = STRBUF_INIT;
enum gh__error_code ec = GH__ERROR_CODE__OK;
const char *endpoint;
if (argc != 2)
return GH__ERROR_CODE__ERROR;
endpoint = argv[1];
trace2_cmd_mode(endpoint);
finish_init(0);
do__http_get__simple_endpoint(&status, &data, endpoint, endpoint);
ec = status.ec;
if (ec == GH__ERROR_CODE__OK)
printf("%s\n", data.buf);
else
error("config: %s", status.error_message.buf);
gh__response_status__release(&status);
strbuf_release(&data);
return ec;
}
/*
* Read a list of objects from stdin and fetch them as a series of
* single object HTTP GET requests.
@ -4098,6 +4142,9 @@ static enum gh__error_code do_sub_cmd(int argc, const char **argv)
if (!strcmp(argv[0], "config"))
return do_sub_cmd__config(argc, argv);
if (!strcmp(argv[0], "endpoint"))
return do_sub_cmd__endpoint(argc, argv);
if (!strcmp(argv[0], "prefetch"))
return do_sub_cmd__prefetch(argc, argv);

183
json-parser.c Normal file
Просмотреть файл

@ -0,0 +1,183 @@
#include "git-compat-util.h"
#include "hex.h"
#include "json-parser.h"
static int reset_iterator(struct json_iterator *it)
{
it->p = it->begin = it->json;
strbuf_release(&it->key);
strbuf_release(&it->string_value);
it->type = JSON_NULL;
return -1;
}
static int parse_json_string(struct json_iterator *it, struct strbuf *out)
{
const char *begin = it->p;
if (*(it->p)++ != '"')
return error("expected double quote: '%.*s'", 5, begin),
reset_iterator(it);
strbuf_reset(&it->string_value);
#define APPEND(c) strbuf_addch(out, c)
while (*it->p != '"') {
switch (*it->p) {
case '\0':
return error("incomplete string: '%s'", begin),
reset_iterator(it);
case '\\':
it->p++;
if (*it->p == '\\' || *it->p == '"')
APPEND(*it->p);
else if (*it->p == 'b')
APPEND(8);
else if (*it->p == 't')
APPEND(9);
else if (*it->p == 'n')
APPEND(10);
else if (*it->p == 'f')
APPEND(12);
else if (*it->p == 'r')
APPEND(13);
else if (*it->p == 'u') {
unsigned char binary[2];
int i;
if (hex_to_bytes(binary, it->p + 1, 2) < 0)
return error("invalid: '%.*s'",
6, it->p - 1),
reset_iterator(it);
it->p += 4;
i = (binary[0] << 8) | binary[1];
if (i < 0x80)
APPEND(i);
else if (i < 0x0800) {
APPEND(0xc0 | ((i >> 6) & 0x1f));
APPEND(0x80 | (i & 0x3f));
} else if (i < 0x10000) {
APPEND(0xe0 | ((i >> 12) & 0x0f));
APPEND(0x80 | ((i >> 6) & 0x3f));
APPEND(0x80 | (i & 0x3f));
} else {
APPEND(0xf0 | ((i >> 18) & 0x07));
APPEND(0x80 | ((i >> 12) & 0x3f));
APPEND(0x80 | ((i >> 6) & 0x3f));
APPEND(0x80 | (i & 0x3f));
}
}
break;
default:
APPEND(*it->p);
}
it->p++;
}
it->end = it->p++;
return 0;
}
static void skip_whitespace(struct json_iterator *it)
{
while (isspace(*it->p))
it->p++;
}
int iterate_json(struct json_iterator *it)
{
skip_whitespace(it);
it->begin = it->p;
switch (*it->p) {
case '\0':
return reset_iterator(it), 0;
case 'n':
if (!starts_with(it->p, "null"))
return error("unexpected value: %.*s", 4, it->p),
reset_iterator(it);
it->type = JSON_NULL;
it->end = it->p = it->begin + 4;
break;
case 't':
if (!starts_with(it->p, "true"))
return error("unexpected value: %.*s", 4, it->p),
reset_iterator(it);
it->type = JSON_TRUE;
it->end = it->p = it->begin + 4;
break;
case 'f':
if (!starts_with(it->p, "false"))
return error("unexpected value: %.*s", 5, it->p),
reset_iterator(it);
it->type = JSON_FALSE;
it->end = it->p = it->begin + 5;
break;
case '-': case '.':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
it->type = JSON_NUMBER;
it->end = it->p = it->begin + strspn(it->p, "-.0123456789");
break;
case '"':
it->type = JSON_STRING;
if (parse_json_string(it, &it->string_value) < 0)
return -1;
break;
case '[': {
const char *save = it->begin;
size_t key_offset = it->key.len;
int i = 0, res;
for (it->p++, skip_whitespace(it); *it->p != ']'; i++) {
strbuf_addf(&it->key, "[%d]", i);
if ((res = iterate_json(it)))
return reset_iterator(it), res;
strbuf_setlen(&it->key, key_offset);
skip_whitespace(it);
if (*it->p == ',')
it->p++;
}
it->type = JSON_ARRAY;
it->begin = save;
it->end = it->p;
it->p++;
break;
}
case '{': {
const char *save = it->begin;
size_t key_offset = it->key.len;
int res;
strbuf_addch(&it->key, '.');
for (it->p++, skip_whitespace(it); *it->p != '}'; ) {
strbuf_setlen(&it->key, key_offset + 1);
if (parse_json_string(it, &it->key) < 0)
return -1;
skip_whitespace(it);
if (*(it->p)++ != ':')
return error("expected colon: %.*s", 5, it->p),
reset_iterator(it);
if ((res = iterate_json(it)))
return res;
skip_whitespace(it);
if (*it->p == ',')
it->p++;
}
strbuf_setlen(&it->key, key_offset);
it->type = JSON_OBJECT;
it->begin = save;
it->end = it->p;
it->p++;
break;
}
}
return it->fn(it);
}

29
json-parser.h Normal file
Просмотреть файл

@ -0,0 +1,29 @@
#ifndef JSON_PARSER_H
#define JSON_PARSER_H
#include "strbuf.h"
struct json_iterator {
const char *json, *p, *begin, *end;
struct strbuf key, string_value;
enum {
JSON_NULL = 0,
JSON_FALSE,
JSON_TRUE,
JSON_NUMBER,
JSON_STRING,
JSON_ARRAY,
JSON_OBJECT
} type;
int (*fn)(struct json_iterator *it);
void *fn_data;
};
#define JSON_ITERATOR_INIT(json_, fn_, fn_data_) { \
.json = json_, .p = json_, \
.key = STRBUF_INIT, .string_value = STRBUF_INIT, \
.fn = fn_, .fn_data = fn_data_ \
}
int iterate_json(struct json_iterator *it);
#endif

448
scalar.c
Просмотреть файл

@ -5,6 +5,7 @@
#include "git-compat-util.h"
#include "abspath.h"
#include "gettext.h"
#include "hex.h"
#include "parse-options.h"
#include "config.h"
#include "run-command.h"
@ -13,10 +14,18 @@
#include "fsmonitor-settings.h"
#include "refs.h"
#include "dir.h"
#include "object-file.h"
#include "packfile.h"
#include "help.h"
#include "setup.h"
#include "wrapper.h"
#include "trace2.h"
#include "json-parser.h"
#include "remote.h"
static int is_unattended(void) {
return git_env_bool("Scalar_UNATTENDED", 0);
}
static void setup_enlistment_directory(int argc, const char **argv,
const char * const *usagestr,
@ -102,6 +111,19 @@ static int run_git(const char *arg, ...)
return res;
}
static const char *ensure_absolute_path(const char *path, char **absolute)
{
struct strbuf buf = STRBUF_INIT;
if (is_absolute_path(path))
return path;
strbuf_realpath_forgiving(&buf, path, 1);
free(*absolute);
*absolute = strbuf_detach(&buf, NULL);
return *absolute;
}
struct scalar_config {
const char *key;
const char *value;
@ -140,23 +162,7 @@ static int set_recommended_config(int reconfigure)
{ "core.FSCache", "true", 1 },
{ "core.multiPackIndex", "true", 1 },
{ "core.preloadIndex", "true", 1 },
#ifndef WIN32
{ "core.untrackedCache", "true", 1 },
#else
/*
* Unfortunately, Scalar's Functional Tests demonstrated
* that the untracked cache feature is unreliable on Windows
* (which is a bummer because that platform would benefit the
* most from it). For some reason, freshly created files seem
* not to update the directory's `lastModified` time
* immediately, but the untracked cache would need to rely on
* that.
*
* Therefore, with a sad heart, we disable this very useful
* feature on Windows.
*/
{ "core.untrackedCache", "false", 1 },
#endif
{ "core.logAllRefUpdates", "true", 1 },
{ "credential.https://dev.azure.com.useHttpPath", "true", 1 },
{ "credential.validate", "false", 1 }, /* GCM4W-only */
@ -334,6 +340,210 @@ static int set_config(const char *fmt, ...)
return res;
}
static int list_cache_server_urls(struct json_iterator *it)
{
const char *p;
char *q;
long l;
if (it->type == JSON_STRING &&
skip_iprefix(it->key.buf, ".CacheServers[", &p) &&
(l = strtol(p, &q, 10)) >= 0 && p != q &&
!strcasecmp(q, "].Url"))
printf("#%ld: %s\n", l, it->string_value.buf);
return 0;
}
/* Find N for which .CacheServers[N].GlobalDefault == true */
static int get_cache_server_index(struct json_iterator *it)
{
const char *p;
char *q;
long l;
if (it->type == JSON_TRUE &&
skip_iprefix(it->key.buf, ".CacheServers[", &p) &&
(l = strtol(p, &q, 10)) >= 0 && p != q &&
!strcasecmp(q, "].GlobalDefault")) {
*(long *)it->fn_data = l;
return 1;
}
return 0;
}
struct cache_server_url_data {
char *key, *url;
};
/* Get .CacheServers[N].Url */
static int get_cache_server_url(struct json_iterator *it)
{
struct cache_server_url_data *data = it->fn_data;
if (it->type == JSON_STRING &&
!strcasecmp(data->key, it->key.buf)) {
data->url = strbuf_detach(&it->string_value, NULL);
return 1;
}
return 0;
}
static int can_url_support_gvfs(const char *url)
{
return starts_with(url, "https://") ||
(git_env_bool("GIT_TEST_ALLOW_GVFS_VIA_HTTP", 0) &&
starts_with(url, "http://"));
}
/*
* If `cache_server_url` is `NULL`, print the list to `stdout`.
*
* Since `gvfs-helper` requires a Git directory, this _must_ be run in
* a worktree.
*/
static int supports_gvfs_protocol(const char *url, char **cache_server_url)
{
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf out = STRBUF_INIT;
/*
* The GVFS protocol is only supported via https://; For testing, we
* also allow http://.
*/
if (!can_url_support_gvfs(url))
return 0;
cp.git_cmd = 1;
strvec_pushl(&cp.args, "gvfs-helper", "--remote", url, "config", NULL);
if (!pipe_command(&cp, NULL, 0, &out, 512, NULL, 0)) {
long l = 0;
struct json_iterator it =
JSON_ITERATOR_INIT(out.buf, get_cache_server_index, &l);
struct cache_server_url_data data = { .url = NULL };
if (!cache_server_url) {
it.fn = list_cache_server_urls;
if (iterate_json(&it) < 0) {
strbuf_release(&out);
return error("JSON parse error");
}
strbuf_release(&out);
return 0;
}
if (iterate_json(&it) < 0) {
strbuf_release(&out);
return error("JSON parse error");
}
data.key = xstrfmt(".CacheServers[%ld].Url", l);
it.fn = get_cache_server_url;
it.fn_data = &data;
if (iterate_json(&it) < 0) {
strbuf_release(&out);
return error("JSON parse error");
}
*cache_server_url = data.url;
free(data.key);
return 1;
}
strbuf_release(&out);
/* error out quietly, unless we wanted to list URLs */
return cache_server_url ?
0 : error(_("Could not access gvfs/config endpoint"));
}
static char *default_cache_root(const char *root)
{
const char *env;
if (is_unattended())
return xstrfmt("%s/.scalarCache", root);
#ifdef WIN32
(void)env;
return xstrfmt("%.*s.scalarCache", offset_1st_component(root), root);
#elif defined(__APPLE__)
if ((env = getenv("HOME")) && *env)
return xstrfmt("%s/.scalarCache", env);
return NULL;
#else
if ((env = getenv("XDG_CACHE_HOME")) && *env)
return xstrfmt("%s/scalar", env);
if ((env = getenv("HOME")) && *env)
return xstrfmt("%s/.cache/scalar", env);
return NULL;
#endif
}
static int get_repository_id(struct json_iterator *it)
{
if (it->type == JSON_STRING &&
!strcasecmp(".repository.id", it->key.buf)) {
*(char **)it->fn_data = strbuf_detach(&it->string_value, NULL);
return 1;
}
return 0;
}
/* Needs to run this in a worktree; gvfs-helper requires a Git repository */
static char *get_cache_key(const char *url)
{
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf out = STRBUF_INIT;
char *cache_key = NULL;
/*
* The GVFS protocol is only supported via https://; For testing, we
* also allow http://.
*/
if (!git_env_bool("SCALAR_TEST_SKIP_VSTS_INFO", 0) &&
can_url_support_gvfs(url)) {
cp.git_cmd = 1;
strvec_pushl(&cp.args, "gvfs-helper", "--remote", url,
"endpoint", "vsts/info", NULL);
if (!pipe_command(&cp, NULL, 0, &out, 512, NULL, 0)) {
char *id = NULL;
struct json_iterator it =
JSON_ITERATOR_INIT(out.buf, get_repository_id,
&id);
if (iterate_json(&it) < 0)
warning("JSON parse error (%s)", out.buf);
else if (id)
cache_key = xstrfmt("id_%s", id);
free(id);
}
}
if (!cache_key) {
struct strbuf downcased = STRBUF_INIT;
int hash_algo_index = hash_algo_by_name("sha1");
const struct git_hash_algo *hash_algo = hash_algo_index < 0 ?
the_hash_algo : &hash_algos[hash_algo_index];
git_hash_ctx ctx;
unsigned char hash[GIT_MAX_RAWSZ];
strbuf_addstr(&downcased, url);
strbuf_tolower(&downcased);
hash_algo->init_fn(&ctx);
hash_algo->update_fn(&ctx, downcased.buf, downcased.len);
hash_algo->final_fn(hash, &ctx);
strbuf_release(&downcased);
cache_key = xstrfmt("url_%s",
hash_to_hex_algop(hash, hash_algo));
}
strbuf_release(&out);
return cache_key;
}
static char *remote_default_branch(const char *url)
{
struct child_process cp = CHILD_PROCESS_INIT;
@ -431,10 +641,49 @@ void load_builtin_commands(const char *prefix, struct cmdnames *cmds)
die("not implemented");
}
static int init_shared_object_cache(const char *url,
const char *local_cache_root)
{
struct strbuf buf = STRBUF_INIT;
int res = 0;
char *cache_key = NULL, *shared_cache_path = NULL;
if (!(cache_key = get_cache_key(url))) {
res = error(_("could not determine cache key for '%s'"), url);
goto cleanup;
}
shared_cache_path = xstrfmt("%s/%s", local_cache_root, cache_key);
if (set_config("gvfs.sharedCache=%s", shared_cache_path)) {
res = error(_("could not configure shared cache"));
goto cleanup;
}
strbuf_addf(&buf, "%s/pack", shared_cache_path);
switch (safe_create_leading_directories(buf.buf)) {
case SCLD_OK: case SCLD_EXISTS:
break; /* okay */
default:
res = error_errno(_("could not initialize '%s'"), buf.buf);
goto cleanup;
}
write_file(git_path("objects/info/alternates"),"%s\n", shared_cache_path);
cleanup:
strbuf_release(&buf);
free(shared_cache_path);
free(cache_key);
return res;
}
static int cmd_clone(int argc, const char **argv)
{
int dummy = 0;
const char *branch = NULL;
int full_clone = 0, single_branch = 0, show_progress = isatty(2);
const char *cache_server_url = NULL, *local_cache_root = NULL;
char *default_cache_server_url = NULL, *local_cache_root_abs = NULL;
struct option clone_options[] = {
OPT_STRING('b', "branch", &branch, N_("<branch>"),
N_("branch to checkout after clone")),
@ -443,6 +692,14 @@ static int cmd_clone(int argc, const char **argv)
OPT_BOOL(0, "single-branch", &single_branch,
N_("only download metadata for the branch that will "
"be checked out")),
OPT_STRING(0, "cache-server-url", &cache_server_url,
N_("<url>"),
N_("the url or friendly name of the cache server")),
OPT_STRING(0, "local-cache-path", &local_cache_root,
N_("<path>"),
N_("override the path for the local Scalar cache")),
OPT_HIDDEN_BOOL(0, "no-fetch-commits-and-trees",
&dummy, N_("no longer used")),
OPT_END(),
};
const char * const clone_usage[] = {
@ -453,6 +710,7 @@ static int cmd_clone(int argc, const char **argv)
char *enlistment = NULL, *dir = NULL;
struct strbuf buf = STRBUF_INIT;
int res;
int gvfs_protocol;
argc = parse_options(argc, argv, NULL, clone_options, clone_usage, 0);
@ -482,8 +740,20 @@ static int cmd_clone(int argc, const char **argv)
if (is_directory(enlistment))
die(_("directory '%s' exists already"), enlistment);
ensure_absolute_path(enlistment, &enlistment);
dir = xstrfmt("%s/src", enlistment);
if (!local_cache_root)
local_cache_root = local_cache_root_abs =
default_cache_root(enlistment);
else
local_cache_root = ensure_absolute_path(local_cache_root,
&local_cache_root_abs);
if (!local_cache_root)
die(_("could not determine local cache root"));
strbuf_reset(&buf);
if (branch)
strbuf_addf(&buf, "init.defaultBranch=%s", branch);
@ -503,8 +773,28 @@ static int cmd_clone(int argc, const char **argv)
setup_git_directory();
git_config(git_default_config, NULL);
/*
* This `dir_inside_of()` call relies on git_config() having parsed the
* newly-initialized repository config's `core.ignoreCase` value.
*/
if (dir_inside_of(local_cache_root, dir) >= 0) {
struct strbuf path = STRBUF_INIT;
strbuf_addstr(&path, enlistment);
if (chdir("../..") < 0 ||
remove_dir_recursively(&path, 0) < 0)
die(_("'--local-cache-path' cannot be inside the src "
"folder;\nCould not remove '%s'"), enlistment);
die(_("'--local-cache-path' cannot be inside the src folder"));
}
/* common-main already logs `argv` */
trace2_def_repo(the_repository);
trace2_data_intmax("scalar", the_repository, "unattended",
is_unattended());
if (!branch && !(branch = remote_default_branch(url))) {
res = error(_("failed to get default branch for '%s'"), url);
@ -515,13 +805,48 @@ static int cmd_clone(int argc, const char **argv)
set_config("remote.origin.fetch="
"+refs/heads/%s:refs/remotes/origin/%s",
single_branch ? branch : "*",
single_branch ? branch : "*") ||
set_config("remote.origin.promisor=true") ||
set_config("remote.origin.partialCloneFilter=blob:none")) {
single_branch ? branch : "*")) {
res = error(_("could not configure remote in '%s'"), dir);
goto cleanup;
}
if (set_config("credential.https://dev.azure.com.useHttpPath=true")) {
res = error(_("could not configure credential.useHttpPath"));
goto cleanup;
}
gvfs_protocol = cache_server_url ||
supports_gvfs_protocol(url, &default_cache_server_url);
if (gvfs_protocol) {
if ((res = init_shared_object_cache(url, local_cache_root)))
goto cleanup;
if (!cache_server_url)
cache_server_url = default_cache_server_url;
if (set_config("core.useGVFSHelper=true") ||
set_config("core.gvfs=150") ||
set_config("http.version=HTTP/1.1")) {
res = error(_("could not turn on GVFS helper"));
goto cleanup;
}
if (cache_server_url &&
set_config("gvfs.cache-server=%s", cache_server_url)) {
res = error(_("could not configure cache server"));
goto cleanup;
}
if (cache_server_url)
fprintf(stderr, "Cache server URL: %s\n",
cache_server_url);
} else {
if (set_config("core.useGVFSHelper=false") ||
set_config("remote.origin.promisor=true") ||
set_config("remote.origin.partialCloneFilter=blob:none")) {
res = error(_("could not configure partial clone in "
"'%s'"), dir);
goto cleanup;
}
}
if (!full_clone &&
(res = run_git("sparse-checkout", "init", "--cone", NULL)))
goto cleanup;
@ -532,6 +857,11 @@ static int cmd_clone(int argc, const char **argv)
if ((res = run_git("fetch", "--quiet",
show_progress ? "--progress" : "--no-progress",
"origin", NULL))) {
if (gvfs_protocol) {
res = error(_("failed to prefetch commits and trees"));
goto cleanup;
}
warning(_("partial clone failed; attempting full clone"));
if (set_config("remote.origin.promisor") ||
@ -564,6 +894,8 @@ cleanup:
free(enlistment);
free(dir);
strbuf_release(&buf);
free(default_cache_server_url);
free(local_cache_root_abs);
return res;
}
@ -916,6 +1248,77 @@ static int cmd_version(int argc, const char **argv)
return 0;
}
static int cmd_cache_server(int argc, const char **argv)
{
int get = 0;
char *set = NULL, *list = NULL;
const char *default_remote = "(default)";
struct option options[] = {
OPT_BOOL(0, "get", &get,
N_("get the configured cache-server URL")),
OPT_STRING(0, "set", &set, N_("URL"),
N_("configure the cache-server to use")),
{ OPTION_STRING, 0, "list", &list, N_("remote"),
N_("list the possible cache-server URLs"),
PARSE_OPT_OPTARG, NULL, (intptr_t) default_remote },
OPT_END(),
};
const char * const usage[] = {
N_("scalar cache_server "
"[--get | --set <url> | --list [<remote>]] [<enlistment>]"),
NULL
};
int res = 0;
argc = parse_options(argc, argv, NULL, options,
usage, 0);
if (get + !!set + !!list > 1)
usage_msg_opt(_("--get/--set/--list are mutually exclusive"),
usage, options);
setup_enlistment_directory(argc, argv, usage, options, NULL);
if (list) {
const char *name = list, *url = list;
if (list == default_remote)
list = NULL;
if (!list || !strchr(list, '/')) {
struct remote *remote;
/* Look up remote */
remote = remote_get(list);
if (!remote) {
error("no such remote: '%s'", name);
free(list);
return 1;
}
if (!remote->url) {
free(list);
return error(_("remote '%s' has no URLs"),
name);
}
url = remote->url[0];
}
res = supports_gvfs_protocol(url, NULL);
free(list);
} else if (set) {
res = set_config("gvfs.cache-server=%s", set);
free(set);
} else {
char *url = NULL;
printf("Using cache server: %s\n",
git_config_get_string("gvfs.cache-server", &url) ?
"(undefined)" : url);
free(url);
}
return !!res;
}
static struct {
const char *name;
int (*fn)(int, const char **);
@ -930,6 +1333,7 @@ static struct {
{ "help", cmd_help },
{ "version", cmd_version },
{ "diagnose", cmd_diagnose },
{ "cache-server", cmd_cache_server },
{ NULL, NULL},
};
@ -938,6 +1342,12 @@ int cmd_main(int argc, const char **argv)
struct strbuf scalar_usage = STRBUF_INIT;
int i;
if (is_unattended()) {
setenv("GIT_ASKPASS", "", 0);
setenv("GIT_TERMINAL_PROMPT", "false", 0);
git_config_push_parameter("credential.interactive=never");
}
while (argc > 1 && *argv[1] == '-') {
if (!strcmp(argv[1], "-C")) {
if (argc < 3)

Просмотреть файл

@ -1,5 +1,6 @@
#include "git-compat-util.h"
#include "environment.h"
#include "gettext.h"
#include "hex.h"
#include "alloc.h"
#include "setup.h"
@ -1490,6 +1491,8 @@ done:
static enum worker_result dispatch(struct req *req)
{
static regex_t *smart_http_regex;
static int initialized;
const char *method;
enum worker_result wr;
@ -1538,6 +1541,53 @@ static enum worker_result dispatch(struct req *req)
return do__gvfs_prefetch__get(req);
}
if (!initialized) {
smart_http_regex = xmalloc(sizeof(*smart_http_regex));
if (regcomp(smart_http_regex, "^/(HEAD|info/refs|"
"objects/info/[^/]+|git-(upload|receive)-pack)$",
REG_EXTENDED)) {
warning("could not compile smart HTTP regex");
smart_http_regex = NULL;
}
initialized = 1;
}
if (smart_http_regex &&
!regexec(smart_http_regex, req->uri_base.buf, 0, NULL, 0)) {
const char *ok = "HTTP/1.1 200 OK\r\n";
struct child_process cp = CHILD_PROCESS_INIT;
int i, res;
if (write(1, ok, strlen(ok)) < 0)
return error(_("could not send '%s'"), ok);
strvec_pushf(&cp.env, "REQUEST_METHOD=%s", method);
strvec_pushf(&cp.env, "PATH_TRANSLATED=%s",
req->uri_base.buf);
/* Prevent MSYS2 from "converting to a Windows path" */
strvec_pushf(&cp.env,
"MSYS2_ENV_CONV_EXCL=PATH_TRANSLATED");
strvec_push(&cp.env, "SERVER_PROTOCOL=HTTP/1.1");
if (req->quest_args.len)
strvec_pushf(&cp.env, "QUERY_STRING=%s",
req->quest_args.buf);
for (i = 0; i < req->header_list.nr; i++) {
const char *header = req->header_list.items[i].string;
if (!strncasecmp("Content-Type: ", header, 14))
strvec_pushf(&cp.env, "CONTENT_TYPE=%s",
header + 14);
else if (!strncasecmp("Content-Length: ", header, 16))
strvec_pushf(&cp.env, "CONTENT_LENGTH=%s",
header + 16);
}
cp.git_cmd = 1;
strvec_push(&cp.args, "http-backend");
res = run_command(&cp);
close(1);
close(0);
return !!res;
}
return send_http_error(1, 501, "Not Implemented", -1,
WR_OK | WR_HANGUP);
}

Просмотреть файл

@ -7,6 +7,13 @@ test_description='test the `scalar` command'
GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt,launchctl:true,schtasks:true"
export GIT_TEST_MAINT_SCHEDULER
# Do not write any files outside the trash directory
Scalar_UNATTENDED=1
export Scalar_UNATTENDED
GIT_ASKPASS=true
export GIT_ASKPASS
test_expect_success 'scalar shows a usage' '
test_expect_code 129 scalar -h
'
@ -231,4 +238,157 @@ test_expect_success UNZIP 'scalar diagnose' '
grep "^Total: [1-9]" out
'
GIT_TEST_ALLOW_GVFS_VIA_HTTP=1
export GIT_TEST_ALLOW_GVFS_VIA_HTTP
test_set_port GIT_TEST_GVFS_PROTOCOL_PORT
HOST_PORT=127.0.0.1:$GIT_TEST_GVFS_PROTOCOL_PORT
PID_FILE="$(pwd)"/pid-file.pid
SERVER_LOG="$(pwd)"/OUT.server.log
test_atexit '
test -f "$PID_FILE" || return 0
# The server will shutdown automatically when we delete the pid-file.
rm -f "$PID_FILE"
test -z "$verbose$verbose_log" || {
echo "server log:"
cat "$SERVER_LOG"
}
# Give it a few seconds to shutdown (mainly to completely release the
# port before the next test start another instance and it attempts to
# bind to it).
for k in $(test_seq 5)
do
grep -q "Starting graceful shutdown" "$SERVER_LOG" &&
return 0 ||
sleep 1
done
echo "stop_gvfs_protocol_server: timeout waiting for server shutdown"
return 1
'
start_gvfs_enabled_http_server () {
GIT_HTTP_EXPORT_ALL=1 \
test-gvfs-protocol --verbose \
--listen=127.0.0.1 \
--port=$GIT_TEST_GVFS_PROTOCOL_PORT \
--reuseaddr \
--pid-file="$PID_FILE" \
2>"$SERVER_LOG" &
for k in 0 1 2 3 4
do
if test -f "$PID_FILE"
then
return 0
fi
sleep 1
done
return 1
}
test_expect_success 'start GVFS-enabled server' '
git config uploadPack.allowFilter false &&
git config uploadPack.allowAnySHA1InWant false &&
start_gvfs_enabled_http_server
'
test_expect_success '`scalar clone` with GVFS-enabled server' '
: the fake cache server requires fake authentication &&
git config --global core.askPass true &&
scalar clone --single-branch -- http://$HOST_PORT/ using-gvfs &&
: verify that the shared cache has been configured &&
cache_key="url_$(printf "%s" http://$HOST_PORT/ |
tr A-Z a-z |
test-tool sha1)" &&
echo "$(pwd)/using-gvfs/.scalarCache/$cache_key" >expect &&
git -C using-gvfs/src config gvfs.sharedCache >actual &&
test_cmp expect actual &&
second=$(git rev-parse --verify second:second.t) &&
(
cd using-gvfs/src &&
test_path_is_missing 1/2 &&
GIT_TRACE=$PWD/trace.txt git cat-file blob $second >actual &&
: verify that the gvfs-helper was invoked to fetch it &&
test_i18ngrep gvfs-helper trace.txt &&
echo "second" >expect &&
test_cmp expect actual
)
'
test_expect_success '`scalar register` parallel to worktree is unsupported' '
git init test-repo/src &&
mkdir -p test-repo/out &&
: parallel to worktree is unsupported &&
test_must_fail env GIT_CEILING_DIRECTORIES="$(pwd)" \
scalar register test-repo/out &&
test_must_fail git config --get --global --fixed-value \
maintenance.repo "$(pwd)/test-repo/src" &&
scalar list >scalar.repos &&
! grep -F "$(pwd)/test-repo/src" scalar.repos &&
: at enlistment root, i.e. parent of repository, is supported &&
GIT_CEILING_DIRECTORIES="$(pwd)" scalar register test-repo &&
git config --get --global --fixed-value \
maintenance.repo "$(pwd)/test-repo/src" &&
scalar list >scalar.repos &&
grep -F "$(pwd)/test-repo/src" scalar.repos &&
: scalar delete properly unregisters enlistment &&
scalar delete test-repo &&
test_must_fail git config --get --global --fixed-value \
maintenance.repo "$(pwd)/test-repo/src" &&
scalar list >scalar.repos &&
! grep -F "$(pwd)/test-repo/src" scalar.repos
'
test_expect_success '`scalar register` & `unregister` with existing repo' '
git init existing &&
scalar register existing &&
git config --get --global --fixed-value \
maintenance.repo "$(pwd)/existing" &&
scalar list >scalar.repos &&
grep -F "$(pwd)/existing" scalar.repos &&
scalar unregister existing &&
test_must_fail git config --get --global --fixed-value \
maintenance.repo "$(pwd)/existing" &&
scalar list >scalar.repos &&
! grep -F "$(pwd)/existing" scalar.repos
'
test_expect_success '`scalar unregister` with existing repo, deleted .git' '
scalar register existing &&
rm -rf existing/.git &&
scalar unregister existing &&
test_must_fail git config --get --global --fixed-value \
maintenance.repo "$(pwd)/existing" &&
scalar list >scalar.repos &&
! grep -F "$(pwd)/existing" scalar.repos
'
test_expect_success '`scalar register` existing repo with `src` folder' '
git init existing &&
mkdir -p existing/src &&
scalar register existing/src &&
scalar list >scalar.repos &&
grep -F "$(pwd)/existing" scalar.repos &&
scalar unregister existing &&
scalar list >scalar.repos &&
! grep -F "$(pwd)/existing" scalar.repos
'
test_expect_success '`scalar delete` with existing repo' '
git init existing &&
scalar register existing &&
scalar delete existing &&
test_path_is_missing existing
'
test_done