// TODO Write a man page. Here are some notes for dogfooding. // TODO // // Usage: git gvfs-helper [] [] // // : // // --remote= // defaults to "origin" // // --fallback // boolean. defaults to off // // When a fetch from the cache-server fails, automatically // fallback to the main Git server. This option has no effect // if no cache-server is defined. // // --cache-server= // defaults to "verify" // // verify := lookup the set of defined cache-servers using // "gvfs/config" and confirm that the selected // cache-server is well-known. Silently disable the // cache-server if not. (See security notes later.) // // error := verify cache-server and abort if not well-known. // // trust := do not verify cache-server. just use it, if set. // // disable := disable the cache-server and always use the main // Git server. // // --shared-cache= // // A relative or absolute pathname to the ODB directory to store // fetched objects. // // If this option is not specified, we default to the value // in the "gvfs.sharedcache" config setting and then to the // local ".git/objects" directory. // // : // // config // // Fetch the "gvfs/config" string from the main Git server. // (The cache-server setting is ignored because cache-servers // do not support this REST API.) // // get // // Fetch 1 or more objects one at a time using a "/gvfs/objects" // GET request. // // If a cache-server is configured, // try it first. Optionally fallback to the main Git server. // // The set of objects is given on stdin and is assumed to be // a list of , one per line. // // : // // --max-retries= // defaults to "6" // // Number of retries after transient network errors. // Set to zero to disable such retries. // // post // // Fetch 1 or more objects in bulk using a "/gvfs/objects" POST // request. // // If a cache-server is configured, // try it first. Optionally fallback to the main Git server. // // The set of objects is given on stdin and is assumed to be // a list of , one per line. // // : // // --block-size= // defaults to "4000" // // Request objects from server in batches of at // most n objects (not bytes). // // --depth= // defaults to "1" // // --max-retries= // defaults to "6" // // Number of retries after transient network errors. // Set to zero to disable such retries. // // prefetch // // Use "/gvfs/prefetch" REST API to fetch 1 or more commits-and-trees // prefetch packs from the server. // // : // // --since= // defaults to "0" // // Time in seconds since the epoch. If omitted or // zero, the timestamp from the newest prefetch // packfile found in the shared-cache ODB is used. // (This is based upon the packfile name, not the // mtime.) // // The GVFS Protocol defines this value as a way to // request cached packfiles NEWER THAN this timestamp. // // --max-retries= // defaults to "6" // // Number of retries after transient network errors. // Set to zero to disable such retries. // // server // // Interactive/sub-process mode. Listen for a series of commands // and data on stdin and return results on stdout. This command // uses pkt-line format [1] and implements the long-running process // protocol [2] to communicate with the foreground/parent process. // // : // // --block-size= // defaults to "4000" // // Request objects from server in batches of at // most n objects (not bytes) when using POST // requests. // // --depth= // defaults to "1" // // --max-retries= // defaults to "6" // // Number of retries after transient network errors. // Set to zero to disable such retries. // // Interactive verb: objects.get // // Fetch 1 or more objects, one at a time, using a // "/gvfs/objects" GET requests. // // Each object will be created as a loose object in the ODB. // // Create 1 or more loose objects in the shared-cache ODB. // (The pathname of the selected ODB is reported at the // beginning of the response; this should match the pathname // given on the command line). // // git> objects.get // git> // git> // git> ... // git> // git> 0000 // // git< odb // git< loose // git< loose // git< ... // git< loose // git< ok | partial | error // git< 0000 // // Interactive verb: objects.post // // Fetch 1 or more objects, in bulk, using one or more // "/gvfs/objects" POST requests. // // Create 1 or more loose objects and/or packfiles in the // shared-cache ODB. A POST is allowed to respond with // either loose or packed objects. // // git> objects.post // git> // git> // git> ... // git> // git> 0000 // // git< odb // git< loose | packfile // git< loose | packfile // git< ... // git< loose | packfile // git< ok | partial | error // git< 0000 // // Interactive verb: object.prefetch // // Fetch 1 or more prefetch packs using a "/gvfs/prefetch" // request. // // git> objects.prefetch // git> // optional // git> 0000 // // git< odb // git< packfile // git< packfile // git< ... // git< packfile // git< ok | error // git< 0000 // // If a cache-server is configured, try it first. // Optionally fallback to the main Git server. // // [1] Documentation/technical/protocol-common.txt // [2] Documentation/technical/long-running-process-protocol.txt // [3] See GIT_TRACE_PACKET // // endpoint // // Fetch the given endpoint from the main Git server (specifying // `gvfs/config` as endpoint is idempotent to the `config` // command mentioned above). // ////////////////////////////////////////////////////////////////// #include "git-compat-util.h" #include "git-curl-compat.h" #include "environment.h" #include "hex.h" #include "setup.h" #include "config.h" #include "remote.h" #include "connect.h" #include "strbuf.h" #include "walker.h" #include "http.h" #include "exec-cmd.h" #include "run-command.h" #include "pkt-line.h" #include "string-list.h" #include "sideband.h" #include "strvec.h" #include "credential.h" #include "oid-array.h" #include "send-pack.h" #include "protocol.h" #include "quote.h" #include "transport.h" #include "parse-options.h" #include "object-file.h" #include "object-store.h" #include "json-writer.h" #include "tempfile.h" #include "oidset.h" #include "dir.h" #include "url.h" #include "abspath.h" #include "progress.h" #include "trace2.h" #include "wrapper.h" #include "packfile.h" #include "date.h" #define TR2_CAT "gvfs-helper" static const char * const main_usage[] = { N_("git gvfs-helper [] config []"), N_("git gvfs-helper [] get []"), N_("git gvfs-helper [] post []"), N_("git gvfs-helper [] prefetch []"), N_("git gvfs-helper [] server []"), NULL }; static const char *const objects_get_usage[] = { N_("git gvfs-helper [] get []"), NULL }; static const char *const objects_post_usage[] = { N_("git gvfs-helper [] post []"), NULL }; static const char *const prefetch_usage[] = { N_("git gvfs-helper [] prefetch []"), NULL }; static const char *const server_usage[] = { N_("git gvfs-helper [] server []"), NULL }; /* * "commitDepth" field in gvfs protocol */ #define GH__DEFAULT__OBJECTS_POST__COMMIT_DEPTH 1 /* * Chunk/block size in number of objects we request in each packfile */ #define GH__DEFAULT__OBJECTS_POST__BLOCK_SIZE 4000 /* * Retry attempts (after the initial request) for transient errors and 429s. */ #define GH__DEFAULT_MAX_RETRIES 6 /* * Maximum delay in seconds for transient (network) error retries. */ #define GH__DEFAULT_MAX_TRANSIENT_BACKOFF_SEC 300 /* * Our exit-codes. */ enum gh__error_code { GH__ERROR_CODE__USAGE = -1, /* will be mapped to usage() */ GH__ERROR_CODE__OK = 0, GH__ERROR_CODE__ERROR = 1, /* unspecified */ GH__ERROR_CODE__CURL_ERROR = 2, GH__ERROR_CODE__HTTP_401 = 3, GH__ERROR_CODE__HTTP_404 = 4, GH__ERROR_CODE__HTTP_429 = 5, GH__ERROR_CODE__HTTP_503 = 6, GH__ERROR_CODE__HTTP_OTHER = 7, GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE = 8, GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE = 8, GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE = 10, GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE = 11, GH__ERROR_CODE__SUBPROCESS_SYNTAX = 12, GH__ERROR_CODE__INDEX_PACK_FAILED = 13, GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH = 14, }; enum gh__cache_server_mode { /* verify URL. disable if unknown. */ GH__CACHE_SERVER_MODE__VERIFY_DISABLE = 0, /* verify URL. error if unknown. */ GH__CACHE_SERVER_MODE__VERIFY_ERROR, /* disable the cache-server, if defined */ GH__CACHE_SERVER_MODE__DISABLE, /* trust any cache-server */ GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY, }; /* * The set of command line, config, and environment variables * that we use as input to decide how we should operate. */ static struct gh__cmd_opts { const char *remote_name; int try_fallback; /* to git server if cache-server fails */ int show_progress; int depth; int block_size; int max_retries; int max_transient_backoff_sec; enum gh__cache_server_mode cache_server_mode; } gh__cmd_opts; /* * The chosen global state derrived from the inputs in gh__cmd_opts. */ static struct gh__global { struct remote *remote; struct credential main_creds; struct credential cache_creds; const char *main_url; const char *cache_server_url; struct strbuf buf_odb_path; int http_is_initialized; int cache_server_is_initialized; /* did sub-command look for one */ int main_creds_need_approval; /* try to only approve them once */ } gh__global; enum gh__server_type { GH__SERVER_TYPE__MAIN = 0, GH__SERVER_TYPE__CACHE = 1, GH__SERVER_TYPE__NR, }; static const char *gh__server_type_label[GH__SERVER_TYPE__NR] = { "(main)", "(cs)" }; enum gh__objects_mode { GH__OBJECTS_MODE__NONE = 0, /* * Bulk fetch objects. * * But also, force the use of HTTP POST regardless of how many * objects we are requesting. * * The GVFS Protocol treats requests for commit objects * differently in GET and POST requests WRT whether it * automatically also fetches the referenced trees. */ GH__OBJECTS_MODE__POST, /* * Fetch objects one at a time using HTTP GET. * * Force the use of GET (primarily because of the commit * object treatment). */ GH__OBJECTS_MODE__GET, /* * Fetch one or more pre-computed "prefetch packs" containing * commits and trees. */ GH__OBJECTS_MODE__PREFETCH, }; struct gh__azure_throttle { unsigned long tstu_limit; unsigned long tstu_remaining; unsigned long reset_sec; unsigned long retry_after_sec; }; static void gh__azure_throttle__zero(struct gh__azure_throttle *azure) { azure->tstu_limit = 0; azure->tstu_remaining = 0; azure->reset_sec = 0; azure->retry_after_sec = 0; } #define GH__AZURE_THROTTLE_INIT { \ .tstu_limit = 0, \ .tstu_remaining = 0, \ .reset_sec = 0, \ .retry_after_sec = 0, \ } static struct gh__azure_throttle gh__global_throttle[GH__SERVER_TYPE__NR] = { GH__AZURE_THROTTLE_INIT, GH__AZURE_THROTTLE_INIT, }; /* * Stolen from http.c */ static CURLcode gh__curlinfo_strbuf(CURL *curl, CURLINFO info, struct strbuf *buf) { char *ptr; CURLcode ret; strbuf_reset(buf); ret = curl_easy_getinfo(curl, info, &ptr); if (!ret && ptr) strbuf_addstr(buf, ptr); return ret; } enum gh__progress_state { GH__PROGRESS_STATE__START = 0, GH__PROGRESS_STATE__PHASE1, GH__PROGRESS_STATE__PHASE2, GH__PROGRESS_STATE__PHASE3, }; /* * Parameters to drive an HTTP request (with any necessary retries). */ struct gh__request_params { /* * b_is_post indicates if the current HTTP request is a POST=1 or * a GET=0. This is a lower level field used to setup CURL and * the tempfile used to receive the content. * * It is related to, but different from the GH__OBJECTS_MODE__ * field that we present to the gvfs-helper client or in the CLI * (which only concerns the semantics of the /gvfs/objects protocol * on the set of requested OIDs). * * For example, we use an HTTP GET to get the /gvfs/config data * into a buffer. */ int b_is_post; int b_write_to_file; /* write to file=1 or strbuf=0 */ int b_permit_cache_server_if_defined; enum gh__objects_mode objects_mode; enum gh__server_type server_type; int k_attempt; /* robust retry attempt */ int k_transient_delay_sec; /* delay before transient error retries */ unsigned long object_count; /* number of objects being fetched */ const struct strbuf *post_payload; /* POST body to send */ struct curl_slist *headers; /* additional http headers to send */ struct tempfile *tempfile; /* for response content when file */ struct strbuf *buffer; /* for response content when strbuf */ struct strbuf tr2_label; /* for trace2 regions */ struct object_id loose_oid; /* * Note that I am putting all of the progress-related instance data * inside the request-params in the hope that we can eventually * do multi-threaded/concurrent HTTP requests when chunking * large requests. However, the underlying "struct progress" API * is not thread safe (that is, it doesn't allow concurrent progress * reports (since that might require multiple lines on the screen * or something)). */ enum gh__progress_state progress_state; struct strbuf progress_base_phase2_msg; struct strbuf progress_base_phase3_msg; /* * The buffer for the formatted progress message is shared by the * "struct progress" API and must remain valid for the duration of * the start_progress..stop_progress lifespan. */ struct strbuf progress_msg; struct progress *progress; struct strbuf e2eid; struct string_list *result_list; /* we do not own this */ }; #define GH__REQUEST_PARAMS_INIT { \ .b_is_post = 0, \ .b_write_to_file = 0, \ .b_permit_cache_server_if_defined = 1, \ .server_type = GH__SERVER_TYPE__MAIN, \ .k_attempt = 0, \ .k_transient_delay_sec = 0, \ .object_count = 0, \ .post_payload = NULL, \ .headers = NULL, \ .tempfile = NULL, \ .buffer = NULL, \ .tr2_label = STRBUF_INIT, \ .loose_oid = {{0}}, \ .progress_state = GH__PROGRESS_STATE__START, \ .progress_base_phase2_msg = STRBUF_INIT, \ .progress_base_phase3_msg = STRBUF_INIT, \ .progress_msg = STRBUF_INIT, \ .progress = NULL, \ .e2eid = STRBUF_INIT, \ .result_list = NULL, \ } static void gh__request_params__release(struct gh__request_params *params) { if (!params) return; params->post_payload = NULL; /* we do not own this */ curl_slist_free_all(params->headers); params->headers = NULL; delete_tempfile(¶ms->tempfile); params->buffer = NULL; /* we do not own this */ strbuf_release(¶ms->tr2_label); strbuf_release(¶ms->progress_base_phase2_msg); strbuf_release(¶ms->progress_base_phase3_msg); strbuf_release(¶ms->progress_msg); stop_progress(¶ms->progress); params->progress = NULL; strbuf_release(¶ms->e2eid); params->result_list = NULL; /* we do not own this */ } /* * How we handle retries for various unexpected network errors. */ enum gh__retry_mode { /* * The operation was successful, so no retry is needed. * Use this for HTTP 200, for example. */ GH__RETRY_MODE__SUCCESS = 0, /* * Retry using the normal 401 Auth mechanism. */ GH__RETRY_MODE__HTTP_401, /* * Fail because at least one of the requested OIDs does not exist. */ GH__RETRY_MODE__FAIL_404, /* * A transient network error, such as dropped connection * or network IO error. Our belief is that a retry MAY * succeed. (See Gremlins and Cosmic Rays....) */ GH__RETRY_MODE__TRANSIENT, /* * Request was blocked completely because of a 429. */ GH__RETRY_MODE__HTTP_429, /* * Request failed because the server was (temporarily?) offline. */ GH__RETRY_MODE__HTTP_503, /* * The operation had a hard failure and we have no * expectation that a second attempt will give a different * answer, such as a bad hostname or a mal-formed URL. */ GH__RETRY_MODE__HARD_FAIL, }; /* * Bucket to describe the results of an HTTP requests (may be * overwritten during retries so that it describes the final attempt). */ struct gh__response_status { struct strbuf error_message; struct strbuf content_type; enum gh__error_code ec; enum gh__retry_mode retry; intmax_t bytes_received; struct gh__azure_throttle *azure; }; #define GH__RESPONSE_STATUS_INIT { \ .error_message = STRBUF_INIT, \ .content_type = STRBUF_INIT, \ .ec = GH__ERROR_CODE__OK, \ .retry = GH__RETRY_MODE__SUCCESS, \ .bytes_received = 0, \ .azure = NULL, \ } static void gh__response_status__zero(struct gh__response_status *s) { strbuf_setlen(&s->error_message, 0); strbuf_setlen(&s->content_type, 0); s->ec = GH__ERROR_CODE__OK; s->retry = GH__RETRY_MODE__SUCCESS; s->bytes_received = 0; s->azure = NULL; } static void install_result(struct gh__request_params *params, struct gh__response_status *status); /* * Log the E2EID for the current request. * * Since every HTTP request to the cache-server and to the main Git server * will send back a unique E2EID (probably a GUID), we don't want to overload * telemetry with each ID -- rather, only the ones for which there was a * problem and that may be helpful in a post mortem. */ static void log_e2eid(struct gh__request_params *params, struct gh__response_status *status) { if (!params->e2eid.len) return; switch (status->retry) { default: case GH__RETRY_MODE__SUCCESS: case GH__RETRY_MODE__HTTP_401: case GH__RETRY_MODE__FAIL_404: return; case GH__RETRY_MODE__HARD_FAIL: case GH__RETRY_MODE__TRANSIENT: case GH__RETRY_MODE__HTTP_429: case GH__RETRY_MODE__HTTP_503: break; } if (trace2_is_enabled()) { struct strbuf key = STRBUF_INIT; strbuf_addstr(&key, "e2eid"); strbuf_addstr(&key, gh__server_type_label[params->server_type]); trace2_data_string(TR2_CAT, NULL, key.buf, params->e2eid.buf); strbuf_release(&key); } } /* * Normalize a few HTTP response codes before we try to decide * how to dispatch on them. */ static long gh__normalize_odd_codes(struct gh__request_params *params, long http_response_code) { if (params->server_type == GH__SERVER_TYPE__CACHE && http_response_code == 400) { /* * The cache-server sends a somewhat bogus 400 instead of * the normal 401 when AUTH is required. Fixup the status * to hide that. * * TODO Technically, the cache-server could send a 400 * TODO for many reasons, not just for their bogus * TODO pseudo-401, but we're going to assume it is a * TODO 401 for now. We should confirm the expected * TODO error message in the response-body. */ return 401; } if (http_response_code == 203) { /* * A proxy server transformed a 200 from the origin server * into a 203. We don't care about the subtle distinction. */ return 200; } return http_response_code; } /* * Map HTTP response codes into a retry strategy. * See https://en.wikipedia.org/wiki/List_of_HTTP_status_codes * * https://docs.microsoft.com/en-us/azure/devops/integrate/concepts/rate-limits?view=azure-devops */ static void compute_retry_mode_from_http_response( struct gh__response_status *status, long http_response_code) { switch (http_response_code) { case 200: status->retry = GH__RETRY_MODE__SUCCESS; status->ec = GH__ERROR_CODE__OK; return; case 301: /* all the various flavors of HTTP Redirect */ case 302: case 303: case 304: case 305: case 306: case 307: case 308: /* * TODO Consider a redirected-retry (with or without * TODO a Retry-After header). */ goto hard_fail; case 401: strbuf_addstr(&status->error_message, "(http:401) Not Authorized"); status->retry = GH__RETRY_MODE__HTTP_401; status->ec = GH__ERROR_CODE__HTTP_401; return; case 404: /* * TODO if params->object_count > 1, consider * TODO splitting the request into 2 halves * TODO and retrying each half in series. */ strbuf_addstr(&status->error_message, "(http:404) Not Found"); status->retry = GH__RETRY_MODE__FAIL_404; status->ec = GH__ERROR_CODE__HTTP_404; return; case 429: /* * This is a hard block because we've been bad. */ strbuf_addstr(&status->error_message, "(http:429) Too Many Requests [throttled]"); status->retry = GH__RETRY_MODE__HTTP_429; status->ec = GH__ERROR_CODE__HTTP_429; trace2_data_string(TR2_CAT, NULL, "error/http", status->error_message.buf); return; case 503: /* * We assume that this comes with a "Retry-After" header like 429s. */ strbuf_addstr(&status->error_message, "(http:503) Server Unavailable [throttled]"); status->retry = GH__RETRY_MODE__HTTP_503; status->ec = GH__ERROR_CODE__HTTP_503; trace2_data_string(TR2_CAT, NULL, "error/http", status->error_message.buf); return; default: goto hard_fail; } hard_fail: strbuf_addf(&status->error_message, "(http:%d) Other [hard_fail]", (int)http_response_code); status->retry = GH__RETRY_MODE__HARD_FAIL; status->ec = GH__ERROR_CODE__HTTP_OTHER; trace2_data_string(TR2_CAT, NULL, "error/http", status->error_message.buf); return; } /* * Map CURLE errors code to a retry strategy. * See and * https://curl.haxx.se/libcurl/c/libcurl-errors.html * * This could be a static table rather than a switch, but * that is harder to debug and we may want to selectively * log errors. * * I've commented out all of the hard-fail cases for now * and let the default handle them. This is to indicate * that I considered them and found them to be not actionable. * Also, the spelling of some of the CURLE_ symbols seem * to change between curl releases on different platforms, * so I'm not going to fight that. */ static void compute_retry_mode_from_curl_error( struct gh__response_status *status, CURLcode curl_code) { switch (curl_code) { case CURLE_OK: status->retry = GH__RETRY_MODE__SUCCESS; status->ec = GH__ERROR_CODE__OK; return; //se CURLE_UNSUPPORTED_PROTOCOL: goto hard_fail; //se CURLE_FAILED_INIT: goto hard_fail; //se CURLE_URL_MALFORMAT: goto hard_fail; //se CURLE_NOT_BUILT_IN: goto hard_fail; //se CURLE_COULDNT_RESOLVE_PROXY: goto hard_fail; //se CURLE_COULDNT_RESOLVE_HOST: goto hard_fail; case CURLE_COULDNT_CONNECT: goto transient; //se CURLE_WEIRD_SERVER_REPLY: goto hard_fail; //se CURLE_REMOTE_ACCESS_DENIED: goto hard_fail; //se CURLE_FTP_ACCEPT_FAILED: goto hard_fail; //se CURLE_FTP_WEIRD_PASS_REPLY: goto hard_fail; //se CURLE_FTP_ACCEPT_TIMEOUT: goto hard_fail; //se CURLE_FTP_WEIRD_PASV_REPLY: goto hard_fail; //se CURLE_FTP_WEIRD_227_FORMAT: goto hard_fail; //se CURLE_FTP_CANT_GET_HOST: goto hard_fail; case CURLE_HTTP2: goto transient; //se CURLE_FTP_COULDNT_SET_TYPE: goto hard_fail; case CURLE_PARTIAL_FILE: goto transient; //se CURLE_FTP_COULDNT_RETR_FILE: goto hard_fail; //se CURLE_OBSOLETE20: goto hard_fail; //se CURLE_QUOTE_ERROR: goto hard_fail; //se CURLE_HTTP_RETURNED_ERROR: goto hard_fail; case CURLE_WRITE_ERROR: goto transient; //se CURLE_OBSOLETE24: goto hard_fail; case CURLE_UPLOAD_FAILED: goto transient; //se CURLE_READ_ERROR: goto hard_fail; //se CURLE_OUT_OF_MEMORY: goto hard_fail; case CURLE_OPERATION_TIMEDOUT: goto transient; //se CURLE_OBSOLETE29: goto hard_fail; //se CURLE_FTP_PORT_FAILED: goto hard_fail; //se CURLE_FTP_COULDNT_USE_REST: goto hard_fail; //se CURLE_OBSOLETE32: goto hard_fail; //se CURLE_RANGE_ERROR: goto hard_fail; case CURLE_HTTP_POST_ERROR: goto transient; //se CURLE_SSL_CONNECT_ERROR: goto hard_fail; //se CURLE_BAD_DOWNLOAD_RESUME: goto hard_fail; //se CURLE_FILE_COULDNT_READ_FILE: goto hard_fail; //se CURLE_LDAP_CANNOT_BIND: goto hard_fail; //se CURLE_LDAP_SEARCH_FAILED: goto hard_fail; //se CURLE_OBSOLETE40: goto hard_fail; //se CURLE_FUNCTION_NOT_FOUND: goto hard_fail; //se CURLE_ABORTED_BY_CALLBACK: goto hard_fail; //se CURLE_BAD_FUNCTION_ARGUMENT: goto hard_fail; //se CURLE_OBSOLETE44: goto hard_fail; //se CURLE_INTERFACE_FAILED: goto hard_fail; //se CURLE_OBSOLETE46: goto hard_fail; //se CURLE_TOO_MANY_REDIRECTS: goto hard_fail; //se CURLE_UNKNOWN_OPTION: goto hard_fail; //se CURLE_TELNET_OPTION_SYNTAX: goto hard_fail; //se CURLE_OBSOLETE50: goto hard_fail; //se CURLE_PEER_FAILED_VERIFICATION: goto hard_fail; //se CURLE_GOT_NOTHING: goto hard_fail; //se CURLE_SSL_ENGINE_NOTFOUND: goto hard_fail; //se CURLE_SSL_ENGINE_SETFAILED: goto hard_fail; case CURLE_SEND_ERROR: goto transient; case CURLE_RECV_ERROR: goto transient; //se CURLE_OBSOLETE57: goto hard_fail; //se CURLE_SSL_CERTPROBLEM: goto hard_fail; //se CURLE_SSL_CIPHER: goto hard_fail; //se CURLE_SSL_CACERT: goto hard_fail; //se CURLE_BAD_CONTENT_ENCODING: goto hard_fail; //se CURLE_LDAP_INVALID_URL: goto hard_fail; //se CURLE_FILESIZE_EXCEEDED: goto hard_fail; //se CURLE_USE_SSL_FAILED: goto hard_fail; //se CURLE_SEND_FAIL_REWIND: goto hard_fail; //se CURLE_SSL_ENGINE_INITFAILED: goto hard_fail; //se CURLE_LOGIN_DENIED: goto hard_fail; //se CURLE_TFTP_NOTFOUND: goto hard_fail; //se CURLE_TFTP_PERM: goto hard_fail; //se CURLE_REMOTE_DISK_FULL: goto hard_fail; //se CURLE_TFTP_ILLEGAL: goto hard_fail; //se CURLE_TFTP_UNKNOWNID: goto hard_fail; //se CURLE_REMOTE_FILE_EXISTS: goto hard_fail; //se CURLE_TFTP_NOSUCHUSER: goto hard_fail; //se CURLE_CONV_FAILED: goto hard_fail; //se CURLE_CONV_REQD: goto hard_fail; //se CURLE_SSL_CACERT_BADFILE: goto hard_fail; //se CURLE_REMOTE_FILE_NOT_FOUND: goto hard_fail; //se CURLE_SSH: goto hard_fail; //se CURLE_SSL_SHUTDOWN_FAILED: goto hard_fail; case CURLE_AGAIN: goto transient; //se CURLE_SSL_CRL_BADFILE: goto hard_fail; //se CURLE_SSL_ISSUER_ERROR: goto hard_fail; //se CURLE_FTP_PRET_FAILED: goto hard_fail; //se CURLE_RTSP_CSEQ_ERROR: goto hard_fail; //se CURLE_RTSP_SESSION_ERROR: goto hard_fail; //se CURLE_FTP_BAD_FILE_LIST: goto hard_fail; //se CURLE_CHUNK_FAILED: goto hard_fail; //se CURLE_NO_CONNECTION_AVAILABLE: goto hard_fail; //se CURLE_SSL_PINNEDPUBKEYNOTMATCH: goto hard_fail; //se CURLE_SSL_INVALIDCERTSTATUS: goto hard_fail; #ifdef CURLE_HTTP2_STREAM case CURLE_HTTP2_STREAM: goto transient; #endif default: goto hard_fail; } hard_fail: strbuf_addf(&status->error_message, "(curl:%d) %s [hard_fail]", curl_code, curl_easy_strerror(curl_code)); status->retry = GH__RETRY_MODE__HARD_FAIL; status->ec = GH__ERROR_CODE__CURL_ERROR; trace2_data_string(TR2_CAT, NULL, "error/curl", status->error_message.buf); return; transient: strbuf_addf(&status->error_message, "(curl:%d) %s [transient]", curl_code, curl_easy_strerror(curl_code)); status->retry = GH__RETRY_MODE__TRANSIENT; status->ec = GH__ERROR_CODE__CURL_ERROR; trace2_data_string(TR2_CAT, NULL, "error/curl", status->error_message.buf); return; } /* * Create a single normalized 'ec' error-code from the status we * received from the HTTP request. Map a few of the expected HTTP * status code to 'ec', but don't get too crazy here. */ static void gh__response_status__set_from_slot( struct gh__request_params *params, struct gh__response_status *status, const struct active_request_slot *slot) { long http_response_code; CURLcode curl_code; curl_code = slot->results->curl_result; gh__curlinfo_strbuf(slot->curl, CURLINFO_CONTENT_TYPE, &status->content_type); curl_easy_getinfo(slot->curl, CURLINFO_RESPONSE_CODE, &http_response_code); strbuf_setlen(&status->error_message, 0); http_response_code = gh__normalize_odd_codes(params, http_response_code); /* * Use normalized response/status codes form curl/http to decide * how to set the error-code we propagate *AND* to decide if we * we should retry because of transient network problems. */ if (curl_code == CURLE_OK || curl_code == CURLE_HTTP_RETURNED_ERROR) compute_retry_mode_from_http_response(status, http_response_code); else compute_retry_mode_from_curl_error(status, curl_code); if (status->ec != GH__ERROR_CODE__OK) status->bytes_received = 0; else if (params->b_write_to_file) status->bytes_received = (intmax_t)ftell(params->tempfile->fp); else status->bytes_received = (intmax_t)params->buffer->len; } static void gh__response_status__release(struct gh__response_status *status) { if (!status) return; strbuf_release(&status->error_message); strbuf_release(&status->content_type); } static int gh__curl_progress_cb(void *clientp, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow) { struct gh__request_params *params = clientp; /* * From what I can tell, CURL progress arrives in 3 phases. * * [1] An initial connection setup phase where we get [0,0] [0,0]. * [2] An upload phase where we start sending the request headers * and body. ulnow will be > 0. ultotal may or may not be 0. * [3] A download phase where we start receiving the response * headers and payload body. dlnow will be > 0. dltotal may * or may not be 0. * * If we pass zero for the total to the "struct progress" API, we * get simple numbers rather than percentages. So our progress * output format may vary depending. * * It is unclear if CURL will give us a final callback after * everything is finished, so we leave the progress handle open * and let the caller issue the final stop_progress(). * * There is a bit of a mismatch between the CURL API and the * "struct progress" API. The latter requires us to set the * progress message when we call one of the start_progress * methods. We cannot change the progress message while we are * showing progress state. And we cannot change the denominator * (total) after we start. CURL may or may not give us the total * sizes for each phase. * * Also be advised that the "struct progress" API eats messages * so that the screen is only updated every second or so. And * may not print anything if the start..stop happen in less then * 2 seconds. Whereas CURL calls this callback very frequently. * The net-net is that we may not actually see this progress * message for small/fast HTTP requests. */ switch (params->progress_state) { case GH__PROGRESS_STATE__START: /* first callback */ if (dlnow == 0 && ulnow == 0) goto enter_phase_1; if (ulnow) goto enter_phase_2; else goto enter_phase_3; case GH__PROGRESS_STATE__PHASE1: if (dlnow == 0 && ulnow == 0) return 0; if (ulnow) goto enter_phase_2; else goto enter_phase_3; case GH__PROGRESS_STATE__PHASE2: display_progress(params->progress, ulnow); if (dlnow == 0) return 0; stop_progress(¶ms->progress); goto enter_phase_3; case GH__PROGRESS_STATE__PHASE3: display_progress(params->progress, dlnow); return 0; default: return 0; } enter_phase_1: /* * Don't bother to create a progress handle during phase [1]. * Because we get [0,0,0,0], we don't have any data to report * and would just have to synthesize some type of progress. * From my testing, phase [1] is fairly quick (probably just * the SSL handshake), so the "struct progress" API will most * likely completely eat any messages that we did produce. */ params->progress_state = GH__PROGRESS_STATE__PHASE1; return 0; enter_phase_2: strbuf_setlen(¶ms->progress_msg, 0); if (params->progress_base_phase2_msg.len) { if (params->k_attempt > 0) strbuf_addf(¶ms->progress_msg, "%s [retry %d/%d] (bytes sent)", params->progress_base_phase2_msg.buf, params->k_attempt, gh__cmd_opts.max_retries); else strbuf_addf(¶ms->progress_msg, "%s (bytes sent)", params->progress_base_phase2_msg.buf); params->progress = start_progress(params->progress_msg.buf, ultotal); display_progress(params->progress, ulnow); } params->progress_state = GH__PROGRESS_STATE__PHASE2; return 0; enter_phase_3: strbuf_setlen(¶ms->progress_msg, 0); if (params->progress_base_phase3_msg.len) { if (params->k_attempt > 0) strbuf_addf(¶ms->progress_msg, "%s [retry %d/%d] (bytes received)", params->progress_base_phase3_msg.buf, params->k_attempt, gh__cmd_opts.max_retries); else strbuf_addf(¶ms->progress_msg, "%s (bytes received)", params->progress_base_phase3_msg.buf); params->progress = start_progress(params->progress_msg.buf, dltotal); display_progress(params->progress, dlnow); } params->progress_state = GH__PROGRESS_STATE__PHASE3; return 0; } /* * Run the request without using "run_one_slot()" because we * don't want the post-request normalization, error handling, * and auto-reauth handling in http.c. */ static void gh__run_one_slot(struct active_request_slot *slot, struct gh__request_params *params, struct gh__response_status *status) { struct strbuf key = STRBUF_INIT; strbuf_addbuf(&key, ¶ms->tr2_label); strbuf_addstr(&key, gh__server_type_label[params->server_type]); params->progress_state = GH__PROGRESS_STATE__START; strbuf_setlen(¶ms->e2eid, 0); trace2_region_enter(TR2_CAT, key.buf, NULL); if (!start_active_slot(slot)) { compute_retry_mode_from_curl_error(status, CURLE_FAILED_INIT); } else { run_active_slot(slot); if (params->b_write_to_file) fflush(params->tempfile->fp); gh__response_status__set_from_slot(params, status, slot); log_e2eid(params, status); if (status->ec == GH__ERROR_CODE__OK) { int old_len = key.len; /* * We only log the number of bytes received. * We do not log the number of objects requested * because the server may give us more than that * (such as when we request a commit). */ strbuf_addstr(&key, "/nr_bytes"); trace2_data_intmax(TR2_CAT, NULL, key.buf, status->bytes_received); strbuf_setlen(&key, old_len); } } if (params->progress) stop_progress(¶ms->progress); if (status->ec == GH__ERROR_CODE__OK && params->b_write_to_file) install_result(params, status); trace2_region_leave(TR2_CAT, key.buf, NULL); strbuf_release(&key); } static int option_parse_cache_server_mode(const struct option *opt, const char *arg, int unset) { if (unset) /* should not happen */ return error(_("missing value for switch '%s'"), opt->long_name); else if (!strcmp(arg, "verify")) gh__cmd_opts.cache_server_mode = GH__CACHE_SERVER_MODE__VERIFY_DISABLE; else if (!strcmp(arg, "error")) gh__cmd_opts.cache_server_mode = GH__CACHE_SERVER_MODE__VERIFY_ERROR; else if (!strcmp(arg, "disable")) gh__cmd_opts.cache_server_mode = GH__CACHE_SERVER_MODE__DISABLE; else if (!strcmp(arg, "trust")) gh__cmd_opts.cache_server_mode = GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY; else return error(_("invalid value for switch '%s'"), opt->long_name); return 0; } /* * Let command line args override "gvfs.sharedcache" config setting * and override the value set by git_default_config(). * * The command line is parsed *AFTER* the config is loaded, so * prepared_alt_odb() has already been called any default or inherited * shared-cache has already been set. * * We have a chance to override it here. */ static int option_parse_shared_cache_directory(const struct option *opt, const char *arg, int unset) { struct strbuf buf_arg = STRBUF_INIT; if (unset) /* should not happen */ return error(_("missing value for switch '%s'"), opt->long_name); strbuf_addstr(&buf_arg, arg); if (strbuf_normalize_path(&buf_arg) < 0) { /* * Pretend command line wasn't given. Use whatever * settings we already have from the config. */ strbuf_release(&buf_arg); return 0; } strbuf_trim_trailing_dir_sep(&buf_arg); if (!strbuf_cmp(&buf_arg, &gvfs_shared_cache_pathname)) { /* * The command line argument matches what we got from * the config, so we're already setup correctly. (And * we have already verified that the directory exists * on disk.) */ strbuf_release(&buf_arg); return 0; } else if (!gvfs_shared_cache_pathname.len) { /* * A shared-cache was requested and we did not inherit one. * Try it, but let alt_odb_usable() secretly disable it if * it cannot create the directory on disk. */ strbuf_addbuf(&gvfs_shared_cache_pathname, &buf_arg); add_to_alternates_memory(buf_arg.buf); strbuf_release(&buf_arg); return 0; } else { /* * The requested shared-cache is different from the one * we inherited. Replace the inherited value with this * one, but smartly fallback if necessary. */ struct strbuf buf_prev = STRBUF_INIT; strbuf_addbuf(&buf_prev, &gvfs_shared_cache_pathname); strbuf_setlen(&gvfs_shared_cache_pathname, 0); strbuf_addbuf(&gvfs_shared_cache_pathname, &buf_arg); add_to_alternates_memory(buf_arg.buf); /* * alt_odb_usable() releases gvfs_shared_cache_pathname * if it cannot create the directory on disk, so fallback * to the previous choice when it fails. */ if (!gvfs_shared_cache_pathname.len) strbuf_addbuf(&gvfs_shared_cache_pathname, &buf_prev); strbuf_release(&buf_arg); strbuf_release(&buf_prev); return 0; } } /* * Lookup the URL for this remote (defaults to 'origin'). */ static void lookup_main_url(void) { /* * Both VFS and Scalar only work with 'origin', so we expect this. * The command line arg is mainly for debugging. */ if (!gh__cmd_opts.remote_name || !*gh__cmd_opts.remote_name) gh__cmd_opts.remote_name = "origin"; gh__global.remote = remote_get(gh__cmd_opts.remote_name); if (!gh__global.remote->url[0] || !*gh__global.remote->url[0]) die("unknown remote '%s'", gh__cmd_opts.remote_name); /* * Strip out any in-line auth in the origin server URL so that * we can control which creds we fetch. * * Azure DevOps has been known to suggest https URLS of the * form "https://@dev.azure.com//". * * Break that so that we can force the use of a PAT. */ gh__global.main_url = transport_anonymize_url(gh__global.remote->url[0]); trace2_data_string(TR2_CAT, NULL, "remote/url", gh__global.main_url); } static void do__http_get__gvfs_config(struct gh__response_status *status, struct strbuf *config_data); /* * Find the URL of the cache-server, if we have one. * * This routine is called by the initialization code and is allowed * to call die() rather than returning an 'ec'. */ static void select_cache_server(void) { struct gh__response_status status = GH__RESPONSE_STATUS_INIT; struct strbuf config_data = STRBUF_INIT; const char *match = NULL; /* * This only indicates that the sub-command actually called * this routine. We rely on gh__global.cache_server_url to tell * us if we actually have a cache-server configured. */ gh__global.cache_server_is_initialized = 1; gh__global.cache_server_url = NULL; if (gh__cmd_opts.cache_server_mode == GH__CACHE_SERVER_MODE__DISABLE) { trace2_data_string(TR2_CAT, NULL, "cache/url", "disabled"); return; } if (!gvfs_cache_server_url || !*gvfs_cache_server_url) { switch (gh__cmd_opts.cache_server_mode) { default: case GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY: case GH__CACHE_SERVER_MODE__VERIFY_DISABLE: trace2_data_string(TR2_CAT, NULL, "cache/url", "unset"); return; case GH__CACHE_SERVER_MODE__VERIFY_ERROR: die("cache-server not set"); } } /* * If the cache-server and main Git server have the same URL, we * can silently disable the cache-server (by NOT setting the field * in gh__global and explicitly disable the fallback logic.) */ if (!strcmp(gvfs_cache_server_url, gh__global.main_url)) { gh__cmd_opts.try_fallback = 0; trace2_data_string(TR2_CAT, NULL, "cache/url", "same"); return; } if (gh__cmd_opts.cache_server_mode == GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY) { gh__global.cache_server_url = gvfs_cache_server_url; trace2_data_string(TR2_CAT, NULL, "cache/url", gvfs_cache_server_url); return; } /* * GVFS cache-servers use the main Git server's creds rather * than having their own creds. This feels like a security * hole. For example, if the cache-server URL is pointed to a * bad site, we'll happily send them our creds to the main Git * server with each request to the cache-server. This would * allow an attacker to later use our creds to impersonate us * on the main Git server. * * So we optionally verify that the URL to the cache-server is * well-known by the main Git server. */ do__http_get__gvfs_config(&status, &config_data); if (status.ec == GH__ERROR_CODE__OK) { /* * The gvfs/config response is in JSON, but I don't think * we need to parse it and all that. Lets just do a simple * strstr() and assume it is sufficient. * * We do add some context to the pattern to guard against * some attacks. */ struct strbuf pattern = STRBUF_INIT; strbuf_addf(&pattern, "\"Url\":\"%s\"", gvfs_cache_server_url); match = strstr(config_data.buf, pattern.buf); strbuf_release(&pattern); } strbuf_release(&config_data); if (match) { gh__global.cache_server_url = gvfs_cache_server_url; trace2_data_string(TR2_CAT, NULL, "cache/url", gvfs_cache_server_url); } else if (gh__cmd_opts.cache_server_mode == GH__CACHE_SERVER_MODE__VERIFY_ERROR) { if (status.ec != GH__ERROR_CODE__OK) die("could not verify cache-server '%s': %s", gvfs_cache_server_url, status.error_message.buf); else die("could not verify cache-server '%s'", gvfs_cache_server_url); } else if (gh__cmd_opts.cache_server_mode == GH__CACHE_SERVER_MODE__VERIFY_DISABLE) { if (status.ec != GH__ERROR_CODE__OK) warning("could not verify cache-server '%s': %s", gvfs_cache_server_url, status.error_message.buf); else warning("could not verify cache-server '%s'", gvfs_cache_server_url); trace2_data_string(TR2_CAT, NULL, "cache/url", "disabled"); } gh__response_status__release(&status); } /* * Read stdin until EOF (or a blank line) and add the desired OIDs * to the oidset. * * Stdin should contain a list of OIDs. Lines may have additional * text following the OID that we ignore. */ static unsigned long read_stdin_for_oids(struct oidset *oids) { struct object_id oid; struct strbuf buf_stdin = STRBUF_INIT; unsigned long count = 0; do { if (strbuf_getline(&buf_stdin, stdin) == EOF || !buf_stdin.len) break; if (get_oid_hex(buf_stdin.buf, &oid)) continue; /* just silently eat it */ if (!oidset_insert(oids, &oid)) count++; } while (1); return count; } /* * Build a complete JSON payload for a gvfs/objects POST request * containing the first `nr_in_block` OIDs found in the OIDSET * indexed by the given iterator. * * https://github.com/microsoft/VFSForGit/blob/master/Protocol.md * * Return the number of OIDs we actually put into the payload. * If only 1 OID was found, also return it. */ static unsigned long build_json_payload__gvfs_objects( struct json_writer *jw_req, struct oidset_iter *iter, unsigned long nr_in_block, struct object_id *oid_out) { unsigned long k; const struct object_id *oid; const struct object_id *oid_prev = NULL; k = 0; jw_init(jw_req); jw_object_begin(jw_req, 0); jw_object_intmax(jw_req, "commitDepth", gh__cmd_opts.depth); jw_object_inline_begin_array(jw_req, "objectIds"); while (k < nr_in_block && (oid = oidset_iter_next(iter))) { jw_array_string(jw_req, oid_to_hex(oid)); k++; oid_prev = oid; } jw_end(jw_req); jw_end(jw_req); if (oid_out) { if (k == 1) oidcpy(oid_out, oid_prev); else oidclr(oid_out); } return k; } /* * Lookup the creds for the main/origin Git server. */ static void lookup_main_creds(void) { if (gh__global.main_creds.username && *gh__global.main_creds.username) return; credential_from_url(&gh__global.main_creds, gh__global.main_url); credential_fill(&gh__global.main_creds); gh__global.main_creds_need_approval = 1; } /* * If we have a set of creds for the main Git server, tell the credential * manager to throw them away and ask it to reacquire them. */ static void refresh_main_creds(void) { if (gh__global.main_creds.username && *gh__global.main_creds.username) credential_reject(&gh__global.main_creds); lookup_main_creds(); // TODO should we compare before and after values of u/p and // TODO shortcut reauth if we already know it will fail? // TODO if so, return a bool if same/different. } static void approve_main_creds(void) { if (!gh__global.main_creds_need_approval) return; credential_approve(&gh__global.main_creds); gh__global.main_creds_need_approval = 0; } /* * Build a set of creds for the cache-server based upon the main Git * server (assuming we have a cache-server configured). * * That is, we NEVER fill them directly for the cache-server -- we * only synthesize them from the filled main creds. */ static void synthesize_cache_server_creds(void) { if (!gh__global.cache_server_is_initialized) BUG("sub-command did not initialize cache-server vars"); if (!gh__global.cache_server_url) return; if (gh__global.cache_creds.username && *gh__global.cache_creds.username) return; /* * Get the main Git server creds so we can borrow the username * and password when we talk to the cache-server. */ lookup_main_creds(); gh__global.cache_creds.username = xstrdup(gh__global.main_creds.username); gh__global.cache_creds.password = xstrdup(gh__global.main_creds.password); } /* * Flush and refresh the cache-server creds. Because the cache-server * does not do 401s (or manage creds), we have to reload the main Git * server creds first. * * That is, we NEVER reject them directly because we never filled them. */ static void refresh_cache_server_creds(void) { credential_clear(&gh__global.cache_creds); refresh_main_creds(); synthesize_cache_server_creds(); } /* * We NEVER approve cache-server creds directly because we never directly * filled them. However, we should be able to infer that the main ones * are valid and can approve them if necessary. */ static void approve_cache_server_creds(void) { approve_main_creds(); } /* * Get the pathname to the ODB where we write objects that we download. */ static void select_odb(void) { prepare_alt_odb(the_repository); strbuf_init(&gh__global.buf_odb_path, 0); if (gvfs_shared_cache_pathname.len) strbuf_addbuf(&gh__global.buf_odb_path, &gvfs_shared_cache_pathname); else strbuf_addstr(&gh__global.buf_odb_path, the_repository->objects->odb->path); } /* * Create a unique tempfile or tempfile-pair inside the * tempPacks directory. */ static void my_create_tempfile( struct gh__response_status *status, int b_fdopen, const char *suffix1, struct tempfile **t1, const char *suffix2, struct tempfile **t2) { static unsigned int nth = 0; static struct timeval tv = {0}; static struct tm tm = {0}; static time_t secs = 0; static char date[32] = {0}; struct strbuf basename = STRBUF_INIT; struct strbuf buf = STRBUF_INIT; int len_tp; enum scld_error scld; int retries; gh__response_status__zero(status); if (!nth) { /* * Create a unique string to use in the name of all * tempfiles created by this process. */ gettimeofday(&tv, NULL); secs = tv.tv_sec; gmtime_r(&secs, &tm); xsnprintf(date, sizeof(date), "%4d%02d%02d-%02d%02d%02d-%06ld", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, (long)tv.tv_usec); } /* * Create a for this instance/pair using a series * number . */ strbuf_addf(&basename, "t-%s-%04d", date, nth++); if (!suffix1 || !*suffix1) suffix1 = "temp"; /* * Create full pathname as: * * "/pack/tempPacks/." */ strbuf_setlen(&buf, 0); strbuf_addbuf(&buf, &gh__global.buf_odb_path); strbuf_complete(&buf, '/'); strbuf_addstr(&buf, "pack/tempPacks/"); len_tp = buf.len; strbuf_addf( &buf, "%s.%s", basename.buf, suffix1); scld = safe_create_leading_directories(buf.buf); if (scld != SCLD_OK && scld != SCLD_EXISTS) { strbuf_addf(&status->error_message, "could not create directory for tempfile: '%s'", buf.buf); status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; goto cleanup; } retries = 0; *t1 = create_tempfile(buf.buf); while (!*t1 && retries < 5) { retries++; strbuf_setlen(&buf, len_tp); strbuf_addf(&buf, "%s-%d.%s", basename.buf, retries, suffix1); *t1 = create_tempfile(buf.buf); } if (!*t1) { strbuf_addf(&status->error_message, "could not create tempfile: '%s'", buf.buf); status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; goto cleanup; } if (b_fdopen) fdopen_tempfile(*t1, "w"); /* * Optionally create a peer tempfile with the same basename. * (This is useful for prefetching .pack and .idx pairs.) * * "/pack/tempPacks/." */ if (suffix2 && *suffix2 && t2) { strbuf_setlen(&buf, len_tp); strbuf_addf( &buf, "%s.%s", basename.buf, suffix2); *t2 = create_tempfile(buf.buf); while (!*t2 && retries < 5) { retries++; strbuf_setlen(&buf, len_tp); strbuf_addf(&buf, "%s-%d.%s", basename.buf, retries, suffix2); *t2 = create_tempfile(buf.buf); } if (!*t2) { strbuf_addf(&status->error_message, "could not create tempfile: '%s'", buf.buf); status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; goto cleanup; } if (b_fdopen) fdopen_tempfile(*t2, "w"); } cleanup: strbuf_release(&buf); strbuf_release(&basename); } /* * Create pathnames to the final location of the .pack and .idx * files in the ODB. These are of the form: * * "/pack/-[-]." * * For example, for prefetch packs, will be the epoch * timestamp and will be the packfile hash. */ static void create_final_packfile_pathnames( const char *term_1, const char *term_2, const char *term_3, struct strbuf *pack_path, struct strbuf *idx_path, struct strbuf *pack_filename) { struct strbuf base = STRBUF_INIT; struct strbuf path = STRBUF_INIT; if (term_3 && *term_3) strbuf_addf(&base, "%s-%s-%s", term_1, term_2, term_3); else strbuf_addf(&base, "%s-%s", term_1, term_2); strbuf_setlen(pack_filename, 0); strbuf_addf( pack_filename, "%s.pack", base.buf); strbuf_addbuf(&path, &gh__global.buf_odb_path); strbuf_complete(&path, '/'); strbuf_addstr(&path, "pack/"); strbuf_setlen(pack_path, 0); strbuf_addbuf(pack_path, &path); strbuf_addf( pack_path, "%s.pack", base.buf); strbuf_setlen(idx_path, 0); strbuf_addbuf(idx_path, &path); strbuf_addf( idx_path, "%s.idx", base.buf); strbuf_release(&base); strbuf_release(&path); } /* * Create a pathname to the loose object in the shared-cache ODB * with the given OID. Try to "mkdir -p" to ensure the parent * directories exist. */ static int create_loose_pathname_in_odb(struct strbuf *buf_path, const struct object_id *oid) { enum scld_error scld; const char *hex; hex = oid_to_hex(oid); strbuf_setlen(buf_path, 0); strbuf_addbuf(buf_path, &gh__global.buf_odb_path); strbuf_complete(buf_path, '/'); strbuf_add(buf_path, hex, 2); strbuf_addch(buf_path, '/'); strbuf_addstr(buf_path, hex+2); scld = safe_create_leading_directories(buf_path->buf); if (scld != SCLD_OK && scld != SCLD_EXISTS) return -1; return 0; } static void my_run_index_pack(struct gh__request_params *params, struct gh__response_status *status, const struct strbuf *temp_path_pack, const struct strbuf *temp_path_idx, struct strbuf *packfile_checksum) { struct child_process ip = CHILD_PROCESS_INIT; struct strbuf ip_stdout = STRBUF_INIT; strvec_push(&ip.args, "git"); strvec_push(&ip.args, "index-pack"); ip.err = -1; ip.no_stderr = 1; /* Skip generating the rev index, we don't need it. */ strvec_push(&ip.args, "--no-rev-index"); strvec_pushl(&ip.args, "-o", temp_path_idx->buf, NULL); strvec_push(&ip.args, temp_path_pack->buf); ip.no_stdin = 1; ip.out = -1; if (pipe_command(&ip, NULL, 0, &ip_stdout, 0, NULL, 0)) { unlink(temp_path_pack->buf); unlink(temp_path_idx->buf); strbuf_addf(&status->error_message, "index-pack failed on '%s'", temp_path_pack->buf); /* * Lets assume that index-pack failed because the * downloaded file is corrupt (truncated). * * Retry it as if the network had dropped. */ status->retry = GH__RETRY_MODE__TRANSIENT; status->ec = GH__ERROR_CODE__INDEX_PACK_FAILED; goto cleanup; } if (packfile_checksum) { /* * stdout from index-pack should have the packfile hash. * Extract it and use it in the final packfile name. * * TODO What kind of validation should we do on the * TODO string and is there ever any other output besides * TODO just the checksum ? */ strbuf_trim_trailing_newline(&ip_stdout); strbuf_addbuf(packfile_checksum, &ip_stdout); } cleanup: strbuf_release(&ip_stdout); child_process_clear(&ip); } static void my_finalize_packfile(struct gh__request_params *params, struct gh__response_status *status, int b_keep, const struct strbuf *temp_path_pack, const struct strbuf *temp_path_idx, struct strbuf *final_path_pack, struct strbuf *final_path_idx, struct strbuf *final_filename) { /* * Install the .pack and .idx into the ODB pack directory. * * We might be racing with other instances of gvfs-helper if * we, in parallel, both downloaded the exact same packfile * (with the same checksum SHA) and try to install it at the * same time. This might happen on Windows where the loser * can get an EBUSY or EPERM trying to move/rename the * tempfile into the pack dir, for example. * * So, we always install the .pack before the .idx for * consistency. And only if *WE* created the .pack and .idx * files, do we create the matching .keep (when requested). * * If we get an error and the target files already exist, we * silently eat the error. Note that finalize_object_file() * has already munged errno (and it has various creation * strategies), so we don't bother looking at it. */ if (finalize_object_file(temp_path_pack->buf, final_path_pack->buf) || finalize_object_file(temp_path_idx->buf, final_path_idx->buf)) { unlink(temp_path_pack->buf); unlink(temp_path_idx->buf); if (file_exists(final_path_pack->buf) && file_exists(final_path_idx->buf)) { trace2_printf("%s: assuming ok for %s", TR2_CAT, final_path_pack->buf); goto assume_ok; } strbuf_addf(&status->error_message, "could not install packfile '%s'", final_path_pack->buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; return; } if (b_keep) { struct strbuf keep = STRBUF_INIT; int fd_keep; strbuf_addbuf(&keep, final_path_pack); strbuf_strip_suffix(&keep, ".pack"); strbuf_addstr(&keep, ".keep"); fd_keep = xopen(keep.buf, O_WRONLY | O_CREAT | O_TRUNC, 0666); if (fd_keep >= 0) close(fd_keep); strbuf_release(&keep); } assume_ok: if (params->result_list) { struct strbuf result_msg = STRBUF_INIT; strbuf_addf(&result_msg, "packfile %s", final_filename->buf); string_list_append(params->result_list, result_msg.buf); strbuf_release(&result_msg); } } /* * Convert the tempfile into a temporary .pack, index it into a temporary .idx * file, and then install the pair into ODB. */ static void install_packfile(struct gh__request_params *params, struct gh__response_status *status) { struct strbuf temp_path_pack = STRBUF_INIT; struct strbuf temp_path_idx = STRBUF_INIT; struct strbuf packfile_checksum = STRBUF_INIT; struct strbuf final_path_pack = STRBUF_INIT; struct strbuf final_path_idx = STRBUF_INIT; struct strbuf final_filename = STRBUF_INIT; gh__response_status__zero(status); /* * After the download is complete, we will need to steal the file * from the tempfile() class (so that it doesn't magically delete * it when we close the file handle) and then index it. */ strbuf_addf(&temp_path_pack, "%s.pack", get_tempfile_path(params->tempfile)); strbuf_addf(&temp_path_idx, "%s.idx", get_tempfile_path(params->tempfile)); if (rename_tempfile(¶ms->tempfile, temp_path_pack.buf) == -1) { strbuf_addf(&status->error_message, "could not rename packfile to '%s'", temp_path_pack.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; goto cleanup; } my_run_index_pack(params, status, &temp_path_pack, &temp_path_idx, &packfile_checksum); if (status->ec != GH__ERROR_CODE__OK) goto cleanup; create_final_packfile_pathnames("vfs", packfile_checksum.buf, NULL, &final_path_pack, &final_path_idx, &final_filename); my_finalize_packfile(params, status, 0, &temp_path_pack, &temp_path_idx, &final_path_pack, &final_path_idx, &final_filename); cleanup: strbuf_release(&temp_path_pack); strbuf_release(&temp_path_idx); strbuf_release(&packfile_checksum); strbuf_release(&final_path_pack); strbuf_release(&final_path_idx); strbuf_release(&final_filename); } /* * bswap.h only defines big endian functions. * The GVFS Protocol defines fields in little endian. */ static inline uint64_t my_get_le64(uint64_t le_val) { #if GIT_BYTE_ORDER == GIT_LITTLE_ENDIAN return le_val; #else return default_bswap64(le_val); #endif } #define MY_MIN(x,y) (((x) < (y)) ? (x) : (y)) #define MY_MAX(x,y) (((x) > (y)) ? (x) : (y)) /* * Copy the `nr_bytes_total` from `fd_in` to `fd_out`. * * This could be used to extract a single packfile from * a multipart file, for example. */ static int my_copy_fd_len(int fd_in, int fd_out, ssize_t nr_bytes_total) { char buffer[8192]; while (nr_bytes_total > 0) { ssize_t len_to_read = MY_MIN(nr_bytes_total, sizeof(buffer)); ssize_t nr_read = xread(fd_in, buffer, len_to_read); if (!nr_read) break; if (nr_read < 0) return -1; if (write_in_full(fd_out, buffer, nr_read) < 0) return -1; nr_bytes_total -= nr_read; } return 0; } /* * Copy the `nr_bytes_total` from `fd_in` to `fd_out` AND save the * final `tail_len` bytes in the given buffer. * * This could be used to extract a single packfile from * a multipart file and read the final SHA into the buffer. */ static int my_copy_fd_len_tail(int fd_in, int fd_out, ssize_t nr_bytes_total, unsigned char *buf_tail, ssize_t tail_len) { memset(buf_tail, 0, tail_len); if (my_copy_fd_len(fd_in, fd_out, nr_bytes_total) < 0) return -1; if (nr_bytes_total < tail_len) return 0; /* Reset the position to read the tail */ lseek(fd_in, -tail_len, SEEK_CUR); if (xread(fd_in, (char *)buf_tail, tail_len) != tail_len) return -1; return 0; } /* * See the protocol document for the per-packfile header. */ struct ph { uint64_t timestamp; uint64_t pack_len; uint64_t idx_len; }; /* * Extract the next packfile from the multipack. * Install {.pack, .idx, .keep} set. * * Mark each successfully installed prefetch pack as .keep it as installed * in case we have errors decoding/indexing later packs within the received * multipart file. (A later pass can delete the unnecessary .keep files * from this and any previous invocations.) */ static void extract_packfile_from_multipack( struct gh__request_params *params, struct gh__response_status *status, int fd_multipack, unsigned short k) { struct ph ph; struct tempfile *tempfile_pack = NULL; int result = -1; int b_no_idx_in_multipack; struct object_id packfile_checksum; char hex_checksum[GIT_MAX_HEXSZ + 1]; struct strbuf buf_timestamp = STRBUF_INIT; struct strbuf temp_path_pack = STRBUF_INIT; struct strbuf temp_path_idx = STRBUF_INIT; struct strbuf final_path_pack = STRBUF_INIT; struct strbuf final_path_idx = STRBUF_INIT; struct strbuf final_filename = STRBUF_INIT; if (xread(fd_multipack, &ph, sizeof(ph)) != sizeof(ph)) { strbuf_addf(&status->error_message, "could not read header for packfile[%d] in multipack", k); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; goto done; } ph.timestamp = my_get_le64(ph.timestamp); ph.pack_len = my_get_le64(ph.pack_len); ph.idx_len = my_get_le64(ph.idx_len); if (!ph.pack_len) { strbuf_addf(&status->error_message, "packfile[%d]: zero length packfile?", k); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; goto done; } b_no_idx_in_multipack = (ph.idx_len == maximum_unsigned_value_of_type(uint64_t) || ph.idx_len == 0); /* * We are going to harden `gvfs-helper` here and ignore the .idx file * if it is provided and always compute it locally so that we get the * added verification that `git index-pack` provides. */ my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL); if (!tempfile_pack) goto done; /* * Copy the current packfile from the open stream and capture * the checksum. * * TODO This assumes that the checksum is SHA1. Fix this if/when * TODO Git converts to SHA256. */ result = my_copy_fd_len_tail(fd_multipack, get_tempfile_fd(tempfile_pack), ph.pack_len, packfile_checksum.hash, GIT_SHA1_RAWSZ); packfile_checksum.algo = GIT_HASH_SHA1; if (result < 0){ strbuf_addf(&status->error_message, "could not extract packfile[%d] from multipack", k); goto done; } strbuf_addstr(&temp_path_pack, get_tempfile_path(tempfile_pack)); close_tempfile_gently(tempfile_pack); oid_to_hex_r(hex_checksum, &packfile_checksum); /* * Always compute the .idx file from the .pack file. */ strbuf_addbuf(&temp_path_idx, &temp_path_pack); strbuf_strip_suffix(&temp_path_idx, ".pack"); strbuf_addstr(&temp_path_idx, ".idx"); my_run_index_pack(params, status, &temp_path_pack, &temp_path_idx, NULL); if (status->ec != GH__ERROR_CODE__OK) goto done; if (!b_no_idx_in_multipack) { /* * Server sent the .idx immediately after the .pack in the * data stream. Skip over it. */ if (lseek(fd_multipack, ph.idx_len, SEEK_CUR) < 0) { strbuf_addf(&status->error_message, "could not skip index[%d] in multipack", k); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; goto done; } } strbuf_addf(&buf_timestamp, "%u", (unsigned int)ph.timestamp); create_final_packfile_pathnames("prefetch", buf_timestamp.buf, hex_checksum, &final_path_pack, &final_path_idx, &final_filename); my_finalize_packfile(params, status, 1, &temp_path_pack, &temp_path_idx, &final_path_pack, &final_path_idx, &final_filename); done: delete_tempfile(&tempfile_pack); strbuf_release(&temp_path_pack); strbuf_release(&temp_path_idx); strbuf_release(&final_path_pack); strbuf_release(&final_path_idx); strbuf_release(&final_filename); } struct keep_files_data { timestamp_t max_timestamp; int pos_of_max; struct string_list *keep_files; }; static void cb_keep_files(const char *full_path, size_t full_path_len, const char *file_path, void *void_data) { struct keep_files_data *data = void_data; const char *val; timestamp_t t; /* * We expect prefetch packfiles named like: * * prefetch--.keep */ if (!skip_prefix(file_path, "prefetch-", &val)) return; if (!ends_with(val, ".keep")) return; t = strtol(val, NULL, 10); if (t > data->max_timestamp) { data->pos_of_max = data->keep_files->nr; data->max_timestamp = t; } string_list_append(data->keep_files, full_path); } static void delete_stale_keep_files( struct gh__request_params *params, struct gh__response_status *status) { struct string_list keep_files = STRING_LIST_INIT_DUP; struct keep_files_data data = { 0, 0, &keep_files }; int k; for_each_file_in_pack_dir(gh__global.buf_odb_path.buf, cb_keep_files, &data); for (k = 0; k < keep_files.nr; k++) { if (k != data.pos_of_max) unlink(keep_files.items[k].string); } string_list_clear(&keep_files, 0); } /* * Cut apart the received multipart response into individual packfiles * and install each one. */ static void install_prefetch(struct gh__request_params *params, struct gh__response_status *status) { static unsigned char v1_h[6] = { 'G', 'P', 'R', 'E', ' ', 0x01 }; struct mh { unsigned char h[6]; unsigned char np[2]; }; struct mh mh; unsigned short np; unsigned short k; int fd = -1; int nr_installed = 0; struct strbuf temp_path_mp = STRBUF_INIT; /* * Steal the multi-part file from the tempfile class. */ strbuf_addf(&temp_path_mp, "%s.mp", get_tempfile_path(params->tempfile)); if (rename_tempfile(¶ms->tempfile, temp_path_mp.buf) == -1) { strbuf_addf(&status->error_message, "could not rename prefetch tempfile to '%s'", temp_path_mp.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; goto cleanup; } fd = git_open_cloexec(temp_path_mp.buf, O_RDONLY); if (fd == -1) { strbuf_addf(&status->error_message, "could not reopen prefetch tempfile '%s'", temp_path_mp.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; goto cleanup; } if ((xread(fd, &mh, sizeof(mh)) != sizeof(mh)) || (memcmp(mh.h, &v1_h, sizeof(mh.h)))) { strbuf_addstr(&status->error_message, "invalid prefetch multipart header"); goto cleanup; } np = (unsigned short)mh.np[0] + ((unsigned short)mh.np[1] << 8); if (np) trace2_data_intmax(TR2_CAT, NULL, "prefetch/packfile_count", np); if (gh__cmd_opts.show_progress) params->progress = start_progress("Installing prefetch packfiles", np); for (k = 0; k < np; k++) { extract_packfile_from_multipack(params, status, fd, k); display_progress(params->progress, k + 1); if (status->ec != GH__ERROR_CODE__OK) break; nr_installed++; } stop_progress(¶ms->progress); if (nr_installed) delete_stale_keep_files(params, status); cleanup: if (fd != -1) close(fd); unlink(temp_path_mp.buf); strbuf_release(&temp_path_mp); } /* * Wrapper for read_loose_object() to read and verify the hash of a * loose object, and discard the contents buffer. * * Returns 0 on success, negative on error (details may be written to stderr). */ static int verify_loose_object(const char *path, const struct object_id *expected_oid) { enum object_type type; void *contents = NULL; unsigned long size; struct strbuf type_name = STRBUF_INIT; int ret; struct object_info oi = OBJECT_INFO_INIT; struct object_id real_oid = *null_oid(); oi.typep = &type; oi.sizep = &size; oi.type_name = &type_name; ret = read_loose_object(path, expected_oid, &real_oid, &contents, &oi); if (!ret) free(contents); return ret; } /* * Convert the tempfile into a permanent loose object in the ODB. */ static void install_loose(struct gh__request_params *params, struct gh__response_status *status) { struct strbuf tmp_path = STRBUF_INIT; struct strbuf loose_path = STRBUF_INIT; gh__response_status__zero(status); /* * close tempfile to steal ownership away from tempfile class. */ strbuf_addstr(&tmp_path, get_tempfile_path(params->tempfile)); close_tempfile_gently(params->tempfile); /* * Compute the hash of the received content (while it is still * in a temp file) and verify that it matches the OID that we * requested and was not corrupted. */ if (verify_loose_object(tmp_path.buf, ¶ms->loose_oid)) { strbuf_addf(&status->error_message, "hash failed for received loose object '%s'", oid_to_hex(¶ms->loose_oid)); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE; goto cleanup; } /* * Try to install the tempfile as the actual loose object. * * If the loose object already exists, finalize_object_file() * will NOT overwrite/replace it. It will silently eat the * EEXIST error and unlink the tempfile as it if was * successful. We just let it lie to us. * * Since our job is to back-fill missing objects needed by a * foreground git process -- git should have called * oid_object_info_extended() and loose_object_info() BEFORE * asking us to download the missing object. So if we get a * collision we have to assume something else is happening in * parallel and we lost the race. And that's OK. */ if (create_loose_pathname_in_odb(&loose_path, ¶ms->loose_oid)) { strbuf_addf(&status->error_message, "cannot create directory for loose object '%s'", loose_path.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE; goto cleanup; } if (finalize_object_file(tmp_path.buf, loose_path.buf)) { unlink(tmp_path.buf); strbuf_addf(&status->error_message, "could not install loose object '%s'", loose_path.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE; goto cleanup; } if (params->result_list) { struct strbuf result_msg = STRBUF_INIT; strbuf_addf(&result_msg, "loose %s", oid_to_hex(¶ms->loose_oid)); string_list_append(params->result_list, result_msg.buf); strbuf_release(&result_msg); } cleanup: strbuf_release(&tmp_path); strbuf_release(&loose_path); } static void install_result(struct gh__request_params *params, struct gh__response_status *status) { if (params->objects_mode == GH__OBJECTS_MODE__PREFETCH) { /* * The "gvfs/prefetch" API is the only thing that sends * these multi-part packfiles. According to the protocol * documentation, they will have this x- content type. * * However, it appears that there is a BUG in the origin * server causing it to sometimes send "text/html" instead. * So, we silently handle both. */ if (!strcmp(status->content_type.buf, "application/x-gvfs-timestamped-packfiles-indexes")) { install_prefetch(params, status); return; } if (!strcmp(status->content_type.buf, "text/html")) { install_prefetch(params, status); return; } } else { if (!strcmp(status->content_type.buf, "application/x-git-packfile")) { assert(params->b_is_post); assert(params->objects_mode == GH__OBJECTS_MODE__POST); install_packfile(params, status); return; } if (!strcmp(status->content_type.buf, "application/x-git-loose-object")) { /* * We get these for "gvfs/objects" GET and POST requests. * * Note that this content type is singular, not plural. */ install_loose(params, status); return; } } strbuf_addf(&status->error_message, "install_result: received unknown content-type '%s'", status->content_type.buf); status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; } /* * Our wrapper to initialize the HTTP layer. * * We always use the real origin server, not the cache-server, when * initializing the http/curl layer. */ static void gh_http_init(void) { if (gh__global.http_is_initialized) return; http_init(gh__global.remote, gh__global.main_url, 0); gh__global.http_is_initialized = 1; } static void gh_http_cleanup(void) { if (!gh__global.http_is_initialized) return; http_cleanup(); gh__global.http_is_initialized = 0; } /* * buffer has ": [\r]\n" */ static void parse_resp_hdr_1(const char *buffer, size_t size, size_t nitems, struct strbuf *key, struct strbuf *value) { const char *end = buffer + (size * nitems); const char *p; p = strchr(buffer, ':'); strbuf_setlen(key, 0); strbuf_add(key, buffer, (p - buffer)); p++; /* skip ':' */ p++; /* skip ' ' */ strbuf_setlen(value, 0); strbuf_add(value, p, (end - p)); strbuf_trim_trailing_newline(value); } static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, void *void_params) { struct gh__request_params *params = void_params; struct gh__azure_throttle *azure = &gh__global_throttle[params->server_type]; if (starts_with(buffer, "X-RateLimit-")) { struct strbuf key = STRBUF_INIT; struct strbuf val = STRBUF_INIT; parse_resp_hdr_1(buffer, size, nitems, &key, &val); /* * The following X- headers are specific to AzureDevOps. * Other servers have similar sets of values, but I haven't * compared them in depth. */ // trace2_printf("%s: Throttle: %s %s", TR2_CAT, key.buf, val.buf); if (!strcmp(key.buf, "X-RateLimit-Resource")) { /* * The name of the resource that is complaining. * Just log it because we can't do anything with it. */ strbuf_setlen(&key, 0); strbuf_addstr(&key, "ratelimit/resource"); strbuf_addstr(&key, gh__server_type_label[params->server_type]); trace2_data_string(TR2_CAT, NULL, key.buf, val.buf); } else if (!strcmp(key.buf, "X-RateLimit-Delay")) { /* * The amount of delay added to our response. * Just log it because we can't do anything with it. */ unsigned long tarpit_delay_ms; strbuf_setlen(&key, 0); strbuf_addstr(&key, "ratelimit/delay_ms"); strbuf_addstr(&key, gh__server_type_label[params->server_type]); git_parse_ulong(val.buf, &tarpit_delay_ms); trace2_data_intmax(TR2_CAT, NULL, key.buf, tarpit_delay_ms); } else if (!strcmp(key.buf, "X-RateLimit-Limit")) { /* * The resource limit/quota before we get a 429. */ git_parse_ulong(val.buf, &azure->tstu_limit); } else if (!strcmp(key.buf, "X-RateLimit-Remaining")) { /* * The amount of our quota remaining. When zero, we * should get 429s on futher requests until the reset * time. */ git_parse_ulong(val.buf, &azure->tstu_remaining); } else if (!strcmp(key.buf, "X-RateLimit-Reset")) { /* * The server gave us a time-in-seconds-since-the-epoch * for when our quota will be reset (if we stop all * activity right now). * * Checkpoint the local system clock so we can do some * sanity checks on any clock skew. Also, since we get * the headers before we get the content, we can adjust * our delay to compensate for the full download time. */ unsigned long now = time(NULL); unsigned long reset_time; git_parse_ulong(val.buf, &reset_time); if (reset_time > now) azure->reset_sec = reset_time - now; } strbuf_release(&key); strbuf_release(&val); } else if (starts_with(buffer, "Retry-After")) { struct strbuf key = STRBUF_INIT; struct strbuf val = STRBUF_INIT; parse_resp_hdr_1(buffer, size, nitems, &key, &val); /* * We get this header with a 429 and 503 and possibly a 30x. * * Curl does have CURLINFO_RETRY_AFTER that nicely parses and * normalizes the value (and supports HTTP/1.1 usage), but it * is not present yet in the version shipped with the Mac, so * we do it directly here. */ git_parse_ulong(val.buf, &azure->retry_after_sec); strbuf_release(&key); strbuf_release(&val); } else if (starts_with(buffer, "X-VSS-E2EID")) { struct strbuf key = STRBUF_INIT; /* * Capture the E2EID as it goes by, but don't log it until we * know the request result. */ parse_resp_hdr_1(buffer, size, nitems, &key, ¶ms->e2eid); strbuf_release(&key); } return nitems * size; } /* * Wait "duration" seconds and drive the progress mechanism. * * We spin slightly faster than we need to to keep the progress bar * drawn (especially if the user presses return while waiting) and to * compensate for delay factors built into the progress class (which * might wait for 2 seconds before drawing the first message). */ static void do_throttle_spin(struct gh__request_params *params, const char *tr2_label, const char *progress_msg, int duration) { struct strbuf region = STRBUF_INIT; struct progress *progress = NULL; unsigned long begin = time(NULL); unsigned long now = begin; unsigned long end = begin + duration; strbuf_addstr(®ion, tr2_label); strbuf_addstr(®ion, gh__server_type_label[params->server_type]); trace2_region_enter(TR2_CAT, region.buf, NULL); if (gh__cmd_opts.show_progress) progress = start_progress(progress_msg, duration); while (now < end) { display_progress(progress, (now - begin)); sleep_millisec(100); now = time(NULL); } display_progress(progress, duration); stop_progress(&progress); trace2_region_leave(TR2_CAT, region.buf, NULL); strbuf_release(®ion); } /* * Delay the outbound request if necessary in response to previous throttle * blockages or hints. Throttle data is somewhat orthogonal to the status * results from any previous request and/or the request params of the next * request. * * Note that the throttle info also is cross-process information, such as * 2 concurrent fetches in 2 different terminal windows to the same server * will be sharing the same server quota. These could be coordinated too, * so that a blockage received in one process would prevent the other * process from starting another request (and also blocked or extending * the delay interval). We're NOT going to do that level of integration. * We will let both processes independently attempt the next request. * This may cause us to miss the end-of-quota boundary if the server * extends it because of the second request. * * TODO Should we have a max-wait option and then return a hard-error * TODO of some type? */ static void do_throttle_wait(struct gh__request_params *params, struct gh__response_status *status) { struct gh__azure_throttle *azure = &gh__global_throttle[params->server_type]; if (azure->retry_after_sec) { /* * We were given a hard delay (such as after a 429). * Spin until the requested time. */ do_throttle_spin(params, "throttle/hard", "Waiting on hard throttle (sec)", azure->retry_after_sec); return; } if (azure->reset_sec > 0) { /* * We were given a hint that we are overloading * the server. Voluntarily backoff (before we * get tarpitted or blocked). */ do_throttle_spin(params, "throttle/soft", "Waiting on soft throttle (sec)", azure->reset_sec); return; } if (params->k_transient_delay_sec) { /* * Insert an arbitrary delay before retrying after a * transient (network) failure. */ do_throttle_spin(params, "throttle/transient", "Waiting to retry after network error (sec)", params->k_transient_delay_sec); return; } } static void set_main_creds_on_slot(struct active_request_slot *slot, const struct credential *creds) { assert(creds == &gh__global.main_creds); /* * When talking to the main/origin server, we have 3 modes * of operation: * * [1] The initial request is sent without loading creds * and with ANY-AUTH set. (And the `":"` is a magic * value.) * * This allows libcurl to negotiate for us if it can. * For example, this allows NTLM to work by magic and * we get 200s without ever seeing a 401. If libcurl * cannot negotiate for us, it gives us a 401 (and all * of the 401 code in this file responds to that). * * [2] A 401 retry will load the main creds and try again. * This causes `creds->username`to be non-NULL (even * if refers to a zero-length string). And we assume * BASIC Authentication. (And a zero-length username * is a convention for PATs, but then sometimes users * put the PAT in their `username` field and leave the * `password` field blank. And that works too.) * * [3] Subsequent requests on the same connection use * whatever worked before. */ if (creds && creds->username) { curl_easy_setopt(slot->curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC); curl_easy_setopt(slot->curl, CURLOPT_USERNAME, creds->username); curl_easy_setopt(slot->curl, CURLOPT_PASSWORD, creds->password); } else { curl_easy_setopt(slot->curl, CURLOPT_HTTPAUTH, CURLAUTH_ANY); curl_easy_setopt(slot->curl, CURLOPT_USERPWD, ":"); } } static void set_cache_server_creds_on_slot(struct active_request_slot *slot, const struct credential *creds) { assert(creds == &gh__global.cache_creds); assert(creds->username); /* * Things are weird when talking to a cache-server: * * [1] They don't send 401s on an auth error, rather they send * a 400 (with a nice human-readable string in the html body). * This prevents libcurl from doing any negotiation for us. * * [2] Cache-servers don't manage their own passwords, but * rather require us to send the Basic Authentication * username & password that we would send to the main * server. (So yes, we have to get creds validated * against the main server creds and substitute them when * talking to the cache-server.) * * This means that: * * [a] We cannot support cache-servers that want to use NTLM. * * [b] If we want to talk to a cache-server, we have get the * Basic Auth creds for the main server. And this may be * problematic if the libcurl and/or the credential manager * insists on using NTLM and prevents us from getting them. * * So we never try AUTH-ANY and force Basic Auth (if possible). */ if (creds && creds->username) { curl_easy_setopt(slot->curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC); curl_easy_setopt(slot->curl, CURLOPT_USERNAME, creds->username); curl_easy_setopt(slot->curl, CURLOPT_PASSWORD, creds->password); } } /* * Do a single HTTP request WITHOUT robust-retry, auth-retry or fallback. */ static void do_req(const char *url_base, const char *url_component, const struct credential *creds, struct gh__request_params *params, struct gh__response_status *status) { struct active_request_slot *slot; struct slot_results results; struct strbuf rest_url = STRBUF_INIT; gh__response_status__zero(status); if (params->b_write_to_file) { /* Delete dirty tempfile from a previous attempt. */ if (params->tempfile) delete_tempfile(¶ms->tempfile); my_create_tempfile(status, 1, NULL, ¶ms->tempfile, NULL, NULL); if (!params->tempfile || status->ec != GH__ERROR_CODE__OK) return; } else { /* Guard against caller using dirty buffer */ strbuf_setlen(params->buffer, 0); } end_url_with_slash(&rest_url, url_base); strbuf_addstr(&rest_url, url_component); do_throttle_wait(params, status); gh__azure_throttle__zero(&gh__global_throttle[params->server_type]); slot = get_active_slot(); slot->results = &results; curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 0); /* not a HEAD request */ curl_easy_setopt(slot->curl, CURLOPT_URL, rest_url.buf); curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, params->headers); if (params->b_is_post) { curl_easy_setopt(slot->curl, CURLOPT_POST, 1); curl_easy_setopt(slot->curl, CURLOPT_ENCODING, NULL); curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, params->post_payload->buf); curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE, (long)params->post_payload->len); } else { curl_easy_setopt(slot->curl, CURLOPT_POST, 0); } if (params->b_write_to_file) { curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite); curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, (void*)params->tempfile->fp); } else { curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer); curl_easy_setopt(slot->curl, CURLOPT_FILE, params->buffer); } curl_easy_setopt(slot->curl, CURLOPT_HEADERFUNCTION, parse_resp_hdr); curl_easy_setopt(slot->curl, CURLOPT_HEADERDATA, params); if (params->server_type == GH__SERVER_TYPE__MAIN) set_main_creds_on_slot(slot, creds); else set_cache_server_creds_on_slot(slot, creds); if (params->progress_base_phase2_msg.len || params->progress_base_phase3_msg.len) { curl_easy_setopt(slot->curl, CURLOPT_XFERINFOFUNCTION, gh__curl_progress_cb); curl_easy_setopt(slot->curl, CURLOPT_XFERINFODATA, params); curl_easy_setopt(slot->curl, CURLOPT_NOPROGRESS, 0); } else { curl_easy_setopt(slot->curl, CURLOPT_NOPROGRESS, 1); } gh__run_one_slot(slot, params, status); } /* * Compute the delay for the nth attempt. * * No delay for the first attempt. Then use a normal exponential backoff * starting from 8. */ static int compute_transient_delay(int attempt) { int v; if (attempt < 1) return 0; /* * Let 8K be our hard limit (for integer overflow protection). * That's over 2 hours. This is 8<<10. */ if (attempt > 10) attempt = 10; v = 8 << (attempt - 1); if (v > gh__cmd_opts.max_transient_backoff_sec) v = gh__cmd_opts.max_transient_backoff_sec; return v; } /* * Robustly make an HTTP request. Retry if necessary to hide common * transient network errors and/or 429 blockages. * * For a transient (network) failure (where we do not have a throttle * delay factor), we should insert a small delay to let the network * recover. The outage might be because the VPN dropped, or the * machine went to sleep or something and we want to give the network * time to come back up. Insert AI here :-) */ static void do_req__with_robust_retry(const char *url_base, const char *url_component, const struct credential *creds, struct gh__request_params *params, struct gh__response_status *status) { for (params->k_attempt = 0; params->k_attempt < gh__cmd_opts.max_retries + 1; params->k_attempt++) { do_req(url_base, url_component, creds, params, status); switch (status->retry) { default: case GH__RETRY_MODE__SUCCESS: case GH__RETRY_MODE__HTTP_401: /* caller does auth-retry */ case GH__RETRY_MODE__HARD_FAIL: case GH__RETRY_MODE__FAIL_404: return; case GH__RETRY_MODE__HTTP_429: case GH__RETRY_MODE__HTTP_503: /* * We should have gotten a "Retry-After" header with * these and that gives us the wait time. If not, * fallthru and use the backoff delay. */ if (gh__global_throttle[params->server_type].retry_after_sec) continue; /*fallthru*/ case GH__RETRY_MODE__TRANSIENT: params->k_transient_delay_sec = compute_transient_delay(params->k_attempt); continue; } } } static void do_req__to_main(const char *url_component, struct gh__request_params *params, struct gh__response_status *status) { params->server_type = GH__SERVER_TYPE__MAIN; /* * When talking to the main Git server, we DO NOT preload the * creds before the first request. */ do_req__with_robust_retry(gh__global.main_url, url_component, &gh__global.main_creds, params, status); if (status->retry == GH__RETRY_MODE__HTTP_401) { refresh_main_creds(); do_req__with_robust_retry(gh__global.main_url, url_component, &gh__global.main_creds, params, status); } if (status->retry == GH__RETRY_MODE__SUCCESS) approve_main_creds(); } static void do_req__to_cache_server(const char *url_component, struct gh__request_params *params, struct gh__response_status *status) { params->server_type = GH__SERVER_TYPE__CACHE; /* * When talking to a cache-server, DO force load the creds. * This implicitly preloads the creds to the main server. */ synthesize_cache_server_creds(); do_req__with_robust_retry(gh__global.cache_server_url, url_component, &gh__global.cache_creds, params, status); if (status->retry == GH__RETRY_MODE__HTTP_401) { refresh_cache_server_creds(); do_req__with_robust_retry(gh__global.cache_server_url, url_component, &gh__global.cache_creds, params, status); } if (status->retry == GH__RETRY_MODE__SUCCESS) approve_cache_server_creds(); } /* * Try the cache-server (if configured) then fall-back to the main Git server. */ static void do_req__with_fallback(const char *url_component, struct gh__request_params *params, struct gh__response_status *status) { if (gh__global.cache_server_url && params->b_permit_cache_server_if_defined) { do_req__to_cache_server(url_component, params, status); if (status->retry == GH__RETRY_MODE__SUCCESS) return; if (!gh__cmd_opts.try_fallback) return; /* * The cache-server shares creds with the main Git server, * so if our creds failed against the cache-server, they * will also fail against the main Git server. We just let * this fail. * * Falling-back would likely just cause the 3rd (or maybe * 4th) cred prompt. */ if (status->retry == GH__RETRY_MODE__HTTP_401) return; } do_req__to_main(url_component, params, status); } /* * Call "gvfs/config" REST API. * * Return server's response buffer. This is probably a raw JSON string. */ static void do__http_get__simple_endpoint(struct gh__response_status *status, struct strbuf *response, const char *endpoint, const char *tr2_label) { struct gh__request_params params = GH__REQUEST_PARAMS_INIT; strbuf_addstr(¶ms.tr2_label, tr2_label); params.b_is_post = 0; params.b_write_to_file = 0; /* cache-servers do not handle gvfs/config REST calls */ params.b_permit_cache_server_if_defined = 0; params.buffer = response; params.objects_mode = GH__OBJECTS_MODE__NONE; params.object_count = 1; /* a bit of a lie */ /* * "X-TFS-FedAuthRedirect: Suppress" disables the 302 + 203 redirect * sequence to a login page and forces the main Git server to send a * normal 401. */ params.headers = http_copy_default_headers(); params.headers = curl_slist_append(params.headers, "X-TFS-FedAuthRedirect: Suppress"); params.headers = curl_slist_append(params.headers, "Pragma: no-cache"); if (gh__cmd_opts.show_progress) { /* * gvfs/config has a very small reqest payload, so I don't * see any need to report progress on the upload side of * the GET. So just report progress on the download side. */ strbuf_addf(¶ms.progress_base_phase3_msg, "Receiving %s", endpoint); } do_req__with_fallback(endpoint, ¶ms, status); gh__request_params__release(¶ms); } static void do__http_get__gvfs_config(struct gh__response_status *status, struct strbuf *config_data) { do__http_get__simple_endpoint(status, config_data, "gvfs/config", "GET/config"); } static void setup_gvfs_objects_progress(struct gh__request_params *params, unsigned long num, unsigned long den) { if (!gh__cmd_opts.show_progress) return; if (params->b_is_post) { strbuf_addf(¶ms->progress_base_phase3_msg, "Receiving packfile %ld/%ld with %ld objects", num, den, params->object_count); } /* If requesting only one object, then do not show progress */ } /* * Call "gvfs/objects/" REST API to fetch a loose object * and write it to the ODB. */ static void do__http_get__gvfs_object(struct gh__response_status *status, const struct object_id *oid, unsigned long l_num, unsigned long l_den, struct string_list *result_list) { struct gh__request_params params = GH__REQUEST_PARAMS_INIT; struct strbuf component_url = STRBUF_INIT; gh__response_status__zero(status); strbuf_addf(&component_url, "gvfs/objects/%s", oid_to_hex(oid)); strbuf_addstr(¶ms.tr2_label, "GET/objects"); params.b_is_post = 0; params.b_write_to_file = 1; params.b_permit_cache_server_if_defined = 1; params.objects_mode = GH__OBJECTS_MODE__GET; params.object_count = 1; params.result_list = result_list; params.headers = http_copy_default_headers(); params.headers = curl_slist_append(params.headers, "X-TFS-FedAuthRedirect: Suppress"); params.headers = curl_slist_append(params.headers, "Pragma: no-cache"); oidcpy(¶ms.loose_oid, oid); setup_gvfs_objects_progress(¶ms, l_num, l_den); do_req__with_fallback(component_url.buf, ¶ms, status); gh__request_params__release(¶ms); strbuf_release(&component_url); } /* * Call "gvfs/objects" POST REST API to fetch a batch of objects * from the OIDSET. Normal, this is results in a packfile containing * `nr_wanted_in_block` objects. And we return the number actually * consumed (along with the filename of the resulting packfile). * * However, if we only have 1 oid (remaining) in the OIDSET, the * server *MAY* respond to our POST with a loose object rather than * a packfile with 1 object. * * Append a message to the result_list describing the result. * * Return the number of OIDs consumed from the OIDSET. */ static void do__http_post__gvfs_objects(struct gh__response_status *status, struct oidset_iter *iter, unsigned long nr_wanted_in_block, int j_pack_num, int j_pack_den, struct string_list *result_list, unsigned long *nr_oid_taken) { struct json_writer jw_req = JSON_WRITER_INIT; struct gh__request_params params = GH__REQUEST_PARAMS_INIT; gh__response_status__zero(status); params.object_count = build_json_payload__gvfs_objects( &jw_req, iter, nr_wanted_in_block, ¶ms.loose_oid); *nr_oid_taken = params.object_count; strbuf_addstr(¶ms.tr2_label, "POST/objects"); params.b_is_post = 1; params.b_write_to_file = 1; params.b_permit_cache_server_if_defined = 1; params.objects_mode = GH__OBJECTS_MODE__POST; params.post_payload = &jw_req.json; params.result_list = result_list; params.headers = http_copy_default_headers(); params.headers = curl_slist_append(params.headers, "X-TFS-FedAuthRedirect: Suppress"); params.headers = curl_slist_append(params.headers, "Pragma: no-cache"); params.headers = curl_slist_append(params.headers, "Content-Type: application/json"); /* * If our POST contains more than one object, we want the * server to send us a packfile. We DO NOT want the non-standard * concatenated loose object format, so we DO NOT send: * "Accept: application/x-git-loose-objects" (plural) * * However, if the payload only requests 1 OID, the server * will send us a single loose object instead of a packfile, * so we ACK that and send: * "Accept: application/x-git-loose-object" (singular) */ params.headers = curl_slist_append(params.headers, "Accept: application/x-git-packfile"); params.headers = curl_slist_append(params.headers, "Accept: application/x-git-loose-object"); setup_gvfs_objects_progress(¶ms, j_pack_num, j_pack_den); do_req__with_fallback("gvfs/objects", ¶ms, status); gh__request_params__release(¶ms); jw_release(&jw_req); } struct find_last_data { timestamp_t timestamp; int nr_files; }; static void cb_find_last(const char *full_path, size_t full_path_len, const char *file_path, void *void_data) { struct find_last_data *data = void_data; const char *val; timestamp_t t; if (!skip_prefix(file_path, "prefetch-", &val)) return; if (!ends_with(val, ".pack")) return; data->nr_files++; /* * We expect prefetch packfiles named like: * * prefetch--.pack */ t = strtol(val, NULL, 10); data->timestamp = MY_MAX(t, data->timestamp); } /* * Find the server timestamp on the last prefetch packfile that * we have in the ODB. * * TODO I'm going to assume that all prefetch packs are created * TODO equal and take the one with the largest t value. * TODO * TODO Or should we look for one marked with .keep ? * * TODO Alternatively, should we maybe get the 2nd largest? * TODO (Or maybe subtract an hour delta from the largest?) * TODO * TODO Since each cache-server maintains its own set of prefetch * TODO packs (such that 2 requests may hit 2 different * TODO load-balanced servers and get different answers (with or * TODO without clock-skew issues)), is it possible for us to miss * TODO the absolute fringe of new commits and trees? * TODO * TODO That is, since the cache-server generates hourly prefetch * TODO packs, we could do a prefetch and be up-to-date, but then * TODO do the main fetch and hit a different cache/main server * TODO and be behind by as much as an hour and have to demand- * TODO load the commits/trees. * * TODO Alternatively, should we compare the last timestamp found * TODO with "now" and silently do nothing if within an epsilon? */ static void find_last_prefetch_timestamp(timestamp_t *last) { struct find_last_data data; memset(&data, 0, sizeof(data)); for_each_file_in_pack_dir(gh__global.buf_odb_path.buf, cb_find_last, &data); *last = data.timestamp; } /* * Call "gvfs/prefetch[?lastPackTimestamp=]" REST API to * fetch a series of packfiles and write them to the ODB. * * Return a list of packfile names. */ static void do__http_get__gvfs_prefetch(struct gh__response_status *status, timestamp_t seconds_since_epoch, struct string_list *result_list) { struct gh__request_params params = GH__REQUEST_PARAMS_INIT; struct strbuf component_url = STRBUF_INIT; gh__response_status__zero(status); strbuf_addstr(&component_url, "gvfs/prefetch"); if (!seconds_since_epoch) find_last_prefetch_timestamp(&seconds_since_epoch); if (seconds_since_epoch) strbuf_addf(&component_url, "?lastPackTimestamp=%"PRItime, seconds_since_epoch); params.b_is_post = 0; params.b_write_to_file = 1; params.b_permit_cache_server_if_defined = 1; params.objects_mode = GH__OBJECTS_MODE__PREFETCH; params.object_count = -1; params.result_list = result_list; params.headers = http_copy_default_headers(); params.headers = curl_slist_append(params.headers, "X-TFS-FedAuthRedirect: Suppress"); params.headers = curl_slist_append(params.headers, "Pragma: no-cache"); params.headers = curl_slist_append(params.headers, "Accept: application/x-gvfs-timestamped-packfiles-indexes"); if (gh__cmd_opts.show_progress) strbuf_addf(¶ms.progress_base_phase3_msg, "Prefetch %"PRItime" (%s)", seconds_since_epoch, show_date(seconds_since_epoch, 0, DATE_MODE(ISO8601))); do_req__with_fallback(component_url.buf, ¶ms, status); gh__request_params__release(¶ms); strbuf_release(&component_url); } /* * Drive one or more HTTP GET requests to fetch the objects * in the given OIDSET. These are received into loose objects. * * Accumulate results for each request in `result_list` until we get a * hard error and have to stop. */ static void do__http_get__fetch_oidset(struct gh__response_status *status, struct oidset *oids, unsigned long nr_oid_total, struct string_list *result_list) { struct oidset_iter iter; struct strbuf err404 = STRBUF_INIT; const struct object_id *oid; unsigned long k; int had_404 = 0; gh__response_status__zero(status); if (!nr_oid_total) return; oidset_iter_init(oids, &iter); for (k = 0; k < nr_oid_total; k++) { oid = oidset_iter_next(&iter); do__http_get__gvfs_object(status, oid, k+1, nr_oid_total, result_list); /* * If we get a 404 for an individual object, ignore * it and get the rest. We'll fixup the 'ec' later. */ if (status->ec == GH__ERROR_CODE__HTTP_404) { if (!err404.len) strbuf_addf(&err404, "%s: from GET %s", status->error_message.buf, oid_to_hex(oid)); /* * Mark the fetch as "incomplete", but don't * stop trying to get other chunks. */ had_404 = 1; continue; } if (status->ec != GH__ERROR_CODE__OK) { /* Stop at the first hard error. */ strbuf_addf(&status->error_message, ": from GET %s", oid_to_hex(oid)); goto cleanup; } } cleanup: if (had_404 && status->ec == GH__ERROR_CODE__OK) { strbuf_setlen(&status->error_message, 0); strbuf_addbuf(&status->error_message, &err404); status->ec = GH__ERROR_CODE__HTTP_404; } strbuf_release(&err404); } /* * Drive one or more HTTP POST requests to bulk fetch the objects in * the given OIDSET. Create one or more packfiles and/or loose objects. * * Accumulate results for each request in `result_list` until we get a * hard error and have to stop. */ static void do__http_post__fetch_oidset(struct gh__response_status *status, struct oidset *oids, unsigned long nr_oid_total, struct string_list *result_list) { struct oidset_iter iter; struct strbuf err404 = STRBUF_INIT; unsigned long k; unsigned long nr_oid_taken; int j_pack_den = 0; int j_pack_num = 0; int had_404 = 0; gh__response_status__zero(status); if (!nr_oid_total) return; oidset_iter_init(oids, &iter); j_pack_den = ((nr_oid_total + gh__cmd_opts.block_size - 1) / gh__cmd_opts.block_size); for (k = 0; k < nr_oid_total; k += nr_oid_taken) { j_pack_num++; do__http_post__gvfs_objects(status, &iter, gh__cmd_opts.block_size, j_pack_num, j_pack_den, result_list, &nr_oid_taken); /* * Because the oidset iterator has random * order, it does no good to say the k-th or * n-th chunk was incomplete; the client * cannot use that index for anything. * * We get a 404 when at least one object in * the chunk was not found. * * For now, ignore the 404 and go on to the * next chunk and then fixup the 'ec' later. */ if (status->ec == GH__ERROR_CODE__HTTP_404) { if (!err404.len) strbuf_addf(&err404, "%s: from POST", status->error_message.buf); /* * Mark the fetch as "incomplete", but don't * stop trying to get other chunks. */ had_404 = 1; continue; } if (status->ec != GH__ERROR_CODE__OK) { /* Stop at the first hard error. */ strbuf_addstr(&status->error_message, ": from POST"); goto cleanup; } } cleanup: if (had_404 && status->ec == GH__ERROR_CODE__OK) { strbuf_setlen(&status->error_message, 0); strbuf_addbuf(&status->error_message, &err404); status->ec = GH__ERROR_CODE__HTTP_404; } strbuf_release(&err404); } /* * Finish with initialization. This happens after the main option * parsing, dispatch to sub-command, and sub-command option parsing * and before actually doing anything. * * Optionally configure the cache-server if the sub-command will * use it. */ static void finish_init(int setup_cache_server) { select_odb(); lookup_main_url(); gh_http_init(); if (setup_cache_server) select_cache_server(); } /* * Request gvfs/config from main Git server. (Config data is not * available from a GVFS cache-server.) * * Print the received server configuration (as the raw JSON string). */ static enum gh__error_code do_sub_cmd__config(int argc, const char **argv) { struct gh__response_status status = GH__RESPONSE_STATUS_INIT; struct strbuf config_data = STRBUF_INIT; enum gh__error_code ec = GH__ERROR_CODE__OK; trace2_cmd_mode("config"); finish_init(0); do__http_get__gvfs_config(&status, &config_data); ec = status.ec; if (ec == GH__ERROR_CODE__OK) printf("%s\n", config_data.buf); else error("config: %s", status.error_message.buf); gh__response_status__release(&status); strbuf_release(&config_data); return ec; } static enum gh__error_code do_sub_cmd__endpoint(int argc, const char **argv) { struct gh__response_status status = GH__RESPONSE_STATUS_INIT; struct strbuf data = STRBUF_INIT; enum gh__error_code ec = GH__ERROR_CODE__OK; const char *endpoint; if (argc != 2) return GH__ERROR_CODE__ERROR; endpoint = argv[1]; trace2_cmd_mode(endpoint); finish_init(0); do__http_get__simple_endpoint(&status, &data, endpoint, endpoint); ec = status.ec; if (ec == GH__ERROR_CODE__OK) printf("%s\n", data.buf); else error("config: %s", status.error_message.buf); gh__response_status__release(&status); strbuf_release(&data); return ec; } /* * Read a list of objects from stdin and fetch them as a series of * single object HTTP GET requests. */ static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) { static struct option get_options[] = { OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, N_("retries for transient network errors")), OPT_END(), }; struct gh__response_status status = GH__RESPONSE_STATUS_INIT; struct oidset oids = OIDSET_INIT; struct string_list result_list = STRING_LIST_INIT_DUP; enum gh__error_code ec = GH__ERROR_CODE__OK; unsigned long nr_oid_total; int k; trace2_cmd_mode("get"); if (argc > 1 && !strcmp(argv[1], "-h")) usage_with_options(objects_get_usage, get_options); argc = parse_options(argc, argv, NULL, get_options, objects_get_usage, 0); if (gh__cmd_opts.max_retries < 0) gh__cmd_opts.max_retries = 0; finish_init(1); nr_oid_total = read_stdin_for_oids(&oids); do__http_get__fetch_oidset(&status, &oids, nr_oid_total, &result_list); ec = status.ec; for (k = 0; k < result_list.nr; k++) printf("%s\n", result_list.items[k].string); if (ec != GH__ERROR_CODE__OK) error("get: %s", status.error_message.buf); gh__response_status__release(&status); oidset_clear(&oids); string_list_clear(&result_list, 0); return ec; } /* * Read a list of objects from stdin and fetch them in a single request (or * multiple block-size requests) using one or more HTTP POST requests. */ static enum gh__error_code do_sub_cmd__post(int argc, const char **argv) { static struct option post_options[] = { OPT_MAGNITUDE('b', "block-size", &gh__cmd_opts.block_size, N_("number of objects to request at a time")), OPT_INTEGER('d', "depth", &gh__cmd_opts.depth, N_("Commit depth")), OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, N_("retries for transient network errors")), OPT_END(), }; struct gh__response_status status = GH__RESPONSE_STATUS_INIT; struct oidset oids = OIDSET_INIT; struct string_list result_list = STRING_LIST_INIT_DUP; enum gh__error_code ec = GH__ERROR_CODE__OK; unsigned long nr_oid_total; int k; trace2_cmd_mode("post"); if (argc > 1 && !strcmp(argv[1], "-h")) usage_with_options(objects_post_usage, post_options); argc = parse_options(argc, argv, NULL, post_options, objects_post_usage, 0); if (gh__cmd_opts.depth < 1) gh__cmd_opts.depth = 1; if (gh__cmd_opts.max_retries < 0) gh__cmd_opts.max_retries = 0; finish_init(1); nr_oid_total = read_stdin_for_oids(&oids); do__http_post__fetch_oidset(&status, &oids, nr_oid_total, &result_list); ec = status.ec; for (k = 0; k < result_list.nr; k++) printf("%s\n", result_list.items[k].string); if (ec != GH__ERROR_CODE__OK) error("post: %s", status.error_message.buf); gh__response_status__release(&status); oidset_clear(&oids); string_list_clear(&result_list, 0); return ec; } /* * Interpret the given string as a timestamp and compute an absolute * UTC-seconds-since-epoch value (and without TZ). * * Note that the gvfs/prefetch API only accepts seconds since epoch, * so that is all we really need here. But there is a tradition of * various Git commands allowing a variety of formats for args like * this. For example, see the `--date` arg in `git commit`. We allow * these other forms mainly for testing purposes. */ static int my_parse_since(const char *since, timestamp_t *p_timestamp) { int offset = 0; int errors = 0; unsigned long t; if (!parse_date_basic(since, p_timestamp, &offset)) return 0; t = approxidate_careful(since, &errors); if (!errors) { *p_timestamp = t; return 0; } return -1; } /* * Ask the server for all available packfiles -or- all available since * the given timestamp. */ static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv) { static const char *since_str; static struct option prefetch_options[] = { OPT_STRING(0, "since", &since_str, N_("since"), N_("seconds since epoch")), OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, N_("retries for transient network errors")), OPT_END(), }; struct gh__response_status status = GH__RESPONSE_STATUS_INIT; struct string_list result_list = STRING_LIST_INIT_DUP; enum gh__error_code ec = GH__ERROR_CODE__OK; timestamp_t seconds_since_epoch = 0; int k; trace2_cmd_mode("prefetch"); if (argc > 1 && !strcmp(argv[1], "-h")) usage_with_options(prefetch_usage, prefetch_options); argc = parse_options(argc, argv, NULL, prefetch_options, prefetch_usage, 0); if (since_str && *since_str) { if (my_parse_since(since_str, &seconds_since_epoch)) die("could not parse 'since' field"); } if (gh__cmd_opts.max_retries < 0) gh__cmd_opts.max_retries = 0; finish_init(1); do__http_get__gvfs_prefetch(&status, seconds_since_epoch, &result_list); ec = status.ec; for (k = 0; k < result_list.nr; k++) printf("%s\n", result_list.items[k].string); if (ec != GH__ERROR_CODE__OK) error("prefetch: %s", status.error_message.buf); gh__response_status__release(&status); string_list_clear(&result_list, 0); return ec; } /* * Handle the 'objects.get' and 'objects.post' and 'objects.prefetch' * verbs in "server mode". * * Only call error() and set ec for hard errors where we cannot * communicate correctly with the foreground client process. Pass any * actual data errors (such as 404's or 401's from the fetch) back to * the client process. */ static enum gh__error_code do_server_subprocess__objects(const char *verb_line) { struct gh__response_status status = GH__RESPONSE_STATUS_INIT; struct oidset oids = OIDSET_INIT; struct object_id oid; struct string_list result_list = STRING_LIST_INIT_DUP; enum gh__error_code ec = GH__ERROR_CODE__OK; char *line; int len; int err; int k; enum gh__objects_mode objects_mode; unsigned long nr_oid_total = 0; timestamp_t seconds_since_epoch = 0; if (!strcmp(verb_line, "objects.get")) objects_mode = GH__OBJECTS_MODE__GET; else if (!strcmp(verb_line, "objects.post")) objects_mode = GH__OBJECTS_MODE__POST; else if (!strcmp(verb_line, "objects.prefetch")) objects_mode = GH__OBJECTS_MODE__PREFETCH; else { error("server: unexpected objects-mode verb '%s'", verb_line); ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; goto cleanup; } switch (objects_mode) { case GH__OBJECTS_MODE__GET: case GH__OBJECTS_MODE__POST: while (1) { len = packet_read_line_gently(0, NULL, &line); if (len < 0 || !line) break; if (get_oid_hex(line, &oid)) { error("server: invalid oid syntax '%s'", line); ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; goto cleanup; } if (!oidset_insert(&oids, &oid)) nr_oid_total++; } if (!nr_oid_total) { /* if zero objects requested, trivial OK. */ if (packet_write_fmt_gently(1, "ok\n")) { error("server: cannot write 'get' result to client"); ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; } else ec = GH__ERROR_CODE__OK; goto cleanup; } if (objects_mode == GH__OBJECTS_MODE__GET) do__http_get__fetch_oidset(&status, &oids, nr_oid_total, &result_list); else do__http_post__fetch_oidset(&status, &oids, nr_oid_total, &result_list); break; case GH__OBJECTS_MODE__PREFETCH: /* get optional timestamp line */ while (1) { len = packet_read_line_gently(0, NULL, &line); if (len < 0 || !line) break; seconds_since_epoch = strtoul(line, NULL, 10); } do__http_get__gvfs_prefetch(&status, seconds_since_epoch, &result_list); break; default: BUG("unexpected object_mode in switch '%d'", objects_mode); } /* * Write pathname of the ODB where we wrote all of the objects * we fetched. */ if (packet_write_fmt_gently(1, "odb %s\n", gh__global.buf_odb_path.buf)) { error("server: cannot write 'odb' to client"); ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; goto cleanup; } for (k = 0; k < result_list.nr; k++) if (packet_write_fmt_gently(1, "%s\n", result_list.items[k].string)) { error("server: cannot write result to client: '%s'", result_list.items[k].string); ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; goto cleanup; } /* * We only use status.ec to tell the client whether the request * was complete, incomplete, or had IO errors. We DO NOT return * this value to our caller. */ err = 0; if (status.ec == GH__ERROR_CODE__OK) err = packet_write_fmt_gently(1, "ok\n"); else if (status.ec == GH__ERROR_CODE__HTTP_404) err = packet_write_fmt_gently(1, "partial\n"); else err = packet_write_fmt_gently(1, "error %s\n", status.error_message.buf); if (err) { error("server: cannot write result to client"); ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; goto cleanup; } if (packet_flush_gently(1)) { error("server: cannot flush result to client"); ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; goto cleanup; } cleanup: oidset_clear(&oids); string_list_clear(&result_list, 0); return ec; } typedef enum gh__error_code (fn_subprocess_cmd)(const char *verb_line); struct subprocess_capability { const char *name; int client_has; fn_subprocess_cmd *pfn; }; static struct subprocess_capability caps[] = { { "objects", 0, do_server_subprocess__objects }, { NULL, 0, NULL }, }; /* * Handle the subprocess protocol handshake as described in: * [] Documentation/technical/protocol-common.txt * [] Documentation/technical/long-running-process-protocol.txt */ static int do_protocol_handshake(void) { #define OUR_SUBPROCESS_VERSION "1" char *line; int len; int k; int b_support_our_version = 0; len = packet_read_line_gently(0, NULL, &line); if (len < 0 || !line || strcmp(line, "gvfs-helper-client")) { error("server: subprocess welcome handshake failed: %s", line); return -1; } while (1) { const char *v; len = packet_read_line_gently(0, NULL, &line); if (len < 0 || !line) break; if (!skip_prefix(line, "version=", &v)) { error("server: subprocess version handshake failed: %s", line); return -1; } b_support_our_version |= (!strcmp(v, OUR_SUBPROCESS_VERSION)); } if (!b_support_our_version) { error("server: client does not support our version: %s", OUR_SUBPROCESS_VERSION); return -1; } if (packet_write_fmt_gently(1, "gvfs-helper-server\n") || packet_write_fmt_gently(1, "version=%s\n", OUR_SUBPROCESS_VERSION) || packet_flush_gently(1)) { error("server: cannot write version handshake"); return -1; } while (1) { const char *v; int k; len = packet_read_line_gently(0, NULL, &line); if (len < 0 || !line) break; if (!skip_prefix(line, "capability=", &v)) { error("server: subprocess capability handshake failed: %s", line); return -1; } for (k = 0; caps[k].name; k++) if (!strcmp(v, caps[k].name)) caps[k].client_has = 1; } for (k = 0; caps[k].name; k++) if (caps[k].client_has) if (packet_write_fmt_gently(1, "capability=%s\n", caps[k].name)) { error("server: cannot write capabilities handshake: %s", caps[k].name); return -1; } if (packet_flush_gently(1)) { error("server: cannot write capabilities handshake"); return -1; } return 0; } /* * Interactively listen to stdin for a series of commands and execute them. */ static enum gh__error_code do_sub_cmd__server(int argc, const char **argv) { static struct option server_options[] = { OPT_MAGNITUDE('b', "block-size", &gh__cmd_opts.block_size, N_("number of objects to request at a time")), OPT_INTEGER('d', "depth", &gh__cmd_opts.depth, N_("Commit depth")), OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, N_("retries for transient network errors")), OPT_END(), }; enum gh__error_code ec = GH__ERROR_CODE__OK; char *line; int len; int k; trace2_cmd_mode("server"); if (argc > 1 && !strcmp(argv[1], "-h")) usage_with_options(server_usage, server_options); argc = parse_options(argc, argv, NULL, server_options, server_usage, 0); if (gh__cmd_opts.depth < 1) gh__cmd_opts.depth = 1; if (gh__cmd_opts.max_retries < 0) gh__cmd_opts.max_retries = 0; finish_init(1); if (do_protocol_handshake()) { ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; goto cleanup; } top_of_loop: while (1) { len = packet_read_line_gently(0, NULL, &line); if (len < 0 || !line) { /* use extra FLUSH as a QUIT */ ec = GH__ERROR_CODE__OK; goto cleanup; } for (k = 0; caps[k].name; k++) { if (caps[k].client_has && starts_with(line, caps[k].name)) { ec = (caps[k].pfn)(line); if (ec != GH__ERROR_CODE__OK) goto cleanup; goto top_of_loop; } } error("server: unknown command '%s'", line); ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; goto cleanup; } cleanup: return ec; } static enum gh__error_code do_sub_cmd(int argc, const char **argv) { if (!strcmp(argv[0], "get")) return do_sub_cmd__get(argc, argv); if (!strcmp(argv[0], "post")) return do_sub_cmd__post(argc, argv); if (!strcmp(argv[0], "config")) return do_sub_cmd__config(argc, argv); if (!strcmp(argv[0], "endpoint")) return do_sub_cmd__endpoint(argc, argv); if (!strcmp(argv[0], "prefetch")) return do_sub_cmd__prefetch(argc, argv); /* * server mode is for talking with git.exe via the "gh_client_" API * using packet-line format. */ if (!strcmp(argv[0], "server")) return do_sub_cmd__server(argc, argv); return GH__ERROR_CODE__USAGE; } /* * Communicate with the primary Git server or a GVFS cache-server using the * GVFS Protocol. * * https://github.com/microsoft/VFSForGit/blob/master/Protocol.md */ int cmd_main(int argc, const char **argv) { static struct option main_options[] = { OPT_STRING('r', "remote", &gh__cmd_opts.remote_name, N_("remote"), N_("Remote name")), OPT_BOOL('f', "fallback", &gh__cmd_opts.try_fallback, N_("Fallback to Git server if cache-server fails")), OPT_CALLBACK(0, "cache-server", NULL, N_("cache-server"), N_("cache-server=disable|trust|verify|error"), option_parse_cache_server_mode), OPT_CALLBACK(0, "shared-cache", NULL, N_("pathname"), N_("Pathname to shared objects directory"), option_parse_shared_cache_directory), OPT_BOOL('p', "progress", &gh__cmd_opts.show_progress, N_("Show progress")), OPT_END(), }; enum gh__error_code ec = GH__ERROR_CODE__OK; if (argc > 1 && !strcmp(argv[1], "-h")) usage_with_options(main_usage, main_options); trace2_cmd_name("gvfs-helper"); packet_trace_identity("gvfs-helper"); setup_git_directory_gently(NULL); /* Set any non-zero initial values in gh__cmd_opts. */ gh__cmd_opts.depth = GH__DEFAULT__OBJECTS_POST__COMMIT_DEPTH; gh__cmd_opts.block_size = GH__DEFAULT__OBJECTS_POST__BLOCK_SIZE; gh__cmd_opts.max_retries = GH__DEFAULT_MAX_RETRIES; gh__cmd_opts.max_transient_backoff_sec = GH__DEFAULT_MAX_TRANSIENT_BACKOFF_SEC; gh__cmd_opts.show_progress = !!isatty(2); // TODO use existing gvfs config settings to override our GH__DEFAULT_ // TODO values in gh__cmd_opts. (And maybe add/remove our command line // TODO options for them.) // TODO // TODO See "scalar.max-retries" (and maybe "gvfs.max-retries") git_config(git_default_config, NULL); argc = parse_options(argc, argv, NULL, main_options, main_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (argc == 0) usage_with_options(main_usage, main_options); ec = do_sub_cmd(argc, argv); gh_http_cleanup(); if (ec == GH__ERROR_CODE__USAGE) usage_with_options(main_usage, main_options); return ec; }