From 665b35eccd39fefd714cb5c332277a6b94fd9386 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Thu, 9 Jun 2016 17:35:36 -0700 Subject: [PATCH 1/2] submodule--helper: initial clone learns retry logic Each submodule that is attempted to be cloned, will be retried once in case of failure after all other submodules were cloned. This helps to mitigate ephemeral server failures and increases chances of a reliable clone of a repo with hundreds of submodules immensely. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 66 +++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 7 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index ca33408f55..8d01fdd1f0 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -592,10 +592,14 @@ struct submodule_update_clone { /* If we want to stop as fast as possible and return an error */ unsigned quickstop : 1; + + /* failed clones to be retried again */ + const struct cache_entry **failed_clones; + int failed_clones_nr, failed_clones_alloc; }; #define SUBMODULE_UPDATE_CLONE_INIT {0, MODULE_LIST_INIT, 0, \ SUBMODULE_UPDATE_STRATEGY_INIT, 0, -1, NULL, NULL, NULL, NULL, \ - STRING_LIST_INIT_DUP, 0} + STRING_LIST_INIT_DUP, 0, NULL, 0, 0} static void next_submodule_warn_missing(struct submodule_update_clone *suc, @@ -720,23 +724,47 @@ cleanup: static int update_clone_get_next_task(struct child_process *child, struct strbuf *err, void *suc_cb, - void **void_task_cb) + void **idx_task_cb) { struct submodule_update_clone *suc = suc_cb; + const struct cache_entry *ce; + int index; for (; suc->current < suc->list.nr; suc->current++) { - const struct cache_entry *ce = suc->list.entries[suc->current]; + ce = suc->list.entries[suc->current]; if (prepare_to_clone_next_submodule(ce, child, suc, err)) { + int *p = xmalloc(sizeof(*p)); + *p = suc->current; + *idx_task_cb = p; suc->current++; return 1; } } + + /* + * The loop above tried cloning each submodule once, now try the + * stragglers again, which we can imagine as an extension of the + * entry list. + */ + index = suc->current - suc->list.nr; + if (index < suc->failed_clones_nr) { + int *p; + ce = suc->failed_clones[index]; + if (!prepare_to_clone_next_submodule(ce, child, suc, err)) + die("BUG: ce was a submodule before?"); + p = xmalloc(sizeof(*p)); + *p = suc->current; + *idx_task_cb = p; + suc->current ++; + return 1; + } + return 0; } static int update_clone_start_failure(struct strbuf *err, void *suc_cb, - void *void_task_cb) + void *idx_task_cb) { struct submodule_update_clone *suc = suc_cb; suc->quickstop = 1; @@ -746,15 +774,39 @@ static int update_clone_start_failure(struct strbuf *err, static int update_clone_task_finished(int result, struct strbuf *err, void *suc_cb, - void *void_task_cb) + void *idx_task_cb) { + const struct cache_entry *ce; struct submodule_update_clone *suc = suc_cb; + int *idxP = *(int**)idx_task_cb; + int idx = *idxP; + free(idxP); + if (!result) return 0; - suc->quickstop = 1; - return 1; + if (idx < suc->list.nr) { + ce = suc->list.entries[idx]; + strbuf_addf(err, _("Failed to clone '%s'. Retry scheduled"), + ce->name); + strbuf_addch(err, '\n'); + ALLOC_GROW(suc->failed_clones, + suc->failed_clones_nr + 1, + suc->failed_clones_alloc); + suc->failed_clones[suc->failed_clones_nr++] = ce; + return 0; + } else { + idx = suc->current - suc->list.nr; + ce = suc->failed_clones[idx]; + strbuf_addf(err, _("Failed to clone '%s' a second time, aborting"), + ce->name); + strbuf_addch(err, '\n'); + suc->quickstop = 1; + return 1; + } + + return 0; } static int update_clone(int argc, const char **argv, const char *prefix) From bb9d91b4ed54df7bd970c82971ba2851e6735d72 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Thu, 9 Jun 2016 12:06:37 -0700 Subject: [PATCH 2/2] submodule update: continue when a clone fails In 15ffb7cde48 (2011-06-13, submodule update: continue when a checkout fails), we reasoned it is ok to continue, when there is not much of a mental burden by the failure. If a recursive submodule fails to clone because a .gitmodules file is broken (e.g. : fatal: No url found for submodule path 'foo/bar' in .gitmodules Failed to recurse into submodule path 'foo', signaled by exit code 128), this is one of the cases where the user is not expected to have much of a burden afterwards, so we can also continue in that case. This means we only want to stop for updating submodules in case of rebase, merge or custom update command failures, which are all signaled with exit code 2. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- git-submodule.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git-submodule.sh b/git-submodule.sh index 42e0e9f63d..f1919ca16f 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -734,7 +734,7 @@ cmd_update() if test $res -gt 0 then die_msg="$(eval_gettext "Failed to recurse into submodule path '\$displaypath'")" - if test $res -eq 1 + if test $res -ne 2 then err="${err};$die_msg" continue