From 5322b837afb3e6dacf73b464bcc975f93a30667d Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Sun, 10 May 2015 04:45:30 +0200 Subject: [PATCH 01/16] update-ref: test handling large transactions properly Signed-off-by: Stefan Beller Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- t/t1400-update-ref.sh | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh index 7b4707b776..47d2fe9cce 100755 --- a/t/t1400-update-ref.sh +++ b/t/t1400-update-ref.sh @@ -973,4 +973,32 @@ test_expect_success 'stdin -z delete refs works with packed and loose refs' ' test_must_fail git rev-parse --verify -q $c ' +run_with_limited_open_files () { + (ulimit -n 32 && "$@") +} + +test_lazy_prereq ULIMIT_FILE_DESCRIPTORS 'run_with_limited_open_files true' + +test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' ' +( + for i in $(test_seq 33) + do + echo "create refs/heads/$i HEAD" + done >large_input && + run_with_limited_open_files git update-ref --stdin large_input && + run_with_limited_open_files git update-ref --stdin Date: Sun, 10 May 2015 04:45:31 +0200 Subject: [PATCH 02/16] t7004: rename ULIMIT test prerequisite to ULIMIT_STACK_SIZE During creation of the patch series, our discussion revealed that we could have a more descriptive name for the prerequisite for the test so it stays unique when other limits of ulimit are introduced. Let's rename the existing ulimit about setting the stack size to a more explicit ULIMIT_STACK_SIZE. Signed-off-by: Stefan Beller Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- t/t7004-tag.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh index 796e9f79ea..06b8e0def1 100755 --- a/t/t7004-tag.sh +++ b/t/t7004-tag.sh @@ -1463,10 +1463,10 @@ run_with_limited_stack () { (ulimit -s 128 && "$@") } -test_lazy_prereq ULIMIT 'run_with_limited_stack true' +test_lazy_prereq ULIMIT_STACK_SIZE 'run_with_limited_stack true' # we require ulimit, this excludes Windows -test_expect_success ULIMIT '--contains works in a deep repo' ' +test_expect_success ULIMIT_STACK_SIZE '--contains works in a deep repo' ' >expect && i=1 && while test $i -lt 8000 From 1d455231a0f823dc75cd5c7e32b818a4dc3ec020 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 10 May 2015 04:45:32 +0200 Subject: [PATCH 03/16] write_ref_to_lockfile(): new function, extracted from write_ref_sha1() This is the first step towards separating the checking and writing of the new reference value to committing the change. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/refs.c b/refs.c index 6664423f8d..9e40c35cf5 100644 --- a/refs.c +++ b/refs.c @@ -3048,23 +3048,15 @@ int is_branch(const char *refname) } /* - * Write sha1 into the ref specified by the lock. Make sure that errno - * is sane on error. + * Write sha1 into the open lockfile, then close the lockfile. On + * errors, rollback the lockfile and set errno to reflect the problem. */ -static int write_ref_sha1(struct ref_lock *lock, - const unsigned char *sha1, const char *logmsg) +static int write_ref_to_lockfile(struct ref_lock *lock, + const unsigned char *sha1) { static char term = '\n'; struct object *o; - if (!lock) { - errno = EINVAL; - return -1; - } - if (!lock->force_write && !hashcmp(lock->old_sha1, sha1)) { - unlock_ref(lock); - return 0; - } o = parse_object(sha1); if (!o) { error("Trying to write ref %s with nonexistent object %s", @@ -3089,6 +3081,28 @@ static int write_ref_sha1(struct ref_lock *lock, errno = save_errno; return -1; } + return 0; +} + +/* + * Write sha1 into the ref specified by the lock. Make sure that errno + * is sane on error. + */ +static int write_ref_sha1(struct ref_lock *lock, + const unsigned char *sha1, const char *logmsg) +{ + if (!lock) { + errno = EINVAL; + return -1; + } + if (!lock->force_write && !hashcmp(lock->old_sha1, sha1)) { + unlock_ref(lock); + return 0; + } + + if (write_ref_to_lockfile(lock, sha1)) + return -1; + clear_loose_ref_cache(&ref_cache); if (log_ref_write(lock->ref_name, lock->old_sha1, sha1, logmsg) < 0 || (strcmp(lock->ref_name, lock->orig_ref_name) && From 38e50e81e3ee1d0e329245a3c63512cbd8a1ab44 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 10 May 2015 04:45:33 +0200 Subject: [PATCH 04/16] commit_ref_update(): new function, extracted from write_ref_sha1() Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/refs.c b/refs.c index 9e40c35cf5..7661db9563 100644 --- a/refs.c +++ b/refs.c @@ -3085,24 +3085,13 @@ static int write_ref_to_lockfile(struct ref_lock *lock, } /* - * Write sha1 into the ref specified by the lock. Make sure that errno - * is sane on error. + * Commit a change to a loose reference that has already been written + * to the loose reference lockfile. Also update the reflogs if + * necessary, using the specified lockmsg (which can be NULL). */ -static int write_ref_sha1(struct ref_lock *lock, - const unsigned char *sha1, const char *logmsg) +static int commit_ref_update(struct ref_lock *lock, + const unsigned char *sha1, const char *logmsg) { - if (!lock) { - errno = EINVAL; - return -1; - } - if (!lock->force_write && !hashcmp(lock->old_sha1, sha1)) { - unlock_ref(lock); - return 0; - } - - if (write_ref_to_lockfile(lock, sha1)) - return -1; - clear_loose_ref_cache(&ref_cache); if (log_ref_write(lock->ref_name, lock->old_sha1, sha1, logmsg) < 0 || (strcmp(lock->ref_name, lock->orig_ref_name) && @@ -3141,6 +3130,30 @@ static int write_ref_sha1(struct ref_lock *lock, return 0; } +/* + * Write sha1 as the new value of the reference specified by the + * (open) lock. On error, roll back the lockfile and set errno + * appropriately. + */ +static int write_ref_sha1(struct ref_lock *lock, + const unsigned char *sha1, const char *logmsg) +{ + if (!lock) { + errno = EINVAL; + return -1; + } + if (!lock->force_write && !hashcmp(lock->old_sha1, sha1)) { + unlock_ref(lock); + return 0; + } + + if (write_ref_to_lockfile(lock, sha1) || + commit_ref_update(lock, sha1, logmsg)) + return -1; + + return 0; +} + int create_symref(const char *ref_target, const char *refs_heads_master, const char *logmsg) { From 29957fda0b6a061c49792875f303570b4b451d62 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 10 May 2015 04:45:34 +0200 Subject: [PATCH 05/16] rename_ref(): inline calls to write_ref_sha1() from this function Most of what it does is unneeded from these call sites. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/refs.c b/refs.c index 7661db9563..18ce8e6e89 100644 --- a/refs.c +++ b/refs.c @@ -2799,8 +2799,9 @@ static int rename_ref_available(const char *oldname, const char *newname) return ret; } -static int write_ref_sha1(struct ref_lock *lock, const unsigned char *sha1, - const char *logmsg); +static int write_ref_to_lockfile(struct ref_lock *lock, const unsigned char *sha1); +static int commit_ref_update(struct ref_lock *lock, + const unsigned char *sha1, const char *logmsg); int rename_ref(const char *oldrefname, const char *newrefname, const char *logmsg) { @@ -2859,7 +2860,9 @@ int rename_ref(const char *oldrefname, const char *newrefname, const char *logms } lock->force_write = 1; hashcpy(lock->old_sha1, orig_sha1); - if (write_ref_sha1(lock, orig_sha1, logmsg)) { + + if (write_ref_to_lockfile(lock, orig_sha1) || + commit_ref_update(lock, orig_sha1, logmsg)) { error("unable to write current sha1 into %s", newrefname); goto rollback; } @@ -2876,7 +2879,8 @@ int rename_ref(const char *oldrefname, const char *newrefname, const char *logms lock->force_write = 1; flag = log_all_ref_updates; log_all_ref_updates = 0; - if (write_ref_sha1(lock, orig_sha1, NULL)) + if (write_ref_to_lockfile(lock, orig_sha1) || + commit_ref_update(lock, orig_sha1, NULL)) error("unable to write current sha1 into %s", oldrefname); log_all_ref_updates = flag; From 4da50def5b4dcb0753334e3f44fffaccb3728a6c Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 10 May 2015 04:45:35 +0200 Subject: [PATCH 06/16] ref_transaction_commit(): inline call to write_ref_sha1() And remove the function write_ref_sha1(), as it is no longer used. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 38 ++++++++++---------------------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/refs.c b/refs.c index 18ce8e6e89..76609bff43 100644 --- a/refs.c +++ b/refs.c @@ -3134,30 +3134,6 @@ static int commit_ref_update(struct ref_lock *lock, return 0; } -/* - * Write sha1 as the new value of the reference specified by the - * (open) lock. On error, roll back the lockfile and set errno - * appropriately. - */ -static int write_ref_sha1(struct ref_lock *lock, - const unsigned char *sha1, const char *logmsg) -{ - if (!lock) { - errno = EINVAL; - return -1; - } - if (!lock->force_write && !hashcmp(lock->old_sha1, sha1)) { - unlock_ref(lock); - return 0; - } - - if (write_ref_to_lockfile(lock, sha1) || - commit_ref_update(lock, sha1, logmsg)) - return -1; - - return 0; -} - int create_symref(const char *ref_target, const char *refs_heads_master, const char *logmsg) { @@ -3852,15 +3828,21 @@ int ref_transaction_commit(struct ref_transaction *transaction, struct ref_update *update = updates[i]; if (!is_null_sha1(update->new_sha1)) { - if (write_ref_sha1(update->lock, update->new_sha1, - update->msg)) { - update->lock = NULL; /* freed by write_ref_sha1 */ + if (!update->lock->force_write && + !hashcmp(update->lock->old_sha1, update->new_sha1)) { + unlock_ref(update->lock); + update->lock = NULL; + } else if (write_ref_to_lockfile(update->lock, update->new_sha1) || + commit_ref_update(update->lock, update->new_sha1, update->msg)) { + update->lock = NULL; /* freed by the above calls */ strbuf_addf(err, "Cannot update the ref '%s'.", update->refname); ret = TRANSACTION_GENERIC_ERROR; goto cleanup; + } else { + /* freed by the above calls: */ + update->lock = NULL; } - update->lock = NULL; /* freed by write_ref_sha1 */ } } From 805cf6e938fa0fe421e85f2274e67194df559cb5 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 10 May 2015 04:45:36 +0200 Subject: [PATCH 07/16] ref_transaction_commit(): remove the local flags variable Instead, work directly with update->flags. This has the advantage that the REF_DELETING bit, set in the first loop, can be read in the second loop instead of having to be recomputed. Plus, it was potentially confusing having both update->flags and flags, which sometimes had different values. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/refs.c b/refs.c index 76609bff43..58b118243c 100644 --- a/refs.c +++ b/refs.c @@ -3802,16 +3802,15 @@ int ref_transaction_commit(struct ref_transaction *transaction, /* Acquire all locks while verifying old values */ for (i = 0; i < n; i++) { struct ref_update *update = updates[i]; - int flags = update->flags; if (is_null_sha1(update->new_sha1)) - flags |= REF_DELETING; + update->flags |= REF_DELETING; update->lock = lock_ref_sha1_basic(update->refname, (update->have_old ? update->old_sha1 : NULL), NULL, - flags, + update->flags, &update->type); if (!update->lock) { ret = (errno == ENOTDIR) @@ -3827,7 +3826,7 @@ int ref_transaction_commit(struct ref_transaction *transaction, for (i = 0; i < n; i++) { struct ref_update *update = updates[i]; - if (!is_null_sha1(update->new_sha1)) { + if (!(update->flags & REF_DELETING)) { if (!update->lock->force_write && !hashcmp(update->lock->old_sha1, update->new_sha1)) { unlock_ref(update->lock); From 6c34492ab4f260a4f32968e235da9badb22b56b4 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 10 May 2015 04:45:37 +0200 Subject: [PATCH 08/16] ref_transaction_commit(): fix atomicity and avoid fd exhaustion The old code was roughly for update in updates: acquire locks and check old_sha for update in updates: if changing value: write_ref_to_lockfile() commit_ref_update() for update in updates: if deleting value: unlink() rewrite packed-refs file for update in updates: if reference still locked: unlock_ref() This has two problems. Non-atomic updates ================== The atomicity of the reference transaction depends on all pre-checks being done in the first loop, before any changes have started being committed in the second loop. The problem is that write_ref_to_lockfile() (previously part of write_ref_sha1()), which is called from the second loop, contains two more checks: * It verifies that new_sha1 is a valid object * If the reference being updated is a branch, it verifies that new_sha1 points at a commit object (as opposed to a tag, tree, or blob). If either of these checks fails, the "transaction" is aborted during the second loop. But this might happen after some reference updates have already been permanently committed. In other words, the all-or-nothing promise of "git update-ref --stdin" could be violated. So these checks have to be moved to the first loop. File descriptor exhaustion ========================== The old code locked all of the references in the first loop, leaving all of the lockfiles open until later loops. Since we might be updating a lot of references, this could result in file descriptor exhaustion. The solution ============ After this patch, the code looks like for update in updates: acquire locks and check old_sha if changing value: write_ref_to_lockfile() else: close_ref() for update in updates: if changing value: commit_ref_update() for update in updates: if deleting value: unlink() rewrite packed-refs file for update in updates: if reference still locked: unlock_ref() This fixes both problems: 1. The pre-checks in write_ref_to_lockfile() are now done in the first loop, before any changes have been committed. If any of the checks fails, the whole transaction can now be rolled back correctly. 2. All lockfiles are closed in the first loop immediately after they are created (either by write_ref_to_lockfile() or by close_ref()). This means that there is never more than one open lockfile at a time, preventing file descriptor exhaustion. To simplify the bookkeeping across loops, add a new REF_NEEDS_COMMIT bit to update->flags, which keeps track of whether the corresponding lockfile needs to be committed, as opposed to just unlocked. (Since "struct ref_update" is internal to the refs module, this change is not visible to external callers.) This change fixes two tests in t1400. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 55 +++++++++++++++++++++++++++++++++++-------- t/t1400-update-ref.sh | 4 ++-- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/refs.c b/refs.c index 58b118243c..85c1dcb017 100644 --- a/refs.c +++ b/refs.c @@ -30,6 +30,13 @@ static unsigned char refname_disposition[256] = { * pruned. */ #define REF_ISPRUNING 0x0100 + +/* + * Used as a flag in ref_update::flags when the lockfile needs to be + * committed. + */ +#define REF_NEEDS_COMMIT 0x0200 + /* * Try to read one refname component from the front of refname. * Return the length of the component found, or -1 if the component is @@ -3799,7 +3806,12 @@ int ref_transaction_commit(struct ref_transaction *transaction, goto cleanup; } - /* Acquire all locks while verifying old values */ + /* + * Acquire all locks, verify old values if provided, check + * that new values are valid, and write new values to the + * lockfiles, ready to be activated. Only keep one lockfile + * open at a time to avoid running out of file descriptors. + */ for (i = 0; i < n; i++) { struct ref_update *update = updates[i]; @@ -3820,26 +3832,49 @@ int ref_transaction_commit(struct ref_transaction *transaction, update->refname); goto cleanup; } + if (!(update->flags & REF_DELETING) && + (update->lock->force_write || + hashcmp(update->lock->old_sha1, update->new_sha1))) { + if (write_ref_to_lockfile(update->lock, update->new_sha1)) { + /* + * The lock was freed upon failure of + * write_ref_to_lockfile(): + */ + update->lock = NULL; + strbuf_addf(err, "Cannot update the ref '%s'.", + update->refname); + ret = TRANSACTION_GENERIC_ERROR; + goto cleanup; + } + update->flags |= REF_NEEDS_COMMIT; + } else { + /* + * We didn't have to write anything to the lockfile. + * Close it to free up the file descriptor: + */ + if (close_ref(update->lock)) { + strbuf_addf(err, "Couldn't close %s.lock", + update->refname); + goto cleanup; + } + } } /* Perform updates first so live commits remain referenced */ for (i = 0; i < n; i++) { struct ref_update *update = updates[i]; - if (!(update->flags & REF_DELETING)) { - if (!update->lock->force_write && - !hashcmp(update->lock->old_sha1, update->new_sha1)) { - unlock_ref(update->lock); + if (update->flags & REF_NEEDS_COMMIT) { + if (commit_ref_update(update->lock, + update->new_sha1, update->msg)) { + /* The lock was freed by commit_ref_update(): */ update->lock = NULL; - } else if (write_ref_to_lockfile(update->lock, update->new_sha1) || - commit_ref_update(update->lock, update->new_sha1, update->msg)) { - update->lock = NULL; /* freed by the above calls */ strbuf_addf(err, "Cannot update the ref '%s'.", update->refname); ret = TRANSACTION_GENERIC_ERROR; goto cleanup; } else { - /* freed by the above calls: */ + /* freed by the above call: */ update->lock = NULL; } } @@ -3849,7 +3884,7 @@ int ref_transaction_commit(struct ref_transaction *transaction, for (i = 0; i < n; i++) { struct ref_update *update = updates[i]; - if (update->lock) { + if (update->flags & REF_DELETING) { if (delete_ref_loose(update->lock, update->type, err)) { ret = TRANSACTION_GENERIC_ERROR; goto cleanup; diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh index 47d2fe9cce..c593a1df20 100755 --- a/t/t1400-update-ref.sh +++ b/t/t1400-update-ref.sh @@ -979,7 +979,7 @@ run_with_limited_open_files () { test_lazy_prereq ULIMIT_FILE_DESCRIPTORS 'run_with_limited_open_files true' -test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' ' +test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' ' ( for i in $(test_seq 33) do @@ -990,7 +990,7 @@ test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches ) ' -test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' ' +test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' ' ( for i in $(test_seq 33) do From d415ad022d8b94adaf67bac2b86180847cc259ec Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 14 Apr 2015 15:25:06 -0700 Subject: [PATCH 09/16] update-ref: test handling large transactions properly Signed-off-by: Stefan Beller Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- t/t1400-update-ref.sh | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh index 6805b9e6bb..7a69f1a083 100755 --- a/t/t1400-update-ref.sh +++ b/t/t1400-update-ref.sh @@ -1065,4 +1065,32 @@ test_expect_success 'stdin -z delete refs works with packed and loose refs' ' test_must_fail git rev-parse --verify -q $c ' +run_with_limited_open_files () { + (ulimit -n 32 && "$@") +} + +test_lazy_prereq ULIMIT_FILE_DESCRIPTORS 'run_with_limited_open_files true' + +test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' ' +( + for i in $(test_seq 33) + do + echo "create refs/heads/$i HEAD" + done >large_input && + run_with_limited_open_files git update-ref --stdin large_input && + run_with_limited_open_files git update-ref --stdin Date: Tue, 14 Apr 2015 15:25:07 -0700 Subject: [PATCH 10/16] t7004: rename ULIMIT test prerequisite to ULIMIT_STACK_SIZE During creation of the patch series our discussion we could have a more descriptive name for the prerequisite for the test so it stays unique when other limits of ulimit are introduced. Signed-off-by: Stefan Beller Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- t/t7004-tag.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh index fa207f3b8c..d1ff5c94f2 100755 --- a/t/t7004-tag.sh +++ b/t/t7004-tag.sh @@ -1491,10 +1491,10 @@ run_with_limited_stack () { (ulimit -s 128 && "$@") } -test_lazy_prereq ULIMIT 'run_with_limited_stack true' +test_lazy_prereq ULIMIT_STACK_SIZE 'run_with_limited_stack true' # we require ulimit, this excludes Windows -test_expect_success ULIMIT '--contains works in a deep repo' ' +test_expect_success ULIMIT_STACK_SIZE '--contains works in a deep repo' ' >expect && i=1 && while test $i -lt 8000 From e6fd3c67308cb388effba646b52b7ba461ce79a7 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 24 Apr 2015 13:35:45 +0200 Subject: [PATCH 11/16] write_ref_to_lockfile(): new function, extracted from write_ref_sha1() This is the first step towards separating the checking and writing of the new reference value to committing the change. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/refs.c b/refs.c index 47e4e5380a..f94682a0b6 100644 --- a/refs.c +++ b/refs.c @@ -3022,11 +3022,11 @@ int is_branch(const char *refname) } /* - * Write sha1 into the ref specified by the lock. Make sure that errno - * is sane on error. + * Write sha1 into the open lockfile, then close the lockfile. On + * errors, rollback the lockfile and set errno to reflect the problem. */ -static int write_ref_sha1(struct ref_lock *lock, - const unsigned char *sha1, const char *logmsg) +static int write_ref_to_lockfile(struct ref_lock *lock, + const unsigned char *sha1) { static char term = '\n'; struct object *o; @@ -3055,6 +3055,19 @@ static int write_ref_sha1(struct ref_lock *lock, errno = save_errno; return -1; } + return 0; +} + +/* + * Write sha1 into the ref specified by the lock. Make sure that errno + * is sane on error. + */ +static int write_ref_sha1(struct ref_lock *lock, + const unsigned char *sha1, const char *logmsg) +{ + if (write_ref_to_lockfile(lock, sha1)) + return -1; + clear_loose_ref_cache(&ref_cache); if (log_ref_write(lock->ref_name, lock->old_sha1, sha1, logmsg) < 0 || (strcmp(lock->ref_name, lock->orig_ref_name) && From ad4cd6c29743274001cce323b670f7fb0c035ff1 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 9 May 2015 17:18:36 +0200 Subject: [PATCH 12/16] commit_ref_update(): new function, extracted from write_ref_sha1() Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/refs.c b/refs.c index f94682a0b6..3db88e0b2a 100644 --- a/refs.c +++ b/refs.c @@ -3059,15 +3059,13 @@ static int write_ref_to_lockfile(struct ref_lock *lock, } /* - * Write sha1 into the ref specified by the lock. Make sure that errno - * is sane on error. + * Commit a change to a loose reference that has already been written + * to the loose reference lockfile. Also update the reflogs if + * necessary, using the specified lockmsg (which can be NULL). */ -static int write_ref_sha1(struct ref_lock *lock, - const unsigned char *sha1, const char *logmsg) +static int commit_ref_update(struct ref_lock *lock, + const unsigned char *sha1, const char *logmsg) { - if (write_ref_to_lockfile(lock, sha1)) - return -1; - clear_loose_ref_cache(&ref_cache); if (log_ref_write(lock->ref_name, lock->old_sha1, sha1, logmsg) < 0 || (strcmp(lock->ref_name, lock->orig_ref_name) && @@ -3106,6 +3104,21 @@ static int write_ref_sha1(struct ref_lock *lock, return 0; } +/* + * Write sha1 as the new value of the reference specified by the + * (open) lock. On error, roll back the lockfile and set errno + * appropriately. + */ +static int write_ref_sha1(struct ref_lock *lock, + const unsigned char *sha1, const char *logmsg) +{ + if (write_ref_to_lockfile(lock, sha1) || + commit_ref_update(lock, sha1, logmsg)) + return -1; + + return 0; +} + int create_symref(const char *ref_target, const char *refs_heads_master, const char *logmsg) { From ba43b7f29c59f75cf5f28af3a02d16c08937e439 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 9 May 2015 17:20:39 +0200 Subject: [PATCH 13/16] rename_ref(): inline calls to write_ref_sha1() from this function Most of what it does is unneeded from these call sites. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/refs.c b/refs.c index 3db88e0b2a..ad4b6d5202 100644 --- a/refs.c +++ b/refs.c @@ -2773,8 +2773,9 @@ static int rename_ref_available(const char *oldname, const char *newname) return ret; } -static int write_ref_sha1(struct ref_lock *lock, const unsigned char *sha1, - const char *logmsg); +static int write_ref_to_lockfile(struct ref_lock *lock, const unsigned char *sha1); +static int commit_ref_update(struct ref_lock *lock, + const unsigned char *sha1, const char *logmsg); int rename_ref(const char *oldrefname, const char *newrefname, const char *logmsg) { @@ -2832,7 +2833,9 @@ int rename_ref(const char *oldrefname, const char *newrefname, const char *logms goto rollback; } hashcpy(lock->old_sha1, orig_sha1); - if (write_ref_sha1(lock, orig_sha1, logmsg)) { + + if (write_ref_to_lockfile(lock, orig_sha1) || + commit_ref_update(lock, orig_sha1, logmsg)) { error("unable to write current sha1 into %s", newrefname); goto rollback; } @@ -2848,7 +2851,8 @@ int rename_ref(const char *oldrefname, const char *newrefname, const char *logms flag = log_all_ref_updates; log_all_ref_updates = 0; - if (write_ref_sha1(lock, orig_sha1, NULL)) + if (write_ref_to_lockfile(lock, orig_sha1) || + commit_ref_update(lock, orig_sha1, NULL)) error("unable to write current sha1 into %s", oldrefname); log_all_ref_updates = flag; From 61e51e0000073b684eaf5393ae2229f4f62f35c9 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 9 May 2015 17:29:20 +0200 Subject: [PATCH 14/16] ref_transaction_commit(): inline call to write_ref_sha1() That was the last caller, so delete function write_ref_sha1(). Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/refs.c b/refs.c index ad4b6d5202..6cb1476c33 100644 --- a/refs.c +++ b/refs.c @@ -3108,21 +3108,6 @@ static int commit_ref_update(struct ref_lock *lock, return 0; } -/* - * Write sha1 as the new value of the reference specified by the - * (open) lock. On error, roll back the lockfile and set errno - * appropriately. - */ -static int write_ref_sha1(struct ref_lock *lock, - const unsigned char *sha1, const char *logmsg) -{ - if (write_ref_to_lockfile(lock, sha1) || - commit_ref_update(lock, sha1, logmsg)) - return -1; - - return 0; -} - int create_symref(const char *ref_target, const char *refs_heads_master, const char *logmsg) { @@ -3816,9 +3801,13 @@ int ref_transaction_commit(struct ref_transaction *transaction, */ unlock_ref(update->lock); update->lock = NULL; - } else if (write_ref_sha1(update->lock, update->new_sha1, - update->msg)) { - update->lock = NULL; /* freed by write_ref_sha1 */ + } else if (write_ref_to_lockfile(update->lock, + update->new_sha1) || + commit_ref_update(update->lock, + update->new_sha1, + update->msg)) { + /* freed by one of the above calls: */ + update->lock = NULL; strbuf_addf(err, "Cannot update the ref '%s'.", update->refname); ret = TRANSACTION_GENERIC_ERROR; From cbf50f9e3d8e54f09ebbe6159a1bdd622c6c2f26 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 24 Apr 2015 13:35:48 +0200 Subject: [PATCH 15/16] ref_transaction_commit(): remove the local flags variable Instead, work directly with update->flags. This has the advantage that the REF_DELETING bit, set in the first loop, can be read in the second loop instead of having to be recomputed. Plus, it was potentially confusing having both update->flags and flags, which sometimes had different values. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/refs.c b/refs.c index 6cb1476c33..8dad0dfa01 100644 --- a/refs.c +++ b/refs.c @@ -3763,16 +3763,16 @@ int ref_transaction_commit(struct ref_transaction *transaction, /* Acquire all locks while verifying old values */ for (i = 0; i < n; i++) { struct ref_update *update = updates[i]; - unsigned int flags = update->flags; - if ((flags & REF_HAVE_NEW) && is_null_sha1(update->new_sha1)) - flags |= REF_DELETING; + if ((update->flags & REF_HAVE_NEW) && + is_null_sha1(update->new_sha1)) + update->flags |= REF_DELETING; update->lock = lock_ref_sha1_basic( update->refname, ((update->flags & REF_HAVE_OLD) ? update->old_sha1 : NULL), NULL, - flags, + update->flags, &update->type); if (!update->lock) { ret = (errno == ENOTDIR) @@ -3787,9 +3787,9 @@ int ref_transaction_commit(struct ref_transaction *transaction, /* Perform updates first so live commits remain referenced */ for (i = 0; i < n; i++) { struct ref_update *update = updates[i]; - int flags = update->flags; - if ((flags & REF_HAVE_NEW) && !is_null_sha1(update->new_sha1)) { + if ((update->flags & REF_HAVE_NEW) + && !is_null_sha1(update->new_sha1)) { int overwriting_symref = ((update->type & REF_ISSYMREF) && (update->flags & REF_NODEREF)); @@ -3822,15 +3822,15 @@ int ref_transaction_commit(struct ref_transaction *transaction, /* Perform deletes now that updates are safely completed */ for (i = 0; i < n; i++) { struct ref_update *update = updates[i]; - int flags = update->flags; - if ((flags & REF_HAVE_NEW) && is_null_sha1(update->new_sha1)) { + if ((update->flags & REF_HAVE_NEW) + && is_null_sha1(update->new_sha1)) { if (delete_ref_loose(update->lock, update->type, err)) { ret = TRANSACTION_GENERIC_ERROR; goto cleanup; } - if (!(flags & REF_ISPRUNING)) + if (!(update->flags & REF_ISPRUNING)) string_list_append(&refs_to_delete, update->lock->ref_name); } From cf018ee0cd897150300a1d6431c07c840ab5b54e Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 24 Apr 2015 13:35:49 +0200 Subject: [PATCH 16/16] ref_transaction_commit(): fix atomicity and avoid fd exhaustion The old code was roughly for update in updates: acquire locks and check old_sha for update in updates: if changing value: write_ref_to_lockfile() commit_ref_update() for update in updates: if deleting value: unlink() rewrite packed-refs file for update in updates: if reference still locked: unlock_ref() This has two problems. Non-atomic updates ================== The atomicity of the reference transaction depends on all pre-checks being done in the first loop, before any changes have started being committed in the second loop. The problem is that write_ref_to_lockfile() (previously part of write_ref_sha1()), which is called from the second loop, contains two more checks: * It verifies that new_sha1 is a valid object * If the reference being updated is a branch, it verifies that new_sha1 points at a commit object (as opposed to a tag, tree, or blob). If either of these checks fails, the "transaction" is aborted during the second loop. But this might happen after some reference updates have already been permanently committed. In other words, the all-or-nothing promise of "git update-ref --stdin" could be violated. So these checks have to be moved to the first loop. File descriptor exhaustion ========================== The old code locked all of the references in the first loop, leaving all of the lockfiles open until later loops. Since we might be updating a lot of references, this could result in file descriptor exhaustion. The solution ============ After this patch, the code looks like for update in updates: acquire locks and check old_sha if changing value: write_ref_to_lockfile() else: close_ref() for update in updates: if changing value: commit_ref_update() for update in updates: if deleting value: unlink() rewrite packed-refs file for update in updates: if reference still locked: unlock_ref() This fixes both problems: 1. The pre-checks in write_ref_to_lockfile() are now done in the first loop, before any changes have been committed. If any of the checks fails, the whole transaction can now be rolled back correctly. 2. All lockfiles are closed in the first loop immediately after they are created (either by write_ref_to_lockfile() or by close_ref()). This means that there is never more than one open lockfile at a time, preventing file descriptor exhaustion. To simplify the bookkeeping across loops, add a new REF_NEEDS_COMMIT bit to update->flags, which keeps track of whether the corresponding lockfile needs to be committed, as opposed to just unlocked. (Since "struct ref_update" is internal to the refs module, this change is not visible to external callers.) This change fixes two tests in t1400. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 74 +++++++++++++++++++++++++++++++------------ t/t1400-update-ref.sh | 4 +-- 2 files changed, 55 insertions(+), 23 deletions(-) diff --git a/refs.c b/refs.c index 8dad0dfa01..96f5046712 100644 --- a/refs.c +++ b/refs.c @@ -57,6 +57,12 @@ static unsigned char refname_disposition[256] = { */ #define REF_HAVE_OLD 0x10 +/* + * Used as a flag in ref_update::flags when the lockfile needs to be + * committed. + */ +#define REF_NEEDS_COMMIT 0x20 + /* * Try to read one refname component from the front of refname. * Return the length of the component found, or -1 if the component is @@ -3760,7 +3766,12 @@ int ref_transaction_commit(struct ref_transaction *transaction, goto cleanup; } - /* Acquire all locks while verifying old values */ + /* + * Acquire all locks, verify old values if provided, check + * that new values are valid, and write new values to the + * lockfiles, ready to be activated. Only keep one lockfile + * open at a time to avoid running out of file descriptors. + */ for (i = 0; i < n; i++) { struct ref_update *update = updates[i]; @@ -3782,38 +3793,60 @@ int ref_transaction_commit(struct ref_transaction *transaction, update->refname); goto cleanup; } - } - - /* Perform updates first so live commits remain referenced */ - for (i = 0; i < n; i++) { - struct ref_update *update = updates[i]; - - if ((update->flags & REF_HAVE_NEW) - && !is_null_sha1(update->new_sha1)) { + if ((update->flags & REF_HAVE_NEW) && + !(update->flags & REF_DELETING)) { int overwriting_symref = ((update->type & REF_ISSYMREF) && (update->flags & REF_NODEREF)); - if (!overwriting_symref - && !hashcmp(update->lock->old_sha1, update->new_sha1)) { + if (!overwriting_symref && + !hashcmp(update->lock->old_sha1, update->new_sha1)) { /* * The reference already has the desired * value, so we don't need to write it. */ - unlock_ref(update->lock); - update->lock = NULL; } else if (write_ref_to_lockfile(update->lock, - update->new_sha1) || - commit_ref_update(update->lock, - update->new_sha1, - update->msg)) { - /* freed by one of the above calls: */ + update->new_sha1)) { + /* + * The lock was freed upon failure of + * write_ref_to_lockfile(): + */ update->lock = NULL; strbuf_addf(err, "Cannot update the ref '%s'.", update->refname); ret = TRANSACTION_GENERIC_ERROR; goto cleanup; } else { - /* freed by write_ref_sha1(): */ + update->flags |= REF_NEEDS_COMMIT; + } + } + if (!(update->flags & REF_NEEDS_COMMIT)) { + /* + * We didn't have to write anything to the lockfile. + * Close it to free up the file descriptor: + */ + if (close_ref(update->lock)) { + strbuf_addf(err, "Couldn't close %s.lock", + update->refname); + goto cleanup; + } + } + } + + /* Perform updates first so live commits remain referenced */ + for (i = 0; i < n; i++) { + struct ref_update *update = updates[i]; + + if (update->flags & REF_NEEDS_COMMIT) { + if (commit_ref_update(update->lock, + update->new_sha1, update->msg)) { + /* freed by commit_ref_update(): */ + update->lock = NULL; + strbuf_addf(err, "Cannot update the ref '%s'.", + update->refname); + ret = TRANSACTION_GENERIC_ERROR; + goto cleanup; + } else { + /* freed by commit_ref_update(): */ update->lock = NULL; } } @@ -3823,8 +3856,7 @@ int ref_transaction_commit(struct ref_transaction *transaction, for (i = 0; i < n; i++) { struct ref_update *update = updates[i]; - if ((update->flags & REF_HAVE_NEW) - && is_null_sha1(update->new_sha1)) { + if (update->flags & REF_DELETING) { if (delete_ref_loose(update->lock, update->type, err)) { ret = TRANSACTION_GENERIC_ERROR; goto cleanup; diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh index 7a69f1a083..636d3a167c 100755 --- a/t/t1400-update-ref.sh +++ b/t/t1400-update-ref.sh @@ -1071,7 +1071,7 @@ run_with_limited_open_files () { test_lazy_prereq ULIMIT_FILE_DESCRIPTORS 'run_with_limited_open_files true' -test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' ' +test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' ' ( for i in $(test_seq 33) do @@ -1082,7 +1082,7 @@ test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches ) ' -test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' ' +test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' ' ( for i in $(test_seq 33) do