From 6d63baa47883315033474fc06196330e3a5ca4e0 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 14 Jul 2014 01:42:50 -0400 Subject: [PATCH 1/4] prio-queue: factor out compare and swap operations When manipulating the priority queue's heap, we frequently have to compare and swap heap entries. As we are storing only void pointers right now, this is quite easy to do inline in a few lines. However, when we start using a more complicated heap entry in a future patch, that will get longer. Factoring out these operations lets us make future changes in one place. It also makes the code a little shorter and more readable. Note that we actually accept indices into the queue array instead of pointers. This is slightly less flexible than passing pointers-to-pointers (we could not swap items from unrelated arrays, but we would not want to), but will make further refactoring simpler (and lets us avoid repeating "queue->array" at each callsite, which led to some long lines). And finally, note that we are cleaning up an accidental use of a "struct commit" pointer to hold a temporary entry during swap. Even though we currently only use this code for commits, it is supposed to be type-agnostic. In practice this didn't matter anyway because we never dereferenced the commit pointer (and on most systems, the pointer values themselves are interchangeable between types). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- prio-queue.c | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/prio-queue.c b/prio-queue.c index c9f8c6d253..0f4fcf2755 100644 --- a/prio-queue.c +++ b/prio-queue.c @@ -1,18 +1,28 @@ #include "cache.h" -#include "commit.h" #include "prio-queue.h" +static inline int compare(struct prio_queue *queue, int i, int j) +{ + int cmp = queue->compare(queue->array[i], queue->array[j], + queue->cb_data); + return cmp; +} + +static inline void swap(struct prio_queue *queue, int i, int j) +{ + void *tmp = queue->array[i]; + queue->array[i] = queue->array[j]; + queue->array[j] = tmp; +} + void prio_queue_reverse(struct prio_queue *queue) { int i, j; if (queue->compare != NULL) die("BUG: prio_queue_reverse() on non-LIFO queue"); - for (i = 0; i <= (j = (queue->nr - 1) - i); i++) { - struct commit *swap = queue->array[i]; - queue->array[i] = queue->array[j]; - queue->array[j] = swap; - } + for (i = 0; i <= (j = (queue->nr - 1) - i); i++) + swap(queue, i, j); } void clear_prio_queue(struct prio_queue *queue) @@ -25,37 +35,32 @@ void clear_prio_queue(struct prio_queue *queue) void prio_queue_put(struct prio_queue *queue, void *thing) { - prio_queue_compare_fn compare = queue->compare; int ix, parent; /* Append at the end */ ALLOC_GROW(queue->array, queue->nr + 1, queue->alloc); queue->array[queue->nr++] = thing; - if (!compare) + if (!queue->compare) return; /* LIFO */ /* Bubble up the new one */ for (ix = queue->nr - 1; ix; ix = parent) { parent = (ix - 1) / 2; - if (compare(queue->array[parent], queue->array[ix], - queue->cb_data) <= 0) + if (compare(queue, parent, ix) <= 0) break; - thing = queue->array[parent]; - queue->array[parent] = queue->array[ix]; - queue->array[ix] = thing; + swap(queue, parent, ix); } } void *prio_queue_get(struct prio_queue *queue) { - void *result, *swap; + void *result; int ix, child; - prio_queue_compare_fn compare = queue->compare; if (!queue->nr) return NULL; - if (!compare) + if (!queue->compare) return queue->array[--queue->nr]; /* LIFO */ result = queue->array[0]; @@ -67,18 +72,14 @@ void *prio_queue_get(struct prio_queue *queue) /* Push down the one at the root */ for (ix = 0; ix * 2 + 1 < queue->nr; ix = child) { child = ix * 2 + 1; /* left */ - if ((child + 1 < queue->nr) && - (compare(queue->array[child], queue->array[child + 1], - queue->cb_data) >= 0)) + if (child + 1 < queue->nr && + compare(queue, child, child + 1) >= 0) child++; /* use right child */ - if (compare(queue->array[ix], queue->array[child], - queue->cb_data) <= 0) + if (compare(queue, ix, child) <= 0) break; - swap = queue->array[child]; - queue->array[child] = queue->array[ix]; - queue->array[ix] = swap; + swap(queue, child, ix); } return result; } From e8f91e3df82a33b7e0b59c935cc4af892068baa2 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 14 Jul 2014 01:51:59 -0400 Subject: [PATCH 2/4] prio-queue: make output stable with respect to insertion If two items are added to a prio_queue and compare equal, they currently come out in an apparently random order (this order is deterministic for a particular sequence of insertions and removals, but does not necessarily match the insertion order). This makes it unlike using a date-ordered commit_list, which is one of the main types we would like to replace with it (because prio_queue does not suffer from O(n) insertions). We can make the priority queue stable by keeping an insertion counter for each element, and using it to break ties. This does increase the memory usage of the structure (one int per element), but in practice it does not seem to affect runtime. A best-of-five "git rev-list --topo-order" on linux.git showed less than 1% difference (well within the run-to-run noise). In an ideal world, we would offer both stable and unstable priority queues (the latter to try to maximize performance). However, given the lack of a measurable performance difference, it is not worth the extra code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- prio-queue.c | 15 ++++++++++----- prio-queue.h | 8 +++++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/prio-queue.c b/prio-queue.c index 0f4fcf2755..e4365b00d6 100644 --- a/prio-queue.c +++ b/prio-queue.c @@ -3,14 +3,16 @@ static inline int compare(struct prio_queue *queue, int i, int j) { - int cmp = queue->compare(queue->array[i], queue->array[j], + int cmp = queue->compare(queue->array[i].data, queue->array[j].data, queue->cb_data); + if (!cmp) + cmp = queue->array[i].ctr - queue->array[j].ctr; return cmp; } static inline void swap(struct prio_queue *queue, int i, int j) { - void *tmp = queue->array[i]; + struct prio_queue_entry tmp = queue->array[i]; queue->array[i] = queue->array[j]; queue->array[j] = tmp; } @@ -31,6 +33,7 @@ void clear_prio_queue(struct prio_queue *queue) queue->nr = 0; queue->alloc = 0; queue->array = NULL; + queue->insertion_ctr = 0; } void prio_queue_put(struct prio_queue *queue, void *thing) @@ -39,7 +42,9 @@ void prio_queue_put(struct prio_queue *queue, void *thing) /* Append at the end */ ALLOC_GROW(queue->array, queue->nr + 1, queue->alloc); - queue->array[queue->nr++] = thing; + queue->array[queue->nr].ctr = queue->insertion_ctr++; + queue->array[queue->nr].data = thing; + queue->nr++; if (!queue->compare) return; /* LIFO */ @@ -61,9 +66,9 @@ void *prio_queue_get(struct prio_queue *queue) if (!queue->nr) return NULL; if (!queue->compare) - return queue->array[--queue->nr]; /* LIFO */ + return queue->array[--queue->nr].data; /* LIFO */ - result = queue->array[0]; + result = queue->array[0].data; if (!--queue->nr) return result; diff --git a/prio-queue.h b/prio-queue.h index 9c3cd1f875..d030ec9dd6 100644 --- a/prio-queue.h +++ b/prio-queue.h @@ -21,11 +21,17 @@ */ typedef int (*prio_queue_compare_fn)(const void *one, const void *two, void *cb_data); +struct prio_queue_entry { + unsigned ctr; + void *data; +}; + struct prio_queue { prio_queue_compare_fn compare; + unsigned insertion_ctr; void *cb_data; int alloc, nr; - void **array; + struct prio_queue_entry *array; }; /* From 73f43f220f0276012de50c84413fd61bf6aa307b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 14 Jul 2014 01:53:54 -0400 Subject: [PATCH 3/4] paint_down_to_common: use prio_queue When we are traversing to find merge bases, we keep our usual commit_list of commits to process, sorted by their commit timestamp. As we add each parent to the list, we have to spend "O(width of history)" to do the insertion, where the width of history is the number of simultaneous lines of development. If we instead use a heap-based priority queue, we can do these insertions in "O(log width)" time. This provides minor speedups to merge-base calculations (timings in linux.git, warm cache, best-of-five): [before] $ git merge-base HEAD v2.6.12 real 0m3.251s user 0m3.148s sys 0m0.104s [after] $ git merge-base HEAD v2.6.12 real 0m3.234s user 0m3.108s sys 0m0.128s That's only an 0.5% speedup, but it does help protect us against pathological cases. While we are munging the "interesting" function, we also take the opportunity to give it a more descriptive name, and convert the return value to an int (we returned the first interesting commit, but nobody ever looked at it). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit.c | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/commit.c b/commit.c index acb74b55d4..1fc60c0109 100644 --- a/commit.c +++ b/commit.c @@ -786,45 +786,41 @@ void sort_in_topological_order(struct commit_list **list, enum rev_sort_order so static const unsigned all_flags = (PARENT1 | PARENT2 | STALE | RESULT); -static struct commit *interesting(struct commit_list *list) +static int queue_has_nonstale(struct prio_queue *queue) { - while (list) { - struct commit *commit = list->item; - list = list->next; - if (commit->object.flags & STALE) - continue; - return commit; + int i; + for (i = 0; i < queue->nr; i++) { + struct commit *commit = queue->array[i].data; + if (!(commit->object.flags & STALE)) + return 1; } - return NULL; + return 0; } /* all input commits in one and twos[] must have been parsed! */ static struct commit_list *paint_down_to_common(struct commit *one, int n, struct commit **twos) { - struct commit_list *list = NULL; + struct prio_queue queue = { compare_commits_by_commit_date }; struct commit_list *result = NULL; int i; one->object.flags |= PARENT1; - commit_list_insert_by_date(one, &list); - if (!n) - return list; + if (!n) { + commit_list_append(one, &result); + return result; + } + prio_queue_put(&queue, one); + for (i = 0; i < n; i++) { twos[i]->object.flags |= PARENT2; - commit_list_insert_by_date(twos[i], &list); + prio_queue_put(&queue, twos[i]); } - while (interesting(list)) { - struct commit *commit; + while (queue_has_nonstale(&queue)) { + struct commit *commit = prio_queue_get(&queue); struct commit_list *parents; - struct commit_list *next; int flags; - commit = list->item; - next = list->next; - free(list); - list = next; - flags = commit->object.flags & (PARENT1 | PARENT2 | STALE); if (flags == (PARENT1 | PARENT2)) { if (!(commit->object.flags & RESULT)) { @@ -843,11 +839,11 @@ static struct commit_list *paint_down_to_common(struct commit *one, int n, struc if (parse_commit(p)) return NULL; p->object.flags |= flags; - commit_list_insert_by_date(p, &list); + prio_queue_put(&queue, p); } } - free_commit_list(list); + clear_prio_queue(&queue); return result; } From f0e802ca200b1296495d2ee5c55cd8f8083486bc Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 14 Jul 2014 01:40:22 -0400 Subject: [PATCH 4/4] t5539: update a flaky test The test creates some unrelated commits in two separate repositories, and then fetches from one to the other. Since the commit creation happens in a subshell, the first commit in each ends up with the same test_tick value. When fetch-pack looks at the two root commits "unrelated1" and "new-too", the exact sequence of ACKs is different depending on which one it pulls out of the queue first. With the current code, it happens to be "unrelated1" (though this is not at all guaranteed by the prio_queue data structure, it is deterministic for this particular sequence of input). We see the ready-ACK, and the test succeeds. With the stable queue, we reliably get "new-too" out (since it is our local tip, it is added to the queue before we even talk to the remote). We never see a ready-ACK, and the test fails due to the grep on the TRACE_PACKET output at the end (the fetch itself succeeds as expected). I'm really not quite clear on what's supposed to be going on in the test. I can make it pass with this change. --- t/t5539-fetch-http-shallow.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/t/t5539-fetch-http-shallow.sh b/t/t5539-fetch-http-shallow.sh index 94553e1039..b46118846c 100755 --- a/t/t5539-fetch-http-shallow.sh +++ b/t/t5539-fetch-http-shallow.sh @@ -54,6 +54,7 @@ EOF test_expect_success 'no shallow lines after receiving ACK ready' ' ( cd shallow && + test_tick && for i in $(test_seq 15) do git checkout --orphan unrelated$i &&