Add chunked_qs_reverse function

This is a separate function to chunked_qs to keep both functions simple and easy to read. Both functions contain some inherent complexity since they are windowing functions and it's important to not hide that below multiple conditional branches.
2018-05-01 08:07:21 +01:00 · 2018-05-01 08:07:21 +01:00 · 46d4602d3f
--- a/tests/utils/init.py
+++ b/tests/utils/init.py
--- a/tests/utils/test_queryset.py
+++ b/tests/utils/test_queryset.py
@ -0,0 +1,70 @@
+from tests.autoclassify.utils import (create_failure_lines,
+                                      test_line)
+from treeherder.model.models import FailureLine
+from treeherder.utils.queryset import (chunked_qs,
+                                       chunked_qs_reverse)
+
+
+def test_chunked_qs(test_job):
+    # create 25 failure lines
+    create_failure_lines(test_job, [(test_line, {}) for i in range(25)])
+
+    qs = FailureLine.objects.all()
+    chunks = list(chunked_qs(qs, chunk_size=5))
+
+    one = chunks[0]
+    two = chunks[1]
+    five = chunks[4]
+
+    assert len(one) == 5
+    assert one[0].id == 1
+    assert one[4].id == 5
+
+    assert len(two) == 5
+    assert two[0].id == 6
+    assert two[4].id == 10
+
+    assert len(five) == 5
+    assert five[0].id == 21
+    assert five[4].id == 25
+
+
+def test_chunked_qs_with_empty_qs():
+    chunks = list(chunked_qs(FailureLine.objects.none()))
+
+    assert len(chunks) == 0
+
+
+def test_chunked_qs_reverse(test_job):
+    """
+    Test `chunked_qs_reverse` function
+
+    Specifically checks the length of chunks and their items don't overlap.
+    """
+    # create 25 failure lines
+    create_failure_lines(test_job, [(test_line, {}) for i in range(25)])
+
+    qs = FailureLine.objects.all()
+    chunks = list(chunked_qs_reverse(qs, chunk_size=5))
+
+    one = chunks[0]
+    two = chunks[1]
+    five = chunks[4]
+
+    assert len(one) == 5
+    assert one[0].id == 25
+    assert one[4].id == 21
+
+    assert len(two) == 5
+    assert two[0].id == 20
+    assert two[4].id == 16
+
+    assert len(five) == 5
+    assert five[0].id == 5
+    assert five[4].id == 1
+
+
+def test_chunked_qs_reverse_with_empty_qs():
+    chunks = list(chunked_qs_reverse(FailureLine.objects.none()))
+
+    assert len(chunks) == 0
--- a/treeherder/utils/queryset.py
+++ b/treeherder/utils/queryset.py
@ -5,7 +5,7 @@ def chunked_qs(qs, chunk_size=10000, fields=None):
    Usage:

        >>> qs = FailureLine.objects.filter(action='test_result')
-        >>> for qs in batch_qs(qs, chunk_size=10000, fields=['id', 'message']):
+        >>> for qs in chunked_qs(qs, chunk_size=10000, fields=['id', 'message']):
        ...     for line in qs:
        ...         print(line.message)

@ -37,3 +37,38 @@ def chunked_qs(qs, chunk_size=10000, fields=None):

        # update the minimum ID for next iteration
        min_id = rows[-1].id
+
+
+def chunked_qs_reverse(qs, chunk_size=10000):
+    """
+    Generator to iterate over the given QuerySet in reverse chunk_size rows at a time
+
+    Usage:
+
+        >>> qs = FailureLine.objects.filter(action='test_result')
+        >>> for qs in chunked_qs_reverse(qs, chunk_size=100):
+        ...     for line in qs:
+        ...         print(line.message)
+
+    Note: This method is just different enough that it seemed easier to keep
+    this function separate to chunked_qs.
+    """
+    if not qs:
+        return
+
+    qs = qs.order_by('-id')
+
+    # Can't use .only() here in case the query used select_related
+    max_id = qs.first().id
+    while True:
+        chunk = qs.filter(id__lte=max_id)  # upper bound of this chunk
+
+        rows = chunk[:chunk_size]
+
+        if len(rows) < 1:
+            break
+
+        yield rows
+
+        # update the maximum ID for next iteration
+        max_id = max_id - chunk_size