Add chunked_qs_reverse function

This is a separate function to chunked_qs to keep both functions simple
and easy to read.  Both functions contain some inherent complexity since
they are windowing functions and it's important to not hide that below
multiple conditional branches.
This commit is contained in:
George Hickman 2018-05-01 08:07:21 +01:00 коммит произвёл George Hickman
Родитель bdaf0b5ab3
Коммит 46d4602d3f
3 изменённых файлов: 106 добавлений и 1 удалений

0
tests/utils/__init__.py Normal file
Просмотреть файл

Просмотреть файл

@ -0,0 +1,70 @@
from tests.autoclassify.utils import (create_failure_lines,
test_line)
from treeherder.model.models import FailureLine
from treeherder.utils.queryset import (chunked_qs,
chunked_qs_reverse)
def test_chunked_qs(test_job):
# create 25 failure lines
create_failure_lines(test_job, [(test_line, {}) for i in range(25)])
qs = FailureLine.objects.all()
chunks = list(chunked_qs(qs, chunk_size=5))
one = chunks[0]
two = chunks[1]
five = chunks[4]
assert len(one) == 5
assert one[0].id == 1
assert one[4].id == 5
assert len(two) == 5
assert two[0].id == 6
assert two[4].id == 10
assert len(five) == 5
assert five[0].id == 21
assert five[4].id == 25
def test_chunked_qs_with_empty_qs():
chunks = list(chunked_qs(FailureLine.objects.none()))
assert len(chunks) == 0
def test_chunked_qs_reverse(test_job):
"""
Test `chunked_qs_reverse` function
Specifically checks the length of chunks and their items don't overlap.
"""
# create 25 failure lines
create_failure_lines(test_job, [(test_line, {}) for i in range(25)])
qs = FailureLine.objects.all()
chunks = list(chunked_qs_reverse(qs, chunk_size=5))
one = chunks[0]
two = chunks[1]
five = chunks[4]
assert len(one) == 5
assert one[0].id == 25
assert one[4].id == 21
assert len(two) == 5
assert two[0].id == 20
assert two[4].id == 16
assert len(five) == 5
assert five[0].id == 5
assert five[4].id == 1
def test_chunked_qs_reverse_with_empty_qs():
chunks = list(chunked_qs_reverse(FailureLine.objects.none()))
assert len(chunks) == 0

Просмотреть файл

@ -5,7 +5,7 @@ def chunked_qs(qs, chunk_size=10000, fields=None):
Usage:
>>> qs = FailureLine.objects.filter(action='test_result')
>>> for qs in batch_qs(qs, chunk_size=10000, fields=['id', 'message']):
>>> for qs in chunked_qs(qs, chunk_size=10000, fields=['id', 'message']):
... for line in qs:
... print(line.message)
@ -37,3 +37,38 @@ def chunked_qs(qs, chunk_size=10000, fields=None):
# update the minimum ID for next iteration
min_id = rows[-1].id
def chunked_qs_reverse(qs, chunk_size=10000):
"""
Generator to iterate over the given QuerySet in reverse chunk_size rows at a time
Usage:
>>> qs = FailureLine.objects.filter(action='test_result')
>>> for qs in chunked_qs_reverse(qs, chunk_size=100):
... for line in qs:
... print(line.message)
Note: This method is just different enough that it seemed easier to keep
this function separate to chunked_qs.
"""
if not qs:
return
qs = qs.order_by('-id')
# Can't use .only() here in case the query used select_related
max_id = qs.first().id
while True:
chunk = qs.filter(id__lte=max_id) # upper bound of this chunk
rows = chunk[:chunk_size]
if len(rows) < 1:
break
yield rows
# update the maximum ID for next iteration
max_id = max_id - chunk_size