add before/after arguments to es7_reindex command (#4556)

This commit is contained in:
Leo McArdle 2020-11-30 17:13:32 +00:00 коммит произвёл GitHub
Родитель 0609352eb7
Коммит 3370b8ef85
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
9 изменённых файлов: 61 добавлений и 5 удалений

Просмотреть файл

@ -300,6 +300,8 @@ class Post(ModelBase):
)
flags = GenericRelation(FlaggedObject)
updated_column_name = "updated"
class Meta:
ordering = ["created"]

Просмотреть файл

@ -106,6 +106,7 @@ class Question(ModelBase, BigVocabTaggableMixin, SearchMixin):
contributors_cache_key = "question:contributors:%s"
objects = QuestionManager()
updated_column_name = "updated"
class Meta:
ordering = ["-updated"]
@ -941,6 +942,7 @@ class Answer(ModelBase, SearchMixin):
images_cache_key = "answer:images:%s"
objects = AnswerManager()
updated_column_name = "updated"
class Meta:
ordering = ["created"]

Просмотреть файл

@ -2,6 +2,7 @@ from math import ceil
from django.core.management.base import BaseCommand
from django.db import connection, reset_queries
from dateutil.parser import parse as dateutil_parse
from kitsune.search.v2.es7_utils import get_doc_types, index_objects_bulk
@ -36,6 +37,18 @@ class Command(BaseCommand):
default=100,
help="Index this number of documents at once",
)
parser.add_argument(
"--updated-before",
type=dateutil_parse,
default=None,
help="Only index model instances updated before this date",
)
parser.add_argument(
"--updated-after",
type=dateutil_parse,
default=None,
help="Only index model instances updated after this date",
)
parser.add_argument(
"--print-sql-count",
action="store_true",
@ -55,7 +68,21 @@ class Command(BaseCommand):
self.stdout.write("Reindexing: {}".format(dt.__name__))
model = dt.get_model()
qs = model.objects.all()
before = kwargs["updated_before"]
after = kwargs["updated_after"]
if before or after:
try:
qs = model.objects_range(before=before, after=after)
except NotImplementedError:
print(
f"{model} hasn't implemeneted an `updated_column_name` property."
"No documents will be indexed of this type."
)
continue
else:
qs = model._default_manager.all()
total = qs.count()
count = kwargs["count"]

Просмотреть файл

@ -5,12 +5,33 @@ from django.db import models
class ModelBase(models.Model):
"""Base class for SUMO models.
* Adds objects_range class method.
* Adds update method.
"""
class Meta:
abstract = True
@classmethod
def objects_range(cls, before=None, after=None):
"""
Returns a QuerySet of rows updated before, after or between the supplied datetimes.
The `updated_column_name` property must be defined on a model using this,
as that will be used as the column to filter on.
"""
column_name = getattr(cls, "updated_column_name", None)
if not column_name:
raise NotImplementedError
queryset = cls._default_manager
if before:
queryset = queryset.filter(**{f"{column_name}__lt": before})
if after:
queryset = queryset.filter(**{f"{column_name}__gt": after})
return queryset
# TODO: Remove this in django 1.6, which comes with a smarter save().
def update(self, **kw):
"""

Просмотреть файл

@ -111,6 +111,8 @@ class Profile(ModelBase, SearchMixin):
products = models.ManyToManyField(Product, related_name="subscribed_users")
fxa_password_change = models.DateTimeField(blank=True, null=True)
updated_column_name = "user__date_joined"
class Meta(object):
permissions = (
("view_karma_points", "Can view karma points"),

Просмотреть файл

@ -154,6 +154,8 @@ class Document(
# List of related documents
related_documents = models.ManyToManyField("self", blank=True)
updated_column_name = "current_revision__created"
# firefox_versions,
# operating_systems:
# defined in the respective classes below. Use them as in

Просмотреть файл

@ -59,6 +59,7 @@ puente~=0.5.0
py-wikimarkup~=2.0.1
pyOpenSSL~=19.1.0
pyquery~=1.2.9
python-dateutil~=2.8.1
python-decouple~=3.3
python-memcached~=1.59
pytz~=2020.1

Просмотреть файл

@ -1,4 +1,4 @@
# SHA1:8372704396375f50213ce7c14dc7d520635c9b29
# SHA1:f53060b9cfd6055255c4a96f7f908da6e9ee966c
#
# This file is autogenerated by pip-compile-multi
# To update, run:
@ -544,7 +544,7 @@ pyquery==1.2.17 \
python-dateutil==2.8.1 \
--hash=sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c \
--hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a \
# via botocore, faker
# via -r requirements/default.in, botocore, faker
python-decouple==3.3 \
--hash=sha256:55c546b85b0c47a15a47a4312d451a437f7344a9be3e001660bccd93b637de95 \
# via -r requirements/default.in

Просмотреть файл

@ -92,8 +92,7 @@ def job_process_exit_surveys():
@scheduled_job("cron", month="*", day="*", hour="*", minute="45", max_instances=1, coalesce=True)
@babis.decorator(ping_after=settings.DMS_REINDEX)
def job_reindex():
# Look back 90 minutes for new items to avoid racing conditions between
# cron execution and db updates.
# Index items newer than 90 minutes old in ES2
call_command("esreindex --minutes-ago 90")