From 3370b8ef85676df11b1dd04b961cb53fe60c1b2f Mon Sep 17 00:00:00 2001 From: Leo McArdle Date: Mon, 30 Nov 2020 17:13:32 +0000 Subject: [PATCH] add before/after arguments to es7_reindex command (#4556) --- kitsune/forums/models.py | 2 ++ kitsune/questions/models.py | 2 ++ .../v2/management/commands/es7_reindex.py | 29 ++++++++++++++++++- kitsune/sumo/models.py | 21 ++++++++++++++ kitsune/users/models.py | 2 ++ kitsune/wiki/models.py | 2 ++ requirements/default.in | 1 + requirements/default.txt | 4 +-- scripts/cron.py | 3 +- 9 files changed, 61 insertions(+), 5 deletions(-) diff --git a/kitsune/forums/models.py b/kitsune/forums/models.py index 39ae88829..e59b0fc2a 100644 --- a/kitsune/forums/models.py +++ b/kitsune/forums/models.py @@ -300,6 +300,8 @@ class Post(ModelBase): ) flags = GenericRelation(FlaggedObject) + updated_column_name = "updated" + class Meta: ordering = ["created"] diff --git a/kitsune/questions/models.py b/kitsune/questions/models.py index 493b944d4..024bd437c 100755 --- a/kitsune/questions/models.py +++ b/kitsune/questions/models.py @@ -106,6 +106,7 @@ class Question(ModelBase, BigVocabTaggableMixin, SearchMixin): contributors_cache_key = "question:contributors:%s" objects = QuestionManager() + updated_column_name = "updated" class Meta: ordering = ["-updated"] @@ -941,6 +942,7 @@ class Answer(ModelBase, SearchMixin): images_cache_key = "answer:images:%s" objects = AnswerManager() + updated_column_name = "updated" class Meta: ordering = ["created"] diff --git a/kitsune/search/v2/management/commands/es7_reindex.py b/kitsune/search/v2/management/commands/es7_reindex.py index 79fca123a..fee7ffe0c 100644 --- a/kitsune/search/v2/management/commands/es7_reindex.py +++ b/kitsune/search/v2/management/commands/es7_reindex.py @@ -2,6 +2,7 @@ from math import ceil from django.core.management.base import BaseCommand from django.db import connection, reset_queries +from dateutil.parser import parse as dateutil_parse from kitsune.search.v2.es7_utils import get_doc_types, index_objects_bulk @@ -36,6 +37,18 @@ class Command(BaseCommand): default=100, help="Index this number of documents at once", ) + parser.add_argument( + "--updated-before", + type=dateutil_parse, + default=None, + help="Only index model instances updated before this date", + ) + parser.add_argument( + "--updated-after", + type=dateutil_parse, + default=None, + help="Only index model instances updated after this date", + ) parser.add_argument( "--print-sql-count", action="store_true", @@ -55,7 +68,21 @@ class Command(BaseCommand): self.stdout.write("Reindexing: {}".format(dt.__name__)) model = dt.get_model() - qs = model.objects.all() + + before = kwargs["updated_before"] + after = kwargs["updated_after"] + if before or after: + try: + qs = model.objects_range(before=before, after=after) + except NotImplementedError: + print( + f"{model} hasn't implemeneted an `updated_column_name` property." + "No documents will be indexed of this type." + ) + continue + else: + qs = model._default_manager.all() + total = qs.count() count = kwargs["count"] diff --git a/kitsune/sumo/models.py b/kitsune/sumo/models.py index 33a7b56e4..9ada4858f 100644 --- a/kitsune/sumo/models.py +++ b/kitsune/sumo/models.py @@ -5,12 +5,33 @@ from django.db import models class ModelBase(models.Model): """Base class for SUMO models. + * Adds objects_range class method. * Adds update method. """ class Meta: abstract = True + @classmethod + def objects_range(cls, before=None, after=None): + """ + Returns a QuerySet of rows updated before, after or between the supplied datetimes. + + The `updated_column_name` property must be defined on a model using this, + as that will be used as the column to filter on. + """ + column_name = getattr(cls, "updated_column_name", None) + if not column_name: + raise NotImplementedError + + queryset = cls._default_manager + if before: + queryset = queryset.filter(**{f"{column_name}__lt": before}) + if after: + queryset = queryset.filter(**{f"{column_name}__gt": after}) + + return queryset + # TODO: Remove this in django 1.6, which comes with a smarter save(). def update(self, **kw): """ diff --git a/kitsune/users/models.py b/kitsune/users/models.py index 12c393304..5ce7a76f3 100644 --- a/kitsune/users/models.py +++ b/kitsune/users/models.py @@ -111,6 +111,8 @@ class Profile(ModelBase, SearchMixin): products = models.ManyToManyField(Product, related_name="subscribed_users") fxa_password_change = models.DateTimeField(blank=True, null=True) + updated_column_name = "user__date_joined" + class Meta(object): permissions = ( ("view_karma_points", "Can view karma points"), diff --git a/kitsune/wiki/models.py b/kitsune/wiki/models.py index 105b52585..05a5c37c8 100644 --- a/kitsune/wiki/models.py +++ b/kitsune/wiki/models.py @@ -154,6 +154,8 @@ class Document( # List of related documents related_documents = models.ManyToManyField("self", blank=True) + updated_column_name = "current_revision__created" + # firefox_versions, # operating_systems: # defined in the respective classes below. Use them as in diff --git a/requirements/default.in b/requirements/default.in index ec85896ac..b8bb8fca8 100644 --- a/requirements/default.in +++ b/requirements/default.in @@ -59,6 +59,7 @@ puente~=0.5.0 py-wikimarkup~=2.0.1 pyOpenSSL~=19.1.0 pyquery~=1.2.9 +python-dateutil~=2.8.1 python-decouple~=3.3 python-memcached~=1.59 pytz~=2020.1 diff --git a/requirements/default.txt b/requirements/default.txt index da6a6055c..a3e89a601 100644 --- a/requirements/default.txt +++ b/requirements/default.txt @@ -1,4 +1,4 @@ -# SHA1:8372704396375f50213ce7c14dc7d520635c9b29 +# SHA1:f53060b9cfd6055255c4a96f7f908da6e9ee966c # # This file is autogenerated by pip-compile-multi # To update, run: @@ -544,7 +544,7 @@ pyquery==1.2.17 \ python-dateutil==2.8.1 \ --hash=sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c \ --hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a \ - # via botocore, faker + # via -r requirements/default.in, botocore, faker python-decouple==3.3 \ --hash=sha256:55c546b85b0c47a15a47a4312d451a437f7344a9be3e001660bccd93b637de95 \ # via -r requirements/default.in diff --git a/scripts/cron.py b/scripts/cron.py index 155167dc0..022bb7fdd 100644 --- a/scripts/cron.py +++ b/scripts/cron.py @@ -92,8 +92,7 @@ def job_process_exit_surveys(): @scheduled_job("cron", month="*", day="*", hour="*", minute="45", max_instances=1, coalesce=True) @babis.decorator(ping_after=settings.DMS_REINDEX) def job_reindex(): - # Look back 90 minutes for new items to avoid racing conditions between - # cron execution and db updates. + # Index items newer than 90 minutes old in ES2 call_command("esreindex --minutes-ago 90")