зеркало из https://github.com/mozilla/kitsune.git
use bulk operations
This commit is contained in:
Родитель
6231c465e5
Коммит
ee79ab8d09
|
@ -1,13 +1,14 @@
|
|||
import logging
|
||||
|
||||
from django.db import IntegrityError, models, transaction
|
||||
from django.db.models import Subquery
|
||||
from django.db.models import Q, Subquery
|
||||
from django.utils.translation import gettext_lazy as _lazy
|
||||
|
||||
from kitsune.dashboards import PERIODS
|
||||
from kitsune.products.models import Product
|
||||
from kitsune.sumo import googleanalytics
|
||||
from kitsune.sumo.models import LocaleField, ModelBase
|
||||
from kitsune.sumo.utils import chunked
|
||||
from kitsune.wiki.models import Document
|
||||
|
||||
log = logging.getLogger("k.dashboards")
|
||||
|
@ -35,26 +36,70 @@ class WikiDocumentVisits(ModelBase):
|
|||
|
||||
cls.objects.filter(period=period).delete()
|
||||
|
||||
# Then we can create the fresh results for this period.
|
||||
# Next, let's gather some data we need. We're sacrificing memory
|
||||
# here in order to reduce the number of database queries later on.
|
||||
if verbose:
|
||||
log.info(f"Creating fresh instances of {cls.__name__} with period = {period}...")
|
||||
log.info("Gathering pageviews per article from GA4 data API...")
|
||||
|
||||
instance_by_locale_and_slug = {}
|
||||
for (locale, slug), visits in googleanalytics.pageviews_by_document(
|
||||
period, verbose=verbose
|
||||
):
|
||||
instance_by_locale_and_slug[(locale, slug)] = cls(
|
||||
document_id=Subquery(
|
||||
Document.objects.filter(locale=locale, slug=slug).values("id")
|
||||
),
|
||||
period=period,
|
||||
visits=visits,
|
||||
)
|
||||
|
||||
# Then we can create the fresh results for this period.
|
||||
if verbose:
|
||||
log.info(
|
||||
f"Creating {len(instance_by_locale_and_slug)} fresh instances of "
|
||||
f"{cls.__name__} with period = {period}..."
|
||||
)
|
||||
|
||||
def create_batch(batch_of_locale_and_slug_queries):
|
||||
"""
|
||||
Create a batch of instances in one shot, but only include instances that
|
||||
refer to an existing Document, so we avoid triggering an integrity error.
|
||||
A call to this function makes only two databases queries no matter how
|
||||
many instances we need to validate and create.
|
||||
"""
|
||||
cls.objects.bulk_create(
|
||||
[
|
||||
instance_by_locale_and_slug[locale_and_slug]
|
||||
for locale_and_slug in Document.objects.filter(
|
||||
batch_of_locale_and_slug_queries
|
||||
).values_list("locale", "slug")
|
||||
]
|
||||
)
|
||||
|
||||
# Let's create the fresh instances in batches, so we avoid exposing ourselves to
|
||||
# the possibility of transgressing some query size limit.
|
||||
batch_size = 1000
|
||||
for batch_of_pairs in chunked(list(instance_by_locale_and_slug), batch_size):
|
||||
locale_and_slug_queries = Q()
|
||||
for locale, slug in batch_of_pairs:
|
||||
locale_and_slug_queries |= Q(locale=locale, slug=slug)
|
||||
|
||||
if verbose:
|
||||
log.info(f"Creating a batch of {len(batch_of_pairs)} instances...")
|
||||
|
||||
try:
|
||||
with transaction.atomic():
|
||||
cls.objects.create(
|
||||
document_id=Subquery(
|
||||
Document.objects.filter(locale=locale, slug=slug).values("id")
|
||||
),
|
||||
period=period,
|
||||
visits=visits,
|
||||
)
|
||||
create_batch(locale_and_slug_queries)
|
||||
except IntegrityError:
|
||||
# We've already rolled back the bad insertion, which was due to the
|
||||
# fact that the document no longer exists, so let's move on.
|
||||
pass
|
||||
# There is a very slim chance that one or more Documents have been deleted in
|
||||
# the moment of time between the formation of the list of valid instances and
|
||||
# actually creating them, so let's give it one more try, assuming there's an
|
||||
# even slimmer chance that lightning will strike twice. If this one fails,
|
||||
# we'll roll-back everything and give up on the entire effort.
|
||||
create_batch(locale_and_slug_queries)
|
||||
|
||||
if verbose:
|
||||
log.info("Done.")
|
||||
|
||||
|
||||
L10N_TOP20_CODE = "percent_localized_top20"
|
||||
|
|
|
@ -10,7 +10,7 @@ from django.contrib.auth.models import User
|
|||
from django.contrib.contenttypes.fields import GenericForeignKey, GenericRelation
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.core.cache import cache
|
||||
from django.db import IntegrityError, models
|
||||
from django.db import IntegrityError, models, transaction
|
||||
from django.db.models import Count, Subquery
|
||||
from django.db.models.functions import Now
|
||||
from django.db.models.signals import post_save
|
||||
|
@ -31,6 +31,7 @@ from kitsune.sumo.i18n import split_into_language_and_path
|
|||
from kitsune.sumo.models import LocaleField, ModelBase
|
||||
from kitsune.sumo.templatetags.jinja_helpers import urlparams, wiki_to_html
|
||||
from kitsune.sumo.urlresolvers import reverse
|
||||
from kitsune.sumo.utils import chunked
|
||||
from kitsune.tags.models import BigVocabTaggableMixin
|
||||
from kitsune.tags.utils import add_existing_tag
|
||||
from kitsune.upload.models import ImageAttachment
|
||||
|
@ -732,15 +733,67 @@ class QuestionVisits(ModelBase):
|
|||
"""Update the stats from Google Analytics."""
|
||||
from kitsune.sumo import googleanalytics
|
||||
|
||||
for question_id, visits in googleanalytics.pageviews_by_question(verbose=verbose):
|
||||
try:
|
||||
cls.objects.update_or_create(
|
||||
with transaction.atomic():
|
||||
# First, let's gather some data we need. We're sacrificing memory
|
||||
# here in order to reduce the number of database queries later on.
|
||||
if verbose:
|
||||
log.info("Gathering pageviews per question from GA4 data API...")
|
||||
|
||||
instance_by_question_id = {}
|
||||
for question_id, visits in googleanalytics.pageviews_by_question(verbose=verbose):
|
||||
instance_by_question_id[question_id] = cls(
|
||||
question_id=Subquery(Question.objects.filter(id=question_id).values("id")),
|
||||
defaults=dict(visits=visits),
|
||||
visits=visits,
|
||||
)
|
||||
except IntegrityError:
|
||||
# Skip the update-or-create if the question no longer exists.
|
||||
pass
|
||||
|
||||
question_ids = list(instance_by_question_id)
|
||||
|
||||
# Next, let's clear out the stale instances that have new results.
|
||||
if verbose:
|
||||
log.info(f"Deleting all stale instances of {cls.__name__}...")
|
||||
|
||||
cls.objects.filter(question_id__in=question_ids).delete()
|
||||
|
||||
# Then we can create fresh instances for the questions that have results.
|
||||
if verbose:
|
||||
log.info(f"Creating {len(question_ids)} fresh instances of {cls.__name__}...")
|
||||
|
||||
def create_batch(batch_of_question_ids):
|
||||
"""
|
||||
Create a batch of instances in one shot, but only include instances that
|
||||
refer to an existing Question, so we avoid triggering an integrity error.
|
||||
A call to this function makes only two databases queries no matter how
|
||||
many instances we need to validate and create.
|
||||
"""
|
||||
cls.objects.bulk_create(
|
||||
[
|
||||
instance_by_question_id[id]
|
||||
for id in Question.objects.filter(
|
||||
id__in=batch_of_question_ids
|
||||
).values_list("id", flat=True)
|
||||
]
|
||||
)
|
||||
|
||||
# Let's create the fresh instances in batches, so we avoid exposing ourselves to
|
||||
# the possibility of transgressing some query size limit.
|
||||
batch_size = 1000
|
||||
for batch_of_question_ids in chunked(question_ids, batch_size):
|
||||
if verbose:
|
||||
log.info(f"Creating a batch of {len(batch_of_question_ids)} instances...")
|
||||
|
||||
try:
|
||||
with transaction.atomic():
|
||||
create_batch(batch_of_question_ids)
|
||||
except IntegrityError:
|
||||
# There is a very slim chance that one or more Questions have been deleted in
|
||||
# the moment of time between the formation of the list of valid instances and
|
||||
# actually creating them, so let's give it one more try, assuming there's an
|
||||
# even slimmer chance that lightning will strike twice. If this one fails,
|
||||
# we'll roll-back everything and give up on the entire effort.
|
||||
create_batch(batch_of_question_ids)
|
||||
|
||||
if verbose:
|
||||
log.info("Done.")
|
||||
|
||||
|
||||
class QuestionLocale(ModelBase):
|
||||
|
|
Загрузка…
Ссылка в новой задаче