From 4451fac1cff794241d2869c5a6dd9f236f2cae19 Mon Sep 17 00:00:00 2001 From: Wil Clouser Date: Mon, 3 May 2010 23:44:31 -0700 Subject: [PATCH 1/2] Migrate collection_stats cron job --- apps/bandwagon/cron.py | 80 +++++++++++++++++++++++++++++++++++++++++ apps/bandwagon/tasks.py | 1 + apps/stats/models.py | 13 +++++++ 3 files changed, 94 insertions(+) create mode 100644 apps/bandwagon/cron.py create mode 100644 apps/bandwagon/tasks.py diff --git a/apps/bandwagon/cron.py b/apps/bandwagon/cron.py new file mode 100644 index 0000000000..61805b8405 --- /dev/null +++ b/apps/bandwagon/cron.py @@ -0,0 +1,80 @@ +import datetime +import logging + +from django.db import connection, transaction +from django.db.models import Count +from celery.decorators import task +from celery.messaging import establish_connection + +from amo.utils import chunked +from bandwagon.models import (CollectionSubscription, + CollectionVote) +import cronjobs + +task_log = logging.getLogger('z.task') + + +@cronjobs.register +def update_collections_subscribers(): + """Update collections subscribers totals.""" + + d = (CollectionSubscription.objects.values('collection_id') + .annotate(count=Count('collection')) + .extra(where=['DATE(created)=%s'], params=[datetime.date.today()])) + + with establish_connection() as conn: + for chunk in chunked(d, 1000): + _update_collections_subscribers.apply_async(args=[chunk], + connection=conn) + + +@task(rate_limit='15/m') +def _update_collections_subscribers(data, **kw): + task_log.debug("[%s@%s] Updating collections' subscribers totals." % + (len(data), '15/m')) + cursor = connection.cursor() + for var in data: + q = ('REPLACE INTO stats_collections(`date`, `name`, ' + '`collection_id`, `count`) VALUES (%s, %s, %s, %s)') + p = [datetime.date.today(), 'new_subscribers', + var['collection_id'], var['count']] + cursor.execute(q, p) + transaction.commit_unless_managed() + + +@cronjobs.register +def update_collections_votes(): + """Update collection's votes.""" + + up = (CollectionVote.objects.values('collection_id') + .annotate(count=Count('collection')) + .filter(vote=1) + .extra(where=['DATE(created)=%s'], params=[datetime.date.today()])) + + down = (CollectionVote.objects.values('collection_id') + .annotate(count=Count('collection')) + .filter(vote=-1) + .extra(where=['DATE(created)=%s'], params=[datetime.date.today()])) + + with establish_connection() as conn: + for chunk in chunked(up, 1000): + _update_collections_votes.apply_async(args=[chunk, "new_votes_up"], + connection=conn) + for chunk in chunked(down, 1000): + _update_collections_votes.apply_async(args=[chunk, + "new_votes_down"], + connection=conn) + + +@task(rate_limit='15/m') +def _update_collections_votes(data, stat, **kw): + task_log.debug("[%s@%s] Updating collections' subscribers totals." % + (len(data), '15/m')) + cursor = connection.cursor() + for var in data: + q = ('REPLACE INTO stats_collections(`date`, `name`, ' + '`collection_id`, `count`) VALUES (%s, %s, %s, %s)') + p = [datetime.date.today(), stat, + var['collection_id'], var['count']] + cursor.execute(q, p) + transaction.commit_unless_managed() diff --git a/apps/bandwagon/tasks.py b/apps/bandwagon/tasks.py new file mode 100644 index 0000000000..4917b9612f --- /dev/null +++ b/apps/bandwagon/tasks.py @@ -0,0 +1 @@ +from . import cron diff --git a/apps/stats/models.py b/apps/stats/models.py index 952439c366..dfa387dc65 100644 --- a/apps/stats/models.py +++ b/apps/stats/models.py @@ -26,6 +26,19 @@ class CollectionCount(caching.base.CachingMixin, models.Model): db_table = 'stats_collections_counts' +class CollectionStats(caching.base.CachingMixin, models.Model): + """In the running for worst-named model ever.""" + collection = models.ForeignKey('bandwagon.Collection') + name = models.CharField(max_length=255, null=True) + count = models.PositiveIntegerField() + date = models.DateField() + + objects = StatsManager('date') + + class Meta: + db_table = 'stats_collections' + + class DownloadCount(caching.base.CachingMixin, models.Model): addon = models.ForeignKey('addons.Addon') count = models.PositiveIntegerField() From e69cf15fd6a47b34568f5d44dab1a2d91dc78310 Mon Sep 17 00:00:00 2001 From: Wil Clouser Date: Tue, 4 May 2010 00:26:10 -0700 Subject: [PATCH 2/2] Migrate addons_collections_total --- apps/bandwagon/cron.py | 16 +++++++++------- apps/stats/cron.py | 29 ++++++++++++++++++++++++++--- apps/stats/models.py | 12 ++++++++++-- 3 files changed, 45 insertions(+), 12 deletions(-) diff --git a/apps/bandwagon/cron.py b/apps/bandwagon/cron.py index 61805b8405..d1dda1c48d 100644 --- a/apps/bandwagon/cron.py +++ b/apps/bandwagon/cron.py @@ -31,13 +31,15 @@ def update_collections_subscribers(): @task(rate_limit='15/m') def _update_collections_subscribers(data, **kw): task_log.debug("[%s@%s] Updating collections' subscribers totals." % - (len(data), '15/m')) + (len(data), _update_collections_subscribers.rate_limit)) cursor = connection.cursor() + today = datetime.date.today() for var in data: - q = ('REPLACE INTO stats_collections(`date`, `name`, ' - '`collection_id`, `count`) VALUES (%s, %s, %s, %s)') - p = [datetime.date.today(), 'new_subscribers', - var['collection_id'], var['count']] + q = """REPLACE INTO + stats_collections(`date`, `name`, `collection_id`, `count`) + VALUES + (%s, %s, %s, %s)""" + p = [today, 'new_subscribers', var['collection_id'], var['count']] cursor.execute(q, p) transaction.commit_unless_managed() @@ -68,8 +70,8 @@ def update_collections_votes(): @task(rate_limit='15/m') def _update_collections_votes(data, stat, **kw): - task_log.debug("[%s@%s] Updating collections' subscribers totals." % - (len(data), '15/m')) + task_log.debug("[%s@%s] Updating collections' votes totals." % + (len(data), _update_collections_votes.rate_limit)) cursor = connection.cursor() for var in data: q = ('REPLACE INTO stats_collections(`date`, `name`, ' diff --git a/apps/stats/cron.py b/apps/stats/cron.py index 8177018c71..fe154aff4d 100644 --- a/apps/stats/cron.py +++ b/apps/stats/cron.py @@ -1,18 +1,41 @@ import logging -from django.db import transaction from django.db.models import Sum from celery.decorators import task from celery.messaging import establish_connection -from .models import CollectionCount +from .models import (AddonCollectionCount, + CollectionCount) from amo.utils import chunked -from bandwagon.models import Collection +from bandwagon.models import Collection, CollectionAddon import cronjobs task_log = logging.getLogger('z.task') +@cronjobs.register +def update_addons_collections_downloads(): + """Update addons+collections download totals.""" + + d = (AddonCollectionCount.objects.values('addon', 'collection') + .annotate(sum=Sum('count'))) + + with establish_connection() as conn: + for chunk in chunked(d, 1000): + _update_addons_collections_downloads.apply_async(args=[chunk], + connection=conn) + + +@task(rate_limit='15/m') +def _update_addons_collections_downloads(data, **kw): + task_log.debug("[%s@%s] Updating addons+collections download totals." % + (len(data), '15/m')) + for var in data: + (CollectionAddon.objects.filter(addon=var['addon'], + collection=var['collection']) + .update(downloads=var['sum'])) + + @cronjobs.register def update_collections_total(): """Update collections downloads totals.""" diff --git a/apps/stats/models.py b/apps/stats/models.py index dfa387dc65..7a913ce511 100644 --- a/apps/stats/models.py +++ b/apps/stats/models.py @@ -15,6 +15,16 @@ from amo.utils import send_mail as amo_send_mail from .db import StatsDictField, StatsManager +class AddonCollectionCount(caching.base.CachingMixin, models.Model): + addon = models.ForeignKey('addons.Addon') + collection = models.ForeignKey('bandwagon.Collection') + count = models.PositiveIntegerField() + date = models.DateField() + + class Meta: + db_table = 'stats_addons_collections_counts' + + class CollectionCount(caching.base.CachingMixin, models.Model): collection = models.ForeignKey('bandwagon.Collection') count = models.PositiveIntegerField() @@ -33,8 +43,6 @@ class CollectionStats(caching.base.CachingMixin, models.Model): count = models.PositiveIntegerField() date = models.DateField() - objects = StatsManager('date') - class Meta: db_table = 'stats_collections'