From 1a98c6f3635938ffd59d76cc3a787447878b0587 Mon Sep 17 00:00:00 2001 From: Chris Beaven Date: Tue, 23 Apr 2019 05:30:32 +1200 Subject: [PATCH] Django 1.11 cronjob changes (#3532) * Remove django-cronjobs, replacing with proper management commands Fixes #3515 * Fix some strptime imports * Fix some call_command references * Fix the output of a management command to be text * Fix the date argument of two new management commands * Lint fixes * Fix test syntax error --- docs/armyofawesome.rst | 2 +- docs/badges.rst | 2 +- kitsune/community/cron.py | 78 -- kitsune/community/management/__init__.py | 0 .../community/management/commands/__init__.py | 0 .../commands/send_welcome_emails.py | 84 +++ kitsune/community/tests/test_cron.py | 13 +- kitsune/customercare/cron.py | 276 ------- kitsune/customercare/management/__init__.py | 0 .../management/commands/__init__.py | 0 .../management/commands/collect_tweets.py | 164 +++++ .../commands/get_customercare_stats.py | 92 +++ .../management/commands/purge_tweets.py | 42 ++ kitsune/dashboards/cron.py | 277 ------- kitsune/dashboards/management/__init__.py | 0 .../management/commands/__init__.py | 0 .../cache_most_unhelpful_kb_articles.py | 158 ++++ .../commands/reload_wiki_traffic_stats.py | 12 + .../update_l10n_contributor_metrics.py | 56 ++ .../commands/update_l10n_coverage_metrics.py | 78 ++ kitsune/dashboards/tests/test_cron.py | 52 +- kitsune/karma/cron.py | 12 - kitsune/karma/management/__init__.py | 0 kitsune/karma/management/commands/__init__.py | 0 .../commands/update_top_contributors.py | 13 + kitsune/kpi/cron.py | 694 ------------------ kitsune/kpi/management/__init__.py | 0 kitsune/kpi/management/commands/__init__.py | 0 .../commands/calculate_csat_metrics.py | 111 +++ .../management/commands/cohort_analysis.py | 91 +++ .../management/commands/csat_survey_emails.py | 50 ++ .../commands/process_exit_surveys.py | 51 ++ .../commands/survey_recent_askers.py | 29 + .../commands/update_contributor_metrics.py | 189 +++++ .../management/commands/update_l10n_metric.py | 69 ++ .../commands/update_search_ctr_metric.py | 57 ++ .../commands/update_visitors_metric.py | 39 + kitsune/kpi/management/utils.py | 159 ++++ kitsune/kpi/tests/test_api.py | 25 +- kitsune/kpi/tests/test_cron.py | 50 +- kitsune/questions/cron.py | 210 ------ .../commands/auto_archive_old_questions.py | 78 ++ .../management/commands/escalate_questions.py | 45 ++ .../commands/reload_question_traffic_stats.py | 11 + .../commands/report_employee_answers.py | 70 ++ .../commands/update_weekly_votes.py | 31 + kitsune/questions/tests/test_cron.py | 9 +- kitsune/questions/tests/test_models.py | 23 +- kitsune/questions/tests/test_votes.py | 8 +- kitsune/settings.py | 1 - kitsune/sumo/cron.py | 11 - .../commands/enqueue_lag_monitor_task.py | 12 + kitsune/users/cron.py | 53 -- .../commands/clear_expired_auth_tokens.py | 11 + ...eindex_users_that_contributed_yesterday.py | 39 + .../remove_expired_registration_profiles.py | 12 + kitsune/users/tests/test_es.py | 8 +- kitsune/wiki/cron.py | 150 ---- .../commands/fix_current_revisions.py | 31 + .../commands/generate_missing_share_links.py | 27 + .../wiki/management/commands/rebuild_kb.py | 14 + .../wiki/management/commands/reindex_kb.py | 11 + .../send_weekly_ready_for_review_digest.py | 89 +++ requirements/default.txt | 2 - scripts/cron.py | 54 +- 65 files changed, 2146 insertions(+), 1889 deletions(-) delete mode 100644 kitsune/community/cron.py create mode 100644 kitsune/community/management/__init__.py create mode 100644 kitsune/community/management/commands/__init__.py create mode 100644 kitsune/community/management/commands/send_welcome_emails.py delete mode 100644 kitsune/customercare/cron.py create mode 100644 kitsune/customercare/management/__init__.py create mode 100644 kitsune/customercare/management/commands/__init__.py create mode 100644 kitsune/customercare/management/commands/collect_tweets.py create mode 100644 kitsune/customercare/management/commands/get_customercare_stats.py create mode 100644 kitsune/customercare/management/commands/purge_tweets.py delete mode 100644 kitsune/dashboards/cron.py create mode 100644 kitsune/dashboards/management/__init__.py create mode 100644 kitsune/dashboards/management/commands/__init__.py create mode 100644 kitsune/dashboards/management/commands/cache_most_unhelpful_kb_articles.py create mode 100644 kitsune/dashboards/management/commands/reload_wiki_traffic_stats.py create mode 100644 kitsune/dashboards/management/commands/update_l10n_contributor_metrics.py create mode 100644 kitsune/dashboards/management/commands/update_l10n_coverage_metrics.py delete mode 100644 kitsune/karma/cron.py create mode 100644 kitsune/karma/management/__init__.py create mode 100644 kitsune/karma/management/commands/__init__.py create mode 100644 kitsune/karma/management/commands/update_top_contributors.py delete mode 100644 kitsune/kpi/cron.py create mode 100644 kitsune/kpi/management/__init__.py create mode 100644 kitsune/kpi/management/commands/__init__.py create mode 100644 kitsune/kpi/management/commands/calculate_csat_metrics.py create mode 100644 kitsune/kpi/management/commands/cohort_analysis.py create mode 100644 kitsune/kpi/management/commands/csat_survey_emails.py create mode 100644 kitsune/kpi/management/commands/process_exit_surveys.py create mode 100644 kitsune/kpi/management/commands/survey_recent_askers.py create mode 100644 kitsune/kpi/management/commands/update_contributor_metrics.py create mode 100644 kitsune/kpi/management/commands/update_l10n_metric.py create mode 100644 kitsune/kpi/management/commands/update_search_ctr_metric.py create mode 100644 kitsune/kpi/management/commands/update_visitors_metric.py create mode 100644 kitsune/kpi/management/utils.py delete mode 100644 kitsune/questions/cron.py create mode 100644 kitsune/questions/management/commands/auto_archive_old_questions.py create mode 100644 kitsune/questions/management/commands/escalate_questions.py create mode 100644 kitsune/questions/management/commands/reload_question_traffic_stats.py create mode 100644 kitsune/questions/management/commands/report_employee_answers.py create mode 100644 kitsune/questions/management/commands/update_weekly_votes.py delete mode 100644 kitsune/sumo/cron.py create mode 100644 kitsune/sumo/management/commands/enqueue_lag_monitor_task.py delete mode 100644 kitsune/users/cron.py create mode 100644 kitsune/users/management/commands/clear_expired_auth_tokens.py create mode 100644 kitsune/users/management/commands/reindex_users_that_contributed_yesterday.py create mode 100644 kitsune/users/management/commands/remove_expired_registration_profiles.py delete mode 100644 kitsune/wiki/cron.py create mode 100644 kitsune/wiki/management/commands/fix_current_revisions.py create mode 100644 kitsune/wiki/management/commands/generate_missing_share_links.py create mode 100644 kitsune/wiki/management/commands/rebuild_kb.py create mode 100644 kitsune/wiki/management/commands/reindex_kb.py create mode 100644 kitsune/wiki/management/commands/send_weekly_ready_for_review_digest.py diff --git a/docs/armyofawesome.rst b/docs/armyofawesome.rst index d1b2c8078..c0b94f43e 100644 --- a/docs/armyofawesome.rst +++ b/docs/armyofawesome.rst @@ -36,7 +36,7 @@ Fetch tweets To fetch tweets, run:: - $ ./manage.py cron collect_tweets + $ ./manage.py collect_tweets You should now see tweets at /army-of-awesome. diff --git a/docs/badges.rst b/docs/badges.rst index 7c4f713fd..fd46da076 100644 --- a/docs/badges.rst +++ b/docs/badges.rst @@ -40,7 +40,7 @@ The Army of Awesome Badge is awarded when a user has tweeted 50 Army of Awesome Logic for awarding this badge can be found in ``kitsune.customercare.badges``. -Logic for tweet collection (via the Twitter API) can be found in ``kitsune.customercare.cron``. +Logic for tweet collection (via the Twitter API) can be found in ``kitsune.customercare`` management commands. The number of replies needed is configurable in ``settings.BADGE_LIMIT_ARMY_OF_AWESOME``. diff --git a/kitsune/community/cron.py b/kitsune/community/cron.py deleted file mode 100644 index 69b5c59e8..000000000 --- a/kitsune/community/cron.py +++ /dev/null @@ -1,78 +0,0 @@ -import cronjobs -from datetime import datetime, timedelta - -from django.conf import settings -from django.contrib.auth.models import User -from django.contrib.sites.models import Site -from django.db.models import F, Q -from django.utils.translation import ugettext as _ - -from kitsune.questions.models import Answer -from kitsune.sumo.email_utils import make_mail, safe_translation, send_messages -from kitsune.users.models import Profile -from kitsune.wiki.models import Revision - - -@cronjobs.register -def send_welcome_emails(): - """Send a welcome email to first time contributors. - - Anyone who has made a contribution more than 24 hours ago and has not - already gotten a welcome email should get a welcome email. - """ - - wait_period = datetime.now() - timedelta(hours=24) - messages = [] - context = { - 'host': Site.objects.get_current().domain, - } - - # Answers - - answer_filter = Q(created__lte=wait_period) - answer_filter &= ~Q(question__creator=F('creator')) - answer_filter &= Q(creator__profile__first_answer_email_sent=False) - - answer_recipient_ids = set( - Answer.objects - .filter(answer_filter) - .values_list('creator', flat=True)) - - @safe_translation - def _make_answer_email(locale, to): - return make_mail(subject=_('Thank you for your contribution to Mozilla Support!'), - text_template='community/email/first_answer.ltxt', - html_template='community/email/first_answer.html', - context_vars=context, - from_email=settings.TIDINGS_FROM_ADDRESS, - to_email=to.email) - - for user in User.objects.filter(id__in=answer_recipient_ids): - messages.append(_make_answer_email(user.profile.locale, user)) - - # Localization - - l10n_filter = Q(created__lte=wait_period) - l10n_filter &= ~Q(document__locale=settings.WIKI_DEFAULT_LANGUAGE) - l10n_filter &= Q(creator__profile__first_l10n_email_sent=False) - - l10n_recipient_ids = set( - Revision.objects - .filter(l10n_filter) - .values_list('creator', flat=True)) - - # This doesn't need localized, and so don't need the `safe_translation` helper. - for user in User.objects.filter(id__in=l10n_recipient_ids): - messages.append(make_mail( - subject='Thank you for your contribution to Mozilla Support!', - text_template='community/email/first_l10n.ltxt', - html_template='community/email/first_l10n.html', - context_vars=context, - from_email=settings.TIDINGS_FROM_ADDRESS, - to_email=user.email)) - - # Release the Kraken! - send_messages(messages) - - Profile.objects.filter(user__id__in=answer_recipient_ids).update(first_answer_email_sent=True) - Profile.objects.filter(user__id__in=l10n_recipient_ids).update(first_l10n_email_sent=True) diff --git a/kitsune/community/management/__init__.py b/kitsune/community/management/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kitsune/community/management/commands/__init__.py b/kitsune/community/management/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kitsune/community/management/commands/send_welcome_emails.py b/kitsune/community/management/commands/send_welcome_emails.py new file mode 100644 index 000000000..bffc275bb --- /dev/null +++ b/kitsune/community/management/commands/send_welcome_emails.py @@ -0,0 +1,84 @@ +from datetime import datetime, timedelta + +from django.conf import settings +from django.contrib.auth.models import User +from django.contrib.sites.models import Site +from django.core.management.base import BaseCommand +from django.db.models import F, Q +from django.utils.translation import ugettext as _ + +from kitsune.questions.models import Answer +from kitsune.sumo.email_utils import make_mail, safe_translation, send_messages +from kitsune.users.models import Profile +from kitsune.wiki.models import Revision + + +class Command(BaseCommand): + help = "Send a welcome email to first time contributors." + + def handle(self, **options): + """ + Anyone who has made a contribution more than 24 hours ago and has not + already gotten a welcome email should get a welcome email. + """ + + wait_period = datetime.now() - timedelta(hours=24) + messages = [] + context = {"host": Site.objects.get_current().domain} + + # Answers + + answer_filter = Q(created__lte=wait_period) + answer_filter &= ~Q(question__creator=F("creator")) + answer_filter &= Q(creator__profile__first_answer_email_sent=False) + + answer_recipient_ids = set( + Answer.objects.filter(answer_filter).values_list("creator", flat=True) + ) + + @safe_translation + def _make_answer_email(locale, to): + return make_mail( + subject=_("Thank you for your contribution to Mozilla Support!"), + text_template="community/email/first_answer.ltxt", + html_template="community/email/first_answer.html", + context_vars=context, + from_email=settings.TIDINGS_FROM_ADDRESS, + to_email=to.email, + ) + + for user in User.objects.filter(id__in=answer_recipient_ids): + messages.append(_make_answer_email(user.profile.locale, user)) + + # Localization + + l10n_filter = Q(created__lte=wait_period) + l10n_filter &= ~Q(document__locale=settings.WIKI_DEFAULT_LANGUAGE) + l10n_filter &= Q(creator__profile__first_l10n_email_sent=False) + + l10n_recipient_ids = set( + Revision.objects.filter(l10n_filter).values_list("creator", flat=True) + ) + + # This doesn't need localized, and so don't need the `safe_translation` helper. + for user in User.objects.filter(id__in=l10n_recipient_ids): + messages.append( + make_mail( + subject="Thank you for your contribution to Mozilla Support!", + text_template="community/email/first_l10n.ltxt", + html_template="community/email/first_l10n.html", + context_vars=context, + from_email=settings.TIDINGS_FROM_ADDRESS, + to_email=user.email, + ) + ) + + # Release the Kraken! + send_messages(messages) + + Profile.objects.filter(user__id__in=answer_recipient_ids).update( + first_answer_email_sent=True + ) + Profile.objects.filter(user__id__in=l10n_recipient_ids).update( + first_l10n_email_sent=True + ) diff --git a/kitsune/community/tests/test_cron.py b/kitsune/community/tests/test_cron.py index b49679184..278c0abd9 100644 --- a/kitsune/community/tests/test_cron.py +++ b/kitsune/community/tests/test_cron.py @@ -1,16 +1,15 @@ from datetime import datetime, timedelta -from django.core import mail +import mock from django.contrib.auth.models import User from django.contrib.sites.models import Site +from django.core import mail +from django.core.management import call_command from django.test.utils import override_settings - -import mock from nose.tools import eq_ -from kitsune.community import cron from kitsune.questions.tests import AnswerFactory, QuestionFactory -from kitsune.sumo.tests import attrs_eq, TestCase +from kitsune.sumo.tests import TestCase, attrs_eq from kitsune.users.tests import UserFactory from kitsune.wiki.tests import DocumentFactory, RevisionFactory @@ -35,7 +34,7 @@ class WelcomeEmailsTests(TestCase): # Clear out the notifications that were sent mail.outbox = [] # Send email(s) for welcome messages - cron.send_welcome_emails() + call_command('send_welcome_emails') # There should be an email for u3 only. # u1 was the asker, and so did not make a contribution. @@ -76,7 +75,7 @@ class WelcomeEmailsTests(TestCase): # Clear out the notifications that were sent mail.outbox = [] # Send email(s) for welcome messages - cron.send_welcome_emails() + call_command('send_welcome_emails') # There should be an email for u1 only. # u2 has already recieved the email diff --git a/kitsune/customercare/cron.py b/kitsune/customercare/cron.py deleted file mode 100644 index 18b204a38..000000000 --- a/kitsune/customercare/cron.py +++ /dev/null @@ -1,276 +0,0 @@ -import calendar -from datetime import datetime, timedelta -import json -import logging -import re -import rfc822 - -from django.conf import settings -from django.db.utils import IntegrityError - -import cronjobs -from multidb.pinning import pin_this_thread -from django_statsd.clients import statsd - -from kitsune.customercare.models import Tweet, TwitterAccount, Reply -from kitsune.sumo.redis_utils import redis_client, RedisError -from kitsune.sumo.utils import chunked -from kitsune.twitter import get_twitter_api - - -LINK_REGEX = re.compile('https?\:', re.IGNORECASE) -RT_REGEX = re.compile('^rt\W', re.IGNORECASE) - -ALLOWED_USERS = [ - {'id': 2142731, 'username': 'Firefox'}, - {'id': 150793437, 'username': 'FirefoxBrasil'}, - {'id': 107272435, 'username': 'firefox_es'}, -] - -log = logging.getLogger('k.twitter') - - -def get_word_blacklist_regex(): - """ - Make a regex that looks kind of like r'\b(foo|bar|baz)\b'. - - This is a function so that it isn't calculated at import time, - and so can be tested more easily. - - This doesn't use raw strings (r'') because the "mismatched" parens - were confusing my syntax highlighter, which was confusing me. - """ - return re.compile( - '\\b(' + - '|'.join(map(re.escape, settings.CC_WORD_BLACKLIST)) + - ')\\b') - - -@cronjobs.register -def collect_tweets(): - # Don't (ab)use the twitter API from dev and stage. - if settings.STAGE: - return - - """Collect new tweets about Firefox.""" - with statsd.timer('customercare.tweets.time_elapsed'): - t = get_twitter_api() - - search_options = { - 'q': ('firefox OR #fxinput OR @firefoxbrasil OR #firefoxos ' - 'OR @firefox_es'), - 'count': settings.CC_TWEETS_PERPAGE, # Items per page. - 'result_type': 'recent', # Retrieve tweets by date. - } - - # If we already have some tweets, collect nothing older than what we - # have. - try: - latest_tweet = Tweet.latest() - except Tweet.DoesNotExist: - log.debug('No existing tweets. Retrieving %d tweets from search.' % - settings.CC_TWEETS_PERPAGE) - else: - search_options['since_id'] = latest_tweet.tweet_id - log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id) - - # Retrieve Tweets - results = t.search(**search_options) - - if len(results['statuses']) == 0: - # Twitter returned 0 results. - return - - # Drop tweets into DB - for item in results['statuses']: - # Apply filters to tweet before saving - # Allow links in #fxinput tweets - statsd.incr('customercare.tweet.collected') - item = _filter_tweet(item, - allow_links='#fxinput' in item['text']) - if not item: - continue - - created_date = datetime.utcfromtimestamp(calendar.timegm( - rfc822.parsedate(item['created_at']))) - - item_lang = item['metadata'].get('iso_language_code', 'en') - - tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item), - locale=item_lang, created=created_date) - try: - tweet.save() - statsd.incr('customercare.tweet.saved') - except IntegrityError: - pass - - -@cronjobs.register -def purge_tweets(): - """Periodically purge old tweets for each locale. - - This does a lot of DELETEs on master, so it shouldn't run too frequently. - Probably once every hour or more. - - """ - # Pin to master - pin_this_thread() - - # Build list of tweets to delete, by id. - for locale in settings.SUMO_LANGUAGES: - locale = settings.LOCALES[locale].iso639_1 - # Some locales don't have an iso639_1 code, too bad for them. - if not locale: - continue - oldest = _get_oldest_tweet(locale, settings.CC_MAX_TWEETS) - if oldest: - log.debug('Truncating tweet list: Removing tweets older than %s, ' - 'for [%s].' % (oldest.created, locale)) - Tweet.objects.filter(locale=locale, - created__lte=oldest.created).delete() - - -def _get_oldest_tweet(locale, n=0): - """Returns the nth oldest tweet per locale, defaults to newest.""" - try: - return Tweet.objects.filter(locale=locale).order_by( - '-created')[n] - except IndexError: - return None - - -def _filter_tweet(item, allow_links=False): - """ - Apply some filters to an incoming tweet. - - May modify tweet. If None is returned, tweet will be discarded. - Used to exclude replies and such from incoming tweets. - """ - text = item['text'].lower() - # No replies, except to ALLOWED_USERS - allowed_user_ids = [u['id'] for u in ALLOWED_USERS] - to_user_id = item.get('to_user_id') - if to_user_id and to_user_id not in allowed_user_ids: - statsd.incr('customercare.tweet.rejected.reply_or_mention') - return None - - # No mentions, except of ALLOWED_USERS - for user in item['entities']['user_mentions']: - if user['id'] not in allowed_user_ids: - statsd.incr('customercare.tweet.rejected.reply_or_mention') - return None - - # No retweets - if RT_REGEX.search(text) or text.find('(via ') > -1: - statsd.incr('customercare.tweet.rejected.retweet') - return None - - # No links - if not allow_links and LINK_REGEX.search(text): - statsd.incr('customercare.tweet.rejected.link') - return None - - screen_name = item['user']['screen_name'] - - # Django's caching system will save us here. - IGNORED_USERS = set( - TwitterAccount.objects - .filter(ignored=True) - .values_list('username', flat=True) - ) - - # Exclude filtered users - if screen_name in IGNORED_USERS: - statsd.incr('customercare.tweet.rejected.user') - return None - - # Exlude users with firefox in the handle - if 'firefox' in screen_name.lower(): - statsd.incr('customercare.tweet.rejected.firefox_in_handle') - return None - - # Exclude problem words - match = get_word_blacklist_regex().search(text) - if match: - bad_word = match.group(1) - statsd.incr('customercare.tweet.rejected.blacklist_word.' + bad_word) - return None - - return item - - -@cronjobs.register -def get_customercare_stats(): - """ - Generate customer care stats from the Replies table. - - This gets cached in Redis as a sorted list of contributors, stored as JSON. - - Example Top Contributor data: - - [ - { - 'twitter_username': 'username1', - 'avatar': 'http://twitter.com/path/to/the/avatar.png', - 'avatar_https': 'https://twitter.com/path/to/the/avatar.png', - 'all': 5211, - '1m': 230, - '1w': 33, - '1d': 3, - }, - { ... }, - { ... }, - ] - """ - if settings.STAGE: - return - - contributor_stats = {} - - now = datetime.now() - one_month_ago = now - timedelta(days=30) - one_week_ago = now - timedelta(days=7) - yesterday = now - timedelta(days=1) - - for chunk in chunked(Reply.objects.all(), 2500, Reply.objects.count()): - for reply in chunk: - user = reply.twitter_username - if user not in contributor_stats: - raw = json.loads(reply.raw_json) - if 'from_user' in raw: # For tweets collected using v1 API - user_data = raw - else: - user_data = raw['user'] - - contributor_stats[user] = { - 'twitter_username': user, - 'avatar': user_data['profile_image_url'], - 'avatar_https': user_data['profile_image_url_https'], - 'all': 0, '1m': 0, '1w': 0, '1d': 0, - } - contributor = contributor_stats[reply.twitter_username] - - contributor['all'] += 1 - if reply.created > one_month_ago: - contributor['1m'] += 1 - if reply.created > one_week_ago: - contributor['1w'] += 1 - if reply.created > yesterday: - contributor['1d'] += 1 - - sort_key = settings.CC_TOP_CONTRIB_SORT - limit = settings.CC_TOP_CONTRIB_LIMIT - # Sort by whatever is in settings, break ties with 'all' - contributor_stats = sorted(contributor_stats.values(), - key=lambda c: (c[sort_key], c['all']), - reverse=True)[:limit] - - try: - redis = redis_client(name='default') - key = settings.CC_TOP_CONTRIB_CACHE_KEY - redis.set(key, json.dumps(contributor_stats)) - except RedisError as e: - statsd.incr('redis.error') - log.error('Redis error: %s' % e) - - return contributor_stats diff --git a/kitsune/customercare/management/__init__.py b/kitsune/customercare/management/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kitsune/customercare/management/commands/__init__.py b/kitsune/customercare/management/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kitsune/customercare/management/commands/collect_tweets.py b/kitsune/customercare/management/commands/collect_tweets.py new file mode 100644 index 000000000..edfcb6b7e --- /dev/null +++ b/kitsune/customercare/management/commands/collect_tweets.py @@ -0,0 +1,164 @@ +import calendar +import json +import logging +import re +import rfc822 +from datetime import datetime + +from django.conf import settings +from django.core.management.base import BaseCommand +from django.db.utils import IntegrityError +from django_statsd.clients import statsd + +from kitsune.customercare.models import Tweet, TwitterAccount +from kitsune.twitter import get_twitter_api + +LINK_REGEX = re.compile('https?\:', re.IGNORECASE) +RT_REGEX = re.compile('^rt\W', re.IGNORECASE) + +ALLOWED_USERS = [ + {'id': 2142731, 'username': 'Firefox'}, + {'id': 150793437, 'username': 'FirefoxBrasil'}, + {'id': 107272435, 'username': 'firefox_es'}, +] + +log = logging.getLogger('k.twitter') + + +def get_word_blacklist_regex(): + """ + Make a regex that looks kind of like r'\b(foo|bar|baz)\b'. + + This is a function so that it isn't calculated at import time, + and so can be tested more easily. + + This doesn't use raw strings (r'') because the "mismatched" parens + were confusing my syntax highlighter, which was confusing me. + """ + return re.compile( + '\\b(' + + '|'.join(map(re.escape, settings.CC_WORD_BLACKLIST)) + + ')\\b') + + +class Command(BaseCommand): + + def handle(self, **options): + # Don't (ab)use the twitter API from dev and stage. + if settings.STAGE: + return + + """Collect new tweets about Firefox.""" + with statsd.timer('customercare.tweets.time_elapsed'): + t = get_twitter_api() + + search_options = { + 'q': 'firefox OR #fxinput OR @firefoxbrasil OR #firefoxos OR @firefox_es', + 'count': settings.CC_TWEETS_PERPAGE, # Items per page. + 'result_type': 'recent', # Retrieve tweets by date. + } + + # If we already have some tweets, collect nothing older than what we + # have. + try: + latest_tweet = Tweet.latest() + except Tweet.DoesNotExist: + log.debug( + 'No existing tweets. Retrieving %d tweets from search.' % + settings.CC_TWEETS_PERPAGE) + else: + search_options['since_id'] = latest_tweet.tweet_id + log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id) + + # Retrieve Tweets + results = t.search(**search_options) + + if len(results['statuses']) == 0: + # Twitter returned 0 results. + return + + # Drop tweets into DB + for item in results['statuses']: + # Apply filters to tweet before saving + # Allow links in #fxinput tweets + statsd.incr('customercare.tweet.collected') + item = _filter_tweet(item, allow_links='#fxinput' in item['text']) + if not item: + continue + + created_date = datetime.utcfromtimestamp(calendar.timegm( + rfc822.parsedate(item['created_at']))) + + item_lang = item['metadata'].get('iso_language_code', 'en') + + tweet = Tweet( + tweet_id=item['id'], + raw_json=json.dumps(item), + locale=item_lang, + created=created_date, + ) + try: + tweet.save() + statsd.incr('customercare.tweet.saved') + except IntegrityError: + pass + + +def _filter_tweet(item, allow_links=False): + """ + Apply some filters to an incoming tweet. + + May modify tweet. If None is returned, tweet will be discarded. + Used to exclude replies and such from incoming tweets. + """ + text = item['text'].lower() + # No replies, except to ALLOWED_USERS + allowed_user_ids = [u['id'] for u in ALLOWED_USERS] + to_user_id = item.get('to_user_id') + if to_user_id and to_user_id not in allowed_user_ids: + statsd.incr('customercare.tweet.rejected.reply_or_mention') + return None + + # No mentions, except of ALLOWED_USERS + for user in item['entities']['user_mentions']: + if user['id'] not in allowed_user_ids: + statsd.incr('customercare.tweet.rejected.reply_or_mention') + return None + + # No retweets + if RT_REGEX.search(text) or text.find('(via ') > -1: + statsd.incr('customercare.tweet.rejected.retweet') + return None + + # No links + if not allow_links and LINK_REGEX.search(text): + statsd.incr('customercare.tweet.rejected.link') + return None + + screen_name = item['user']['screen_name'] + + # Django's caching system will save us here. + IGNORED_USERS = set( + TwitterAccount.objects + .filter(ignored=True) + .values_list('username', flat=True) + ) + + # Exclude filtered users + if screen_name in IGNORED_USERS: + statsd.incr('customercare.tweet.rejected.user') + return None + + # Exlude users with firefox in the handle + if 'firefox' in screen_name.lower(): + statsd.incr('customercare.tweet.rejected.firefox_in_handle') + return None + + # Exclude problem words + match = get_word_blacklist_regex().search(text) + if match: + bad_word = match.group(1) + statsd.incr('customercare.tweet.rejected.blacklist_word.' + bad_word) + return None + + return item diff --git a/kitsune/customercare/management/commands/get_customercare_stats.py b/kitsune/customercare/management/commands/get_customercare_stats.py new file mode 100644 index 000000000..51c59a791 --- /dev/null +++ b/kitsune/customercare/management/commands/get_customercare_stats.py @@ -0,0 +1,92 @@ +import json +import logging +from datetime import datetime, timedelta + +from django.conf import settings +from django.core.management.base import BaseCommand +from django_statsd.clients import statsd + +from kitsune.customercare.models import Reply +from kitsune.sumo.redis_utils import RedisError, redis_client +from kitsune.sumo.utils import chunked + +log = logging.getLogger('k.twitter') + + +class Command(BaseCommand): + help = "Generate customer care stats from the Replies table." + + def handle(self, **options): + """ + This gets cached in Redis as a sorted list of contributors, stored as JSON. + + Example Top Contributor data: + + [ + { + 'twitter_username': 'username1', + 'avatar': 'http://twitter.com/path/to/the/avatar.png', + 'avatar_https': 'https://twitter.com/path/to/the/avatar.png', + 'all': 5211, + '1m': 230, + '1w': 33, + '1d': 3, + }, + { ... }, + { ... }, + ] + """ + if settings.STAGE: + return + + contributor_stats = {} + + now = datetime.now() + one_month_ago = now - timedelta(days=30) + one_week_ago = now - timedelta(days=7) + yesterday = now - timedelta(days=1) + + for chunk in chunked(Reply.objects.all(), 2500, Reply.objects.count()): + for reply in chunk: + user = reply.twitter_username + if user not in contributor_stats: + raw = json.loads(reply.raw_json) + if 'from_user' in raw: # For tweets collected using v1 API + user_data = raw + else: + user_data = raw['user'] + + contributor_stats[user] = { + 'twitter_username': user, + 'avatar': user_data['profile_image_url'], + 'avatar_https': user_data['profile_image_url_https'], + 'all': 0, '1m': 0, '1w': 0, '1d': 0, + } + contributor = contributor_stats[reply.twitter_username] + + contributor['all'] += 1 + if reply.created > one_month_ago: + contributor['1m'] += 1 + if reply.created > one_week_ago: + contributor['1w'] += 1 + if reply.created > yesterday: + contributor['1d'] += 1 + + sort_key = settings.CC_TOP_CONTRIB_SORT + limit = settings.CC_TOP_CONTRIB_LIMIT + # Sort by whatever is in settings, break ties with 'all' + contributor_stats = sorted( + contributor_stats.values(), + key=lambda c: (c[sort_key], c['all']), + reverse=True, + )[:limit] + + try: + redis = redis_client(name='default') + key = settings.CC_TOP_CONTRIB_CACHE_KEY + redis.set(key, json.dumps(contributor_stats)) + except RedisError as e: + statsd.incr('redis.error') + log.error('Redis error: %s' % e) + + return contributor_stats diff --git a/kitsune/customercare/management/commands/purge_tweets.py b/kitsune/customercare/management/commands/purge_tweets.py new file mode 100644 index 000000000..340b5d860 --- /dev/null +++ b/kitsune/customercare/management/commands/purge_tweets.py @@ -0,0 +1,42 @@ +import logging + +from django.conf import settings +from django.core.management.base import BaseCommand +from multidb.pinning import pin_this_thread + +from kitsune.customercare.models import Tweet + +log = logging.getLogger('k.twitter') + + +class Command(BaseCommand): + help = "Periodically purge old tweets for each locale." + + def handle(self, **options): + """ + This does a lot of DELETEs on master, so it shouldn't run too frequently. + Probably once every hour or more. + """ + # Pin to master + pin_this_thread() + + # Build list of tweets to delete, by id. + for locale in settings.SUMO_LANGUAGES: + locale = settings.LOCALES[locale].iso639_1 + # Some locales don't have an iso639_1 code, too bad for them. + if not locale: + continue + oldest = _get_oldest_tweet(locale, settings.CC_MAX_TWEETS) + if oldest: + log.debug( + 'Truncating tweet list: Removing tweets older than %s, for [%s].' % + (oldest.created, locale)) + Tweet.objects.filter(locale=locale, created__lte=oldest.created).delete() + + +def _get_oldest_tweet(locale, n=0): + """Returns the nth oldest tweet per locale, defaults to newest.""" + try: + return Tweet.objects.filter(locale=locale).order_by('-created')[n] + except IndexError: + return None diff --git a/kitsune/dashboards/cron.py b/kitsune/dashboards/cron.py deleted file mode 100644 index 38a6a99f9..000000000 --- a/kitsune/dashboards/cron.py +++ /dev/null @@ -1,277 +0,0 @@ -from datetime import date - -from django.conf import settings -from django.db import connection - -import cronjobs - -from kitsune.dashboards.models import ( - PERIODS, WikiDocumentVisits, WikiMetric, L10N_TOP20_CODE, L10N_TOP100_CODE, L10N_ALL_CODE, - L10N_ACTIVE_CONTRIBUTORS_CODE) -from kitsune.dashboards.readouts import l10n_overview_rows -from kitsune.products.models import Product -from kitsune.sumo.redis_utils import redis_client -from kitsune.wiki.models import Document -from kitsune.wiki.utils import num_active_contributors - - -@cronjobs.register -def reload_wiki_traffic_stats(): - for period, _ in PERIODS: - WikiDocumentVisits.reload_period_from_analytics( - period, verbose=settings.DEBUG) - - -@cronjobs.register -def update_l10n_coverage_metrics(): - """Calculate and store the l10n metrics for each locale/product. - - The metrics are: - * Percent localized of top 20 articles - * Percent localized of all articles - """ - today = date.today() - - # Loop through all locales. - for locale in settings.SUMO_LANGUAGES: - - # Skip en-US, it is always 100% localized. - if locale == settings.WIKI_DEFAULT_LANGUAGE: - continue - - # Loop through all enabled products, including None (really All). - for product in [None] + list(Product.objects.filter(visible=True)): - - # (Ab)use the l10n_overview_rows helper from the readouts. - rows = l10n_overview_rows(locale=locale, product=product) - - # % of top 20 articles - top20 = rows['top-20'] - - try: - percent = 100.0 * float(top20['numerator']) / top20['denominator'] - except ZeroDivisionError: - percent = 0.0 - - WikiMetric.objects.create( - code=L10N_TOP20_CODE, - locale=locale, - product=product, - date=today, - value=percent) - - # % of top 100 articles - top100 = rows['top-100'] - - try: - percent = 100.0 * float(top100['numerator']) / top100['denominator'] - except ZeroDivisionError: - percent = 0.0 - - WikiMetric.objects.create( - code=L10N_TOP100_CODE, - locale=locale, - product=product, - date=today, - value=percent) - - # % of all articles - all_ = rows['all'] - try: - percent = 100 * float(all_['numerator']) / all_['denominator'] - except ZeroDivisionError: - percent = 0.0 - - WikiMetric.objects.create( - code=L10N_ALL_CODE, - locale=locale, - product=product, - date=today, - value=percent) - - -@cronjobs.register -def update_l10n_contributor_metrics(day=None): - """Update the number of active contributors for each locale/product. - - An active contributor is defined as a user that created or reviewed a - revision in the previous calendar month. - """ - if day is None: - day = date.today() - first_of_month = date(day.year, day.month, 1) - if day.month == 1: - previous_first_of_month = date(day.year - 1, 12, 1) - else: - previous_first_of_month = date(day.year, day.month - 1, 1) - - # Loop through all locales. - for locale in settings.SUMO_LANGUAGES: - - # Loop through all enabled products, including None (really All). - for product in [None] + list(Product.objects.filter(visible=True)): - - num = num_active_contributors( - from_date=previous_first_of_month, - to_date=first_of_month, - locale=locale, - product=product) - - WikiMetric.objects.create( - code=L10N_ACTIVE_CONTRIBUTORS_CODE, - locale=locale, - product=product, - date=previous_first_of_month, - value=num) - - -def _get_old_unhelpful(): - """ - Gets the data from 2 weeks ago and formats it as output so that we can - get a percent change. - """ - - old_formatted = {} - cursor = connection.cursor() - - cursor.execute( - """SELECT doc_id, yes, no - FROM - (SELECT wiki_revision.document_id as doc_id, - SUM(limitedvotes.helpful) as yes, - SUM(NOT(limitedvotes.helpful)) as no - FROM - (SELECT * FROM wiki_helpfulvote - WHERE created <= DATE_SUB(CURDATE(), INTERVAL 1 WEEK) - AND created >= DATE_SUB(DATE_SUB(CURDATE(), - INTERVAL 1 WEEK), INTERVAL 1 WEEK) - ) as limitedvotes - INNER JOIN wiki_revision ON - limitedvotes.revision_id=wiki_revision.id - INNER JOIN wiki_document ON - wiki_document.id=wiki_revision.document_id - WHERE wiki_document.locale="en-US" - GROUP BY doc_id - HAVING no > yes - ) as calculated""") - - old_data = cursor.fetchall() - - for data in old_data: - doc_id = data[0] - yes = float(data[1]) - no = float(data[2]) - total = yes + no - if total == 0: - continue - old_formatted[doc_id] = {'total': total, - 'percentage': yes / total} - - return old_formatted - - -def _get_current_unhelpful(old_formatted): - """Gets the data for the past week and formats it as return value.""" - - final = {} - cursor = connection.cursor() - - cursor.execute( - """SELECT doc_id, yes, no - FROM - (SELECT wiki_revision.document_id as doc_id, - SUM(limitedvotes.helpful) as yes, - SUM(NOT(limitedvotes.helpful)) as no - FROM - (SELECT * FROM wiki_helpfulvote - WHERE created >= DATE_SUB(CURDATE(), INTERVAL 1 WEEK) - ) as limitedvotes - INNER JOIN wiki_revision ON - limitedvotes.revision_id=wiki_revision.id - INNER JOIN wiki_document ON - wiki_document.id=wiki_revision.document_id - WHERE wiki_document.locale="en-US" - GROUP BY doc_id - HAVING no > yes - ) as calculated""") - - current_data = cursor.fetchall() - - for data in current_data: - doc_id = data[0] - yes = float(data[1]) - no = float(data[2]) - total = yes + no - if total == 0: - continue - percentage = yes / total - if doc_id in old_formatted: - final[doc_id] = { - 'total': total, - 'currperc': percentage, - 'diffperc': percentage - old_formatted[doc_id]['percentage'] - } - else: - final[doc_id] = { - 'total': total, - 'currperc': percentage, - 'diffperc': 0.0 - } - - return final - - -@cronjobs.register -def cache_most_unhelpful_kb_articles(): - """Calculate and save the most unhelpful KB articles in the past month.""" - - REDIS_KEY = settings.HELPFULVOTES_UNHELPFUL_KEY - - old_formatted = _get_old_unhelpful() - final = _get_current_unhelpful(old_formatted) - - if final == {}: - return - - def _mean(vals): - """Argument: List of floats""" - if len(vals) == 0: - return None - return sum(vals) / len(vals) - - def _bayes_avg(C, m, R, v): - # Bayesian Average - # C = mean vote, v = number of votes, - # R = mean rating, m = minimum votes to list in topranked - return (C * m + R * v) / (m + v) - - mean_perc = _mean([float(final[key]['currperc']) for key in final.keys()]) - mean_total = _mean([float(final[key]['total']) for key in final.keys()]) - - # TODO: Make this into namedtuples - sorted_final = [(key, - final[key]['total'], - final[key]['currperc'], - final[key]['diffperc'], - _bayes_avg(mean_perc, mean_total, - final[key]['currperc'], - final[key]['total'])) - for key in final.keys()] - sorted_final.sort(key=lambda entry: entry[4]) # Sort by Bayesian Avg - - redis = redis_client('helpfulvotes') - - redis.delete(REDIS_KEY) - - max_total = max([b[1] for b in sorted_final]) - - for entry in sorted_final: - doc = Document.objects.get(pk=entry[0]) - redis.rpush(REDIS_KEY, (u'%s::%s::%s::%s::%s::%s::%s' % - (entry[0], # Document ID - entry[1], # Total Votes - entry[2], # Current Percentage - entry[3], # Difference in Percentage - 1 - (entry[1] / max_total), # Graph Color - doc.slug, # Document slug - doc.title))) # Document title diff --git a/kitsune/dashboards/management/__init__.py b/kitsune/dashboards/management/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kitsune/dashboards/management/commands/__init__.py b/kitsune/dashboards/management/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kitsune/dashboards/management/commands/cache_most_unhelpful_kb_articles.py b/kitsune/dashboards/management/commands/cache_most_unhelpful_kb_articles.py new file mode 100644 index 000000000..46532c472 --- /dev/null +++ b/kitsune/dashboards/management/commands/cache_most_unhelpful_kb_articles.py @@ -0,0 +1,158 @@ +from django.conf import settings +from django.core.management.base import BaseCommand +from django.db import connection + +from kitsune.sumo.redis_utils import redis_client +from kitsune.wiki.models import Document + + +def _get_old_unhelpful(): + """ + Gets the data from 2 weeks ago and formats it as output so that we can + get a percent change. + """ + + old_formatted = {} + cursor = connection.cursor() + + cursor.execute( + """SELECT doc_id, yes, no + FROM + (SELECT wiki_revision.document_id as doc_id, + SUM(limitedvotes.helpful) as yes, + SUM(NOT(limitedvotes.helpful)) as no + FROM + (SELECT * FROM wiki_helpfulvote + WHERE created <= DATE_SUB(CURDATE(), INTERVAL 1 WEEK) + AND created >= DATE_SUB(DATE_SUB(CURDATE(), + INTERVAL 1 WEEK), INTERVAL 1 WEEK) + ) as limitedvotes + INNER JOIN wiki_revision ON + limitedvotes.revision_id=wiki_revision.id + INNER JOIN wiki_document ON + wiki_document.id=wiki_revision.document_id + WHERE wiki_document.locale="en-US" + GROUP BY doc_id + HAVING no > yes + ) as calculated""") + + old_data = cursor.fetchall() + + for data in old_data: + doc_id = data[0] + yes = float(data[1]) + no = float(data[2]) + total = yes + no + if total == 0: + continue + old_formatted[doc_id] = {'total': total, + 'percentage': yes / total} + + return old_formatted + + +def _get_current_unhelpful(old_formatted): + """Gets the data for the past week and formats it as return value.""" + + final = {} + cursor = connection.cursor() + + cursor.execute( + """SELECT doc_id, yes, no + FROM + (SELECT wiki_revision.document_id as doc_id, + SUM(limitedvotes.helpful) as yes, + SUM(NOT(limitedvotes.helpful)) as no + FROM + (SELECT * FROM wiki_helpfulvote + WHERE created >= DATE_SUB(CURDATE(), INTERVAL 1 WEEK) + ) as limitedvotes + INNER JOIN wiki_revision ON + limitedvotes.revision_id=wiki_revision.id + INNER JOIN wiki_document ON + wiki_document.id=wiki_revision.document_id + WHERE wiki_document.locale="en-US" + GROUP BY doc_id + HAVING no > yes + ) as calculated""") + + current_data = cursor.fetchall() + + for data in current_data: + doc_id = data[0] + yes = float(data[1]) + no = float(data[2]) + total = yes + no + if total == 0: + continue + percentage = yes / total + if doc_id in old_formatted: + final[doc_id] = { + 'total': total, + 'currperc': percentage, + 'diffperc': percentage - old_formatted[doc_id]['percentage'] + } + else: + final[doc_id] = { + 'total': total, + 'currperc': percentage, + 'diffperc': 0.0 + } + + return final + + +class Command(BaseCommand): + help = "Calculate and save the most unhelpful KB articles in the past month." + + def handle(self, **options): + REDIS_KEY = settings.HELPFULVOTES_UNHELPFUL_KEY + + old_formatted = _get_old_unhelpful() + final = _get_current_unhelpful(old_formatted) + + if final == {}: + return + + def _mean(vals): + """Argument: List of floats""" + if len(vals) == 0: + return None + return sum(vals) / len(vals) + + def _bayes_avg(C, m, R, v): + # Bayesian Average + # C = mean vote, v = number of votes, + # R = mean rating, m = minimum votes to list in topranked + return (C * m + R * v) / (m + v) + + mean_perc = _mean([float(final[key]['currperc']) for key in final.keys()]) + mean_total = _mean([float(final[key]['total']) for key in final.keys()]) + + # TODO: Make this into namedtuples + sorted_final = [(key, + final[key]['total'], + final[key]['currperc'], + final[key]['diffperc'], + _bayes_avg( + mean_perc, mean_total, final[key]['currperc'], final[key]['total'])) + for key in final.keys()] + sorted_final.sort(key=lambda entry: entry[4]) # Sort by Bayesian Avg + + redis = redis_client('helpfulvotes') + + redis.delete(REDIS_KEY) + + max_total = max([b[1] for b in sorted_final]) + + for entry in sorted_final: + doc = Document.objects.get(pk=entry[0]) + redis.rpush(REDIS_KEY, (u'%s::%s::%s::%s::%s::%s::%s' % ( + entry[0], # Document ID + entry[1], # Total Votes + entry[2], # Current Percentage + entry[3], # Difference in Percentage + 1 - (entry[1] / max_total), # Graph Color + doc.slug, # Document slug + doc.title, # Document title + ))) diff --git a/kitsune/dashboards/management/commands/reload_wiki_traffic_stats.py b/kitsune/dashboards/management/commands/reload_wiki_traffic_stats.py new file mode 100644 index 000000000..bf1e41c69 --- /dev/null +++ b/kitsune/dashboards/management/commands/reload_wiki_traffic_stats.py @@ -0,0 +1,12 @@ +from django.conf import settings +from django.core.management.base import BaseCommand + +from kitsune.dashboards.models import PERIODS, WikiDocumentVisits + + +class Command(BaseCommand): + + def handle(self, **options): + for period, _ in PERIODS: + WikiDocumentVisits.reload_period_from_analytics( + period, verbose=settings.DEBUG) diff --git a/kitsune/dashboards/management/commands/update_l10n_contributor_metrics.py b/kitsune/dashboards/management/commands/update_l10n_contributor_metrics.py new file mode 100644 index 000000000..6cf9206a5 --- /dev/null +++ b/kitsune/dashboards/management/commands/update_l10n_contributor_metrics.py @@ -0,0 +1,56 @@ +import argparse +from datetime import date, datetime + +from django.conf import settings +from django.core.management.base import BaseCommand + +from kitsune.dashboards.models import L10N_ACTIVE_CONTRIBUTORS_CODE, WikiMetric +from kitsune.products.models import Product +from kitsune.wiki.utils import num_active_contributors + + +def valid_date(s): + try: + return datetime.strptime(s, "%Y-%m-%d") + except ValueError: + msg = "Not a valid date: '{0}'.".format(s) + raise argparse.ArgumentTypeError(msg) + + +class Command(BaseCommand): + help = "Update the number of active contributors for each locale/product." + + def add_arguments(self, parser): + parser.add_argument('day', type=valid_date) + + def handle(self, day=None, **options): + """ + An active contributor is defined as a user that created or reviewed a + revision in the previous calendar month. + """ + if day is None: + day = date.today() + first_of_month = date(day.year, day.month, 1) + if day.month == 1: + previous_first_of_month = date(day.year - 1, 12, 1) + else: + previous_first_of_month = date(day.year, day.month - 1, 1) + + # Loop through all locales. + for locale in settings.SUMO_LANGUAGES: + + # Loop through all enabled products, including None (really All). + for product in [None] + list(Product.objects.filter(visible=True)): + + num = num_active_contributors( + from_date=previous_first_of_month, + to_date=first_of_month, + locale=locale, + product=product) + + WikiMetric.objects.create( + code=L10N_ACTIVE_CONTRIBUTORS_CODE, + locale=locale, + product=product, + date=previous_first_of_month, + value=num) diff --git a/kitsune/dashboards/management/commands/update_l10n_coverage_metrics.py b/kitsune/dashboards/management/commands/update_l10n_coverage_metrics.py new file mode 100644 index 000000000..c4728901e --- /dev/null +++ b/kitsune/dashboards/management/commands/update_l10n_coverage_metrics.py @@ -0,0 +1,78 @@ +from datetime import date + +from django.conf import settings +from django.core.management.base import BaseCommand + +from kitsune.dashboards.models import (L10N_ALL_CODE, L10N_TOP20_CODE, + L10N_TOP100_CODE, WikiMetric) +from kitsune.dashboards.readouts import l10n_overview_rows +from kitsune.products.models import Product + + +class Command(BaseCommand): + help = "Calculate and store the l10n metrics for each locale/product." + + def handle(self, **options): + """ + The metrics are: + * Percent localized of top 20 articles + * Percent localized of all articles + """ + today = date.today() + + # Loop through all locales. + for locale in settings.SUMO_LANGUAGES: + + # Skip en-US, it is always 100% localized. + if locale == settings.WIKI_DEFAULT_LANGUAGE: + continue + + # Loop through all enabled products, including None (really All). + for product in [None] + list(Product.objects.filter(visible=True)): + + # (Ab)use the l10n_overview_rows helper from the readouts. + rows = l10n_overview_rows(locale=locale, product=product) + + # % of top 20 articles + top20 = rows['top-20'] + + try: + percent = 100.0 * float(top20['numerator']) / top20['denominator'] + except ZeroDivisionError: + percent = 0.0 + + WikiMetric.objects.create( + code=L10N_TOP20_CODE, + locale=locale, + product=product, + date=today, + value=percent) + + # % of top 100 articles + top100 = rows['top-100'] + + try: + percent = 100.0 * float(top100['numerator']) / top100['denominator'] + except ZeroDivisionError: + percent = 0.0 + + WikiMetric.objects.create( + code=L10N_TOP100_CODE, + locale=locale, + product=product, + date=today, + value=percent) + + # % of all articles + all_ = rows['all'] + try: + percent = 100 * float(all_['numerator']) / all_['denominator'] + except ZeroDivisionError: + percent = 0.0 + + WikiMetric.objects.create( + code=L10N_ALL_CODE, + locale=locale, + product=product, + date=today, + value=percent) diff --git a/kitsune/dashboards/tests/test_cron.py b/kitsune/dashboards/tests/test_cron.py index e4703321f..23b3824a7 100644 --- a/kitsune/dashboards/tests/test_cron.py +++ b/kitsune/dashboards/tests/test_cron.py @@ -2,21 +2,19 @@ from datetime import date, timedelta from django.conf import settings - +from django.core.management import call_command from nose.tools import eq_ -from kitsune.dashboards.cron import ( - cache_most_unhelpful_kb_articles, _get_old_unhelpful, - _get_current_unhelpful, update_l10n_coverage_metrics, - update_l10n_contributor_metrics) -from kitsune.dashboards.models import ( - WikiMetric, L10N_TOP20_CODE, L10N_TOP100_CODE, L10N_ALL_CODE) +from kitsune.dashboards.management.commands.cache_most_unhelpful_kb_articles import ( + _get_current_unhelpful, _get_old_unhelpful) +from kitsune.dashboards.models import (L10N_ALL_CODE, L10N_TOP20_CODE, + L10N_TOP100_CODE, WikiMetric) from kitsune.products.tests import ProductFactory -from kitsune.sumo.redis_utils import redis_client, RedisError +from kitsune.sumo.redis_utils import RedisError, redis_client from kitsune.sumo.tests import SkipTest, TestCase from kitsune.users.tests import UserFactory -from kitsune.wiki.tests import ( - RevisionFactory, ApprovedRevisionFactory, DocumentFactory, HelpfulVoteFactory) +from kitsune.wiki.tests import (ApprovedRevisionFactory, DocumentFactory, + HelpfulVoteFactory, RevisionFactory) def _add_vote_in_past(rev, vote, days_back): @@ -124,9 +122,9 @@ class TopUnhelpfulArticlesTests(TestCase): eq_(5, result[r.document.id]['total']) -class TopUnhelpfulArticlesCronTests(TestCase): +class TopUnhelpfulArticlesCommandTests(TestCase): def setUp(self): - super(TopUnhelpfulArticlesCronTests, self).setUp() + super(TopUnhelpfulArticlesCommandTests, self).setUp() self.REDIS_KEY = settings.HELPFULVOTES_UNHELPFUL_KEY try: self.redis = redis_client('helpfulvotes') @@ -139,15 +137,15 @@ class TopUnhelpfulArticlesCronTests(TestCase): self.redis.flushdb() except (KeyError, AttributeError): raise SkipTest - super(TopUnhelpfulArticlesCronTests, self).tearDown() + super(TopUnhelpfulArticlesCommandTests, self).tearDown() def test_no_articles(self): - """Full cron with no articles returns no unhelpful articles.""" - cache_most_unhelpful_kb_articles() + """No articles returns no unhelpful articles.""" + call_command('cache_most_unhelpful_kb_articles') eq_(0, self.redis.llen(self.REDIS_KEY)) def test_caching_unhelpful(self): - """Cron should get the unhelpful articles.""" + """Command should get the unhelpful articles.""" r = _make_backdated_revision(90) for x in range(0, 3): @@ -156,7 +154,7 @@ class TopUnhelpfulArticlesCronTests(TestCase): for x in range(0, 2): _add_vote_in_past(r, 1, 3) - cache_most_unhelpful_kb_articles() + call_command('cache_most_unhelpful_kb_articles') eq_(1, self.redis.llen(self.REDIS_KEY)) result = self.redis.lrange(self.REDIS_KEY, 0, 1) @@ -166,7 +164,7 @@ class TopUnhelpfulArticlesCronTests(TestCase): result[0].decode('utf-8')) def test_caching_helpful(self): - """Cron should ignore the helpful articles.""" + """Command should ignore the helpful articles.""" r = _make_backdated_revision(90) for x in range(0, 3): @@ -175,7 +173,7 @@ class TopUnhelpfulArticlesCronTests(TestCase): for x in range(0, 2): _add_vote_in_past(r, 0, 3) - cache_most_unhelpful_kb_articles() + call_command('cache_most_unhelpful_kb_articles') eq_(0, self.redis.llen(self.REDIS_KEY)) @@ -195,7 +193,7 @@ class TopUnhelpfulArticlesCronTests(TestCase): for x in range(0, 2): _add_vote_in_past(r, 1, 3) - cache_most_unhelpful_kb_articles() + call_command('cache_most_unhelpful_kb_articles') eq_(1, self.redis.llen(self.REDIS_KEY)) result = self.redis.lrange(self.REDIS_KEY, 0, 1) @@ -233,7 +231,7 @@ class TopUnhelpfulArticlesCronTests(TestCase): for x in range(0, 91): _add_vote_in_past(r3, 0, 3) - cache_most_unhelpful_kb_articles() + call_command('cache_most_unhelpful_kb_articles') eq_(3, self.redis.llen(self.REDIS_KEY)) result = self.redis.lrange(self.REDIS_KEY, 0, 3) @@ -245,7 +243,7 @@ class TopUnhelpfulArticlesCronTests(TestCase): class L10nMetricsTests(TestCase): def test_update_l10n_coverage_metrics(self): - """Test the cron job that updates l10n coverage metrics.""" + """Test the command that updates l10n coverage metrics.""" p = ProductFactory(visible=True) # Create en-US documents. @@ -274,8 +272,8 @@ class L10nMetricsTests(TestCase): d = DocumentFactory(parent=r.document, locale='ru') RevisionFactory(document=d, based_on=r, is_approved=True) - # Call the cronjob - update_l10n_coverage_metrics() + # Call the management command + call_command('update_l10n_coverage_metrics') # Verify es metrics. eq_(6, WikiMetric.objects.filter(locale='es').count()) @@ -314,7 +312,7 @@ class L10nMetricsTests(TestCase): eq_(0.0, WikiMetric.objects.get(locale='it', product=None, code=L10N_ALL_CODE).value) def test_update_active_contributor_metrics(self): - """Test the cron job that updates active contributor metrics.""" + """Test the command that updates active contributor metrics.""" day = date(2013, 7, 31) last_month = date(2013, 6, 15) start_date = date(2013, 6, 1) @@ -345,8 +343,8 @@ class L10nMetricsTests(TestCase): RevisionFactory(document=d, created=before_start) RevisionFactory(document=d, created=day) - # Call the cron job. - update_l10n_contributor_metrics(day) + # Call the command. + call_command('update_l10n_contributor_metrics', str(day)) eq_(3.0, WikiMetric.objects.get(locale='en-US', product=None, date=start_date).value) eq_(1.0, WikiMetric.objects.get(locale='en-US', product=p, date=start_date).value) diff --git a/kitsune/karma/cron.py b/kitsune/karma/cron.py deleted file mode 100644 index eee6572e4..000000000 --- a/kitsune/karma/cron.py +++ /dev/null @@ -1,12 +0,0 @@ -import cronjobs - -from kitsune.community.utils import top_contributors_questions -from kitsune.karma.models import Title - - -@cronjobs.register -def update_top_contributors(): - """"Update the top contributor lists and titles.""" - top25_ids = [x['user']['id'] for x in top_contributors_questions(count=25)[0]] - Title.objects.set_top10_contributors(top25_ids[:10]) - Title.objects.set_top25_contributors(top25_ids[10:25]) diff --git a/kitsune/karma/management/__init__.py b/kitsune/karma/management/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kitsune/karma/management/commands/__init__.py b/kitsune/karma/management/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kitsune/karma/management/commands/update_top_contributors.py b/kitsune/karma/management/commands/update_top_contributors.py new file mode 100644 index 000000000..5c48f4072 --- /dev/null +++ b/kitsune/karma/management/commands/update_top_contributors.py @@ -0,0 +1,13 @@ +from django.core.management.base import BaseCommand + +from kitsune.community.utils import top_contributors_questions +from kitsune.karma.models import Title + + +class Command(BaseCommand): + help = "Update the top contributor lists and titles." + + def handle(self, **options): + top25_ids = [x['user']['id'] for x in top_contributors_questions(count=25)[0]] + Title.objects.set_top10_contributors(top25_ids[:10]) + Title.objects.set_top25_contributors(top25_ids[10:25]) diff --git a/kitsune/kpi/cron.py b/kitsune/kpi/cron.py deleted file mode 100644 index 95fe1562f..000000000 --- a/kitsune/kpi/cron.py +++ /dev/null @@ -1,694 +0,0 @@ -import json -import operator -from datetime import datetime, date, timedelta -from functools import reduce - -from django.conf import settings -from django.db.models import Count, F, Q - -import cronjobs -import requests -from django_statsd.clients import statsd - -from kitsune.customercare.models import Reply -from kitsune.dashboards import LAST_90_DAYS -from kitsune.dashboards.models import WikiDocumentVisits -from kitsune.kpi.models import ( - Metric, MetricKind, CohortKind, Cohort, RetentionMetric, AOA_CONTRIBUTORS_METRIC_CODE, - KB_ENUS_CONTRIBUTORS_METRIC_CODE, KB_L10N_CONTRIBUTORS_METRIC_CODE, L10N_METRIC_CODE, - SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE, VISITORS_METRIC_CODE, SEARCH_SEARCHES_METRIC_CODE, - SEARCH_CLICKS_METRIC_CODE, EXIT_SURVEY_YES_CODE, EXIT_SURVEY_NO_CODE, - EXIT_SURVEY_DONT_KNOW_CODE, CONTRIBUTOR_COHORT_CODE, KB_ENUS_CONTRIBUTOR_COHORT_CODE, - KB_L10N_CONTRIBUTOR_COHORT_CODE, SUPPORT_FORUM_HELPER_COHORT_CODE, AOA_CONTRIBUTOR_COHORT_CODE, - CONTRIBUTORS_CSAT_METRIC_CODE, AOA_CONTRIBUTORS_CSAT_METRIC_CODE, - KB_ENUS_CONTRIBUTORS_CSAT_METRIC_CODE, KB_L10N_CONTRIBUTORS_CSAT_METRIC_CODE, - SUPPORT_FORUM_CONTRIBUTORS_CSAT_METRIC_CODE) -from kitsune.kpi.surveygizmo_utils import ( - get_email_addresses, add_email_to_campaign, get_exit_survey_results, - SURVEYS) -from kitsune.questions.models import Answer, Question -from kitsune.sumo import googleanalytics -from kitsune.wiki.config import TYPO_SIGNIFICANCE, MEDIUM_SIGNIFICANCE -from kitsune.wiki.models import Revision - - -@cronjobs.register -def update_visitors_metric(): - """Get new visitor data from Google Analytics and save.""" - if settings.STAGE: - # Let's be nice to GA and skip on stage. - return - - # Start updating the day after the last updated. - latest_metric = _get_latest_metric(VISITORS_METRIC_CODE) - if latest_metric is not None: - latest_metric_date = latest_metric.start - else: - latest_metric_date = date(2011, 01, 01) - start = latest_metric_date + timedelta(days=1) - - # Collect up until yesterday - end = date.today() - timedelta(days=1) - - # Get the visitor data from Google Analytics. - visitors = googleanalytics.visitors(start, end) - - # Create the metrics. - metric_kind = MetricKind.objects.get(code=VISITORS_METRIC_CODE) - for date_str, visits in visitors.items(): - day = datetime.strptime(date_str, '%Y-%m-%d').date() - Metric.objects.create( - kind=metric_kind, - start=day, - end=day + timedelta(days=1), - value=visits) - - -MAX_DOCS_UP_TO_DATE = 50 - - -@cronjobs.register -def update_l10n_metric(): - """Calculate new l10n coverage numbers and save. - - L10n coverage is a measure of the amount of translations that are - up to date, weighted by the number of visits for each locale. - - The "algorithm" (see Bug 727084): - SUMO visits = Total SUMO visits for the last 30 days; - Total translated = 0; - - For each locale { - Total up to date = Total up to date + - ((Number of up to date articles in the en-US top 50 visited)/50 ) * - (Visitors for that locale / SUMO visits)); - } - - An up to date article is any of the following: - * An en-US article (by definition it is always up to date) - * The latest en-US revision has been translated - * There are only new revisions with TYPO_SIGNIFICANCE not translated - * There is only one revision of MEDIUM_SIGNIFICANCE not translated - """ - # Get the top 60 visited articles. We will only use the top 50 - # but a handful aren't localizable so we get some extras. - top_60_docs = _get_top_docs(60) - - # Get the visits to each locale in the last 30 days. - end = date.today() - timedelta(days=1) # yesterday - start = end - timedelta(days=30) - locale_visits = googleanalytics.visitors_by_locale(start, end) - - # Total visits. - total_visits = sum(locale_visits.itervalues()) - - # Calculate the coverage. - coverage = 0 - for locale, visits in locale_visits.iteritems(): - if locale == settings.WIKI_DEFAULT_LANGUAGE: - num_docs = MAX_DOCS_UP_TO_DATE - up_to_date_docs = MAX_DOCS_UP_TO_DATE - else: - up_to_date_docs, num_docs = _get_up_to_date_count( - top_60_docs, locale) - - if num_docs and total_visits: - coverage += ((float(up_to_date_docs) / num_docs) * - (float(visits) / total_visits)) - - # Save the value to Metric table. - metric_kind = MetricKind.objects.get(code=L10N_METRIC_CODE) - day = date.today() - Metric.objects.create( - kind=metric_kind, - start=day, - end=day + timedelta(days=1), - value=int(coverage * 100)) # Store as a % int. - - -@cronjobs.register -def update_contributor_metrics(day=None): - """Calculate and save contributor metrics.""" - update_support_forum_contributors_metric(day) - update_kb_contributors_metric(day) - update_aoa_contributors_metric(day) - - -def update_support_forum_contributors_metric(day=None): - """Calculate and save the support forum contributor counts. - - An support forum contributor is a user that has replied 10 times - in the past 30 days to questions that aren't his/her own. - """ - if day: - start = end = day - else: - latest_metric = _get_latest_metric( - SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE) - if latest_metric is not None: - # Start updating the day after the last updated. - start = latest_metric.end + timedelta(days=1) - else: - start = date(2011, 01, 01) - - # Update until yesterday. - end = date.today() - timedelta(days=1) - - # Loop through all the days from start to end, calculating and saving. - day = start - while day <= end: - # Figure out the number of contributors from the last 30 days. - thirty_days_back = day - timedelta(days=30) - contributors = ( - Answer.objects.exclude(creator=F('question__creator')) - .filter(created__gte=thirty_days_back, - created__lt=day) - .values('creator') - .annotate(count=Count('creator')) - .filter(count__gte=10)) - count = contributors.count() - - # Save the value to Metric table. - metric_kind = MetricKind.objects.get( - code=SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE) - Metric.objects.create( - kind=metric_kind, - start=thirty_days_back, - end=day, - value=count) - - day = day + timedelta(days=1) - - -def update_kb_contributors_metric(day=None): - """Calculate and save the KB (en-US and L10n) contributor counts. - - A KB contributor is a user that has edited or reviewed a Revision - in the last 30 days. - """ - if day: - start = end = day - else: - latest_metric = _get_latest_metric(KB_ENUS_CONTRIBUTORS_METRIC_CODE) - if latest_metric is not None: - # Start updating the day after the last updated. - start = latest_metric.end + timedelta(days=1) - else: - start = date(2011, 01, 01) - - # Update until yesterday. - end = date.today() - timedelta(days=1) - - # Loop through all the days from start to end, calculating and saving. - day = start - while day <= end: - # Figure out the number of contributors from the last 30 days. - thirty_days_back = day - timedelta(days=30) - editors = ( - Revision.objects.filter( - created__gte=thirty_days_back, - created__lt=day) - .values_list('creator', flat=True).distinct()) - reviewers = ( - Revision.objects.filter( - reviewed__gte=thirty_days_back, - reviewed__lt=day) - .values_list('reviewer', flat=True).distinct()) - - en_us_count = len(set( - list(editors.filter(document__locale='en-US')) + - list(reviewers.filter(document__locale='en-US')) - )) - l10n_count = len(set( - list(editors.exclude(document__locale='en-US')) + - list(reviewers.exclude(document__locale='en-US')) - )) - - # Save the values to Metric table. - metric_kind = MetricKind.objects.get( - code=KB_ENUS_CONTRIBUTORS_METRIC_CODE) - Metric.objects.create( - kind=metric_kind, - start=thirty_days_back, - end=day, - value=en_us_count) - - metric_kind = MetricKind.objects.get( - code=KB_L10N_CONTRIBUTORS_METRIC_CODE) - Metric.objects.create( - kind=metric_kind, - start=thirty_days_back, - end=day, - value=l10n_count) - - day = day + timedelta(days=1) - - -def update_aoa_contributors_metric(day=None): - """Calculate and save the AoA contributor counts. - - An AoA contributor is a user that has replied in the last 30 days. - """ - if day: - start = end = day - else: - latest_metric = _get_latest_metric(AOA_CONTRIBUTORS_METRIC_CODE) - if latest_metric is not None: - # Start updating the day after the last updated. - start = latest_metric.end + timedelta(days=1) - else: - # Start updating 30 days after the first reply we have. - try: - first_reply = Reply.objects.order_by('created')[0] - start = first_reply.created.date() + timedelta(days=30) - except IndexError: - # If there is no data, there is nothing to do here. - return - - # Update until yesterday. - end = date.today() - timedelta(days=1) - - # Loop through all the days from start to end, calculating and saving. - day = start - while day <= end: - # Figure out the number of contributors from the last 30 days. - thirty_days_back = day - timedelta(days=30) - contributors = ( - Reply.objects.filter( - created__gte=thirty_days_back, - created__lt=day) - .values_list('twitter_username').distinct()) - count = contributors.count() - - # Save the value to Metric table. - metric_kind = MetricKind.objects.get(code=AOA_CONTRIBUTORS_METRIC_CODE) - Metric.objects.create( - kind=metric_kind, - start=thirty_days_back, - end=day, - value=count) - - day = day + timedelta(days=1) - - -@cronjobs.register -def update_search_ctr_metric(): - """Get new search CTR data from Google Analytics and save.""" - if settings.STAGE: - # Let's be nice to GA and skip on stage. - return - - # Start updating the day after the last updated. - latest_metric = _get_latest_metric(SEARCH_CLICKS_METRIC_CODE) - if latest_metric is not None: - latest_metric_date = latest_metric.start - else: - latest_metric_date = date(2011, 01, 01) - start = latest_metric_date + timedelta(days=1) - - # Collect up until yesterday - end = date.today() - timedelta(days=1) - - # Get the CTR data from Google Analytics. - ctr_data = googleanalytics.search_ctr(start, end) - - # Create the metrics. - clicks_kind = MetricKind.objects.get(code=SEARCH_CLICKS_METRIC_CODE) - searches_kind = MetricKind.objects.get(code=SEARCH_SEARCHES_METRIC_CODE) - for date_str, ctr in ctr_data.items(): - day = datetime.strptime(date_str, '%Y-%m-%d').date() - - # Note: we've been storing our search data as total number of - # searches and clicks. Google Analytics only gives us the rate, - # so I am normalizing to 1000 searches (multiplying the % by 10). - # I didn't switch everything to a rate because I don't want to - # throw away the historic data. - Metric.objects.create( - kind=searches_kind, - start=day, - end=day + timedelta(days=1), - value=1000) - Metric.objects.create( - kind=clicks_kind, - start=day, - end=day + timedelta(days=1), - value=round(ctr, 1) * 10) - - -def _get_latest_metric(metric_code): - """Returns the date of the latest metric value.""" - try: - # Get the latest metric value and return the date. - last_metric = Metric.objects.filter( - kind__code=metric_code).order_by('-start')[0] - return last_metric - except IndexError: - return None - - -def _get_top_docs(count): - """Get the top documents by visits.""" - top_qs = WikiDocumentVisits.objects.select_related('document').filter( - period=LAST_90_DAYS).order_by('-visits')[:count] - return [v.document for v in top_qs] - - -def _get_up_to_date_count(top_60_docs, locale): - up_to_date_docs = 0 - num_docs = 0 - - for doc in top_60_docs: - if num_docs == MAX_DOCS_UP_TO_DATE: - break - - if not doc.is_localizable: - # Skip non localizable documents. - continue - - num_docs += 1 - cur_rev_id = doc.latest_localizable_revision_id - translation = doc.translated_to(locale) - - if not translation or not translation.current_revision_id: - continue - - if translation.current_revision.based_on_id >= cur_rev_id: - # The latest translation is based on the latest revision - # that is ready for localization or a newer one. - up_to_date_docs += 1 - else: - # Check if the approved revisions that happened between - # the last approved translation and the latest revision - # that is ready for localization are all minor (significance = - # TYPO_SIGNIFICANCE). If so, the translation is still - # considered up to date. - revs = doc.revisions.filter( - id__gt=translation.current_revision.based_on_id, - is_approved=True, - id__lte=cur_rev_id).exclude(significance=TYPO_SIGNIFICANCE) - if not revs.exists(): - up_to_date_docs += 1 - # If there is only 1 revision of MEDIUM_SIGNIFICANCE, then we - # count that as half-up-to-date (see bug 790797). - elif (len(revs) == 1 and - revs[0].significance == MEDIUM_SIGNIFICANCE): - up_to_date_docs += 0.5 - - return up_to_date_docs, num_docs - - -@cronjobs.register -def process_exit_surveys(): - """Exit survey handling. - - * Collect new exit survey results. - * Save results to our metrics table. - * Add new emails collected to the exit survey. - """ - - _process_exit_survey_results() - - # Get the email addresses from 4-5 hours ago and add them to the survey - # campaign (skip this on stage). - - # The cron associated with this process is set to run every hour, - # with the intent of providing a 4-5 hour wait period between when a - # visitor enters their email address and is then sent a follow-up - # survey. - # The range here is set between 4 and 8 hours to be sure no emails are - # missed should a particular cron run be skipped (e.g. during a deployment) - startdatetime = datetime.now() - timedelta(hours=8) - enddatetime = datetime.now() - timedelta(hours=4) - - for survey in SURVEYS.keys(): - if not SURVEYS[survey]['active'] or 'email_collection_survey_id' not in SURVEYS[survey]: - # Some surveys don't have email collection on the site - # (the askers survey, for example). - continue - - emails = get_email_addresses(survey, startdatetime, enddatetime) - for email in emails: - add_email_to_campaign(survey, email) - - statsd.gauge('survey.{0}'.format(survey), len(emails)) - - -def _process_exit_survey_results(): - """Collect and save new exit survey results.""" - # Gather and process up until yesterday's exit survey results. - yes_kind, _ = MetricKind.objects.get_or_create(code=EXIT_SURVEY_YES_CODE) - no_kind, _ = MetricKind.objects.get_or_create(code=EXIT_SURVEY_NO_CODE) - dunno_kind, _ = MetricKind.objects.get_or_create( - code=EXIT_SURVEY_DONT_KNOW_CODE) - - latest_metric = _get_latest_metric(EXIT_SURVEY_YES_CODE) - if latest_metric is not None: - latest_metric_date = latest_metric.start - else: - latest_metric_date = date(2013, 07, 01) - - day = latest_metric_date + timedelta(days=1) - today = date.today() - - while day < today: - # Get the aggregated results. - results = get_exit_survey_results('general', day) - - # Store them. - Metric.objects.create( - kind=yes_kind, - start=day, - end=day + timedelta(days=1), - value=results['yes']) - Metric.objects.create( - kind=no_kind, - start=day, - end=day + timedelta(days=1), - value=results['no']) - Metric.objects.create( - kind=dunno_kind, - start=day, - end=day + timedelta(days=1), - value=results['dont-know']) - - # Move on to next day. - day += timedelta(days=1) - - -@cronjobs.register -def survey_recent_askers(): - """Add question askers to a surveygizmo campaign to get surveyed.""" - # We get the email addresses of all users that asked a question 2 days - # ago. Then, all we have to do is send the email address to surveygizmo - # and it does the rest. - two_days_ago = date.today() - timedelta(days=2) - yesterday = date.today() - timedelta(days=1) - - emails = ( - Question.objects - .filter(created__gte=two_days_ago, created__lt=yesterday) - .values_list('creator__email', flat=True)) - - for email in emails: - add_email_to_campaign('askers', email) - - statsd.gauge('survey.askers', len(emails)) - - -@cronjobs.register -def cohort_analysis(): - today = datetime.today().replace(hour=0, minute=0, second=0, microsecond=0) - boundaries = [today - timedelta(days=today.weekday())] - for _ in range(12): - previous_week = boundaries[-1] - timedelta(weeks=1) - boundaries.append(previous_week) - boundaries.reverse() - ranges = zip(boundaries[:-1], boundaries[1:]) - - reports = [ - (CONTRIBUTOR_COHORT_CODE, [ - (Revision.objects.all(), ('creator', 'reviewer',)), - (Answer.objects.not_by_asker(), ('creator',)), - (Reply.objects.all(), ('user',))]), - (KB_ENUS_CONTRIBUTOR_COHORT_CODE, [ - (Revision.objects.filter(document__locale='en-US'), ('creator', 'reviewer',))]), - (KB_L10N_CONTRIBUTOR_COHORT_CODE, [ - (Revision.objects.exclude(document__locale='en-US'), ('creator', 'reviewer',))]), - (SUPPORT_FORUM_HELPER_COHORT_CODE, [ - (Answer.objects.not_by_asker(), ('creator',))]), - (AOA_CONTRIBUTOR_COHORT_CODE, [ - (Reply.objects.all(), ('user',))]) - ] - - for kind, querysets in reports: - cohort_kind, _ = CohortKind.objects.get_or_create(code=kind) - - for i, cohort_range in enumerate(ranges): - cohort_users = _get_cohort(querysets, cohort_range) - - # Sometimes None will be added to the cohort_users list, so remove it - if None in cohort_users: - cohort_users.remove(None) - - cohort, _ = Cohort.objects.update_or_create( - kind=cohort_kind, start=cohort_range[0], end=cohort_range[1], - defaults={'size': len(cohort_users)}) - - for retention_range in ranges[i:]: - retained_user_count = _count_contributors_in_range(querysets, cohort_users, - retention_range) - RetentionMetric.objects.update_or_create( - cohort=cohort, start=retention_range[0], end=retention_range[1], - defaults={'size': retained_user_count}) - - -def _count_contributors_in_range(querysets, users, date_range): - """Of the group ``users``, count how many made a contribution in ``date_range``.""" - start, end = date_range - retained_users = set() - - for queryset, fields in querysets: - for field in fields: - filters = {'%s__in' % field: users, 'created__gte': start, 'created__lt': end} - retained_users |= set(getattr(o, field) for o in queryset.filter(**filters)) - - return len(retained_users) - - -def _get_cohort(querysets, date_range): - start, end = date_range - cohort = set() - - for queryset, fields in querysets: - contributions_in_range = queryset.filter(created__gte=start, created__lt=end) - potential_users = set() - - for field in fields: - potential_users |= set(getattr(cont, field) for cont in contributions_in_range) - - def is_in_cohort(u): - qs = [Q(**{field: u}) for field in fields] - filters = reduce(operator.or_, qs) - - first_contrib = queryset.filter(filters).order_by('id')[0] - return start <= first_contrib.created < end - - cohort |= set(filter(is_in_cohort, potential_users)) - - return cohort - - -@cronjobs.register -def calculate_csat_metrics(): - user = settings.SURVEYGIZMO_USER - password = settings.SURVEYGIZMO_PASSWORD - startdate = date.today() - timedelta(days=2) - enddate = date.today() - timedelta(days=1) - page = 1 - more_pages = True - survey_id = SURVEYS['general']['community_health'] - - csat = { - CONTRIBUTORS_CSAT_METRIC_CODE: 0, - SUPPORT_FORUM_CONTRIBUTORS_CSAT_METRIC_CODE: 0, - AOA_CONTRIBUTORS_CSAT_METRIC_CODE: 0, - KB_ENUS_CONTRIBUTORS_CSAT_METRIC_CODE: 0, - KB_L10N_CONTRIBUTORS_CSAT_METRIC_CODE: 0, - } - - counts = { - CONTRIBUTORS_CSAT_METRIC_CODE: 0, - SUPPORT_FORUM_CONTRIBUTORS_CSAT_METRIC_CODE: 0, - AOA_CONTRIBUTORS_CSAT_METRIC_CODE: 0, - KB_ENUS_CONTRIBUTORS_CSAT_METRIC_CODE: 0, - KB_L10N_CONTRIBUTORS_CSAT_METRIC_CODE: 0, - } - - while more_pages: - response = requests.get( - 'https://restapi.surveygizmo.com/v2/survey/{survey}' - '/surveyresponse?' - 'filter[field][0]=datesubmitted' - '&filter[operator][0]=>=&filter[value][0]={start}+0:0:0' - '&filter[field][1]=datesubmitted' - '&filter[operator][1]=<&filter[value][1]={end}+0:0:0' - '&filter[field][2]=status&filter[operator][2]==' - '&filter[value][2]=Complete' - '&resultsperpage=500' - '&page={page}' - '&user:pass={user}:{password}'.format( - survey=survey_id, start=startdate, - end=enddate, page=page, user=user, password=password), - timeout=300) - - results = json.loads(response.content) - total_pages = results.get('total_pages', 1) - more_pages = page < total_pages - - if 'data' in results: - for r in results['data']: - try: - rating = int(r['[question(3)]']) - except ValueError: - # CSAT question was not answered - pass - else: - csat[CONTRIBUTORS_CSAT_METRIC_CODE] += rating - counts[CONTRIBUTORS_CSAT_METRIC_CODE] += 1 - - if len(r['[question(4), option(10010)]']): # Army of Awesome - csat[AOA_CONTRIBUTORS_CSAT_METRIC_CODE] += rating - counts[AOA_CONTRIBUTORS_CSAT_METRIC_CODE] += 1 - - if len(r['[question(4), option(10011)]']): # Support Forum - csat[SUPPORT_FORUM_CONTRIBUTORS_CSAT_METRIC_CODE] += rating - counts[SUPPORT_FORUM_CONTRIBUTORS_CSAT_METRIC_CODE] += 1 - - if len(r['[question(4), option(10012)]']): # KB EN-US - csat[KB_ENUS_CONTRIBUTORS_CSAT_METRIC_CODE] += rating - counts[KB_ENUS_CONTRIBUTORS_CSAT_METRIC_CODE] += 1 - - if len(r['[question(4), option(10013)]']): # KB L10N - csat[KB_L10N_CONTRIBUTORS_CSAT_METRIC_CODE] += rating - counts[KB_L10N_CONTRIBUTORS_CSAT_METRIC_CODE] += 1 - - page += 1 - - for code in csat: - metric_kind, _ = MetricKind.objects.get_or_create(code=code) - value = csat[code] / counts[code] if counts[code] else 50 # If no responses assume neutral - Metric.objects.update_or_create(kind=metric_kind, start=startdate, end=enddate, - defaults={'value': value}) - - -@cronjobs.register -def csat_survey_emails(): - querysets = [(Revision.objects.all(), ('creator', 'reviewer',)), - (Answer.objects.not_by_asker(), ('creator',)), - (Reply.objects.all(), ('user',))] - - end = datetime.today().replace(hour=0, minute=0, second=0, microsecond=0) - start = end - timedelta(days=30) - - users = _get_cohort(querysets, (start, end)) - - for u in users: - p = u.profile - if p.csat_email_sent is None or p.csat_email_sent < start: - survey_id = SURVEYS['general']['community_health'] - campaign_id = SURVEYS['general']['community_health_campaign_id'] - - try: - requests.put( - 'https://restapi.surveygizmo.com/v4/survey/{survey}/surveycampaign/' - '{campaign}/contact?semailaddress={email}&api_token={token}' - '&api_token_secret={secret}&allowdupe=true'.format( - survey=survey_id, campaign=campaign_id, email=u.email, - token=settings.SURVEYGIZMO_API_TOKEN, - secret=settings.SURVEYGIZMO_API_TOKEN_SECRET), - timeout=30) - except requests.exceptions.Timeout: - print 'Timed out adding: %s' % u.email - else: - p.csat_email_sent = datetime.now() - p.save() diff --git a/kitsune/kpi/management/__init__.py b/kitsune/kpi/management/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kitsune/kpi/management/commands/__init__.py b/kitsune/kpi/management/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kitsune/kpi/management/commands/calculate_csat_metrics.py b/kitsune/kpi/management/commands/calculate_csat_metrics.py new file mode 100644 index 000000000..41b7bcc0d --- /dev/null +++ b/kitsune/kpi/management/commands/calculate_csat_metrics.py @@ -0,0 +1,111 @@ +import json +from datetime import date, timedelta + +import requests +from django.conf import settings +from django.core.management.base import BaseCommand +from kitsune.kpi.models import ( + AOA_CONTRIBUTORS_CSAT_METRIC_CODE, + CONTRIBUTORS_CSAT_METRIC_CODE, + KB_ENUS_CONTRIBUTORS_CSAT_METRIC_CODE, + KB_L10N_CONTRIBUTORS_CSAT_METRIC_CODE, + SUPPORT_FORUM_CONTRIBUTORS_CSAT_METRIC_CODE, + Metric, + MetricKind, +) +from kitsune.kpi.surveygizmo_utils import SURVEYS + + +class Command(BaseCommand): + def handle(self, **options): + user = settings.SURVEYGIZMO_USER + password = settings.SURVEYGIZMO_PASSWORD + startdate = date.today() - timedelta(days=2) + enddate = date.today() - timedelta(days=1) + page = 1 + more_pages = True + survey_id = SURVEYS["general"]["community_health"] + + csat = { + CONTRIBUTORS_CSAT_METRIC_CODE: 0, + SUPPORT_FORUM_CONTRIBUTORS_CSAT_METRIC_CODE: 0, + AOA_CONTRIBUTORS_CSAT_METRIC_CODE: 0, + KB_ENUS_CONTRIBUTORS_CSAT_METRIC_CODE: 0, + KB_L10N_CONTRIBUTORS_CSAT_METRIC_CODE: 0, + } + + counts = { + CONTRIBUTORS_CSAT_METRIC_CODE: 0, + SUPPORT_FORUM_CONTRIBUTORS_CSAT_METRIC_CODE: 0, + AOA_CONTRIBUTORS_CSAT_METRIC_CODE: 0, + KB_ENUS_CONTRIBUTORS_CSAT_METRIC_CODE: 0, + KB_L10N_CONTRIBUTORS_CSAT_METRIC_CODE: 0, + } + + while more_pages: + response = requests.get( + "https://restapi.surveygizmo.com/v2/survey/{survey}" + "/surveyresponse?" + "filter[field][0]=datesubmitted" + "&filter[operator][0]=>=&filter[value][0]={start}+0:0:0" + "&filter[field][1]=datesubmitted" + "&filter[operator][1]=<&filter[value][1]={end}+0:0:0" + "&filter[field][2]=status&filter[operator][2]==" + "&filter[value][2]=Complete" + "&resultsperpage=500" + "&page={page}" + "&user:pass={user}:{password}".format( + survey=survey_id, + start=startdate, + end=enddate, + page=page, + user=user, + password=password, + ), + timeout=300, + ) + + results = json.loads(response.content) + total_pages = results.get("total_pages", 1) + more_pages = page < total_pages + + if "data" in results: + for r in results["data"]: + try: + rating = int(r["[question(3)]"]) + except ValueError: + # CSAT question was not answered + pass + else: + csat[CONTRIBUTORS_CSAT_METRIC_CODE] += rating + counts[CONTRIBUTORS_CSAT_METRIC_CODE] += 1 + + if len(r["[question(4), option(10010)]"]): # Army of Awesome + csat[AOA_CONTRIBUTORS_CSAT_METRIC_CODE] += rating + counts[AOA_CONTRIBUTORS_CSAT_METRIC_CODE] += 1 + + if len(r["[question(4), option(10011)]"]): # Support Forum + csat[SUPPORT_FORUM_CONTRIBUTORS_CSAT_METRIC_CODE] += rating + counts[SUPPORT_FORUM_CONTRIBUTORS_CSAT_METRIC_CODE] += 1 + + if len(r["[question(4), option(10012)]"]): # KB EN-US + csat[KB_ENUS_CONTRIBUTORS_CSAT_METRIC_CODE] += rating + counts[KB_ENUS_CONTRIBUTORS_CSAT_METRIC_CODE] += 1 + + if len(r["[question(4), option(10013)]"]): # KB L10N + csat[KB_L10N_CONTRIBUTORS_CSAT_METRIC_CODE] += rating + counts[KB_L10N_CONTRIBUTORS_CSAT_METRIC_CODE] += 1 + + page += 1 + + for code in csat: + metric_kind, _ = MetricKind.objects.get_or_create(code=code) + value = ( + csat[code] / counts[code] if counts[code] else 50 + ) # If no responses assume neutral + Metric.objects.update_or_create( + kind=metric_kind, + start=startdate, + end=enddate, + defaults={"value": value}, + ) diff --git a/kitsune/kpi/management/commands/cohort_analysis.py b/kitsune/kpi/management/commands/cohort_analysis.py new file mode 100644 index 000000000..3df60ffe7 --- /dev/null +++ b/kitsune/kpi/management/commands/cohort_analysis.py @@ -0,0 +1,91 @@ +from datetime import datetime, timedelta + +from django.core.management.base import BaseCommand + +from kitsune.customercare.models import Reply +from kitsune.kpi.management import utils +from kitsune.kpi.models import ( + AOA_CONTRIBUTOR_COHORT_CODE, + CONTRIBUTOR_COHORT_CODE, + KB_ENUS_CONTRIBUTOR_COHORT_CODE, + KB_L10N_CONTRIBUTOR_COHORT_CODE, + SUPPORT_FORUM_HELPER_COHORT_CODE, + Cohort, + CohortKind, + RetentionMetric, +) +from kitsune.questions.models import Answer +from kitsune.wiki.models import Revision + + +class Command(BaseCommand): + def handle(self, **options): + today = datetime.today().replace(hour=0, minute=0, second=0, microsecond=0) + boundaries = [today - timedelta(days=today.weekday())] + for _ in range(12): + previous_week = boundaries[-1] - timedelta(weeks=1) + boundaries.append(previous_week) + boundaries.reverse() + ranges = zip(boundaries[:-1], boundaries[1:]) + + reports = [ + ( + CONTRIBUTOR_COHORT_CODE, + [ + (Revision.objects.all(), ("creator", "reviewer")), + (Answer.objects.not_by_asker(), ("creator",)), + (Reply.objects.all(), ("user",)), + ], + ), + ( + KB_ENUS_CONTRIBUTOR_COHORT_CODE, + [ + ( + Revision.objects.filter(document__locale="en-US"), + ("creator", "reviewer"), + ) + ], + ), + ( + KB_L10N_CONTRIBUTOR_COHORT_CODE, + [ + ( + Revision.objects.exclude(document__locale="en-US"), + ("creator", "reviewer"), + ) + ], + ), + ( + SUPPORT_FORUM_HELPER_COHORT_CODE, + [(Answer.objects.not_by_asker(), ("creator",))], + ), + (AOA_CONTRIBUTOR_COHORT_CODE, [(Reply.objects.all(), ("user",))]), + ] + + for kind, querysets in reports: + cohort_kind, _ = CohortKind.objects.get_or_create(code=kind) + + for i, cohort_range in enumerate(ranges): + cohort_users = utils._get_cohort(querysets, cohort_range) + + # Sometimes None will be added to the cohort_users list, so remove it + if None in cohort_users: + cohort_users.remove(None) + + cohort, _ = Cohort.objects.update_or_create( + kind=cohort_kind, + start=cohort_range[0], + end=cohort_range[1], + defaults={"size": len(cohort_users)}, + ) + + for retention_range in ranges[i:]: + retained_user_count = utils._count_contributors_in_range( + querysets, cohort_users, retention_range + ) + RetentionMetric.objects.update_or_create( + cohort=cohort, + start=retention_range[0], + end=retention_range[1], + defaults={"size": retained_user_count}, + ) diff --git a/kitsune/kpi/management/commands/csat_survey_emails.py b/kitsune/kpi/management/commands/csat_survey_emails.py new file mode 100644 index 000000000..2a25cd75b --- /dev/null +++ b/kitsune/kpi/management/commands/csat_survey_emails.py @@ -0,0 +1,50 @@ +from datetime import datetime, timedelta + +import requests +from django.conf import settings +from django.core.management.base import BaseCommand + +from kitsune.customercare.models import Reply +from kitsune.kpi.management import utils +from kitsune.kpi.surveygizmo_utils import SURVEYS +from kitsune.questions.models import Answer +from kitsune.wiki.models import Revision + + +class Command(BaseCommand): + def handle(self, **options): + querysets = [ + (Revision.objects.all(), ("creator", "reviewer")), + (Answer.objects.not_by_asker(), ("creator",)), + (Reply.objects.all(), ("user",)), + ] + + end = datetime.today().replace(hour=0, minute=0, second=0, microsecond=0) + start = end - timedelta(days=30) + + users = utils._get_cohort(querysets, (start, end)) + + for u in users: + p = u.profile + if p.csat_email_sent is None or p.csat_email_sent < start: + survey_id = SURVEYS["general"]["community_health"] + campaign_id = SURVEYS["general"]["community_health_campaign_id"] + + try: + requests.put( + "https://restapi.surveygizmo.com/v4/survey/{survey}/surveycampaign/" + "{campaign}/contact?semailaddress={email}&api_token={token}" + "&api_token_secret={secret}&allowdupe=true".format( + survey=survey_id, + campaign=campaign_id, + email=u.email, + token=settings.SURVEYGIZMO_API_TOKEN, + secret=settings.SURVEYGIZMO_API_TOKEN_SECRET, + ), + timeout=30, + ) + except requests.exceptions.Timeout: + print("Timed out adding: %s" % u.email) + else: + p.csat_email_sent = datetime.now() + p.save() diff --git a/kitsune/kpi/management/commands/process_exit_surveys.py b/kitsune/kpi/management/commands/process_exit_surveys.py new file mode 100644 index 000000000..1378f0ce7 --- /dev/null +++ b/kitsune/kpi/management/commands/process_exit_surveys.py @@ -0,0 +1,51 @@ +from datetime import datetime, timedelta + +from django.core.management.base import BaseCommand +from django_statsd.clients import statsd + +from kitsune.kpi.management import utils +from kitsune.kpi.surveygizmo_utils import ( + SURVEYS, + add_email_to_campaign, + get_email_addresses, +) + + +class Command(BaseCommand): + help = "Exit survey handling." + + def handle(self, **options): + """ + * Collect new exit survey results. + * Save results to our metrics table. + * Add new emails collected to the exit survey. + """ + + utils._process_exit_survey_results() + + # Get the email addresses from 4-5 hours ago and add them to the survey + # campaign (skip this on stage). + + # The cron associated with this process is set to run every hour, + # with the intent of providing a 4-5 hour wait period between when a + # visitor enters their email address and is then sent a follow-up + # survey. + # The range here is set between 4 and 8 hours to be sure no emails are + # missed should a particular cron run be skipped (e.g. during a deployment) + startdatetime = datetime.now() - timedelta(hours=8) + enddatetime = datetime.now() - timedelta(hours=4) + + for survey in SURVEYS.keys(): + if ( + not SURVEYS[survey]["active"] or + "email_collection_survey_id" not in SURVEYS[survey] + ): + # Some surveys don't have email collection on the site + # (the askers survey, for example). + continue + + emails = get_email_addresses(survey, startdatetime, enddatetime) + for email in emails: + add_email_to_campaign(survey, email) + + statsd.gauge("survey.{0}".format(survey), len(emails)) diff --git a/kitsune/kpi/management/commands/survey_recent_askers.py b/kitsune/kpi/management/commands/survey_recent_askers.py new file mode 100644 index 000000000..607757f8d --- /dev/null +++ b/kitsune/kpi/management/commands/survey_recent_askers.py @@ -0,0 +1,29 @@ +from datetime import date, timedelta + +from django.core.management.base import BaseCommand +from django_statsd.clients import statsd + +from kitsune.kpi.surveygizmo_utils import ( + add_email_to_campaign, +) +from kitsune.questions.models import Question + + +class Command(BaseCommand): + help = "Add question askers to a surveygizmo campaign to get surveyed." + + def handle(self, **options): + # We get the email addresses of all users that asked a question 2 days + # ago. Then, all we have to do is send the email address to surveygizmo + # and it does the rest. + two_days_ago = date.today() - timedelta(days=2) + yesterday = date.today() - timedelta(days=1) + + emails = Question.objects.filter( + created__gte=two_days_ago, created__lt=yesterday + ).values_list("creator__email", flat=True) + + for email in emails: + add_email_to_campaign("askers", email) + + statsd.gauge("survey.askers", len(emails)) diff --git a/kitsune/kpi/management/commands/update_contributor_metrics.py b/kitsune/kpi/management/commands/update_contributor_metrics.py new file mode 100644 index 000000000..05f12c250 --- /dev/null +++ b/kitsune/kpi/management/commands/update_contributor_metrics.py @@ -0,0 +1,189 @@ +import argparse +from datetime import date, datetime, timedelta + +from django.core.management.base import BaseCommand +from django.db.models import Count, F + +from kitsune.customercare.models import Reply +from kitsune.kpi.management import utils +from kitsune.kpi.models import ( + AOA_CONTRIBUTORS_METRIC_CODE, + KB_ENUS_CONTRIBUTORS_METRIC_CODE, + KB_L10N_CONTRIBUTORS_METRIC_CODE, + SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE, + Metric, + MetricKind, +) +from kitsune.questions.models import Answer +from kitsune.wiki.models import Revision + + +def valid_date(s): + try: + return datetime.strptime(s, "%Y-%m-%d") + except ValueError: + msg = "Not a valid date: '{0}'.".format(s) + raise argparse.ArgumentTypeError(msg) + + +class Command(BaseCommand): + help = "Calculate and save contributor metrics." + + def add_arguments(self, parser): + parser.add_argument('day', type=valid_date) + + def handle(self, day=None, **options): + update_support_forum_contributors_metric(day) + update_kb_contributors_metric(day) + update_aoa_contributors_metric(day) + + +def update_support_forum_contributors_metric(day=None): + """Calculate and save the support forum contributor counts. + + An support forum contributor is a user that has replied 10 times + in the past 30 days to questions that aren't his/her own. + """ + if day: + start = end = day + else: + latest_metric = utils._get_latest_metric(SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE) + if latest_metric is not None: + # Start updating the day after the last updated. + start = latest_metric.end + timedelta(days=1) + else: + start = date(2011, 1, 1) + + # Update until yesterday. + end = date.today() - timedelta(days=1) + + # Loop through all the days from start to end, calculating and saving. + day = start + while day <= end: + # Figure out the number of contributors from the last 30 days. + thirty_days_back = day - timedelta(days=30) + contributors = ( + Answer.objects.exclude(creator=F("question__creator")) + .filter(created__gte=thirty_days_back, created__lt=day) + .values("creator") + .annotate(count=Count("creator")) + .filter(count__gte=10) + ) + count = contributors.count() + + # Save the value to Metric table. + metric_kind = MetricKind.objects.get( + code=SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE + ) + Metric.objects.create( + kind=metric_kind, start=thirty_days_back, end=day, value=count + ) + + day = day + timedelta(days=1) + + +def update_kb_contributors_metric(day=None): + """Calculate and save the KB (en-US and L10n) contributor counts. + + A KB contributor is a user that has edited or reviewed a Revision + in the last 30 days. + """ + if day: + start = end = day + else: + latest_metric = utils._get_latest_metric(KB_ENUS_CONTRIBUTORS_METRIC_CODE) + if latest_metric is not None: + # Start updating the day after the last updated. + start = latest_metric.end + timedelta(days=1) + else: + start = date(2011, 1, 1) + + # Update until yesterday. + end = date.today() - timedelta(days=1) + + # Loop through all the days from start to end, calculating and saving. + day = start + while day <= end: + # Figure out the number of contributors from the last 30 days. + thirty_days_back = day - timedelta(days=30) + editors = ( + Revision.objects.filter(created__gte=thirty_days_back, created__lt=day) + .values_list("creator", flat=True) + .distinct() + ) + reviewers = ( + Revision.objects.filter(reviewed__gte=thirty_days_back, reviewed__lt=day) + .values_list("reviewer", flat=True) + .distinct() + ) + + en_us_count = len( + set( + list(editors.filter(document__locale="en-US")) + + list(reviewers.filter(document__locale="en-US")) + ) + ) + l10n_count = len( + set( + list(editors.exclude(document__locale="en-US")) + + list(reviewers.exclude(document__locale="en-US")) + ) + ) + + # Save the values to Metric table. + metric_kind = MetricKind.objects.get(code=KB_ENUS_CONTRIBUTORS_METRIC_CODE) + Metric.objects.create( + kind=metric_kind, start=thirty_days_back, end=day, value=en_us_count + ) + + metric_kind = MetricKind.objects.get(code=KB_L10N_CONTRIBUTORS_METRIC_CODE) + Metric.objects.create( + kind=metric_kind, start=thirty_days_back, end=day, value=l10n_count + ) + + day = day + timedelta(days=1) + + +def update_aoa_contributors_metric(day=None): + """Calculate and save the AoA contributor counts. + + An AoA contributor is a user that has replied in the last 30 days. + """ + if day: + start = end = day + else: + latest_metric = utils._get_latest_metric(AOA_CONTRIBUTORS_METRIC_CODE) + if latest_metric is not None: + # Start updating the day after the last updated. + start = latest_metric.end + timedelta(days=1) + else: + # Start updating 30 days after the first reply we have. + try: + first_reply = Reply.objects.order_by("created")[0] + start = first_reply.created.date() + timedelta(days=30) + except IndexError: + # If there is no data, there is nothing to do here. + return + + # Update until yesterday. + end = date.today() - timedelta(days=1) + + # Loop through all the days from start to end, calculating and saving. + day = start + while day <= end: + # Figure out the number of contributors from the last 30 days. + thirty_days_back = day - timedelta(days=30) + contributors = ( + Reply.objects.filter(created__gte=thirty_days_back, created__lt=day) + .values_list("twitter_username") + .distinct() + ) + count = contributors.count() + + # Save the value to Metric table. + metric_kind = MetricKind.objects.get(code=AOA_CONTRIBUTORS_METRIC_CODE) + Metric.objects.create( + kind=metric_kind, start=thirty_days_back, end=day, value=count + ) + + day = day + timedelta(days=1) diff --git a/kitsune/kpi/management/commands/update_l10n_metric.py b/kitsune/kpi/management/commands/update_l10n_metric.py new file mode 100644 index 000000000..7a28af1d2 --- /dev/null +++ b/kitsune/kpi/management/commands/update_l10n_metric.py @@ -0,0 +1,69 @@ +from datetime import date, timedelta + +from django.conf import settings +from django.core.management.base import BaseCommand + +from kitsune.kpi.management import utils +from kitsune.kpi.models import L10N_METRIC_CODE, Metric, MetricKind +from kitsune.sumo import googleanalytics + + +class Command(BaseCommand): + help = "Calculate new l10n coverage numbers and save." + + def handle(self, **options): + """ + L10n coverage is a measure of the amount of translations that are + up to date, weighted by the number of visits for each locale. + + The "algorithm" (see Bug 727084): + SUMO visits = Total SUMO visits for the last 30 days; + Total translated = 0; + + For each locale { + Total up to date = Total up to date + + ((Number of up to date articles in the en-US top 50 visited)/50 ) * + (Visitors for that locale / SUMO visits)); + } + + An up to date article is any of the following: + * An en-US article (by definition it is always up to date) + * The latest en-US revision has been translated + * There are only new revisions with TYPO_SIGNIFICANCE not translated + * There is only one revision of MEDIUM_SIGNIFICANCE not translated + """ + # Get the top 60 visited articles. We will only use the top 50 + # but a handful aren't localizable so we get some extras. + top_60_docs = utils._get_top_docs(60) + + # Get the visits to each locale in the last 30 days. + end = date.today() - timedelta(days=1) # yesterday + start = end - timedelta(days=30) + locale_visits = googleanalytics.visitors_by_locale(start, end) + + # Total visits. + total_visits = sum(locale_visits.itervalues()) + + # Calculate the coverage. + coverage = 0 + for locale, visits in locale_visits.iteritems(): + if locale == settings.WIKI_DEFAULT_LANGUAGE: + num_docs = utils.MAX_DOCS_UP_TO_DATE + up_to_date_docs = utils.MAX_DOCS_UP_TO_DATE + else: + up_to_date_docs, num_docs = utils._get_up_to_date_count(top_60_docs, locale) + + if num_docs and total_visits: + coverage += (float(up_to_date_docs) / num_docs) * ( + float(visits) / total_visits + ) + + # Save the value to Metric table. + metric_kind = MetricKind.objects.get(code=L10N_METRIC_CODE) + day = date.today() + Metric.objects.create( + kind=metric_kind, + start=day, + end=day + timedelta(days=1), + value=int(coverage * 100), + ) # Store as a % int. diff --git a/kitsune/kpi/management/commands/update_search_ctr_metric.py b/kitsune/kpi/management/commands/update_search_ctr_metric.py new file mode 100644 index 000000000..3c359a5c1 --- /dev/null +++ b/kitsune/kpi/management/commands/update_search_ctr_metric.py @@ -0,0 +1,57 @@ +from datetime import date, datetime, timedelta + +from django.conf import settings +from django.core.management.base import BaseCommand + +from kitsune.kpi.management import utils +from kitsune.kpi.models import ( + SEARCH_CLICKS_METRIC_CODE, + SEARCH_SEARCHES_METRIC_CODE, + Metric, + MetricKind, +) +from kitsune.sumo import googleanalytics + + +class Command(BaseCommand): + help = "Get new search CTR data from Google Analytics and save." + + def handle(self, **options): + if settings.STAGE: + # Let's be nice to GA and skip on stage. + return + + # Start updating the day after the last updated. + latest_metric = utils._get_latest_metric(SEARCH_CLICKS_METRIC_CODE) + if latest_metric is not None: + latest_metric_date = latest_metric.start + else: + latest_metric_date = date(2011, 1, 1) + start = latest_metric_date + timedelta(days=1) + + # Collect up until yesterday + end = date.today() - timedelta(days=1) + + # Get the CTR data from Google Analytics. + ctr_data = googleanalytics.search_ctr(start, end) + + # Create the metrics. + clicks_kind = MetricKind.objects.get(code=SEARCH_CLICKS_METRIC_CODE) + searches_kind = MetricKind.objects.get(code=SEARCH_SEARCHES_METRIC_CODE) + for date_str, ctr in ctr_data.items(): + day = datetime.strptime(date_str, "%Y-%m-%d").date() + + # Note: we've been storing our search data as total number of + # searches and clicks. Google Analytics only gives us the rate, + # so I am normalizing to 1000 searches (multiplying the % by 10). + # I didn't switch everything to a rate because I don't want to + # throw away the historic data. + Metric.objects.create( + kind=searches_kind, start=day, end=day + timedelta(days=1), value=1000 + ) + Metric.objects.create( + kind=clicks_kind, + start=day, + end=day + timedelta(days=1), + value=round(ctr, 1) * 10, + ) diff --git a/kitsune/kpi/management/commands/update_visitors_metric.py b/kitsune/kpi/management/commands/update_visitors_metric.py new file mode 100644 index 000000000..179fd425f --- /dev/null +++ b/kitsune/kpi/management/commands/update_visitors_metric.py @@ -0,0 +1,39 @@ +from datetime import date, datetime, timedelta + +from django.conf import settings +from django.core.management.base import BaseCommand + +from kitsune.kpi.management.utils import _get_latest_metric +from kitsune.kpi.models import VISITORS_METRIC_CODE, Metric, MetricKind +from kitsune.sumo import googleanalytics + + +class Command(BaseCommand): + help = """Get new visitor data from Google Analytics and save.""" + + def handle(self, **options): + if settings.STAGE: + # Let's be nice to GA and skip on stage. + return + + # Start updating the day after the last updated. + latest_metric = _get_latest_metric(VISITORS_METRIC_CODE) + if latest_metric is not None: + latest_metric_date = latest_metric.start + else: + latest_metric_date = date(2011, 1, 1) + start = latest_metric_date + timedelta(days=1) + + # Collect up until yesterday + end = date.today() - timedelta(days=1) + + # Get the visitor data from Google Analytics. + visitors = googleanalytics.visitors(start, end) + + # Create the metrics. + metric_kind = MetricKind.objects.get(code=VISITORS_METRIC_CODE) + for date_str, visits in visitors.items(): + day = datetime.strptime(date_str, "%Y-%m-%d").date() + Metric.objects.create( + kind=metric_kind, start=day, end=day + timedelta(days=1), value=visits + ) diff --git a/kitsune/kpi/management/utils.py b/kitsune/kpi/management/utils.py new file mode 100644 index 000000000..e53629acd --- /dev/null +++ b/kitsune/kpi/management/utils.py @@ -0,0 +1,159 @@ +import operator +from datetime import date, timedelta +from functools import reduce + +from django.db.models import Q + +from kitsune.dashboards import LAST_90_DAYS +from kitsune.dashboards.models import WikiDocumentVisits +from kitsune.kpi.models import ( + EXIT_SURVEY_DONT_KNOW_CODE, + EXIT_SURVEY_NO_CODE, + EXIT_SURVEY_YES_CODE, + Metric, + MetricKind, +) +from kitsune.kpi.surveygizmo_utils import get_exit_survey_results +from kitsune.wiki.config import MEDIUM_SIGNIFICANCE, TYPO_SIGNIFICANCE + +MAX_DOCS_UP_TO_DATE = 50 + + +def _get_latest_metric(metric_code): + """Returns the date of the latest metric value.""" + try: + # Get the latest metric value and return the date. + last_metric = Metric.objects.filter( + kind__code=metric_code).order_by('-start')[0] + return last_metric + except IndexError: + return None + + +def _get_top_docs(count): + """Get the top documents by visits.""" + top_qs = WikiDocumentVisits.objects.select_related('document').filter( + period=LAST_90_DAYS).order_by('-visits')[:count] + return [v.document for v in top_qs] + + +def _get_up_to_date_count(top_60_docs, locale): + up_to_date_docs = 0 + num_docs = 0 + + for doc in top_60_docs: + if num_docs == MAX_DOCS_UP_TO_DATE: + break + + if not doc.is_localizable: + # Skip non localizable documents. + continue + + num_docs += 1 + cur_rev_id = doc.latest_localizable_revision_id + translation = doc.translated_to(locale) + + if not translation or not translation.current_revision_id: + continue + + if translation.current_revision.based_on_id >= cur_rev_id: + # The latest translation is based on the latest revision + # that is ready for localization or a newer one. + up_to_date_docs += 1 + else: + # Check if the approved revisions that happened between + # the last approved translation and the latest revision + # that is ready for localization are all minor (significance = + # TYPO_SIGNIFICANCE). If so, the translation is still + # considered up to date. + revs = doc.revisions.filter( + id__gt=translation.current_revision.based_on_id, + is_approved=True, + id__lte=cur_rev_id).exclude(significance=TYPO_SIGNIFICANCE) + if not revs.exists(): + up_to_date_docs += 1 + # If there is only 1 revision of MEDIUM_SIGNIFICANCE, then we + # count that as half-up-to-date (see bug 790797). + elif (len(revs) == 1 and + revs[0].significance == MEDIUM_SIGNIFICANCE): + up_to_date_docs += 0.5 + + return up_to_date_docs, num_docs + + +def _process_exit_survey_results(): + """Collect and save new exit survey results.""" + # Gather and process up until yesterday's exit survey results. + yes_kind, _ = MetricKind.objects.get_or_create(code=EXIT_SURVEY_YES_CODE) + no_kind, _ = MetricKind.objects.get_or_create(code=EXIT_SURVEY_NO_CODE) + dunno_kind, _ = MetricKind.objects.get_or_create( + code=EXIT_SURVEY_DONT_KNOW_CODE) + + latest_metric = _get_latest_metric(EXIT_SURVEY_YES_CODE) + if latest_metric is not None: + latest_metric_date = latest_metric.start + else: + latest_metric_date = date(2013, 07, 01) + + day = latest_metric_date + timedelta(days=1) + today = date.today() + + while day < today: + # Get the aggregated results. + results = get_exit_survey_results('general', day) + + # Store them. + Metric.objects.create( + kind=yes_kind, + start=day, + end=day + timedelta(days=1), + value=results['yes']) + Metric.objects.create( + kind=no_kind, + start=day, + end=day + timedelta(days=1), + value=results['no']) + Metric.objects.create( + kind=dunno_kind, + start=day, + end=day + timedelta(days=1), + value=results['dont-know']) + + # Move on to next day. + day += timedelta(days=1) + + +def _count_contributors_in_range(querysets, users, date_range): + """Of the group ``users``, count how many made a contribution in ``date_range``.""" + start, end = date_range + retained_users = set() + + for queryset, fields in querysets: + for field in fields: + filters = {'%s__in' % field: users, 'created__gte': start, 'created__lt': end} + retained_users |= set(getattr(o, field) for o in queryset.filter(**filters)) + + return len(retained_users) + + +def _get_cohort(querysets, date_range): + start, end = date_range + cohort = set() + + for queryset, fields in querysets: + contributions_in_range = queryset.filter(created__gte=start, created__lt=end) + potential_users = set() + + for field in fields: + potential_users |= set(getattr(cont, field) for cont in contributions_in_range) + + def is_in_cohort(u): + qs = [Q(**{field: u}) for field in fields] + filters = reduce(operator.or_, qs) + + first_contrib = queryset.filter(filters).order_by('id')[0] + return start <= first_contrib.created < end + + cohort |= set(filter(is_in_cohort, potential_users)) + + return cohort diff --git a/kitsune/kpi/tests/test_api.py b/kitsune/kpi/tests/test_api.py index 876df7c17..a673467b0 100644 --- a/kitsune/kpi/tests/test_api.py +++ b/kitsune/kpi/tests/test_api.py @@ -1,25 +1,24 @@ -from datetime import date, datetime, timedelta import json +from datetime import date, datetime, timedelta from django.core.cache import cache - +from django.core.management import call_command from nose.tools import eq_ from kitsune.customercare.tests import ReplyFactory -from kitsune.kpi.cron import update_contributor_metrics -from kitsune.kpi.models import ( - Metric, AOA_CONTRIBUTORS_METRIC_CODE, KB_ENUS_CONTRIBUTORS_METRIC_CODE, - KB_L10N_CONTRIBUTORS_METRIC_CODE, L10N_METRIC_CODE, - SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE, VISITORS_METRIC_CODE, - EXIT_SURVEY_YES_CODE, EXIT_SURVEY_NO_CODE, EXIT_SURVEY_DONT_KNOW_CODE) +from kitsune.kpi.models import (AOA_CONTRIBUTORS_METRIC_CODE, EXIT_SURVEY_DONT_KNOW_CODE, + EXIT_SURVEY_NO_CODE, EXIT_SURVEY_YES_CODE, + KB_ENUS_CONTRIBUTORS_METRIC_CODE, KB_L10N_CONTRIBUTORS_METRIC_CODE, + L10N_METRIC_CODE, SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE, + VISITORS_METRIC_CODE, Metric) from kitsune.kpi.tests import MetricFactory, MetricKindFactory from kitsune.products.tests import ProductFactory +from kitsune.questions.tests import AnswerFactory, AnswerVoteFactory, QuestionFactory from kitsune.sumo.templatetags.jinja_helpers import urlparams from kitsune.sumo.tests import TestCase from kitsune.sumo.urlresolvers import reverse -from kitsune.questions.tests import AnswerFactory, AnswerVoteFactory, QuestionFactory from kitsune.users.tests import UserFactory -from kitsune.wiki.tests import DocumentFactory, RevisionFactory, HelpfulVoteFactory +from kitsune.wiki.tests import DocumentFactory, HelpfulVoteFactory, RevisionFactory class KpiApiTests(TestCase): @@ -260,7 +259,7 @@ class KpiApiTests(TestCase): # Create metric kinds and update metrics for tomorrow (today's # activity shows up tomorrow). self._make_contributor_metric_kinds() - update_contributor_metrics(day=date.today() + timedelta(days=1)) + call_command('update_contributor_metrics', str(date.today() + timedelta(days=1))) r = self._get_api_result('api.kpi.contributors') @@ -283,7 +282,7 @@ class KpiApiTests(TestCase): # Create metric kinds and update metrics for tomorrow (today's # activity shows up tomorrow). self._make_contributor_metric_kinds() - update_contributor_metrics(day=date.today() + timedelta(days=1)) + call_command('update_contributor_metrics', str(date.today() + timedelta(days=1))) r = self._get_api_result('api.kpi.contributors') eq_(r['objects'][0]['support_forum'], 0) @@ -294,7 +293,7 @@ class KpiApiTests(TestCase): cache.clear() # We need to clear the cache for new results. Metric.objects.all().delete() - update_contributor_metrics(day=date.today() + timedelta(days=1)) + call_command('update_contributor_metrics', str(date.today() + timedelta(days=1))) r = self._get_api_result('api.kpi.contributors') eq_(r['objects'][0]['support_forum'], 1) diff --git a/kitsune/kpi/tests/test_cron.py b/kitsune/kpi/tests/test_cron.py index 8c5a493a8..0d4e63533 100644 --- a/kitsune/kpi/tests/test_cron.py +++ b/kitsune/kpi/tests/test_cron.py @@ -1,26 +1,30 @@ from datetime import date, datetime, timedelta +from django.core.management import call_command from mock import patch from nose.tools import eq_ -import kitsune.kpi.cron +import kitsune.kpi.management.utils from kitsune.customercare.tests import ReplyFactory from kitsune.kpi import surveygizmo_utils -from kitsune.kpi.cron import ( - cohort_analysis, update_visitors_metric, update_l10n_metric, googleanalytics, - update_search_ctr_metric, _process_exit_survey_results) -from kitsune.kpi.models import ( - Metric, Cohort, VISITORS_METRIC_CODE, L10N_METRIC_CODE, SEARCH_CLICKS_METRIC_CODE, - SEARCH_SEARCHES_METRIC_CODE, EXIT_SURVEY_YES_CODE, EXIT_SURVEY_NO_CODE, - EXIT_SURVEY_DONT_KNOW_CODE, CONTRIBUTOR_COHORT_CODE, KB_ENUS_CONTRIBUTOR_COHORT_CODE, - KB_L10N_CONTRIBUTOR_COHORT_CODE, SUPPORT_FORUM_HELPER_COHORT_CODE, AOA_CONTRIBUTOR_COHORT_CODE) -from kitsune.kpi.tests import MetricKindFactory, MetricFactory +from kitsune.kpi.models import (AOA_CONTRIBUTOR_COHORT_CODE, + CONTRIBUTOR_COHORT_CODE, + EXIT_SURVEY_DONT_KNOW_CODE, + EXIT_SURVEY_NO_CODE, EXIT_SURVEY_YES_CODE, + KB_ENUS_CONTRIBUTOR_COHORT_CODE, + KB_L10N_CONTRIBUTOR_COHORT_CODE, + L10N_METRIC_CODE, SEARCH_CLICKS_METRIC_CODE, + SEARCH_SEARCHES_METRIC_CODE, + SUPPORT_FORUM_HELPER_COHORT_CODE, + VISITORS_METRIC_CODE, Cohort, Metric) +from kitsune.kpi.tests import MetricFactory, MetricKindFactory from kitsune.questions.tests import AnswerFactory +from kitsune.sumo import googleanalytics from kitsune.sumo.tests import TestCase from kitsune.users.tests import UserFactory -from kitsune.wiki.config import ( - MAJOR_SIGNIFICANCE, MEDIUM_SIGNIFICANCE, TYPO_SIGNIFICANCE) -from kitsune.wiki.tests import DocumentFactory, ApprovedRevisionFactory +from kitsune.wiki.config import (MAJOR_SIGNIFICANCE, MEDIUM_SIGNIFICANCE, + TYPO_SIGNIFICANCE) +from kitsune.wiki.tests import ApprovedRevisionFactory, DocumentFactory class CohortAnalysisTests(TestCase): @@ -58,7 +62,7 @@ class CohortAnalysisTests(TestCase): for r in replies: ReplyFactory(user=r.user, created=self.start_of_first_week + timedelta(weeks=2)) - cohort_analysis() + call_command('cohort_analysis') def test_contributor_cohort_analysis(self): c1 = Cohort.objects.get(kind__code=CONTRIBUTOR_COHORT_CODE, start=self.start_of_first_week) @@ -157,7 +161,7 @@ class CronJobTests(TestCase): '2012-01-14': 193, '2012-01-15': 33} - update_visitors_metric() + call_command('update_visitors_metric') metrics = Metric.objects.filter(kind=visitor_kind).order_by('start') eq_(3, len(metrics)) @@ -165,7 +169,7 @@ class CronJobTests(TestCase): eq_(193, metrics[1].value) eq_(date(2012, 1, 15), metrics[2].start) - @patch.object(kitsune.kpi.cron, '_get_top_docs') + @patch.object(kitsune.kpi.management.utils, '_get_top_docs') @patch.object(googleanalytics, 'visitors_by_locale') def test_update_l10n_metric_cron(self, visitors_by_locale, _get_top_docs): """Verify the cron job creates the correct metric.""" @@ -196,7 +200,7 @@ class CronJobTests(TestCase): # Run it and verify results. # Value should be 75% (1/1 * 25/100 + 1/1 * 50/100) - update_l10n_metric() + call_command('update_l10n_metric') metrics = Metric.objects.filter(kind=l10n_kind) eq_(1, len(metrics)) eq_(75, metrics[0].value) @@ -208,7 +212,7 @@ class CronJobTests(TestCase): significance=TYPO_SIGNIFICANCE, is_ready_for_localization=True) Metric.objects.all().delete() - update_l10n_metric() + call_command('update_l10n_metric') metrics = Metric.objects.filter(kind=l10n_kind) eq_(1, len(metrics)) eq_(75, metrics[0].value) @@ -220,7 +224,7 @@ class CronJobTests(TestCase): significance=MEDIUM_SIGNIFICANCE, is_ready_for_localization=True) Metric.objects.all().delete() - update_l10n_metric() + call_command('update_l10n_metric') metrics = Metric.objects.filter(kind=l10n_kind) eq_(1, len(metrics)) eq_(62, metrics[0].value) @@ -232,7 +236,7 @@ class CronJobTests(TestCase): significance=MEDIUM_SIGNIFICANCE, is_ready_for_localization=True) Metric.objects.all().delete() - update_l10n_metric() + call_command('update_l10n_metric') metrics = Metric.objects.filter(kind=l10n_kind) eq_(1, len(metrics)) eq_(50, metrics[0].value) @@ -246,7 +250,7 @@ class CronJobTests(TestCase): significance=MAJOR_SIGNIFICANCE, is_ready_for_localization=True) Metric.objects.all().delete() - update_l10n_metric() + call_command('update_l10n_metric') metrics = Metric.objects.filter(kind=l10n_kind) eq_(1, len(metrics)) eq_(50, metrics[0].value) @@ -260,7 +264,7 @@ class CronJobTests(TestCase): '2013-06-07': 13.7654321, '2013-06-08': 99.55555} - update_search_ctr_metric() + call_command('update_search_ctr_metric') metrics = Metric.objects.filter(kind=clicks_kind).order_by('start') eq_(3, len(metrics)) @@ -285,7 +289,7 @@ class CronJobTests(TestCase): # Collect and process. with self.settings(SURVEYGIZMO_API_TOKEN='test', SURVEYGIZMO_API_TOKEN_SECRET='test'): - _process_exit_survey_results() + kitsune.kpi.management.utils._process_exit_survey_results() # Verify. eq_(4, Metric.objects.count()) diff --git a/kitsune/questions/cron.py b/kitsune/questions/cron.py deleted file mode 100644 index 87dd14b2b..000000000 --- a/kitsune/questions/cron.py +++ /dev/null @@ -1,210 +0,0 @@ -import logging -import textwrap -import time -from datetime import date, datetime, timedelta - -from django.conf import settings -from django.contrib.auth.models import Group -from django.core.mail import send_mail -from django.db import connection, transaction - -import cronjobs - -from kitsune.questions import config -from kitsune.questions.models import ( - Question, QuestionVote, QuestionMappingType, QuestionVisits, Answer) -from kitsune.questions.tasks import ( - escalate_question, update_question_vote_chunk) -from kitsune.search.es_utils import ES_EXCEPTIONS, get_documents -from kitsune.search.tasks import index_task -from kitsune.sumo.utils import chunked - - -log = logging.getLogger('k.cron') - - -@cronjobs.register -def update_weekly_votes(): - """Keep the num_votes_past_week value accurate.""" - - # Get all questions (id) with a vote in the last week. - recent = datetime.now() - timedelta(days=7) - q = QuestionVote.objects.filter(created__gte=recent) - q = q.values_list('question_id', flat=True).order_by('question') - q = q.distinct() - q_with_recent_votes = list(q) - - # Get all questions with num_votes_past_week > 0 - q = Question.objects.filter(num_votes_past_week__gt=0) - q = q.values_list('id', flat=True) - q_with_nonzero_votes = list(q) - - # Union. - qs_to_update = list(set(q_with_recent_votes + q_with_nonzero_votes)) - - # Chunk them for tasks. - for chunk in chunked(qs_to_update, 50): - update_question_vote_chunk.apply_async(args=[chunk]) - - -@cronjobs.register -def auto_archive_old_questions(): - """Archive all questions that were created over 180 days ago""" - # Set up logging so it doesn't send Ricky email. - logging.basicConfig(level=logging.ERROR) - - # Get a list of ids of questions we're going to go change. We need - # a list of ids so that we can feed it to the update, but then - # also know what we need to update in the index. - days_180 = datetime.now() - timedelta(days=180) - q_ids = list(Question.objects.filter(is_archived=False) - .filter(created__lte=days_180) - .values_list('id', flat=True)) - - if q_ids: - log.info('Updating %d questions', len(q_ids)) - - sql = """ - UPDATE questions_question - SET is_archived = 1 - WHERE id IN (%s) - """ % ','.join(map(str, q_ids)) - - cursor = connection.cursor() - cursor.execute(sql) - if not transaction.get_connection().in_atomic_block: - transaction.commit() - - if settings.ES_LIVE_INDEXING: - try: - # So... the first time this runs, it'll handle 160K - # questions or so which stresses everything. Thus we - # do it in chunks because otherwise this won't work. - # - # After we've done this for the first time, we can nix - # the chunking code. - - from kitsune.search.utils import chunked - for chunk in chunked(q_ids, 100): - - # Fetch all the documents we need to update. - es_docs = get_documents(QuestionMappingType, chunk) - - log.info('Updating %d index documents', len(es_docs)) - - documents = [] - - # For each document, update the data and stick it - # back in the index. - for doc in es_docs: - doc[u'question_is_archived'] = True - doc[u'indexed_on'] = int(time.time()) - documents.append(doc) - - QuestionMappingType.bulk_index(documents) - - except ES_EXCEPTIONS: - # Something happened with ES, so let's push index - # updating into an index_task which retries when it - # fails because of ES issues. - index_task.delay(QuestionMappingType, q_ids) - - -@cronjobs.register -def reload_question_traffic_stats(): - """Reload question views from the analytics.""" - QuestionVisits.reload_from_analytics(verbose=settings.DEBUG) - - -@cronjobs.register -def escalate_questions(): - """Escalate questions needing attention. - - Escalate questions where the status is "needs attention" and - still have no replies after 24 hours, but not that are older - than 25 hours (this runs every hour). - """ - # Get all the questions that need attention and haven't been escalated. - qs = Question.objects.needs_attention().exclude( - tags__slug__in=[config.ESCALATE_TAG_NAME]) - - # Only include English. - qs = qs.filter(locale=settings.WIKI_DEFAULT_LANGUAGE) - - # Exclude certain products. - qs = qs.exclude(product__slug__in=config.ESCALATE_EXCLUDE_PRODUCTS) - - # Exclude those by inactive users. - qs = qs.exclude(creator__is_active=False) - - # Filter them down to those that haven't been replied to and are over - # 24 hours old but less than 25 hours old. We run this once an hour. - start = datetime.now() - timedelta(hours=24) - end = datetime.now() - timedelta(hours=25) - qs_no_replies_yet = qs.filter( - last_answer__isnull=True, - created__lt=start, - created__gt=end) - - for question in qs_no_replies_yet: - escalate_question.delay(question.id) - - return len(qs_no_replies_yet) - - -@cronjobs.register -def report_employee_answers(): - """Send an email about employee answered questions. - - We report on the users in the "Support Forum Tracked" group. - We send the email to the users in the "Support Forum Metrics" group. - """ - tracked_group = Group.objects.get(name='Support Forum Tracked') - report_group = Group.objects.get(name='Support Forum Metrics') - - tracked_users = tracked_group.user_set.all() - report_recipients = report_group.user_set.all() - - if len(tracked_users) == 0 or len(report_recipients) == 0: - return - - yesterday = date.today() - timedelta(days=1) - day_before_yesterday = yesterday - timedelta(days=1) - - # Total number of questions asked the day before yesterday - questions = Question.objects.filter( - creator__is_active=True, - created__gte=day_before_yesterday, - created__lt=yesterday) - num_questions = questions.count() - - # Total number of answered questions day before yesterday - num_answered = questions.filter(num_answers__gt=0).count() - - # Total number of questions answered by user in tracked_group - num_answered_by_tracked = {} - for user in tracked_users: - num_answered_by_tracked[user.username] = Answer.objects.filter( - question__in=questions, - creator=user).values_list('question_id').distinct().count() - - email_subject = 'Support Forum answered report for {date}'.format(date=day_before_yesterday) - - email_body_tmpl = textwrap.dedent("""\ - Date: {date} - Number of questions asked: {num_questions} - Number of questions answered: {num_answered} - """) - email_body = email_body_tmpl.format( - date=day_before_yesterday, - num_questions=num_questions, - num_answered=num_answered) - - for username, count in num_answered_by_tracked.items(): - email_body += 'Number of questions answered by {username}: {count}\n'.format( - username=username, count=count) - - email_addresses = [u.email for u in report_recipients] - - send_mail(email_subject, email_body, settings.TIDINGS_FROM_ADDRESS, email_addresses, - fail_silently=False) diff --git a/kitsune/questions/management/commands/auto_archive_old_questions.py b/kitsune/questions/management/commands/auto_archive_old_questions.py new file mode 100644 index 000000000..06c374e93 --- /dev/null +++ b/kitsune/questions/management/commands/auto_archive_old_questions.py @@ -0,0 +1,78 @@ +import logging +import time +from datetime import datetime, timedelta + +from django.conf import settings +from django.core.management.base import BaseCommand +from django.db import connection, transaction + +from kitsune.questions.models import Question, QuestionMappingType +from kitsune.search.es_utils import ES_EXCEPTIONS, get_documents +from kitsune.search.tasks import index_task + +log = logging.getLogger('k.cron') + + +class Command(BaseCommand): + help = "Archive all questions that were created over 180 days ago." + + def handle(self, **options): + # Set up logging so it doesn't send Ricky email. + logging.basicConfig(level=logging.ERROR) + + # Get a list of ids of questions we're going to go change. We need + # a list of ids so that we can feed it to the update, but then + # also know what we need to update in the index. + days_180 = datetime.now() - timedelta(days=180) + q_ids = list( + Question.objects.filter(is_archived=False) + .filter(created__lte=days_180) + .values_list('id', flat=True)) + + if q_ids: + log.info('Updating %d questions', len(q_ids)) + + sql = """ + UPDATE questions_question + SET is_archived = 1 + WHERE id IN (%s) + """ % ','.join(map(str, q_ids)) + + cursor = connection.cursor() + cursor.execute(sql) + if not transaction.get_connection().in_atomic_block: + transaction.commit() + + if settings.ES_LIVE_INDEXING: + try: + # So... the first time this runs, it'll handle 160K + # questions or so which stresses everything. Thus we + # do it in chunks because otherwise this won't work. + # + # After we've done this for the first time, we can nix + # the chunking code. + + from kitsune.search.utils import chunked + for chunk in chunked(q_ids, 100): + + # Fetch all the documents we need to update. + es_docs = get_documents(QuestionMappingType, chunk) + + log.info('Updating %d index documents', len(es_docs)) + + documents = [] + + # For each document, update the data and stick it + # back in the index. + for doc in es_docs: + doc[u'question_is_archived'] = True + doc[u'indexed_on'] = int(time.time()) + documents.append(doc) + + QuestionMappingType.bulk_index(documents) + + except ES_EXCEPTIONS: + # Something happened with ES, so let's push index + # updating into an index_task which retries when it + # fails because of ES issues. + index_task.delay(QuestionMappingType, q_ids) diff --git a/kitsune/questions/management/commands/escalate_questions.py b/kitsune/questions/management/commands/escalate_questions.py new file mode 100644 index 000000000..b428fce90 --- /dev/null +++ b/kitsune/questions/management/commands/escalate_questions.py @@ -0,0 +1,45 @@ +from datetime import datetime, timedelta + +from django.conf import settings +from django.core.management.base import BaseCommand + +from kitsune.questions import config +from kitsune.questions.models import Question +from kitsune.questions.tasks import escalate_question + + +class Command(BaseCommand): + help = "Escalate questions needing attention." + + def handle(self, **options): + """ + Escalate questions where the status is "needs attention" and + still have no replies after 24 hours, but not that are older + than 25 hours (this runs every hour). + """ + # Get all the questions that need attention and haven't been escalated. + qs = Question.objects.needs_attention().exclude( + tags__slug__in=[config.ESCALATE_TAG_NAME]) + + # Only include English. + qs = qs.filter(locale=settings.WIKI_DEFAULT_LANGUAGE) + + # Exclude certain products. + qs = qs.exclude(product__slug__in=config.ESCALATE_EXCLUDE_PRODUCTS) + + # Exclude those by inactive users. + qs = qs.exclude(creator__is_active=False) + + # Filter them down to those that haven't been replied to and are over + # 24 hours old but less than 25 hours old. We run this once an hour. + start = datetime.now() - timedelta(hours=24) + end = datetime.now() - timedelta(hours=25) + qs_no_replies_yet = qs.filter( + last_answer__isnull=True, + created__lt=start, + created__gt=end) + + for question in qs_no_replies_yet: + escalate_question.delay(question.id) + + return str(len(qs_no_replies_yet)) diff --git a/kitsune/questions/management/commands/reload_question_traffic_stats.py b/kitsune/questions/management/commands/reload_question_traffic_stats.py new file mode 100644 index 000000000..600ae758e --- /dev/null +++ b/kitsune/questions/management/commands/reload_question_traffic_stats.py @@ -0,0 +1,11 @@ +from django.conf import settings +from django.core.management.base import BaseCommand + +from kitsune.questions.models import QuestionVisits + + +class Command(BaseCommand): + help = "Reload question views from the analytics." + + def handle(self, **options): + QuestionVisits.reload_from_analytics(verbose=settings.DEBUG) diff --git a/kitsune/questions/management/commands/report_employee_answers.py b/kitsune/questions/management/commands/report_employee_answers.py new file mode 100644 index 000000000..86369108f --- /dev/null +++ b/kitsune/questions/management/commands/report_employee_answers.py @@ -0,0 +1,70 @@ +import textwrap +from datetime import date, timedelta + +from django.conf import settings +from django.contrib.auth.models import Group +from django.core.mail import send_mail +from django.core.management.base import BaseCommand + +from kitsune.questions.models import (Answer, Question) + + +class Command(BaseCommand): + help = "Send an email about employee answered questions." + + def handle(self, **options): + """ + We report on the users in the "Support Forum Tracked" group. + We send the email to the users in the "Support Forum Metrics" group. + """ + tracked_group = Group.objects.get(name='Support Forum Tracked') + report_group = Group.objects.get(name='Support Forum Metrics') + + tracked_users = tracked_group.user_set.all() + report_recipients = report_group.user_set.all() + + if len(tracked_users) == 0 or len(report_recipients) == 0: + return + + yesterday = date.today() - timedelta(days=1) + day_before_yesterday = yesterday - timedelta(days=1) + + # Total number of questions asked the day before yesterday + questions = Question.objects.filter( + creator__is_active=True, + created__gte=day_before_yesterday, + created__lt=yesterday) + num_questions = questions.count() + + # Total number of answered questions day before yesterday + num_answered = questions.filter(num_answers__gt=0).count() + + # Total number of questions answered by user in tracked_group + num_answered_by_tracked = {} + for user in tracked_users: + num_answered_by_tracked[user.username] = Answer.objects.filter( + question__in=questions, + creator=user).values_list('question_id').distinct().count() + + email_subject = 'Support Forum answered report for {date}'.format( + date=day_before_yesterday) + + email_body_tmpl = textwrap.dedent("""\ + Date: {date} + Number of questions asked: {num_questions} + Number of questions answered: {num_answered} + """) + email_body = email_body_tmpl.format( + date=day_before_yesterday, + num_questions=num_questions, + num_answered=num_answered) + + for username, count in num_answered_by_tracked.items(): + email_body += 'Number of questions answered by {username}: {count}\n'.format( + username=username, count=count) + + email_addresses = [u.email for u in report_recipients] + + send_mail( + email_subject, email_body, settings.TIDINGS_FROM_ADDRESS, email_addresses, + fail_silently=False) diff --git a/kitsune/questions/management/commands/update_weekly_votes.py b/kitsune/questions/management/commands/update_weekly_votes.py new file mode 100644 index 000000000..28c17d910 --- /dev/null +++ b/kitsune/questions/management/commands/update_weekly_votes.py @@ -0,0 +1,31 @@ +from datetime import datetime, timedelta + +from django.core.management.base import BaseCommand + +from kitsune.questions.models import Question, QuestionVote +from kitsune.questions.tasks import update_question_vote_chunk +from kitsune.sumo.utils import chunked + + +class Command(BaseCommand): + help = "Keep the num_votes_past_week value accurate." + + def handle(self, **options): + # Get all questions (id) with a vote in the last week. + recent = datetime.now() - timedelta(days=7) + q = QuestionVote.objects.filter(created__gte=recent) + q = q.values_list('question_id', flat=True).order_by('question') + q = q.distinct() + q_with_recent_votes = list(q) + + # Get all questions with num_votes_past_week > 0 + q = Question.objects.filter(num_votes_past_week__gt=0) + q = q.values_list('id', flat=True) + q_with_nonzero_votes = list(q) + + # Union. + qs_to_update = list(set(q_with_recent_votes + q_with_nonzero_votes)) + + # Chunk them for tasks. + for chunk in chunked(qs_to_update, 50): + update_question_vote_chunk.apply_async(args=[chunk]) diff --git a/kitsune/questions/tests/test_cron.py b/kitsune/questions/tests/test_cron.py index f6781e6ab..69f8dcb40 100644 --- a/kitsune/questions/tests/test_cron.py +++ b/kitsune/questions/tests/test_cron.py @@ -1,14 +1,13 @@ from datetime import datetime, timedelta -from django.core import mail - import mock +from django.core import mail +from django.core.management import call_command from nose.tools import eq_ import kitsune.questions.tasks from kitsune.products.tests import ProductFactory from kitsune.questions import config -from kitsune.questions.cron import escalate_questions, report_employee_answers from kitsune.questions.tests import AnswerFactory, QuestionFactory from kitsune.sumo.tests import TestCase from kitsune.users.models import Group @@ -68,7 +67,7 @@ class TestEscalateCron(TestCase): q = QuestionFactory(created=datetime.now() - timedelta(hours=24, minutes=10), product=tb) # Run the cron job and verify only 3 questions were escalated. - eq_(len(questions_to_escalate), escalate_questions()) + eq_(str(len(questions_to_escalate)), call_command('escalate_questions')) class TestEmployeeReportCron(TestCase): @@ -104,7 +103,7 @@ class TestEmployeeReportCron(TestCase): AnswerFactory(question=q) QuestionFactory() - report_employee_answers() + call_command('report_employee_answers') # Get the last email and verify contents email = mail.outbox[len(mail.outbox) - 1] diff --git a/kitsune/questions/tests/test_models.py b/kitsune/questions/tests/test_models.py index 99c383983..488a875e3 100644 --- a/kitsune/questions/tests/test_models.py +++ b/kitsune/questions/tests/test_models.py @@ -1,27 +1,24 @@ # -*- coding: utf-8 -*- from datetime import datetime, timedelta -from django.db.models import Q - import mock from actstream.models import Action, Follow +from django.core.management import call_command +from django.db.models import Q from nose.tools import eq_, ok_, raises from taggit.models import Tag import kitsune.sumo.models from kitsune.flagit.models import FlaggedObject -from kitsune.search.tests.test_es import ElasticTestCase -from kitsune.questions.cron import auto_archive_old_questions +from kitsune.questions import config, models from kitsune.questions.events import QuestionReplyEvent -from kitsune.questions import models -from kitsune.questions.models import ( - Answer, Question, QuestionMetaData, QuestionVisits, - _tenths_version, _has_beta, VoteMetadata, InvalidUserException, - AlreadyTakenException) +from kitsune.questions.models import (AlreadyTakenException, Answer, InvalidUserException, + Question, QuestionMetaData, QuestionVisits, VoteMetadata, + _has_beta, _tenths_version) from kitsune.questions.tasks import update_answer_pages -from kitsune.questions.tests import ( - TestCaseBase, tags_eq, QuestionFactory, AnswerFactory, QuestionVoteFactory) -from kitsune.questions import config +from kitsune.questions.tests import (AnswerFactory, QuestionFactory, QuestionVoteFactory, + TestCaseBase, tags_eq) +from kitsune.search.tests.test_es import ElasticTestCase from kitsune.sumo import googleanalytics from kitsune.sumo.tests import TestCase from kitsune.tags.tests import TagFactory @@ -532,7 +529,7 @@ class OldQuestionsArchiveTest(ElasticTestCase): self.refresh() - auto_archive_old_questions() + call_command('auto_archive_old_questions') # There are three questions. eq_(len(list(Question.objects.all())), 3) diff --git a/kitsune/questions/tests/test_votes.py b/kitsune/questions/tests/test_votes.py index 6cbbc5897..884c71241 100644 --- a/kitsune/questions/tests/test_votes.py +++ b/kitsune/questions/tests/test_votes.py @@ -1,8 +1,8 @@ +from django.core.management import call_command from nose.tools import eq_ from kitsune.questions.models import Question, QuestionMappingType -from kitsune.questions.tests import TestCaseBase, QuestionFactory, QuestionVoteFactory -from kitsune.questions.cron import update_weekly_votes +from kitsune.questions.tests import QuestionFactory, QuestionVoteFactory, TestCaseBase from kitsune.search.tests.test_es import ElasticTestCase @@ -27,7 +27,7 @@ class TestVotes(TestCaseBase): q.num_votes_past_week = 0 q.save() - update_weekly_votes() + call_command('update_weekly_votes') q = Question.objects.get(pk=q.pk) eq_(1, q.num_votes_past_week) @@ -51,7 +51,7 @@ class TestVotesWithElasticSearch(ElasticTestCase): q.num_votes_past_week = 0 q.save() - update_weekly_votes() + call_command('update_weekly_votes') self.refresh() q = Question.objects.get(pk=q.pk) diff --git a/kitsune/settings.py b/kitsune/settings.py index 80112dbf8..d83b25cfd 100644 --- a/kitsune/settings.py +++ b/kitsune/settings.py @@ -632,7 +632,6 @@ INSTALLED_APPS = ( 'kitsune.search', 'kitsune.forums', 'djcelery', - 'cronjobs', 'tidings', 'rest_framework.authtoken', 'kitsune.questions', diff --git a/kitsune/sumo/cron.py b/kitsune/sumo/cron.py deleted file mode 100644 index d737a54c7..000000000 --- a/kitsune/sumo/cron.py +++ /dev/null @@ -1,11 +0,0 @@ -from datetime import datetime - -import cronjobs - -from kitsune.sumo.tasks import measure_queue_lag - - -@cronjobs.register -def enqueue_lag_monitor_task(): - """Fires a task that measures the queue lag.""" - measure_queue_lag.delay(datetime.now()) diff --git a/kitsune/sumo/management/commands/enqueue_lag_monitor_task.py b/kitsune/sumo/management/commands/enqueue_lag_monitor_task.py new file mode 100644 index 000000000..fd0e8d9fe --- /dev/null +++ b/kitsune/sumo/management/commands/enqueue_lag_monitor_task.py @@ -0,0 +1,12 @@ +from datetime import datetime + +from django.core.management.base import BaseCommand + +from kitsune.sumo.tasks import measure_queue_lag + + +class Command(BaseCommand): + help = "Fire a task that measures the queue lag." + + def handle(self, **options): + measure_queue_lag.delay(datetime.now()) diff --git a/kitsune/users/cron.py b/kitsune/users/cron.py deleted file mode 100644 index 689e20b79..000000000 --- a/kitsune/users/cron.py +++ /dev/null @@ -1,53 +0,0 @@ -import cronjobs -from datetime import datetime, timedelta - -from rest_framework.authtoken.models import Token - -from kitsune.questions.models import Answer -from kitsune.search.models import generate_tasks -from kitsune.search.tasks import index_task -from kitsune.users.models import RegistrationProfile, UserMappingType -from kitsune.wiki.models import Revision - - -@cronjobs.register -def remove_expired_registration_profiles(): - """"Cleanup expired registration profiles and users that not activated.""" - RegistrationProfile.objects.delete_expired_users() - generate_tasks() - - -@cronjobs.register -def reindex_users_that_contributed_yesterday(): - """Update the users (in ES) that contributed yesterday. - - The idea is to update the last_contribution_date field. - """ - today = datetime.now() - yesterday = today - timedelta(days=1) - - # Support Forum answers - user_ids = list(Answer.objects.filter( - created__gte=yesterday, - created__lt=today).values_list('creator_id', flat=True)) - - # KB Edits - user_ids += list(Revision.objects.filter( - created__gte=yesterday, - created__lt=today).values_list('creator_id', flat=True)) - - # KB Reviews - user_ids += list(Revision.objects.filter( - reviewed__gte=yesterday, - reviewed__lt=today).values_list('reviewer_id', flat=True)) - - # Note: - # Army of Awesome replies are live indexed. No need to do anything here. - - index_task.delay(UserMappingType, list(set(user_ids))) - - -@cronjobs.register -def clear_expired_auth_tokens(): - too_old = datetime.now() - timedelta(days=30) - Token.objects.filter(created__lt=too_old).delete() diff --git a/kitsune/users/management/commands/clear_expired_auth_tokens.py b/kitsune/users/management/commands/clear_expired_auth_tokens.py new file mode 100644 index 000000000..85f2184d8 --- /dev/null +++ b/kitsune/users/management/commands/clear_expired_auth_tokens.py @@ -0,0 +1,11 @@ +from datetime import datetime, timedelta + +from django.core.management.base import BaseCommand +from rest_framework.authtoken.models import Token + + +class Command(BaseCommand): + + def handle(self, **options): + too_old = datetime.now() - timedelta(days=30) + Token.objects.filter(created__lt=too_old).delete() diff --git a/kitsune/users/management/commands/reindex_users_that_contributed_yesterday.py b/kitsune/users/management/commands/reindex_users_that_contributed_yesterday.py new file mode 100644 index 000000000..9561cec77 --- /dev/null +++ b/kitsune/users/management/commands/reindex_users_that_contributed_yesterday.py @@ -0,0 +1,39 @@ +from datetime import datetime, timedelta + +from django.core.management.base import BaseCommand + +from kitsune.questions.models import Answer +from kitsune.search.tasks import index_task +from kitsune.users.models import UserMappingType +from kitsune.wiki.models import Revision + + +class Command(BaseCommand): + help = "Update the users (in ES) that contributed yesterday." + + def handle(self, **options): + """ + The idea is to update the last_contribution_date field. + """ + today = datetime.now() + yesterday = today - timedelta(days=1) + + # Support Forum answers + user_ids = list(Answer.objects.filter( + created__gte=yesterday, + created__lt=today).values_list('creator_id', flat=True)) + + # KB Edits + user_ids += list(Revision.objects.filter( + created__gte=yesterday, + created__lt=today).values_list('creator_id', flat=True)) + + # KB Reviews + user_ids += list(Revision.objects.filter( + reviewed__gte=yesterday, + reviewed__lt=today).values_list('reviewer_id', flat=True)) + + # Note: + # Army of Awesome replies are live indexed. No need to do anything here. + + index_task.delay(UserMappingType, list(set(user_ids))) diff --git a/kitsune/users/management/commands/remove_expired_registration_profiles.py b/kitsune/users/management/commands/remove_expired_registration_profiles.py new file mode 100644 index 000000000..cd6555f42 --- /dev/null +++ b/kitsune/users/management/commands/remove_expired_registration_profiles.py @@ -0,0 +1,12 @@ +from django.core.management.base import BaseCommand + +from kitsune.search.models import generate_tasks +from kitsune.users.models import RegistrationProfile + + +class Command(BaseCommand): + help = "Cleanup expired registration profiles and users that not activated." + + def handle(self, **options): + RegistrationProfile.objects.delete_expired_users() + generate_tasks() diff --git a/kitsune/users/tests/test_es.py b/kitsune/users/tests/test_es.py index f37e70136..5e845e8b5 100644 --- a/kitsune/users/tests/test_es.py +++ b/kitsune/users/tests/test_es.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- from datetime import datetime, timedelta +from django.core.management import call_command from nose.tools import eq_ from kitsune.customercare.tests import ReplyFactory from kitsune.questions.tests import AnswerFactory from kitsune.search.tests.test_es import ElasticTestCase -from kitsune.users.cron import reindex_users_that_contributed_yesterday from kitsune.users.models import UserMappingType from kitsune.users.tests import ProfileFactory, UserFactory from kitsune.wiki.tests import RevisionFactory @@ -177,7 +177,7 @@ class UserSearchTests(ElasticTestCase): u = UserFactory(username='answerer') AnswerFactory(creator=u, created=yesterday) - reindex_users_that_contributed_yesterday() + call_command('reindex_users_that_contributed_yesterday') self.refresh() data = UserMappingType.search().query(username__match='answerer')[0] @@ -187,7 +187,7 @@ class UserSearchTests(ElasticTestCase): u = UserFactory(username='editor') RevisionFactory(creator=u, created=yesterday) - reindex_users_that_contributed_yesterday() + call_command('reindex_users_that_contributed_yesterday') self.refresh() data = UserMappingType.search().query(username__match='editor')[0] @@ -197,7 +197,7 @@ class UserSearchTests(ElasticTestCase): u = UserFactory(username='reviewer') RevisionFactory(reviewer=u, reviewed=yesterday) - reindex_users_that_contributed_yesterday() + call_command('reindex_users_that_contributed_yesterday') self.refresh() data = UserMappingType.search().query(username__match='reviewer')[0] diff --git a/kitsune/wiki/cron.py b/kitsune/wiki/cron.py deleted file mode 100644 index 15a71d536..000000000 --- a/kitsune/wiki/cron.py +++ /dev/null @@ -1,150 +0,0 @@ -import cronjobs -import waffle - -from itertools import chain - -from django.conf import settings -from django.contrib.sites.models import Site -from django.db.models import F, Q, ObjectDoesNotExist - -from multidb.pinning import pin_this_thread, unpin_this_thread -from django_statsd.clients import statsd -from django.utils.translation import ugettext as _, pgettext - -from kitsune.products.models import Product -from kitsune.search.tasks import index_task -from kitsune.sumo import email_utils -from kitsune.wiki import tasks -from kitsune.wiki.config import REDIRECT_HTML -from kitsune.wiki.models import Document, DocumentMappingType, Revision, Locale -from kitsune.wiki.config import (HOW_TO_CATEGORY, TROUBLESHOOTING_CATEGORY, - TEMPLATES_CATEGORY) - - -@cronjobs.register -def generate_missing_share_links(): - """Generate share links for documents that may be missing them.""" - document_ids = (Document.objects.select_related('revision') - .filter(parent=None, - share_link='', - is_template=False, - is_archived=False, - category__in=settings.IA_DEFAULT_CATEGORIES) - .exclude(slug='', - current_revision=None, - html__startswith=REDIRECT_HTML) - .values_list('id', flat=True)) - - tasks.add_short_links.delay(list(document_ids)) - - -@cronjobs.register -def rebuild_kb(): - # If rebuild on demand switch is on, do nothing. - if waffle.switch_is_active('wiki-rebuild-on-demand'): - return - - tasks.rebuild_kb() - - -@cronjobs.register -def reindex_kb(): - """Reindex wiki_document.""" - index_task.delay(DocumentMappingType, DocumentMappingType.get_indexable()) - - -@cronjobs.register -def send_weekly_ready_for_review_digest(): - """Sends out the weekly "Ready for review" digest email.""" - - @email_utils.safe_translation - def _send_mail(locale, user, context): - subject = _('[Reviews Pending: %s] SUMO needs your help!') % locale - - mail = email_utils.make_mail( - subject=subject, - text_template='wiki/email/ready_for_review_weekly_digest.ltxt', - html_template='wiki/email/ready_for_review_weekly_digest.html', - context_vars=context, - from_email=settings.TIDINGS_FROM_ADDRESS, - to_email=user.email) - - email_utils.send_messages([mail]) - - # Get the list of revisions ready for review - categories = (HOW_TO_CATEGORY, TROUBLESHOOTING_CATEGORY, - TEMPLATES_CATEGORY) - - revs = Revision.objects.filter(reviewed=None, document__is_archived=False, - document__category__in=categories) - - revs = revs.filter(Q(document__current_revision_id__lt=F('id')) | - Q(document__current_revision_id=None)) - - locales = revs.values_list('document__locale', flat=True).distinct() - products = Product.objects.all() - - for l in locales: - docs = revs.filter(document__locale=l).values_list( - 'document', flat=True).distinct() - docs = Document.objects.filter(id__in=docs) - - try: - leaders = Locale.objects.get(locale=l).leaders.all() - reviewers = Locale.objects.get(locale=l).reviewers.all() - users = list(set(chain(leaders, reviewers))) - except ObjectDoesNotExist: - # Locale does not exist, so skip to the next locale - continue - - for u in users: - docs_list = [] - for p in products: - product_docs = docs.filter(Q(parent=None, products__in=[p]) | - Q(parent__products__in=[p])) - if product_docs: - docs_list.append(dict( - product=pgettext('DB: products.Product.title', p.title), - docs=product_docs)) - - product_docs = docs.filter(Q(parent=None, products=None) | - Q(parent__products=None)) - - if product_docs: - docs_list.append(dict(product=_('Other products'), - docs=product_docs)) - - _send_mail(l, u, { - 'host': Site.objects.get_current().domain, - 'locale': l, - 'recipient': u, - 'docs_list': docs_list, - 'products': products - }) - - statsd.incr('wiki.cron.weekly-digest-mail') - - -@cronjobs.register -def fix_current_revisions(): - """Fixes documents that have the current_revision set incorrectly.""" - try: - # Sends all writes to the master DB. Slaves are readonly. - pin_this_thread() - - docs = Document.objects.all() - - for d in docs: - revs = Revision.objects.filter(document=d, is_approved=True) - revs = list(revs.order_by('-reviewed')[:1]) - - if len(revs): - rev = revs[0] - - if d.current_revision != rev: - d.current_revision = rev - d.save() - print d.get_absolute_url() - statsd.incr('wiki.cron.fix-current-revision') - finally: - unpin_this_thread() diff --git a/kitsune/wiki/management/commands/fix_current_revisions.py b/kitsune/wiki/management/commands/fix_current_revisions.py new file mode 100644 index 000000000..0abe9e7d4 --- /dev/null +++ b/kitsune/wiki/management/commands/fix_current_revisions.py @@ -0,0 +1,31 @@ +from django.core.management.base import BaseCommand +from django_statsd.clients import statsd +from multidb.pinning import pin_this_thread, unpin_this_thread + +from kitsune.wiki.models import Document, Revision + + +class Command(BaseCommand): + help = "Fixes documents that have the current_revision set incorrectly." + + def handle(self, **options): + try: + # Sends all writes to the master DB. Slaves are readonly. + pin_this_thread() + + docs = Document.objects.all() + + for d in docs: + revs = Revision.objects.filter(document=d, is_approved=True) + revs = list(revs.order_by('-reviewed')[:1]) + + if len(revs): + rev = revs[0] + + if d.current_revision != rev: + d.current_revision = rev + d.save() + print d.get_absolute_url() + statsd.incr('wiki.cron.fix-current-revision') + finally: + unpin_this_thread() diff --git a/kitsune/wiki/management/commands/generate_missing_share_links.py b/kitsune/wiki/management/commands/generate_missing_share_links.py new file mode 100644 index 000000000..f6a8fa145 --- /dev/null +++ b/kitsune/wiki/management/commands/generate_missing_share_links.py @@ -0,0 +1,27 @@ +from django.conf import settings +from django.core.management.base import BaseCommand + +from kitsune.wiki import tasks +from kitsune.wiki.config import REDIRECT_HTML +from kitsune.wiki.models import Document + + +class Command(BaseCommand): + help = "Generate share links for documents that may be missing them." + + def handle(self, **options): + document_ids = ( + Document.objects.select_related('revision') + .filter( + parent=None, + share_link='', + is_template=False, + is_archived=False, + category__in=settings.IA_DEFAULT_CATEGORIES) + .exclude( + slug='', + current_revision=None, + html__startswith=REDIRECT_HTML) + .values_list('id', flat=True)) + + tasks.add_short_links.delay(list(document_ids)) diff --git a/kitsune/wiki/management/commands/rebuild_kb.py b/kitsune/wiki/management/commands/rebuild_kb.py new file mode 100644 index 000000000..26426fce2 --- /dev/null +++ b/kitsune/wiki/management/commands/rebuild_kb.py @@ -0,0 +1,14 @@ +import waffle +from django.core.management.base import BaseCommand + +from kitsune.wiki import tasks + + +class Command(BaseCommand): + + def handle(self, **options): + # If rebuild on demand switch is on, do nothing. + if waffle.switch_is_active('wiki-rebuild-on-demand'): + return + + tasks.rebuild_kb() diff --git a/kitsune/wiki/management/commands/reindex_kb.py b/kitsune/wiki/management/commands/reindex_kb.py new file mode 100644 index 000000000..2f3f603ac --- /dev/null +++ b/kitsune/wiki/management/commands/reindex_kb.py @@ -0,0 +1,11 @@ +from django.core.management.base import BaseCommand + +from kitsune.search.tasks import index_task +from kitsune.wiki.models import DocumentMappingType + + +class Command(BaseCommand): + help = "Reindex wiki_document." + + def handle(self, **options): + index_task.delay(DocumentMappingType, DocumentMappingType.get_indexable()) diff --git a/kitsune/wiki/management/commands/send_weekly_ready_for_review_digest.py b/kitsune/wiki/management/commands/send_weekly_ready_for_review_digest.py new file mode 100644 index 000000000..852028611 --- /dev/null +++ b/kitsune/wiki/management/commands/send_weekly_ready_for_review_digest.py @@ -0,0 +1,89 @@ +from itertools import chain + +from django.conf import settings +from django.contrib.sites.models import Site +from django.core.management.base import BaseCommand +from django.db.models import F, ObjectDoesNotExist, Q +from django.utils.translation import pgettext +from django.utils.translation import ugettext as _ +from django_statsd.clients import statsd + +from kitsune.products.models import Product +from kitsune.sumo import email_utils +from kitsune.wiki.config import (HOW_TO_CATEGORY, TEMPLATES_CATEGORY, + TROUBLESHOOTING_CATEGORY) +from kitsune.wiki.models import Document, Locale, Revision + + +class Command(BaseCommand): + help = 'Sends out the weekly "Ready for review" digest email.' + + def handle(self, **options): + + @email_utils.safe_translation + def _send_mail(locale, user, context): + subject = _('[Reviews Pending: %s] SUMO needs your help!') % locale + + mail = email_utils.make_mail( + subject=subject, + text_template='wiki/email/ready_for_review_weekly_digest.ltxt', + html_template='wiki/email/ready_for_review_weekly_digest.html', + context_vars=context, + from_email=settings.TIDINGS_FROM_ADDRESS, + to_email=user.email) + + email_utils.send_messages([mail]) + + # Get the list of revisions ready for review + categories = (HOW_TO_CATEGORY, TROUBLESHOOTING_CATEGORY, TEMPLATES_CATEGORY) + + revs = Revision.objects.filter( + reviewed=None, document__is_archived=False, document__category__in=categories) + + revs = revs.filter( + Q(document__current_revision_id__lt=F('id')) | + Q(document__current_revision_id=None)) + + locales = revs.values_list('document__locale', flat=True).distinct() + products = Product.objects.all() + + for l in locales: + docs = revs.filter(document__locale=l).values_list( + 'document', flat=True).distinct() + docs = Document.objects.filter(id__in=docs) + + try: + leaders = Locale.objects.get(locale=l).leaders.all() + reviewers = Locale.objects.get(locale=l).reviewers.all() + users = list(set(chain(leaders, reviewers))) + except ObjectDoesNotExist: + # Locale does not exist, so skip to the next locale + continue + + for u in users: + docs_list = [] + for p in products: + product_docs = docs.filter( + Q(parent=None, products__in=[p]) | + Q(parent__products__in=[p])) + if product_docs: + docs_list.append(dict( + product=pgettext('DB: products.Product.title', p.title), + docs=product_docs)) + + product_docs = docs.filter( + Q(parent=None, products=None) | + Q(parent__products=None)) + + if product_docs: + docs_list.append(dict(product=_('Other products'), docs=product_docs)) + + _send_mail(l, u, { + 'host': Site.objects.get_current().domain, + 'locale': l, + 'recipient': u, + 'docs_list': docs_list, + 'products': products + }) + + statsd.incr('wiki.cron.weekly-digest-mail') diff --git a/requirements/default.txt b/requirements/default.txt index 0de4df994..c8cf2d66b 100644 --- a/requirements/default.txt +++ b/requirements/default.txt @@ -137,8 +137,6 @@ django-celery==3.2.2 \ django-cors-headers==2.4.0 \ --hash=sha256:5545009c9b233ea7e70da7dbab7cb1c12afa01279895086f98ec243d7eab46fa \ --hash=sha256:c4c2ee97139d18541a1be7d96fe337d1694623816d83f53cb7c00da9b94acae1 -django-cronjobs==0.2.3 \ - --hash=sha256:177295b1442400c92cdb67e8e18f9ff5946fb442f85813b9d0837823722ea08d https://github.com/mozilla/django-csp/archive/5c5f5a6b55fb78e99db939c79f0f0d8003a62c98.tar.gz#egg=django-csp \ --hash=sha256:d6ed80738b07d60d2e985ccc284448e7dbd7c41682dc4cf65734d0da576f700e django-filter==1.1.0 \ diff --git a/scripts/cron.py b/scripts/cron.py index c4b73696b..6b62a637c 100644 --- a/scripts/cron.py +++ b/scripts/cron.py @@ -57,7 +57,7 @@ class scheduled_job(object): max_instances=1, coalesce=True) @babis.decorator(ping_after=settings.DMS_ENQUEUE_LAG_MONITOR_TASK) def job_enqueue_lag_monitor_task(): - call_command('cron enqueue_lag_monitor_task') + call_command('enqueue_lag_monitor_task') # Every hour. @@ -65,7 +65,7 @@ def job_enqueue_lag_monitor_task(): max_instances=1, coalesce=True, skip=settings.READ_ONLY) @babis.decorator(ping_after=settings.DMS_SEND_WELCOME_EMAILS) def job_send_welcome_emails(): - call_command('cron send_welcome_emails') + call_command('send_welcome_emails') # Every hour. @@ -73,7 +73,7 @@ def job_send_welcome_emails(): max_instances=1, coalesce=True, skip=(settings.READ_ONLY or settings.STAGE)) @babis.decorator(ping_after=settings.DMS_PROCESS_EXIT_SURVEYS) def job_process_exit_surveys(): - call_command('cron process_exit_surveys') + call_command('process_exit_surveys') @scheduled_job('cron', month='*', day='*', hour='*', minute='45', max_instances=1, coalesce=True) @@ -81,7 +81,7 @@ def job_process_exit_surveys(): def job_reindex(): # Look back 90 minutes for new items to avoid racing conditions between # cron execution and db updates. - call_command('esreindex --minutes-ago 90') + call_command('esreindex', '--minutes-ago 90') # Every 6 hours. @@ -96,7 +96,7 @@ def job_update_product_details(): max_instances=1, coalesce=True, skip=settings.READ_ONLY) @babis.decorator(ping_after=settings.DMS_GENERATE_MISSING_SHARE_LINKS) def job_generate_missing_share_links(): - call_command('cron generate_missing_share_links') + call_command('generate_missing_share_links') # Once per day. @@ -104,124 +104,124 @@ def job_generate_missing_share_links(): max_instances=1, coalesce=True, skip=settings.READ_ONLY) @babis.decorator(ping_after=settings.DMS_REBUILD_KB) def job_rebuild_kb(): - call_command('cron rebuild_kb') + call_command('rebuild_kb') @scheduled_job('cron', month='*', day='*', hour='00', minute='42', max_instances=1, coalesce=True, skip=settings.READ_ONLY) @babis.decorator(ping_after=settings.DMS_UPDATE_TOP_CONTRIBUTORS) def job_update_top_contributors(): - call_command('cron update_top_contributors') + call_command('update_top_contributors') @scheduled_job('cron', month='*', day='*', hour='01', minute='00', max_instances=1, coalesce=True, skip=settings.READ_ONLY) @babis.decorator(ping_after=settings.DMS_UPDATE_L10N_COVERAGE_METRICS) def job_update_l10n_coverage_metrics(): - call_command('cron update_l10n_coverage_metrics') + call_command('update_l10n_coverage_metrics') @scheduled_job('cron', month='*', day='*', hour='01', minute='00', max_instances=1, coalesce=True, skip=(settings.READ_ONLY or settings.STAGE)) @babis.decorator(ping_after=settings.DMS_CALCULATE_CSAT_METRICS) def job_calculate_csat_metrics(): - call_command('cron calculate_csat_metrics') + call_command('calculate_csat_metrics') @scheduled_job('cron', month='*', day='*', hour='01', minute='11', max_instances=1, coalesce=True, skip=settings.READ_ONLY) @babis.decorator(ping_after=settings.DMS_REPORT_EMPLOYEE_ANSWERS) def job_report_employee_answers(): - call_command('cron report_employee_answers') + call_command('report_employee_answers') @scheduled_job('cron', month='*', day='*', hour='01', minute='30', max_instances=1, coalesce=True, skip=settings.STAGE) @babis.decorator(ping_after=settings.DMS_REINDEX_USERS_THAT_CONTRIBUTED_YESTERDAY) def job_reindex_users_that_contributed_yesterday(): - call_command('cron reindex_users_that_contributed_yesterday') + call_command('reindex_users_that_contributed_yesterday') @scheduled_job('cron', month='*', day='*', hour='01', minute='40', max_instances=1, coalesce=True, skip=settings.READ_ONLY) @babis.decorator(ping_after=settings.DMS_UPDATE_WEEKLY_VOTES) def job_update_weekly_votes(): - call_command('cron update_weekly_votes') + call_command('update_weekly_votes') # @scheduled_job('cron', month='*', day='*', hour='02', minute='00', max_instances=1, coalesce=True) # @babis.decorator(ping_after=settings.DMS_UPDATE_SEARCH_CTR_METRIC) # def job_update_search_ctr_metric(): -# call_command('cron update_search_ctr_metric') +# call_command('update_search_ctr_metric') @scheduled_job('cron', month='*', day='*', hour='02', minute='47', max_instances=1, coalesce=True, skip=settings.READ_ONLY) @babis.decorator(ping_after=settings.DMS_REMOVE_EXPIRED_REGISTRATION_PROFILES) def job_remove_expired_registration_profiles(): - call_command('cron remove_expired_registration_profiles') + call_command('remove_expired_registration_profiles') @scheduled_job('cron', month='*', day='*', hour='03', minute='00', max_instances=1, coalesce=True, skip=(settings.READ_ONLY or settings.STAGE)) @babis.decorator(ping_after=settings.DMS_UPDATE_CONTRIBUTOR_METRICS) def job_update_contributor_metrics(): - call_command('cron update_contributor_metrics') + call_command('update_contributor_metrics') @scheduled_job('cron', month='*', day='*', hour='04', minute='00', max_instances=1, coalesce=True, skip=settings.READ_ONLY) @babis.decorator(ping_after=settings.DMS_AUTO_ARCHIVE_OLD_QUESTIONS) def job_auto_archive_old_questions(): - call_command('cron auto_archive_old_questions') + call_command('auto_archive_old_questions') @scheduled_job('cron', month='*', day='*', hour='07', minute='00', max_instances=1, coalesce=True, skip=(settings.READ_ONLY or settings.STAGE)) @babis.decorator(ping_after=settings.DMS_SURVEY_RECENT_ASKERS) def job_survey_recent_askers(): - call_command('cron survey_recent_askers') + call_command('survey_recent_askers') @scheduled_job('cron', month='*', day='*', hour='08', minute='00', max_instances=1, coalesce=True, skip=settings.READ_ONLY) @babis.decorator(ping_after=settings.DMS_CLEAR_EXPIRED_AUTH_TOKENS) def job_clear_expired_auth_tokens(): - call_command('cron clear_expired_auth_tokens') + call_command('clear_expired_auth_tokens') # @scheduled_job('cron', month='*', day='*', hour='09', minute='00', max_instances=1, coalesce=True) # @babis.decorator(ping_after=settings.DMS_UPDATE_VISITORS_METRIC) # def job_update_visitors_metric(): -# call_command('cron update_visitors_metric') +# call_command('update_visitors_metric') @scheduled_job('cron', month='*', day='*', hour='10', minute='00', max_instances=1, coalesce=True, skip=(settings.READ_ONLY or settings.STAGE)) @babis.decorator(ping_after=settings.DMS_UPDATE_L10N_METRIC) def job_update_l10n_metric(): - call_command('cron update_l10n_metric') + call_command('update_l10n_metric') @scheduled_job('cron', month='*', day='*', hour='16', minute='00', max_instances=1, coalesce=True, skip=(settings.READ_ONLY or settings.STAGE)) @babis.decorator(ping_after=settings.DMS_RELOAD_WIKI_TRAFFIC_STATS) def job_reload_wiki_traffic_stats(): - call_command('cron reload_wiki_traffic_stats') + call_command('reload_wiki_traffic_stats') @scheduled_job('cron', month='*', day='*', hour='21', minute='00', max_instances=1, coalesce=True, skip=settings.READ_ONLY) @babis.decorator(ping_after=settings.DMS_CACHE_MOST_UNHELPFUL_KB_ARTICLES) def job_cache_most_unhelpful_kb_articles(): - call_command('cron cache_most_unhelpful_kb_articles') + call_command('cache_most_unhelpful_kb_articles') @scheduled_job('cron', month='*', day='*', hour='23', minute='00', max_instances=1, coalesce=True, skip=(settings.READ_ONLY or settings.STAGE)) @babis.decorator(ping_after=settings.DMS_RELOAD_QUESTION_TRAFFIC_STATS) def job_reload_question_traffic_stats(): - call_command('cron reload_question_traffic_stats') + call_command('reload_question_traffic_stats') # Once per week @@ -236,21 +236,21 @@ def job_purge_hashes(): max_instances=1, coalesce=True, skip=(settings.READ_ONLY or settings.STAGE)) @babis.decorator(ping_after=settings.DMS_SEND_WEEKLY_READY_FOR_REVIEW_DIGEST) def job_send_weekly_ready_for_review_digest(): - call_command('cron send_weekly_ready_for_review_digest') + call_command('send_weekly_ready_for_review_digest') @scheduled_job('cron', month='*', day='*', hour='00', minute='00', day_of_week=0, max_instances=1, coalesce=True, skip=settings.READ_ONLY) @babis.decorator(ping_after=settings.DMS_FIX_CURRENT_REVISIONS) def job_fix_current_revisions(): - call_command('cron fix_current_revisions') + call_command('fix_current_revisions') @scheduled_job('cron', month='*', day='*', hour='00', minute='30', day_of_week=1, max_instances=1, coalesce=True, skip=settings.READ_ONLY) @babis.decorator(ping_after=settings.DMS_COHORT_ANALYSIS) def job_cohort_analysis(): - call_command('cron cohort_analysis') + call_command('cohort_analysis') # Once per month @@ -258,7 +258,7 @@ def job_cohort_analysis(): max_instances=1, coalesce=True, skip=settings.READ_ONLY) @babis.decorator(ping_after=settings.DMS_UPDATE_L10N_CONTRIBUTOR_METRICS) def job_update_l10n_contributor_metrics(): - call_command('cron update_l10n_contributor_metrics') + call_command('update_l10n_contributor_metrics') def run():