[bug 757028] Add daily calculation of contributor metrics.

New cron job that updates the number of contributors on a daily
basis. Graphing this in stockcharts, now we have the ability to see
daily, weekly and monthly groupings of the data.
This commit is contained in:
Ricky Rosario 2012-06-14 21:39:14 -04:00
Родитель 5af6aafe67
Коммит 114a1053fc
7 изменённых файлов: 267 добавлений и 79 удалений

Просмотреть файл

@ -10,11 +10,15 @@ from tastypie.authorization import Authorization
from tastypie.cache import SimpleCache
from tastypie.resources import Resource
from customercare.models import Reply
from kpi.models import (Metric, MetricKind, VISITORS_METRIC_CODE,
L10N_METRIC_CODE)
from kpi.models import (Metric, MetricKind,
AOA_CONTRIBUTORS_METRIC_CODE,
KB_ENUS_CONTRIBUTORS_METRIC_CODE,
KB_L10N_CONTRIBUTORS_METRIC_CODE,
L10N_METRIC_CODE,
SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE,
VISITORS_METRIC_CODE)
from questions.models import Question, Answer, AnswerVote
from wiki.models import HelpfulVote, Revision
from wiki.models import HelpfulVote
class CachedResource(Resource):
@ -247,73 +251,37 @@ class ActiveContributorsResource(CachedResource):
aoa = fields.IntegerField('aoa', default=0)
def get_object_list(self, request):
# TODO: This whole method is yucky... Is there a nicer way to do this?
# It will probably get soon nuked in favor of using the Metric model
# when we need to go more granular than monthly.
revisions = _monthly_qs_for(Revision)
# Set up the queries for the data we need
kind = MetricKind.objects.get(code=KB_ENUS_CONTRIBUTORS_METRIC_CODE)
en_us = Metric.objects.filter(kind=kind).order_by('-start')
creators = revisions.values('year', 'month', 'creator').distinct()
reviewers = revisions.values('year', 'month', 'reviewer').distinct()
kind = MetricKind.objects.get(code=KB_L10N_CONTRIBUTORS_METRIC_CODE)
l10n = Metric.objects.filter(kind=kind).order_by('-start')
def _add_user(monthly_dict, year, month, userid):
if userid:
yearmonth = (year, month)
if yearmonth not in monthly_dict:
monthly_dict[yearmonth] = set()
monthly_dict[yearmonth].add(userid)
kind = MetricKind.objects.get(
code=SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE)
answers = Metric.objects.filter(kind=kind).order_by('-start')
def _add_users(monthly_dict, values, column):
for r in values:
_add_user(monthly_dict, r['year'], r['month'], r[column])
kind = MetricKind.objects.get(code=AOA_CONTRIBUTORS_METRIC_CODE)
aoa = Metric.objects.filter(kind=kind).order_by('-start')
# Build the en-US KB contributors count list
d = {}
_add_users(d, creators.filter(document__locale='en-US'), 'creator')
_add_users(d, reviewers.filter(document__locale='en-US'), 'reviewer')
en_us_list = [{'month': k[1], 'year': k[0], 'count': len(v)} for
k, v in d.items()]
# Put all the results in a dict with the date as the key.
results_dict = {}
# Build the non en-US KB contributors count list
d = {}
_add_users(d, creators.exclude(document__locale='en-US'), 'creator')
_add_users(d, reviewers.exclude(document__locale='en-US'), 'reviewer')
non_en_us_list = [{'month': k[1], 'year': k[0], 'count': len(v)} for
k, v in d.items()]
def merge_results(metrics_qs, label):
for metric in metrics_qs:
results_dict.setdefault(metric.end, {})[label] = metric.value
# Build the support forum contributors count list aggregated by month
answerers = (Answer.objects
.exclude(creator=F('question__creator'))
.filter(created__gte=_start_date())
.extra(
select={
'month': 'extract( month from questions_answer.created )',
'year': 'extract( year from questions_answer.created )',
})
.values('year', 'month', 'creator')
.annotate(count=Count('creator'))
.filter(count__gte=10))
d = {}
for a in answerers:
_add_user(d, a['year'], a['month'], a['creator'])
support_forum = [{'month': k[1], 'year': k[0], 'count': len(v)} for
k, v in d.items()]
merge_results(en_us, 'en_us')
merge_results(l10n, 'non_en_us')
merge_results(answers, 'support_forum')
merge_results(aoa, 'aoa')
# Build the AoA contributors count list aggregated by month
qs = _monthly_qs_for(Reply).values(
'year', 'month', 'twitter_username')
contributors = qs.distinct()
d = {}
for c in contributors:
_add_user(d, c['year'], c['month'], c['twitter_username'])
aoa = [{'month': k[1], 'year': k[0], 'count': len(v)} for
k, v in d.items()]
# Convert that to a list of dicts.
results_list = [dict(date=k, **v) for k, v in results_dict.items()]
# Merge and return
return merge_results(
en_us=en_us_list,
non_en_us=non_en_us_list,
support_forum=support_forum,
aoa=aoa)
return [Struct(**x) for x in sorted(
results_list, key=itemgetter('date'), reverse=True)]
class Meta:
cache = SimpleCache()

Просмотреть файл

@ -1,26 +1,34 @@
from datetime import datetime, date, timedelta
from django.db.models import Count, F
import cronjobs
from customercare.models import Reply
from dashboards import LAST_90_DAYS
from dashboards.models import WikiDocumentVisits
from kpi.models import (Metric, MetricKind, VISITORS_METRIC_CODE,
L10N_METRIC_CODE)
from kpi.models import (Metric, MetricKind,
AOA_CONTRIBUTORS_METRIC_CODE,
KB_ENUS_CONTRIBUTORS_METRIC_CODE,
KB_L10N_CONTRIBUTORS_METRIC_CODE,
L10N_METRIC_CODE,
SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE,
VISITORS_METRIC_CODE)
from questions.models import Answer
from sumo.webtrends import Webtrends
from wiki.models import Revision
@cronjobs.register
def update_visitors_metric():
"""Get new visitor data from webtrends and save."""
try:
# Get the latest metric value.
last_metric = Metric.objects.filter(
kind__code=VISITORS_METRIC_CODE).order_by('-start')[0]
# Start updating the day after the last updated.
start = last_metric.start + timedelta(days=1)
except IndexError:
# There are no metrics yet, start from 2011-01-01
start = date(2011, 01, 01)
# Start updating the day after the last updated.
latest_metric = _get_latest_metric(VISITORS_METRIC_CODE)
if latest_metric is not None:
latest_metric_date = latest_metric.start
else:
latest_metric_date = date(2011, 01, 01)
start = latest_metric_date + timedelta(days=1)
# Collect up until yesterday
end = date.today() - timedelta(days=1)
@ -106,6 +114,179 @@ def update_l10n_metric():
value=int(coverage * 100)) # Store as a % int.
@cronjobs.register
def update_contributor_metrics(day=None):
"""Calculate and save contributor metrics."""
update_support_forum_contributors_metric(day)
update_kb_contributors_metric(day)
update_aoa_contributors_metric(day)
def update_support_forum_contributors_metric(day=None):
"""Calculate and save the support forum contributor counts.
An support forum contributor is a user that has replied 10 times
in the past 30 days to questions that aren't his/her own.
"""
if day:
start = end = day
else:
latest_metric = _get_latest_metric(
SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE)
if latest_metric is not None:
# Start updating the day after the last updated.
start = latest_metric.end + timedelta(days=1)
else:
start = date(2011, 01, 01)
# Update until yesterday.
end = date.today() - timedelta(days=1)
# Loop through all the days from start to end, calculating and saving.
day = start
while day <= end:
# Figure out the number of contributors from the last 30 days.
thirty_days_back = day - timedelta(days=30)
contributors = (Answer.objects
.exclude(creator=F('question__creator'))
.filter(
created__gte=thirty_days_back,
created__lt=day)
.values('creator')
.annotate(count=Count('creator'))
.filter(count__gte=10))
count = contributors.count()
# Save the value to Metric table.
metric_kind = MetricKind.objects.get(
code=SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE)
Metric.objects.create(
kind=metric_kind,
start=thirty_days_back,
end=day,
value=count)
day = day + timedelta(days=1)
def update_kb_contributors_metric(day=None):
"""Calculate and save the KB (en-US and L10n) contributor counts.
A KB contributor is a user that has edited or reviewed a Revision
in the last 30 days.
"""
if day:
start = end = day
else:
latest_metric = _get_latest_metric(KB_ENUS_CONTRIBUTORS_METRIC_CODE)
if latest_metric is not None:
# Start updating the day after the last updated.
start = latest_metric.end + timedelta(days=1)
else:
start = date(2011, 01, 01)
# Update until yesterday.
end = date.today() - timedelta(days=1)
# Loop through all the days from start to end, calculating and saving.
day = start
while day <= end:
# Figure out the number of contributors from the last 30 days.
thirty_days_back = day - timedelta(days=30)
editors = (Revision.objects
.filter(
created__gte=thirty_days_back,
created__lt=day)
.values_list('creator', flat=True).distinct())
reviewers = (Revision.objects
.filter(
reviewed__gte=thirty_days_back,
reviewed__lt=day)
.values_list('reviewer', flat=True).distinct())
en_us_count = len(set(
list(editors.filter(document__locale='en-US')) +
list(reviewers.filter(document__locale='en-US'))
))
l10n_count = len(set(
list(editors.exclude(document__locale='en-US')) +
list(reviewers.exclude(document__locale='en-US'))
))
# Save the values to Metric table.
metric_kind = MetricKind.objects.get(
code=KB_ENUS_CONTRIBUTORS_METRIC_CODE)
Metric.objects.create(
kind=metric_kind,
start=thirty_days_back,
end=day,
value=en_us_count)
metric_kind = MetricKind.objects.get(
code=KB_L10N_CONTRIBUTORS_METRIC_CODE)
Metric.objects.create(
kind=metric_kind,
start=thirty_days_back,
end=day,
value=l10n_count)
day = day + timedelta(days=1)
def update_aoa_contributors_metric(day=None):
"""Calculate and save the AoA contributor counts.
An AoA contributor is a user that has replied in the last 30 days.
"""
if day:
start = end = day
else:
latest_metric = _get_latest_metric(AOA_CONTRIBUTORS_METRIC_CODE)
if latest_metric is not None:
# Start updating the day after the last updated.
start = latest_metric.end + timedelta(days=1)
else:
# Start updating 30 days after the first reply we have.
first_reply = Reply.objects.order_by('created')[0]
start = first_reply.created.date() + timedelta(days=30)
# Update until yesterday.
end = date.today() - timedelta(days=1)
# Loop through all the days from start to end, calculating and saving.
day = start
while day <= end:
# Figure out the number of contributors from the last 30 days.
thirty_days_back = day - timedelta(days=30)
contributors = (Reply.objects
.filter(
created__gte=thirty_days_back,
created__lt=day)
.values_list('twitter_username').distinct())
count = contributors.count()
# Save the value to Metric table.
metric_kind = MetricKind.objects.get(code=AOA_CONTRIBUTORS_METRIC_CODE)
Metric.objects.create(
kind=metric_kind,
start=thirty_days_back,
end=day,
value=count)
day = day + timedelta(days=1)
def _get_latest_metric(metric_code):
"""Returns the date of the latest metric value."""
try:
# Get the latest metric value and return the date.
last_metric = Metric.objects.filter(
kind__code=metric_code).order_by('-start')[0]
return last_metric
except IndexError:
return None
def _get_top_docs(count):
"""Get the top documents by visits."""
top_qs = WikiDocumentVisits.objects.select_related('document').filter(

Просмотреть файл

@ -6,6 +6,10 @@ from sumo.models import ModelBase
VISITORS_METRIC_CODE = 'general keymetrics:visitors'
L10N_METRIC_CODE = 'general l10n:coverage'
AOA_CONTRIBUTORS_METRIC_CODE = 'general aoa:contributors'
SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE = 'general supportforum:contributors'
KB_ENUS_CONTRIBUTORS_METRIC_CODE = 'general kb:en-US:contributors'
KB_L10N_CONTRIBUTORS_METRIC_CODE = 'general kb:l10n:contributors'
class MetricKind(ModelBase):
@ -49,4 +53,5 @@ class Metric(ModelBase):
unique_together = [('kind', 'start', 'end')]
def __unicode__(self):
return '%s (%s thru %s): %s' % (self.kind, self.start, self.end, self.value)
return '%s (%s thru %s): %s' % (
self.kind, self.start, self.end, self.value)

Просмотреть файл

@ -1,5 +1,5 @@
from base64 import b64encode
from datetime import date, datetime
from datetime import date, datetime, timedelta
import json
from django.core.cache import cache
@ -7,7 +7,15 @@ from django.core.cache import cache
from nose.tools import eq_
from customercare.tests import reply
from kpi.models import Metric, VISITORS_METRIC_CODE, L10N_METRIC_CODE
from kpi.cron import update_contributor_metrics
from kpi.models import (Metric,
AOA_CONTRIBUTORS_METRIC_CODE,
KB_ENUS_CONTRIBUTORS_METRIC_CODE,
KB_L10N_CONTRIBUTORS_METRIC_CODE,
L10N_METRIC_CODE,
SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE,
VISITORS_METRIC_CODE)
from kpi.tests import metric, metric_kind
from sumo.tests import TestCase, LocalizingClient
from sumo.urlresolvers import reverse
@ -26,6 +34,12 @@ class KpiApiTests(TestCase):
save=True)
return click_kind, search_kind
def _make_contributor_metric_kinds(self):
metric_kind(code=AOA_CONTRIBUTORS_METRIC_CODE, save=True)
metric_kind(code=KB_ENUS_CONTRIBUTORS_METRIC_CODE, save=True)
metric_kind(code=KB_L10N_CONTRIBUTORS_METRIC_CODE, save=True)
metric_kind(code=SUPPORT_FORUM_CONTRIBUTORS_METRIC_CODE, save=True)
def _get_api_result(self, resource_name):
"""Helper to make API calls, parse the json and return the result."""
url = reverse('api_dispatch_list',
@ -118,7 +132,13 @@ class KpiApiTests(TestCase):
# An AoA reply (1 contributor):
reply(save=True)
# Create metric kinds and update metrics for tomorrow (today's
# activity shows up tomorrow).
self._make_contributor_metric_kinds()
update_contributor_metrics(day=date.today() + timedelta(days=1))
r = self._get_api_result('kpi_active_contributors')
eq_(r['objects'][0]['en_us'], 2)
eq_(r['objects'][0]['non_en_us'], 2)
eq_(r['objects'][0]['support_forum'], 1)
@ -136,16 +156,25 @@ class KpiApiTests(TestCase):
for x in range(10):
answer(creator=u, question=q, save=True)
# Create metric kinds and update metrics for tomorrow (today's
# activity shows up tomorrow).
self._make_contributor_metric_kinds()
update_contributor_metrics(day=date.today() + timedelta(days=1))
r = self._get_api_result('kpi_active_contributors')
eq_(len(r['objects']), 0)
eq_(r['objects'][0]['support_forum'], 0)
# Change the question creator, now we should have 1 contributor.
q.creator = user(save=True)
q.save()
cache.clear() # We need to clear the cache for new results.
Metric.objects.all().delete()
update_contributor_metrics(day=date.today() + timedelta(days=1))
r = self._get_api_result('kpi_active_contributors')
eq_(r['objects'][0]['support_forum'], 1)
test_asker_replies_arent_a_contribution.xx = 1
def test_sphinx_clickthrough_get(self):
"""Test Sphinx clickthrough read API."""

Просмотреть файл

@ -396,7 +396,7 @@ window.KpiDashboard = Backbone.View.extend({
}*/]
});
this.activeContributorsView = new BasicChartView({
this.activeContributorsView = new StockChartView({
model: this.activeContributorsChart,
title: gettext('Active Contributors'),
series: [{

Просмотреть файл

@ -0,0 +1,4 @@
INSERT INTO kpi_metrickind (`code`) VALUES ('general aoa:contributors');
INSERT INTO kpi_metrickind (`code`) VALUES ('general supportforum:contributors');
INSERT INTO kpi_metrickind (`code`) VALUES ('general kb:en-US:contributors');
INSERT INTO kpi_metrickind (`code`) VALUES ('general kb:l10n:contributors');

Просмотреть файл

@ -30,6 +30,7 @@ HOME = /tmp
47 2 * * * {{ cron }} remove_expired_registration_profiles
0 9 * * * {{ cron }} update_visitors_metric
0 10 * * * {{ cron }} update_l10n_metric
0 3 * * * {{ cron }} update_contributor_metrics
# Twice per week.
#05 01 * * 1,4 {{ cron }} update_weekly_votes