460 строки
15 KiB
Python
460 строки
15 KiB
Python
import csv
|
|
import cStringIO
|
|
import itertools
|
|
import time
|
|
from types import GeneratorType
|
|
from datetime import date, timedelta
|
|
|
|
from django import http
|
|
from django.db.models import Avg, Count, Sum, Q
|
|
from django.utils import simplejson
|
|
from django.utils.cache import add_never_cache_headers, patch_cache_control
|
|
from django.core.serializers.json import DjangoJSONEncoder
|
|
from django.core.exceptions import PermissionDenied
|
|
|
|
import jingo
|
|
from product_details import product_details
|
|
|
|
from access import acl
|
|
from addons.decorators import addon_view, addon_view_factory
|
|
from addons.models import Addon
|
|
from zadmin.models import SiteEvent
|
|
|
|
import amo
|
|
from amo.decorators import json_view
|
|
from amo.urlresolvers import reverse
|
|
|
|
from .decorators import allow_cross_site_request
|
|
from .models import DownloadCount, UpdateCount, Contribution
|
|
|
|
|
|
SERIES_GROUPS = ('day', 'week', 'month')
|
|
SERIES_FORMATS = ('json', 'csv')
|
|
SERIES = ('downloads', 'usage', 'contributions', 'overview',
|
|
'sources', 'os', 'locales', 'statuses', 'versions', 'apps')
|
|
|
|
|
|
def get_series(model, extra_field=None, **filters):
|
|
"""
|
|
Get a generator of dicts for the stats model given by the filters.
|
|
|
|
Returns {'date': , 'count': } by default. Add an extra field (such as
|
|
application faceting) by passing `extra_field=apps`. `apps` should be in
|
|
the query result.
|
|
"""
|
|
extra = () if extra_field is None else (extra_field,)
|
|
# Put a slice on it so we get more than 10 (the default), but limit to 365.
|
|
qs = (model.search().order_by('-date').filter(**filters)
|
|
.values_dict('date', 'count', *extra))[:365]
|
|
for val in qs:
|
|
# Convert the datetimes to a date.
|
|
date_ = date(*val['date'].timetuple()[:3])
|
|
rv = dict(count=val['count'], date=date_, end=date_)
|
|
if extra_field:
|
|
rv['data'] = extract(val[extra_field])
|
|
yield rv
|
|
|
|
|
|
def csv_fields(series):
|
|
"""
|
|
Figure out all the keys in the `data` dict for csv columns.
|
|
|
|
Returns (series, fields). The series only contains the `data` dicts, plus
|
|
`count` and `date` from the top level.
|
|
"""
|
|
rv = []
|
|
fields = set()
|
|
for row in series:
|
|
fields.update(row['data'])
|
|
rv.append(row['data'])
|
|
row['data'].update(count=row['count'], date=row['date'])
|
|
return rv, fields
|
|
|
|
|
|
def extract(dicts):
|
|
"""Turn a list of dicts like we store in ES into one big dict.
|
|
|
|
Also works if the list of dicts is nested inside another dict.
|
|
|
|
>>> extract([{'k': 'a', 'v': 1}, {'k': 'b', 'v': 2}])
|
|
{'a': 1, 'b': 2}
|
|
"""
|
|
if hasattr(dicts, 'items'):
|
|
return dict((k, extract(v)) for k, v in dicts.items())
|
|
return dict((d['k'], d['v']) for d in dicts)
|
|
|
|
|
|
@addon_view
|
|
def overview_series(request, addon, group, start, end, format):
|
|
"""Combines downloads_series and updates_series into one payload."""
|
|
date_range = check_series_params_or_404(group, start, end, format)
|
|
check_stats_permission(request, addon)
|
|
|
|
dls = get_series(DownloadCount, addon=addon.id, date__range=date_range)
|
|
updates = get_series(UpdateCount, addon=addon.id, date__range=date_range)
|
|
|
|
series = zip_overview(dls, updates)
|
|
|
|
return render_json(request, addon, series)
|
|
|
|
|
|
def zip_overview(downloads, updates):
|
|
# Jump through some hoops to make sure we're matching dates across download
|
|
# and update series and inserting zeroes for any missing days.
|
|
downloads, updates = list(downloads), list(updates)
|
|
if not (downloads or updates):
|
|
return
|
|
start_date = None
|
|
if downloads:
|
|
start_date = downloads[0]['date']
|
|
if updates:
|
|
d = updates[0]['date']
|
|
start_date = max(start_date, d) if start_date else d
|
|
downloads, updates = iter(downloads), iter(updates)
|
|
|
|
def iterator(series):
|
|
item = next(series)
|
|
next_date = start_date
|
|
while 1:
|
|
if item['date'] == next_date:
|
|
yield item['count']
|
|
item = next(series)
|
|
else:
|
|
yield 0
|
|
next_date = next_date - timedelta(days=1)
|
|
|
|
series = itertools.izip_longest(iterator(downloads), iterator(updates))
|
|
for idx, (dl_count, up_count) in enumerate(series):
|
|
yield {'date': start_date - timedelta(days=idx),
|
|
'data': {'downloads': dl_count, 'updates': up_count}}
|
|
|
|
|
|
@addon_view
|
|
def downloads_series(request, addon, group, start, end, format):
|
|
"""Generate download counts grouped by ``group`` in ``format``."""
|
|
date_range = check_series_params_or_404(group, start, end, format)
|
|
check_stats_permission(request, addon)
|
|
|
|
series = get_series(DownloadCount, addon=addon.id, date__range=date_range)
|
|
|
|
if format == 'csv':
|
|
return render_csv(request, addon, series, ['date', 'count'])
|
|
elif format == 'json':
|
|
return render_json(request, addon, series)
|
|
|
|
|
|
@addon_view
|
|
def sources_series(request, addon, group, start, end, format):
|
|
"""Generate download source breakdown."""
|
|
date_range = check_series_params_or_404(group, start, end, format)
|
|
check_stats_permission(request, addon)
|
|
|
|
series = get_series(DownloadCount, extra_field='_source.sources',
|
|
addon=addon.id, date__range=date_range)
|
|
|
|
if format == 'csv':
|
|
series, fields = csv_fields(series)
|
|
return render_csv(request, addon, series,
|
|
['date', 'count'] + list(fields))
|
|
elif format == 'json':
|
|
return render_json(request, addon, series)
|
|
|
|
|
|
@addon_view
|
|
def usage_series(request, addon, group, start, end, format):
|
|
"""Generate ADU counts grouped by ``group`` in ``format``."""
|
|
date_range = check_series_params_or_404(group, start, end, format)
|
|
check_stats_permission(request, addon)
|
|
|
|
series = get_series(UpdateCount, addon=addon.id, date__range=date_range)
|
|
|
|
if format == 'csv':
|
|
return render_csv(request, addon, series, ['date', 'count'])
|
|
elif format == 'json':
|
|
return render_json(request, addon, series)
|
|
|
|
|
|
@addon_view
|
|
def usage_breakdown_series(request, addon, group,
|
|
start, end, format, field):
|
|
"""Generate ADU breakdown of ``field``."""
|
|
date_range = check_series_params_or_404(group, start, end, format)
|
|
check_stats_permission(request, addon)
|
|
|
|
fields = {
|
|
'applications': '_source.apps',
|
|
'locales': '_source.locales',
|
|
'oses': '_source.os',
|
|
'versions': '_source.versions',
|
|
'statuses': '_source.status',
|
|
}
|
|
series = get_series(UpdateCount, extra_field=fields[field],
|
|
addon=addon.id, date__range=date_range)
|
|
if field == 'locales':
|
|
series = process_locales(series)
|
|
|
|
if format == 'csv':
|
|
if field == 'applications':
|
|
series = flatten_applications(series)
|
|
series, fields = csv_fields(series)
|
|
return render_csv(request, addon, series,
|
|
['date', 'count'] + list(fields))
|
|
elif format == 'json':
|
|
return render_json(request, addon, series)
|
|
|
|
|
|
def flatten_applications(series):
|
|
"""Convert app guids to pretty names, flatten count structure."""
|
|
for row in series:
|
|
if 'data' in row:
|
|
new = {}
|
|
for app, versions in row['data'].items():
|
|
# unicode() to decode the gettext proxy.
|
|
appname = unicode(amo.APP_GUIDS[app].pretty)
|
|
for ver, count in versions.items():
|
|
key = ' '.join([appname, ver])
|
|
new[key] = count
|
|
row['data'] = new
|
|
yield row
|
|
|
|
|
|
def process_locales(series):
|
|
"""Convert locale codes to pretty names, skip any unknown locales."""
|
|
languages = dict((k.lower(), v['native'])
|
|
for k, v in product_details.languages.items())
|
|
for row in series:
|
|
if 'data' in row:
|
|
new = {}
|
|
for key, count in row['data'].items():
|
|
if key in languages:
|
|
k = u'%s (%s)' % (languages[key], key)
|
|
new[k] = count
|
|
row['data'] = new
|
|
yield row
|
|
|
|
|
|
def check_series_params_or_404(group, start, end, format):
|
|
"""Check common series parameters."""
|
|
if (group not in SERIES_GROUPS) or (format not in SERIES_FORMATS):
|
|
raise http.Http404
|
|
return get_daterange_or_404(start, end)
|
|
|
|
|
|
def check_stats_permission(request, addon, for_contributions=False):
|
|
"""Check if user is allowed to view stats for ``addon``.
|
|
|
|
Raises PermissionDenied if user is not allowed.
|
|
"""
|
|
if for_contributions or not addon.public_stats:
|
|
# only authenticated admins and authors
|
|
if (request.user.is_authenticated() and (
|
|
acl.action_allowed(request, 'Admin', 'ViewAnyStats') or
|
|
addon.has_author(request.amo_user))):
|
|
return
|
|
elif addon.public_stats:
|
|
# non-contributions, public: everybody can view
|
|
return
|
|
raise PermissionDenied
|
|
|
|
|
|
@addon_view_factory(Addon.objects.valid)
|
|
def stats_report(request, addon, report):
|
|
check_stats_permission(request, addon,
|
|
for_contributions=(report == 'contributions'))
|
|
stats_base_url = reverse('stats.overview', args=[addon.slug])
|
|
view = get_report_view(request)
|
|
return jingo.render(request, 'stats/%s.html' % report,
|
|
{'addon': addon,
|
|
'report': report,
|
|
'view': view,
|
|
'stats_base_url': stats_base_url})
|
|
|
|
|
|
def get_report_view(request):
|
|
"""Parse and validate a pair of YYYMMDD date strings."""
|
|
if ('start' in request.GET and
|
|
'end' in request.GET):
|
|
try:
|
|
start = request.GET.get('start')
|
|
end = request.GET.get('end')
|
|
|
|
assert len(start) == 8
|
|
assert len(end) == 8
|
|
|
|
s_year = int(start[0:4])
|
|
s_month = int(start[4:6])
|
|
s_day = int(start[6:8])
|
|
e_year = int(end[0:4])
|
|
e_month = int(end[4:6])
|
|
e_day = int(end[6:8])
|
|
|
|
date(s_year, s_month, s_day)
|
|
date(e_year, e_month, e_day)
|
|
|
|
return {'range': 'custom',
|
|
'start': start,
|
|
'end': end}
|
|
except (KeyError, AssertionError, ValueError):
|
|
pass
|
|
|
|
if 'last' in request.GET:
|
|
daterange = request.GET.get('last')
|
|
|
|
return {'range': daterange, 'last': daterange + ' days'}
|
|
else:
|
|
return {}
|
|
|
|
|
|
def get_daterange_or_404(start, end):
|
|
"""Parse and validate a pair of YYYMMDD date strings."""
|
|
try:
|
|
assert len(start) == 8
|
|
assert len(end) == 8
|
|
|
|
s_year = int(start[0:4])
|
|
s_month = int(start[4:6])
|
|
s_day = int(start[6:8])
|
|
e_year = int(end[0:4])
|
|
e_month = int(end[4:6])
|
|
e_day = int(end[6:8])
|
|
|
|
start_date = date(s_year, s_month, s_day)
|
|
end_date = date(e_year, e_month, e_day)
|
|
except (AssertionError, ValueError):
|
|
raise http.Http404
|
|
return (start_date, end_date)
|
|
|
|
|
|
@json_view
|
|
def site_events(request, start, end):
|
|
"""Return site events in the given timeframe."""
|
|
start, end = get_daterange_or_404(start, end)
|
|
qs = SiteEvent.objects.filter(
|
|
Q(start__gte=start, start__lte=end) |
|
|
Q(end__gte=start, end__lte=end))
|
|
|
|
events = list(site_event_format(request, qs))
|
|
|
|
type_pretty = unicode(amo.SITE_EVENT_CHOICES[amo.SITE_EVENT_RELEASE])
|
|
|
|
releases = product_details.firefox_history_major_releases
|
|
|
|
for version, date in releases.items():
|
|
events.append({
|
|
'start': date,
|
|
'type_pretty': type_pretty,
|
|
'type': amo.SITE_EVENT_RELEASE,
|
|
'description': 'Firefox %s released' % version,
|
|
})
|
|
return events
|
|
|
|
|
|
def site_event_format(request, events):
|
|
for e in events:
|
|
yield {
|
|
'start': e.start.isoformat(),
|
|
'end': e.end.isoformat() if e.end else None,
|
|
'type_pretty': unicode(amo.SITE_EVENT_CHOICES[e.event_type]),
|
|
'type': e.event_type,
|
|
'description': e.description,
|
|
'url': e.more_info_url,
|
|
}
|
|
|
|
|
|
@addon_view
|
|
def contributions_series(request, addon, group, start, end, format):
|
|
"""Generate summarized contributions grouped by ``group`` in ``format``."""
|
|
date_range = check_series_params_or_404(group, start, end, format)
|
|
check_stats_permission(request, addon, for_contributions=True)
|
|
|
|
# Beware: this needs to scan all the matching rows to do aggregates.
|
|
qs = (Contribution.objects.extra(select={'date_created': 'date(created)'})
|
|
.filter(addon=addon, amount__gt=0, transaction_id__isnull=False,
|
|
created__range=date_range)
|
|
.values('date_created')
|
|
.annotate(count=Count('amount'), average=Avg('amount'),
|
|
total=Sum('amount')))
|
|
|
|
# Add `date` and `end` keys for legacy compat.
|
|
series = sorted(qs, key=lambda x: x['date_created'], reverse=True)
|
|
for row in series:
|
|
row['end'] = row['date'] = row.pop('date_created')
|
|
|
|
if format == 'csv':
|
|
return render_csv(request, addon, series,
|
|
['date', 'count', 'total', 'average'])
|
|
elif format == 'json':
|
|
return render_json(request, addon, series)
|
|
|
|
|
|
def fudge_headers(response, stats):
|
|
"""Alter cache headers. Don't cache content where data could be missing."""
|
|
if not stats:
|
|
add_never_cache_headers(response)
|
|
else:
|
|
seven_days = 60 * 60 * 24 * 7
|
|
patch_cache_control(response, max_age=seven_days)
|
|
|
|
|
|
class UnicodeCSVDictWriter(csv.DictWriter):
|
|
"""A DictWriter that writes a unicode stream."""
|
|
|
|
def __init__(self, stream, fields, **kw):
|
|
# We have the csv module write into our buffer as bytes and then we
|
|
# dump the buffer to the real stream as unicode.
|
|
self.buffer = cStringIO.StringIO()
|
|
csv.DictWriter.__init__(self, self.buffer, fields, **kw)
|
|
self.stream = stream
|
|
|
|
def writeheader(self):
|
|
self.writerow(dict(zip(self.fieldnames, self.fieldnames)))
|
|
|
|
def try_encode(self, obj):
|
|
return obj.encode('utf-8') if isinstance(obj, unicode) else obj
|
|
|
|
def writerow(self, rowdict):
|
|
row = self._dict_to_list(rowdict)
|
|
# Write to the buffer as ascii.
|
|
self.writer.writerow(map(self.try_encode, row))
|
|
# Dump the buffer to the real stream as utf-8.
|
|
self.stream.write(self.buffer.getvalue().decode('utf-8'))
|
|
# Clear the buffer.
|
|
self.buffer.truncate(0)
|
|
|
|
def writerows(self, rowdicts):
|
|
for rowdict in rowdicts:
|
|
self.writerow(rowdict)
|
|
|
|
|
|
@allow_cross_site_request
|
|
def render_csv(request, addon, stats, fields):
|
|
"""Render a stats series in CSV."""
|
|
# Start with a header from the template.
|
|
ts = time.strftime('%c %z')
|
|
response = jingo.render(request, 'stats/csv_header.txt',
|
|
{'addon': addon, 'timestamp': ts})
|
|
|
|
writer = UnicodeCSVDictWriter(response, fields, restval=0,
|
|
extrasaction='ignore')
|
|
writer.writeheader()
|
|
writer.writerows(stats)
|
|
|
|
fudge_headers(response, list)
|
|
response['Content-Type'] = 'text/csv; charset=utf-8'
|
|
return response
|
|
|
|
|
|
@allow_cross_site_request
|
|
def render_json(request, addon, stats):
|
|
"""Render a stats series in JSON."""
|
|
response = http.HttpResponse(mimetype='text/json')
|
|
|
|
# XXX: Subclass DjangoJSONEncoder to handle generators.
|
|
if isinstance(stats, GeneratorType):
|
|
stats = list(stats)
|
|
|
|
# Django's encoder supports date and datetime.
|
|
fudge_headers(response, stats)
|
|
simplejson.dump(stats, response, cls=DjangoJSONEncoder)
|
|
return response
|