This commit is contained in:
William Durand 2020-10-13 11:19:28 +02:00 коммит произвёл GitHub
Родитель 929356d523
Коммит 5f00968a39
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
35 изменённых файлов: 87 добавлений и 1494 удалений

Просмотреть файл

@ -31,14 +31,9 @@ HOME=/tmp
30 14 * * * %(z_cron)s category_totals
0 22 * * * %(z_cron)s gc
# Update ADI metrics from S3 once per day
00 12 * * * %(django)s download_counts_from_file
# Once per day after metrics import is done
30 12 * * * %(z_cron)s update_addon_weekly_downloads
30 13 * * * %(z_cron)s update_addon_average_daily_users
00 14 * * * %(z_cron)s index_latest_stats
# Once per week
1 9 * * 1 %(django)s review_reports

Просмотреть файл

@ -1,10 +1,5 @@
import itertools
from datetime import date
import waffle
from django.db import connection
from django.db.models import F, Q, Value, IntegerField
from celery import group
@ -25,7 +20,6 @@ from olympia.stats.utils import (
get_addons_and_average_daily_users_from_bigquery,
get_addons_and_weekly_downloads_from_bigquery,
get_averages_by_addon_from_bigquery)
from olympia.lib.es.utils import raise_if_reindex_in_progress
log = olympia.core.logger.getLogger('z.cron')
@ -203,62 +197,25 @@ def update_addon_weekly_downloads(chunk_size=250):
"""
Update 7-day add-on download counts.
"""
if waffle.switch_is_active('use-bigquery-for-download-stats-cron'):
counts = dict(
# In order to reset the `weekly_downloads` values of add-ons that
# don't exist in BigQuery, we prepare a set of `(hashed_guid, 0)`
# for most add-ons.
Addon.objects
.filter(type__in=amo.ADDON_TYPES_WITH_STATS)
.exclude(guid__isnull=True)
.exclude(guid__exact='')
.exclude(weekly_downloads=0)
.annotate(count=Value(0, IntegerField()))
.values_list('addonguid__hashed_guid', 'count')
)
# Update the `counts` with values from BigQuery.
counts.update(get_addons_and_weekly_downloads_from_bigquery())
counts = list(counts.items())
counts = dict(
# In order to reset the `weekly_downloads` values of add-ons that
# don't exist in BigQuery, we prepare a set of `(hashed_guid, 0)`
# for most add-ons.
Addon.objects
.filter(type__in=amo.ADDON_TYPES_WITH_STATS)
.exclude(guid__isnull=True)
.exclude(guid__exact='')
.exclude(weekly_downloads=0)
.annotate(count=Value(0, IntegerField()))
.values_list('addonguid__hashed_guid', 'count')
)
# Update the `counts` with values from BigQuery.
counts.update(get_addons_and_weekly_downloads_from_bigquery())
counts = list(counts.items())
log.info('Preparing update of `weekly_downloads` for %s add-ons.',
len(counts))
log.info('Preparing update of `weekly_downloads` for %s add-ons.',
len(counts))
create_chunked_tasks_signatures(
_update_addon_weekly_downloads, counts, chunk_size
).apply_async()
else:
raise_if_reindex_in_progress('amo')
with connection.cursor() as cursor:
cursor.execute("""
SELECT addon_id, SUM(count) AS weekly_count
FROM download_counts
WHERE `date` >= DATE_SUB(CURDATE(), INTERVAL 7 DAY)
GROUP BY addon_id
ORDER BY addon_id""")
counts = cursor.fetchall()
addon_ids = [r[0] for r in counts]
if not addon_ids:
return
with connection.cursor() as cursor:
cursor.execute("""
SELECT id, 0
FROM addons
WHERE id NOT IN %s""", (addon_ids,))
counts += cursor.fetchall()
cursor.execute("""
CREATE TEMPORARY TABLE tmp_wd
(addon_id INT PRIMARY KEY, count INT)""")
cursor.execute('INSERT INTO tmp_wd VALUES %s' %
','.join(['(%s,%s)'] * len(counts)),
list(itertools.chain(*counts)))
cursor.execute("""
UPDATE addons INNER JOIN tmp_wd
ON addons.id = tmp_wd.addon_id
SET weeklydownloads = tmp_wd.count""")
cursor.execute("DROP TABLE IF EXISTS tmp_wd")
create_chunked_tasks_signatures(
_update_addon_weekly_downloads, counts, chunk_size
).apply_async()

Просмотреть файл

@ -444,11 +444,6 @@ class TestUpdateAddonHotness(TestCase):
class TestUpdateAddonWeeklyDownloads(TestCase):
def setUp(self):
super().setUp()
self.create_switch('use-bigquery-for-download-stats-cron')
@mock.patch('olympia.addons.cron.create_chunked_tasks_signatures')
@mock.patch(
'olympia.addons.cron.get_addons_and_weekly_downloads_from_bigquery'

Просмотреть файл

@ -57,7 +57,6 @@ from olympia.lib.es.utils import timestamp_index
from olympia.promoted.models import (
PromotedAddon, PromotedApproval, update_es_for_promoted,
update_es_for_promoted_approval)
from olympia.stats.indexers import DownloadCountIndexer
from olympia.tags.models import Tag
from olympia.translations.models import Translation
from olympia.versions.models import ApplicationsVersions, License, Version
@ -127,16 +126,12 @@ def setup_es_test_data(es):
# indexing things in tests.
AddonIndexer.create_new_index(
actual_indices['default'])
DownloadCountIndexer.create_new_index(
actual_indices['stats_download_counts'])
# Alias it to the name the code is going to use (which is suffixed by
# pytest to avoid clashing with the real thing).
actions = [
{'add': {'index': actual_indices['default'],
'alias': settings.ES_INDEXES['default']}},
{'add': {'index': actual_indices['stats_download_counts'],
'alias': settings.ES_INDEXES['stats_download_counts']}},
]
es.indices.update_aliases({'actions': actions})

Просмотреть файл

@ -18,7 +18,6 @@ from .collection import generate_collection
from .images import generate_addon_preview
from .names import generate_names
from .ratings import generate_ratings
from .stats import generate_download_counts
from .translations import generate_translations
from .user import generate_addon_user_and_category, generate_user
from .version import generate_version
@ -91,7 +90,6 @@ def generate_addons(num, owner, app_name, addon_type=ADDON_EXTENSION):
generate_collection(addon, app)
featured_categories[category] += 1
generate_ratings(addon, 5)
generate_download_counts(addon)
def generate_themes(num, owner, **kwargs):

Просмотреть файл

@ -1,25 +0,0 @@
import random
from datetime import datetime, timedelta
from olympia.stats.models import DownloadCount
def generate_download_counts(addon, x_days=100):
"""Generate download counts for the last X days."""
for days in range(x_days):
date = datetime.now().replace(microsecond=0) - timedelta(days=days)
source = {
0: None,
1: 'search',
2: 'dp-btn-primary',
3: None,
4: 'homepagepromo',
5: 'discovery-promo',
}[days % 5]
DownloadCount.objects.create(
addon=addon,
count=random.randrange(0, 1000),
date=date,
sources={source: random.randrange(0, 500)} if source else None,
)

Просмотреть файл

@ -1,23 +0,0 @@
from olympia import amo
from olympia.addons.models import Addon
from olympia.amo.tests import TestCase
from olympia.stats.models import DownloadCount
from olympia.landfill.stats import generate_download_counts
class TestGenerateDownloadCounts(TestCase):
def setUp(self):
super().setUp()
self.addon = Addon.objects.create(type=amo.ADDON_EXTENSION)
def test_generate_download_counts(self):
x_days = 10
generate_download_counts(self.addon, x_days)
assert DownloadCount.objects.all().count() == x_days
download_count = DownloadCount.objects.all()[0]
assert not download_count.sources
download_count = DownloadCount.objects.all()[1]
assert 'search' in download_count.sources

Просмотреть файл

@ -15,7 +15,6 @@ from olympia.amo.search import get_es
from olympia.lib.es.utils import (
flag_reindexing_amo, is_reindexing_amo, timestamp_index,
unflag_reindexing_amo)
from olympia.stats.indexers import DownloadCountIndexer
logger = olympia.core.logger.getLogger('z.elasticsearch')
@ -32,7 +31,6 @@ def get_indexer(alias):
# The keys are the index alias names, the values the indexer classes.
# The 'default' in ES_INDEXES is actually named 'addons'
settings.ES_INDEXES['default']: AddonIndexer,
settings.ES_INDEXES['stats_download_counts']: DownloadCountIndexer,
}
return modules[alias]

Просмотреть файл

@ -175,17 +175,6 @@ class TestIndexCommand(ESTestCase):
self.check_results(self.expected)
self._test_reindexation(wipe=True)
def test_stats_download_counts(self):
old_indices = self.get_indices_aliases()
stdout = io.StringIO()
management.call_command(
'reindex', key='stats_download_counts', stdout=stdout)
stdout.seek(0)
buf = stdout.read()
new_indices = self.get_indices_aliases()
assert len(new_indices)
assert old_indices != new_indices, (buf, old_indices, new_indices)
@mock.patch.object(reindex, 'gather_index_data_tasks')
def _test_workflow(self, key, gather_index_data_tasks_mock):
command = reindex.Command()
@ -244,10 +233,3 @@ class TestIndexCommand(ESTestCase):
for addons.
"""
self._test_workflow('default')
def test_create_workflow_stats_download_counts(self):
"""
Test tasks returned by create_workflow() as used by reindex command,
for stats_download_counts.
"""
self._test_workflow('stats_download_counts')

Просмотреть файл

@ -1052,7 +1052,6 @@ CELERY_TASK_SOFT_TIME_LIMIT = 60 * 30
CELERY_IMPORTS = (
'olympia.lib.crypto.tasks',
'olympia.lib.es.management.commands.reindex',
'olympia.stats.management.commands.index_stats',
)
CELERY_TASK_QUEUES = (
@ -1069,7 +1068,6 @@ CELERY_TASK_QUEUES = (
Queue('ratings', routing_key='ratings'),
Queue('reviewers', routing_key='reviewers'),
Queue('search', routing_key='search'),
Queue('stats', routing_key='stats'),
Queue('tags', routing_key='tags'),
Queue('users', routing_key='users'),
Queue('zadmin', routing_key='zadmin'),
@ -1224,9 +1222,6 @@ CELERY_TASK_ROUTES = {
'olympia.ratings.tasks.addon_rating_aggregates': {'queue': 'ratings'},
'olympia.ratings.tasks.update_denorm': {'queue': 'ratings'},
# Stats
'olympia.stats.tasks.index_download_counts': {'queue': 'stats'},
# Tags
'olympia.tags.tasks.update_all_tag_stats': {'queue': 'tags'},
'olympia.tags.tasks.update_tag_stat': {'queue': 'tags'},
@ -1520,7 +1515,6 @@ ES_HOSTS = [os.environ.get('ELASTICSEARCH_LOCATION', '127.0.0.1:9200')]
ES_URLS = ['http://%s' % h for h in ES_HOSTS]
ES_INDEXES = {
'default': 'addons',
'stats_download_counts': 'addons_stats_download_counts',
}
ES_TIMEOUT = 30
@ -1857,8 +1851,6 @@ CRON_JOBS = {
'update_blog_posts': 'olympia.devhub.cron',
'index_latest_stats': 'olympia.stats.cron',
'update_user_ratings': 'olympia.users.cron',
}
@ -1886,9 +1878,6 @@ FXA_SQS_AWS_QUEUE_URL = (
'amo-account-change-dev')
FXA_SQS_AWS_WAIT_TIME = 20 # Seconds.
AWS_STATS_S3_BUCKET = env('AWS_STATS_S3_BUCKET', default=None)
AWS_STATS_S3_PREFIX = env('AWS_STATS_S3_PREFIX', default='amo_stats')
BASKET_URL = env('BASKET_URL', default='https://basket.allizom.org')
BASKET_API_KEY = env('BASKET_API_KEY', default=None)
# Default is 10, the API usually answers in 0.5 - 1.5 seconds.

Просмотреть файл

@ -1,26 +0,0 @@
import datetime
from django.core.management import call_command
import olympia.core.logger
from olympia.lib.es.utils import raise_if_reindex_in_progress
from .models import DownloadCount
log = olympia.core.logger.getLogger('z.cron')
def index_latest_stats(index=None):
def fmt(d):
return d.strftime('%Y-%m-%d')
raise_if_reindex_in_progress('amo')
latest = DownloadCount.search(index).order_by('-date').values_dict('date')
if latest:
latest = latest[0]['date']
else:
latest = fmt(datetime.date.today() - datetime.timedelta(days=1))
date_range = '%s:%s' % (latest, fmt(datetime.date.today()))
log.info('index_stats --date=%s' % date_range)
call_command('index_stats', addons=None, date=date_range)

Просмотреть файл

@ -1,11 +0,0 @@
2014-07-10 1 67442 sync
2014-07-10 1 67442 cb-dl-bob
2014-07-10 1 67442 \N
2014-07-10 1 67442 search
2014-07-10 1 67442 gp
2014-07-10 1 666 search
2014-07-10 1 7661 search
2014-07-10 1 a3615 search
2014-07-10 1 disabled-addon search
2014-07-10 1 incomplete-addon search
2014-07-10 1 7661 \N

Просмотреть файл

@ -1,139 +0,0 @@
[
{
"pk": 282,
"model": "applications.appversion",
"fields": {
"application": 1,
"version": "4.1",
"created": "2007-03-05 13:09:26",
"version_int": 4010002001000,
"modified": "2010-01-02 12:34:56"
}
},
{
"pk": 4,
"model": "addons.addon",
"fields": {
"slug": "4",
"type": 1,
"status": 4,
"description": null,
"modified": "2008-05-22 11:59:13",
"name": null,
"created": "2004-06-11 18:23:31"
}
},
{
"pk": 5,
"model": "addons.addon",
"fields": {
"slug": "5",
"type": 1,
"status": 4,
"description": null,
"modified": "2008-05-22 11:59:13",
"name": null,
"created": "2004-06-11 18:23:31"
}
},
{
"pk": 1,
"model": "stats.downloadcount",
"fields": {
"addon": 4,
"count": 10,
"sources": "{\"search\": 3, \"api\": 2}",
"date": "2009-06-01"
}
},
{
"pk": 2,
"model": "stats.downloadcount",
"fields": {
"addon": 4,
"count": 10,
"sources": "{\"search\": 3, \"api\": 2}",
"date": "2009-06-07"
}
},
{
"pk": 3,
"model": "stats.downloadcount",
"fields": {
"addon": 4,
"count": 10,
"sources": "{\"search\": 3, \"api\": 2}",
"date": "2009-06-12"
}
},
{
"pk": 4,
"model": "stats.downloadcount",
"fields": {
"addon": 4,
"count": 10,
"sources": "{\"search\": 3, \"api\": 2}",
"date": "2009-06-20"
}
},
{
"pk": 5,
"model": "stats.downloadcount",
"fields": {
"addon": 4,
"count": 10,
"sources": "{\"search\": 3, \"api\": 2}",
"date": "2009-06-28"
}
},
{
"pk": 6,
"model": "stats.downloadcount",
"fields": {
"addon": 4,
"count": 10,
"sources": "{\"search\": 3, \"api\": 2}",
"date": "2009-07-03"
}
},
{
"pk": 7,
"model": "stats.downloadcount",
"fields": {
"addon": 4,
"count": 10,
"sources": "{\"search\": 3, \"api\": 2}",
"date": "2009-08-03"
}
},
{
"pk": 8,
"model": "stats.downloadcount",
"fields": {
"addon": 4,
"count": 10,
"sources": "{\"search\": 3, \"api\": 2}",
"date": "2009-09-03"
}
},
{
"pk": 9,
"model": "stats.downloadcount",
"fields": {
"addon": 5,
"count": 10,
"sources": "{\"search\": 3, \"api\": 2}",
"date": "2009-10-03"
}
},
{
"pk": 10,
"model": "stats.downloadcount",
"fields": {
"addon": 4,
"count": 10,
"sources": "{\"search\": 3, \"api\": 2}",
"date": "2009-10-03"
}
}
]

Просмотреть файл

@ -1,139 +0,0 @@
from datetime import timedelta
from django.db.models import Max, Min
from celery import group
from olympia.lib.es.utils import create_index
from olympia.amo.celery import create_chunked_tasks_signatures
from olympia.amo.indexers import BaseSearchIndexer
class StatsIndexer(BaseSearchIndexer):
# Number of elements to index at once in ES. The size of a dict to send to
# ES should be less than 1000 bytes, and the max size of messages to send
# to ES can be retrieved with the following command (look for
# "max_content_length_in_bytes"): curl http://HOST:PORT/_nodes/?pretty
CHUNK_SIZE = 5000
# Number of days to process at once when doing a full reindex.
FULL_REINDEX_DAYS_SLICE = 6
@classmethod
def es_dict(cls, items):
if not items:
return {}
if hasattr(items, 'items'):
items = items.items()
return [{'k': key, 'v': value} for key, value in items]
@classmethod
def get_mapping(cls):
return {
'properties': {
'id': {'type': 'long'},
'boost': {'type': 'float', 'null_value': 1.0},
'count': {'type': 'long'},
'data': {
'dynamic': 'true',
'properties': {
'v': {'type': 'long'},
'k': {'type': 'keyword'}
}
},
'date': {
'format': 'dateOptionalTime',
'type': 'date'
}
}
}
@classmethod
def create_new_index(cls, index_name):
config = {
'mappings': {
cls.get_doctype_name(): cls.get_mapping()
}
}
create_index(index_name, config)
@classmethod
def reindex_tasks_group(cls, index_name, addons=None, dates=None):
"""
Return tasks group to execute to index statistics for the given
index/dates/addons.
"""
def get_indexing_tasks_for_qs(qs):
index_data_tasks = create_chunked_tasks_signatures(
cls.get_indexing_task(), qs, cls.CHUNK_SIZE,
task_args=(index_name,))
# Unwrap the tasks from the group create_chunked_tasks_signatures()
# returned, we'll create our own flat group with all the tasks,
# no need to create unnecessary nesting.
return index_data_tasks.tasks
qs = cls.get_model().objects.all()
tasks = []
if dates or addons:
qs = qs.order_by('-date')
qs = qs.values_list('id', flat=True)
if addons:
pks = [int(a.strip()) for a in addons.split(',')]
qs = qs.filter(addon__in=pks)
if dates:
if ':' in dates:
qs = qs.filter(date__range=dates.split(':'))
else:
qs = qs.filter(date=dates)
if not (dates or addons):
# We're loading the whole world. Do it in stages so we get most
# recent stats first and don't do huge queries.
limits = (qs.model.objects.filter(date__isnull=False)
.extra(where=['date <> "0000-00-00"'])
.aggregate(min=Min('date'), max=Max('date')))
# If there isn't any data at all, skip over.
if limits['max'] and limits['min']:
num_days = (limits['max'] - limits['min']).days
# We'll re-assign `qs` in each iteration of the loop, so keep a
# copy around before that will be the base queryset to filter
# from.
base_qs = qs
for start in range(0, num_days, cls.FULL_REINDEX_DAYS_SLICE):
stop = start + cls.FULL_REINDEX_DAYS_SLICE - 1
date_range = (limits['max'] - timedelta(days=stop),
limits['max'] - timedelta(days=start))
qs = base_qs.filter(date__range=date_range)
if qs.exists():
tasks.extend(get_indexing_tasks_for_qs(qs))
else:
if qs.exists():
tasks.extend(get_indexing_tasks_for_qs(qs))
return group(tasks)
class DownloadCountIndexer(StatsIndexer):
@classmethod
def get_model(cls):
from olympia.stats.models import DownloadCount
return DownloadCount
@classmethod
def get_indexing_task(cls):
from olympia.stats.tasks import index_download_counts
return index_download_counts
@classmethod
def extract_document(cls, obj):
return {
'addon': obj.addon_id,
'date': obj.date,
'count': obj.count,
'sources': cls.es_dict(obj.sources) if obj.sources else {},
'id': obj.id,
'_id': '{0}-{1}'.format(obj.addon_id, obj.date)
}

Просмотреть файл

Просмотреть файл

@ -1,46 +0,0 @@
import boto3
import codecs
from io import StringIO
from urllib.parse import urlparse
from django.core.files.storage import get_storage_class
from django.utils.encoding import force_text
storage = get_storage_class()()
def get_date(path, sep):
parsed = urlparse(path)
if parsed.scheme == 's3':
obj = get_object_from_s3(parsed.netloc, parsed.path,
range='bytes=0-4096')
line = obj.splitlines()[0]
else:
with open(path) as f:
line = f.readline()
try:
return line.split(sep)[0]
except IndexError:
return None
def get_stats_data(path):
parsed = urlparse(path)
if parsed.scheme == 's3':
return get_object_from_s3(parsed.netloc, parsed.path).splitlines(True)
else:
with codecs.open(parsed.path, encoding='utf8') as count_file:
return StringIO(count_file.read())
def get_object_from_s3(bucket, object_key, range=''):
"""Get the ojbect from the s3"""
client = boto3.client('s3')
obj = client.get_object(Bucket=bucket, Key=object_key.lstrip('/'),
Range=range)
return force_text(obj['Body'].read())

Просмотреть файл

@ -1,216 +0,0 @@
from datetime import datetime, timedelta
from os import path, unlink
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.db import close_old_connections
import olympia.core.logger
from olympia import amo
from olympia.addons.models import Addon
from olympia.files.models import File
from olympia.stats.models import DownloadCount, update_inc
from . import get_date, get_stats_data
log = olympia.core.logger.getLogger('adi.downloadcounts')
HIVE_NULL = '\\N'
def is_valid_source(src, fulls, prefixes):
"""Return True if the source is valid.
A source is valid if it is either NULL or it is in the list of valid full
sources or prefixed by a prefix in the list of valid prefix sources.
NOTE: doesn't actually check for a prefix with prefiex - does `p in ...`
"""
return src == HIVE_NULL or src in fulls or any(p in src for p in prefixes)
class Command(BaseCommand):
"""Update download count metrics from stats_source in the database.
Usage:
./manage.py download_counts_from_file \
<folder> --date=YYYY-MM-DD --stats_source={s3,file}
If no date is specified, the default is the day before.
If no stats_source is specified, the default is set to s3.
If stats_source is file:
If not folder is specified, the default is `hive_results/YYYY-MM-DD/`.
This folder will be located in `<settings.SHARED_STORAGE>/tmp`.
If stats_source is s3:
This file will be located in
`<settings.AWS_STATS_S3_BUCKET>/<settings.AWS_STATS_S3_PREFIX>`.
File processed:
- download_counts/YYYY-MM-DD/000000_0
We get a row for each "addon download" request, in this format:
<count> <file id or add-on id or add-on slug> <click source>
We insert one DownloadCount entry per addon per day, and each row holds
the json-ified dict of click sources/counters.
Eg, for the above request:
date: <the date of the day the queries were made>
count: <the number of requests for this addon, for this day>
addon: <the addon that has this id>
src: {'dp-btn-primary': 1}
"""
help = __doc__
def add_arguments(self, parser):
"""Handle command arguments."""
parser.add_argument('folder_name', default='hive_results', nargs='?')
parser.add_argument(
'--stats_source', default='s3',
choices=['s3', 'file'],
help='Source of stats data')
parser.add_argument(
'--date', action='store', type=str,
dest='date', help='Date in the YYYY-MM-DD format.')
parser.add_argument(
'--separator', action='store', type=str, default='\t',
dest='separator', help='Field separator in file.')
def handle(self, *args, **options):
start = datetime.now() # Measure the time it takes to run the script.
day = options['date']
if not day:
day = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
sep = options['separator']
if options['stats_source'] == 's3':
filepath = 's3://' + '/'.join([settings.AWS_STATS_S3_BUCKET,
settings.AWS_STATS_S3_PREFIX,
'download_counts',
day, '000000_0'])
elif options['stats_source'] == 'file':
folder = options['folder_name']
folder = path.join(settings.TMP_PATH, folder, day)
filepath = path.join(folder, 'download_counts.hive')
# Make sure we're not trying to update with mismatched data.
if get_date(filepath, sep) != day:
raise CommandError('%s file contains data for another day' %
filepath)
# First, make sure we don't have any existing counts for the same day,
# or it would just increment again the same data.
DownloadCount.objects.filter(date=day).delete()
# Memoize the files to addon relations and the DownloadCounts.
download_counts = {}
# Perf: preload all the files and slugs once and for all.
# This builds two dicts:
# - One where each key (the file_id we get from the hive query) has
# the addon_id as value.
# - One where each key (the add-on slug) has the add-on_id as value.
files_to_addon = dict(File.objects.values_list('id',
'version__addon_id'))
slugs_to_addon = dict(
Addon.unfiltered.exclude(status=amo.STATUS_NULL)
.values_list('slug', 'id'))
# The source must either be exactly one of the "full" valid sources, or
# prefixed by one of the "prefix" valid sources, or NULL.
fulls = amo.DOWNLOAD_SOURCES_FULL
prefixes = amo.DOWNLOAD_SOURCES_PREFIX
count_file = get_stats_data(filepath)
for index, line in enumerate(count_file):
if index and (index % 1000000) == 0:
log.debug('Processed %s lines' % index)
splitted = line[:-1].split(sep)
if len(splitted) != 4:
log.debug('Badly formatted row: %s' % line)
continue
day, counter, id_or_slug, src = splitted
try:
# Clean up data.
id_or_slug = id_or_slug.strip()
counter = int(counter)
except ValueError:
# Ignore completely invalid data.
continue
if not is_valid_source(src, fulls=fulls, prefixes=prefixes):
log.debug('Invalid source: {}.'.format(src))
continue
# This is needed to have a better source than the HIVE_NULL
# representation in the frontend.
if src == HIVE_NULL:
src = None
if id_or_slug.isdigit():
# If it's a digit, then it should be a file id.
try:
id_or_slug = int(id_or_slug)
except ValueError:
continue
addon_id = (
# Does this file exist?
files_to_addon.get(id_or_slug) or
# Maybe it's an add-on ?
(id_or_slug if id_or_slug in files_to_addon.values()
# otherwise it doesn't exist
else None))
else:
# If it's not numeric it's probably a slug.
addon_id = slugs_to_addon.get(id_or_slug)
if not addon_id:
# We've exhausted all possibilities, ignore this row.
continue
# Memoize the DownloadCount.
if addon_id in download_counts:
dc = download_counts[addon_id]
# update the DownloadCount object.
dc.count += counter
dc.sources = update_inc(dc.sources, src, counter)
else:
dc = DownloadCount(
date=day,
addon_id=addon_id,
count=counter,
sources={src: counter})
download_counts[addon_id] = dc
# Close all old connections in this thread before we start creating the
# `DownloadCount` values.
# https://github.com/mozilla/addons-server/issues/6886
# If the calculation above takes too long it might happen that we run
# into `wait_timeout` problems and django doesn't reconnect properly
# (potentially because of misconfiguration).
# Django will re-connect properly after it notices that all
# connections are closed.
close_old_connections()
# Create in bulk: this is much faster.
DownloadCount.objects.bulk_create(download_counts.values(), 100)
log.info('Processed a total of %s lines' % (index + 1))
log.info('Total processing time: %s' % (datetime.now() - start))
if options['stats_source'] == 'file':
# Clean up file.
log.info('Deleting {path}'.format(path=filepath))
unlink(filepath)

Просмотреть файл

@ -1,50 +0,0 @@
from django.core.management.base import BaseCommand
from celery import group
import olympia.core.logger
from olympia.stats.indexers import DownloadCountIndexer
log = olympia.core.logger.getLogger('z.stats')
HELP = """\
Start tasks to index stats. Without constraints, everything will be
processed.
To limit the add-ons:
`--addons=1865,2848,..,1843`
To limit the date range:
`--date=2011-08-15` or `--date=2011-08-15:2011-08-22`
"""
class Command(BaseCommand):
help = HELP
def add_arguments(self, parser):
"""Handle command arguments."""
parser.add_argument(
'--addons',
help='Add-on ids to process. Use commas to separate multiple ids.')
parser.add_argument(
'--date',
help='The date or date range to process. Use the format '
'YYYY-MM-DD for a single date or '
'YYYY-MM-DD:YYYY-MM-DD to index a range of dates '
'(inclusive).')
parser.add_argument(
'--index',
help='Optional index name to use.')
def handle(self, *args, **kw):
addons, dates, index = kw['addons'], kw['date'], kw['index']
index_data_tasks = DownloadCountIndexer.reindex_tasks_group(
index_name=index, addons=addons, dates=dates)
group(index_data_tasks.tasks).apply_async()

Просмотреть файл

@ -0,0 +1,16 @@
# Generated by Django 2.2.16 on 2020-10-06 16:00
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('stats', '0006_create_switch_for_fenix_build_ids'),
]
operations = [
migrations.DeleteModel(
name='DownloadCount',
),
]

Просмотреть файл

@ -0,0 +1,21 @@
# Generated by Django 2.2.16 on 2020-10-07 08:39
from django.db import migrations
def delete_waffles(apps, schema_editor):
Switch = apps.get_model("waffle", "Switch")
Switch.objects.filter(name="use-bigquery-for-download-stats-cron").delete()
Switch.objects.filter(name="use-bigquery-for-addon-adu").delete()
Flag = apps.get_model("waffle", "Flag")
Flag.objects.filter(name="bigquery-download-stats").delete()
class Migration(migrations.Migration):
dependencies = [
("stats", "0007_delete_downloadcount"),
]
operations = [migrations.RunPython(delete_waffles)]

Просмотреть файл

@ -1,36 +0,0 @@
from django.db import models
from django_extensions.db.fields.json import JSONField
from olympia.amo.fields import PositiveAutoField
from olympia.amo.models import SearchMixin
def update_inc(initial, key, count):
"""Update or create a dict of `int` counters, for JSONField."""
initial = initial or {}
initial[key] = count + initial.get(key, 0)
return initial
class DownloadCount(SearchMixin, models.Model):
id = PositiveAutoField(primary_key=True)
addon = models.ForeignKey('addons.Addon', on_delete=models.CASCADE)
count = models.PositiveIntegerField()
date = models.DateField()
sources = JSONField(db_column='src', null=True)
ES_ALIAS_KEY = 'stats_download_counts'
class Meta:
db_table = 'download_counts'
indexes = [
# FIXME: some of these might redundant. See #5712
models.Index(fields=('count',), name='count'),
models.Index(fields=('addon',), name='addon_id'),
models.Index(fields=('addon', 'count'), name='addon_and_count'),
models.Index(fields=('addon', 'date'), name='addon_date_idx'),
]
constraints = [
models.UniqueConstraint(fields=['date', 'addon'], name='date_2')
]

Просмотреть файл

@ -1,33 +0,0 @@
from elasticsearch.helpers import bulk as bulk_index
import olympia.core.logger
from olympia.amo import search as amo_search
from olympia.amo.celery import task
from .indexers import DownloadCountIndexer
from .models import DownloadCount
log = olympia.core.logger.getLogger('z.task')
@task
def index_download_counts(ids, index=None, **kw):
index = index or DownloadCountIndexer.get_index_alias()
es = amo_search.get_es()
qs = DownloadCount.objects.filter(id__in=ids)
if qs.exists():
log.info('Indexing %s downloads for %s.' % (qs.count(), qs[0].date))
try:
data = []
for obj in qs:
data.append(DownloadCountIndexer.extract_document(obj))
bulk_index(es, data, index=index,
doc_type=DownloadCountIndexer.get_doctype_name(),
refresh=True)
except Exception as exc:
index_download_counts.retry(args=[ids, index], exc=exc)
raise

Просмотреть файл

@ -11,17 +11,15 @@
<li data-report="sources">
<a href="{{ url('stats.sources', addon.slug) }}">{{ _('by Source') }}</a>
</li>
{% if bigquery_download_stats %}
<li data-report="mediums">
<a href="{{ url('stats.mediums', addon.slug) }}">{{ _('by Medium') }}</a>
</li>
<li data-report="contents">
<a href="{{ url('stats.contents', addon.slug) }}">{{ _('by Content') }}</a>
</li>
<li data-report="campaigns">
<a href="{{ url('stats.campaigns', addon.slug) }}">{{ _('by Campaign') }}</a>
</li>
{% endif %}
<li data-report="mediums">
<a href="{{ url('stats.mediums', addon.slug) }}">{{ _('by Medium') }}</a>
</li>
<li data-report="contents">
<a href="{{ url('stats.contents', addon.slug) }}">{{ _('by Content') }}</a>
</li>
<li data-report="campaigns">
<a href="{{ url('stats.campaigns', addon.slug) }}">{{ _('by Campaign') }}</a>
</li>
</ul>
{% endif %}
<li data-report="usage">

Просмотреть файл

@ -3,9 +3,7 @@
{% set title = _('Download campaigns by Date') %}
{% block stats_note_link %}
{% if bigquery_download_stats %}
<a href="{{ settings.EXTENSION_WORKSHOP_URL }}/documentation/manage/monitoring-extension-usage-statistics/" target="_blank" rel="noopener noreferrer">
{{ _('About tracking external sources...') }}
</a>
{% endif %}
{% endblock %}

Просмотреть файл

@ -3,9 +3,7 @@
{% set title = _('Download contents by Date') %}
{% block stats_note_link %}
{% if bigquery_download_stats %}
<a href="{{ settings.EXTENSION_WORKSHOP_URL }}/documentation/manage/monitoring-extension-usage-statistics/" target="_blank" rel="noopener noreferrer">
{{ _('About tracking external sources...') }}
</a>
{% endif %}
{% endblock %}

Просмотреть файл

@ -1,22 +1,3 @@
{% extends 'stats/report.html' %}
{% set title = _('Downloaded by Date') %}
{% block stats_note_link %}
{% if not bigquery_download_stats %}
<a href="#" id="stats-note-link">{{ _('How are downloads counted?') }}</a>
{% endif %}
{% endblock %}
{% block stats_note %}
{% if not bigquery_download_stats %}
{% trans %}
<h2>How are downloads counted?</h2>
<p>
Download counts are updated every evening and only include original add-on
downloads, not updates. Downloads can be broken down by the specific source
referring the download.
</p>
{% endtrans %}
{% endif %}
{% endblock %}

Просмотреть файл

@ -3,9 +3,7 @@
{% set title = _('Download mediums by Date') %}
{% block stats_note_link %}
{% if bigquery_download_stats %}
<a href="{{ settings.EXTENSION_WORKSHOP_URL }}/documentation/manage/monitoring-extension-usage-statistics/" target="_blank" rel="noopener noreferrer">
{{ _('About tracking external sources...') }}
</a>
{% endif %}
{% endblock %}

Просмотреть файл

@ -3,37 +3,7 @@
{% set title = _('Download sources by Date') %}
{% block stats_note_link %}
{% with link_text=_('About tracking external sources...') %}
{% if bigquery_download_stats %}
<a href="{{ settings.EXTENSION_WORKSHOP_URL }}/documentation/manage/monitoring-extension-usage-statistics/" target="_blank" rel="noopener noreferrer">{{ link_text }}</a>
{% else %}
<a href="#" id="stats-note-link">{{ link_text }}</a>
{% endif %}
{% endwith %}
{% endblock %}
{% block stats_note %}
{% if not bigquery_download_stats %}
{% trans slug=addon.slug, id=addon.id %}
<h2>Tracking external sources</h2>
<p>
If you link to your add-on's details page or directly to its file from an
external site, such as your blog or website, you can append a parameter to be
tracked as an additional download source on this page. For example, the
following links would appear as sourced by your blog:
</p>
<dl>
<dt>Add-on Details Page</dt>
<dd>https://addons.mozilla.org/addon/{{ slug }}?src=<b>external-blog</b></dd>
<dt>Direct File Link</dt>
<dd>https://addons.mozilla.org/downloads/latest/{{ id }}/addon-{{ id }}-latest.xpi?src=<b>external-blog</b></dd>
</dl>
<p>
Only src parameters that begin with "external-" will be tracked, up to 61
additional characters. Any text after "external-" can be used to describe the
source, such as "external-blog", "external-sidebar", "external-campaign225",
etc. The following URL-safe characters are allowed: <code>a-z A-Z - . _ ~ % +</code>
</p>
{% endtrans %}
{% endif %}
<a href="{{ settings.EXTENSION_WORKSHOP_URL }}/documentation/manage/monitoring-extension-usage-statistics/" target="_blank" rel="noopener noreferrer">
{{ _('About tracking external sources...') }}
</a>
{% endblock %}

Просмотреть файл

@ -116,7 +116,6 @@
data-start_date="{{ view.start }}"
data-end_date="{{ view.end }}"
{% endif %}
data-bigquery-download-stats="{{ bigquery_download_stats }}"
data-use-fenix-build-ids="{{ use_fenix_build_ids }}"
data-base_url="{{ stats_base_url }}">
<div class="island chart">

Просмотреть файл

@ -1,7 +1,6 @@
from django.template import loader
import jinja2
import waffle
from django_jinja import library
@ -16,8 +15,5 @@ def report_menu(context, request, report, obj):
tpl = loader.get_template('stats/addon_report_menu.html')
ctx = {
'addon': obj,
'bigquery_download_stats': waffle.flag_is_active(
request, 'bigquery-download-stats'
),
}
return jinja2.Markup(tpl.render(ctx))

Просмотреть файл

@ -1,176 +0,0 @@
import boto3
import os
import shutil
from botocore.stub import Stubber, ANY
from datetime import date
from django.conf import settings
from django.core import management
from django.test.testcases import TransactionTestCase
from django.test.utils import override_settings
from unittest import mock
from olympia import amo
from olympia.amo.storage_utils import rm_stored_dir
from olympia.amo.tests import addon_factory
from olympia.stats.management.commands import get_stats_data
from olympia.stats.management.commands.download_counts_from_file import \
is_valid_source # noqa
from olympia.stats.models import DownloadCount
hive_folder = os.path.join(settings.ROOT, 'src/olympia/stats/fixtures/files')
class FixturesFolderMixin(object):
# You have to define these values in your subclasses.
date = 'YYYY-MM-DD'
source_folder = 'dummy'
stats_source = 'dummy'
def get_tmp_hive_folder(self):
return os.path.join(hive_folder, self.id())
def clean_up_files(self):
tmp_hive_folder = self.get_tmp_hive_folder()
if os.path.isdir(tmp_hive_folder):
rm_stored_dir(tmp_hive_folder)
def setUp(self):
super(FixturesFolderMixin, self).setUp()
self.clean_up_files()
shutil.copytree(os.path.join(hive_folder, self.source_folder),
os.path.join(self.get_tmp_hive_folder(), self.date))
def tearDown(self):
self.clean_up_files()
super(FixturesFolderMixin, self).tearDown()
class TestADICommand(FixturesFolderMixin, TransactionTestCase):
fixtures = ('base/addon_3615', 'base/featured', 'base/appversion.json')
date = '2014-07-10'
source_folder = 'src'
stats_source = 'file'
def setUp(self):
super(TestADICommand, self).setUp()
def test_download_counts_from_file(self):
management.call_command('download_counts_from_file',
self.get_tmp_hive_folder(), date=self.date,
stats_source=self.stats_source)
assert DownloadCount.objects.all().count() == 2
download_count_1 = DownloadCount.objects.get(addon_id=3615)
assert download_count_1.count == 4
assert download_count_1.date == date(2014, 7, 10)
# In the hive file, 67442 refers to the file for the current version of
# this add-on.
assert download_count_1.sources == {u'search': 2, u'cb-dl-bob': 1,
'null': 1}
download_count_2 = DownloadCount.objects.get(addon_id=7661)
assert download_count_2.count == 2
assert download_count_2.sources == {'search': 1, 'null': 1}
def test_download_counts_from_file_includes_disabled_addons(self):
# We only exclude STATUS_NULL add-ons
addon_factory(slug='disabled-addon', status=amo.STATUS_DISABLED)
addon_factory(slug='incomplete-addon', status=amo.STATUS_NULL)
management.call_command('download_counts_from_file',
self.get_tmp_hive_folder(), date=self.date,
stats_source=self.stats_source)
assert DownloadCount.objects.all().count() == 3
assert DownloadCount.objects.get(addon_id=3615)
assert DownloadCount.objects.get(addon_id=7661)
download_count = DownloadCount.objects.get(
addon__slug='disabled-addon'
)
assert download_count.count == 1
assert download_count.date == date(2014, 7, 10)
assert download_count.sources == {u'search': 1}
# Make sure we didn't generate any stats for incomplete add-ons
assert not DownloadCount.objects.filter(
addon__slug='incomplete-addon'
).exists()
@mock.patch(
'olympia.stats.management.commands.download_counts_from_file.'
'close_old_connections')
def test_download_counts_from_file_closes_old_connections(
self, close_old_connections_mock):
management.call_command('download_counts_from_file',
self.get_tmp_hive_folder(), date=self.date,
stats_source=self.stats_source)
assert DownloadCount.objects.all().count() == 2
close_old_connections_mock.assert_called_once()
def test_is_valid_source(self):
assert is_valid_source('foo',
fulls=['foo', 'bar'],
prefixes=['baz', 'cruux'])
assert not is_valid_source('foob',
fulls=['foo', 'bar'],
prefixes=['baz', 'cruux'])
assert is_valid_source('foobaz',
fulls=['foo', 'bar'],
prefixes=['baz', 'cruux'])
assert not is_valid_source('ba',
fulls=['foo', 'bar'],
prefixes=['baz', 'cruux'])
assert is_valid_source(
'\\N', fulls=['foo', 'bar'], prefixes=['baz', 'cruux']
)
class TestADICommandS3(TransactionTestCase):
fixtures = ('base/addon_3615', 'base/featured', 'base/appversion.json')
date = '2014-07-10'
stats_source = 's3'
def add_response(self, stat):
stat_path = os.path.join(hive_folder, 'src', '%s.hive' % stat)
data = get_stats_data(stat_path)
response = {
'Body': data,
}
expected_params = {'Bucket': 'test-bucket',
'Key': os.path.join('amo_stats', stat,
self.date, '000000_0'),
'Range': ANY}
self.stubber.add_response('get_object', response, expected_params)
def setUp(self):
self.client = boto3.client('s3')
self.stubber = Stubber(self.client)
self.stubber.activate()
def tearDown(self):
self.stubber.deactivate()
@override_settings(AWS_STATS_S3_BUCKET='test-bucket')
@mock.patch('olympia.stats.management.commands.boto3')
def test_download_counts_from_s3(self, mock_boto3):
for x in range(2):
self.add_response('download_counts')
mock_boto3.client.return_value = self.client
management.call_command('download_counts_from_file',
date=self.date, stats_source=self.stats_source)
assert DownloadCount.objects.all().count() == 2
download_count = DownloadCount.objects.get(addon_id=3615)
assert download_count.count == 4
assert download_count.date == date(2014, 7, 10)
# In the hive file, 67442 refers to the file for the current version of
# this add-on.
assert download_count.sources == {u'search': 2, u'cb-dl-bob': 1,
'null': 1}

Просмотреть файл

@ -1,88 +0,0 @@
import datetime
from django.core.management import call_command
from unittest import mock
from olympia import amo
from olympia.amo.tests import TestCase
from olympia.stats import cron
from olympia.stats.models import DownloadCount
@mock.patch('olympia.stats.management.commands.index_stats.group')
class TestIndexStats(TestCase):
fixtures = ['stats/download_counts']
def setUp(self):
super(TestIndexStats, self).setUp()
self.downloads = (DownloadCount.objects.order_by('-date')
.values_list('id', flat=True))
def test_by_date(self, group_mock):
call_command('index_stats', addons=None, date='2009-06-01')
qs = self.downloads.filter(date='2009-06-01')
calls = group_mock.call_args[0][0]
assert calls[0].task == 'olympia.stats.tasks.index_download_counts'
assert calls[0].args == (list(qs), None)
def test_by_addon_and_date_no_match(self, group_mock):
call_command('index_stats', addons='5', date='2009-06-01')
calls = group_mock.call_args[0][0]
assert len(calls) == 0
def test_by_date_range(self, group_mock):
call_command('index_stats', addons=None,
date='2009-06-01:2009-06-07')
qs = self.downloads.filter(date__range=('2009-06-01', '2009-06-07'))
calls = group_mock.call_args[0][0]
assert calls[0].task == 'olympia.stats.tasks.index_download_counts'
assert calls[0].args == (list(qs), None)
def test_by_addon(self, group_mock):
call_command('index_stats', addons='5', date=None)
qs = self.downloads.filter(addon=5)
calls = group_mock.call_args[0][0]
assert calls[0].task == 'olympia.stats.tasks.index_download_counts'
assert calls[0].args == (list(qs), None)
def test_by_addon_and_date(self, group_mock):
call_command('index_stats', addons='4', date='2009-06-01')
qs = self.downloads.filter(addon=4, date='2009-06-01')
calls = group_mock.call_args[0][0]
assert calls[0].args == (list(qs), None)
def test_multiple_addons_and_date(self, group_mock):
call_command('index_stats', addons='4, 5', date='2009-10-03')
qs = self.downloads.filter(addon__in=[4, 5], date='2009-10-03')
calls = group_mock.call_args[0][0]
assert calls[0].task == 'olympia.stats.tasks.index_download_counts'
assert calls[0].args == (list(qs), None)
def test_no_addon_or_date(self, group_mock):
call_command('index_stats', addons=None, date=None)
calls = group_mock.call_args[0][0]
# There should be 10 downloads, but 2 of them have a date close enough
# together that they'll be indexed in the same chunk, so we should have
# 9 calls.
download_counts_calls = [
call.args for call in calls
if call.task == 'olympia.stats.tasks.index_download_counts'
]
assert len(download_counts_calls) == 9
class TestIndexLatest(amo.tests.ESTestCase):
def test_index_latest(self):
latest = datetime.date.today() - datetime.timedelta(days=5)
DownloadCount.index({'date': latest})
self.refresh('stats_download_counts')
start = latest.strftime('%Y-%m-%d')
finish = datetime.date.today().strftime('%Y-%m-%d')
with mock.patch('olympia.stats.cron.call_command') as call:
cron.index_latest_stats()
call.assert_called_with('index_stats', addons=None,
date='%s:%s' % (start, finish))

Просмотреть файл

@ -8,7 +8,7 @@ from unittest import mock
from django.http import Http404
from django.test.client import RequestFactory
from django.utils.encoding import force_text
from waffle.testutils import override_flag, override_switch
from waffle.testutils import override_switch
from olympia import amo
from olympia.access.models import Group, GroupUser
@ -21,12 +21,10 @@ from olympia.amo.tests import (
)
from olympia.amo.urlresolvers import reverse
from olympia.constants.applications import FIREFOX
from olympia.stats import tasks, views
from olympia.stats.models import DownloadCount
from olympia.stats import views
from olympia.users.models import UserProfile
@override_flag('bigquery-download-stats', active=True)
class StatsTestCase(TestCase):
fixtures = [
# Create two configured users:
@ -34,16 +32,14 @@ class StatsTestCase(TestCase):
# - admin: jbalogh@mozilla.com
# - simple user: nobodyspecial@mozilla.com
'stats/users.json',
# Create add-ons `4` and `5` and `DownloadCount` entries.
'stats/download_counts.json',
]
def setUp(self):
super().setUp()
self.addon_4 = Addon.objects.get(pk=4)
self.addon_4 = Addon.objects.create(pk=4, slug='4')
version_factory(addon=self.addon_4)
self.addon_5 = Addon.objects.get(pk=5)
self.addon_5 = Addon.objects.create(pk=5, slug='5')
version_factory(addon=self.addon_5)
# Default url_args to an addon and range with data.
@ -260,20 +256,7 @@ class TestLayout(StatsTestCase):
assert response.status_code == 403
class ESStatsTestCase(StatsTestCase, amo.tests.ESTestCase):
"""Test class with some ES setup."""
def setUp(self):
super().setUp()
self.empty_index('stats_download_counts')
self.index()
def index(self):
downloads = DownloadCount.objects.values_list('id', flat=True)
tasks.index_download_counts(list(downloads))
self.refresh('stats_download_counts')
class TestCsvAndJsonViews(StatsTestCase):
def csv_eq(self, response, expected):
content = force_text(response.content)
content_csv = csv.DictReader(
@ -288,8 +271,6 @@ class ESStatsTestCase(StatsTestCase, amo.tests.ESTestCase):
)
assert tuple(content_csv) == tuple(expected_csv)
class TestCsvAndJsonViews(ESStatsTestCase):
def test_usage_series_no_data_json(self):
self.get_updates_series_mock.return_value = []
@ -627,97 +608,6 @@ class TestCsvAndJsonViews(ESStatsTestCase):
2009-06-01,1000,200,800""",
)
@override_flag('bigquery-download-stats', active=False)
def test_overview(self):
self.get_updates_series_mock.return_value = [
{'date': date(2009, 6, 2), 'end': date(2009, 6, 2), 'count': 1500},
{'date': date(2009, 6, 1), 'end': date(2009, 6, 1), 'count': 1000},
]
response = self.get_view_response(
'stats.overview_series', group='day', format='json'
)
assert response.status_code == 200
# These are the dates from the fixtures. The return value will have
# dates in between filled with zeroes.
expected_data = [
{"date": "2009-09-03", "data": {"downloads": 10, "updates": 0}},
{"date": "2009-08-03", "data": {"downloads": 10, "updates": 0}},
{"date": "2009-07-03", "data": {"downloads": 10, "updates": 0}},
{"date": "2009-06-28", "data": {"downloads": 10, "updates": 0}},
{"date": "2009-06-20", "data": {"downloads": 10, "updates": 0}},
{"date": "2009-06-12", "data": {"downloads": 10, "updates": 0}},
{"date": "2009-06-07", "data": {"downloads": 10, "updates": 0}},
{"date": "2009-06-02", "data": {"downloads": 0, "updates": 1500}},
{"date": "2009-06-01", "data": {"downloads": 10, "updates": 1000}},
]
actual_data = json.loads(force_text(response.content))
# Make sure they match up at the front and back.
assert actual_data[0]['date'] == expected_data[0]['date']
assert actual_data[-1]['date'] == expected_data[-1]['date']
end_date = expected_data[-1]['date']
expected, actual = iter(expected_data), iter(actual_data)
next_expected, next_actual = next(expected), next(actual)
while True:
if next_expected['date'] == next_actual['date']:
# If they match it's a date we have data for.
self.assertDictEqual(next_expected, next_actual)
if next_expected['date'] == end_date:
break
next_expected, next_actual = next(expected), next(actual)
else:
# Otherwise just check that the data is zeroes.
self.assertDictEqual(
next_actual['data'], {'downloads': 0, 'updates': 0}
)
next_actual = next(actual)
self.get_download_series_mock.no_called()
@override_flag('bigquery-download-stats', active=False)
def test_downloads_json_legacy(self):
response = self.get_view_response(
'stats.downloads_series', group='day', format='json'
)
assert response.status_code == 200
self.assertListEqual(
json.loads(force_text(response.content)),
[
{"count": 10, "date": "2009-09-03", "end": "2009-09-03"},
{"count": 10, "date": "2009-08-03", "end": "2009-08-03"},
{"count": 10, "date": "2009-07-03", "end": "2009-07-03"},
{"count": 10, "date": "2009-06-28", "end": "2009-06-28"},
{"count": 10, "date": "2009-06-20", "end": "2009-06-20"},
{"count": 10, "date": "2009-06-12", "end": "2009-06-12"},
{"count": 10, "date": "2009-06-07", "end": "2009-06-07"},
{"count": 10, "date": "2009-06-01", "end": "2009-06-01"},
],
)
self.get_download_series_mock.no_called()
@override_flag('bigquery-download-stats', active=False)
def test_downloads_csv_legacy(self):
response = self.get_view_response(
'stats.downloads_series', group='day', format='csv'
)
assert response.status_code == 200
self.csv_eq(
response,
"""date,count
2009-09-03,10
2009-08-03,10
2009-07-03,10
2009-06-28,10
2009-06-20,10
2009-06-12,10
2009-06-07,10
2009-06-01,10""",
)
self.get_download_series_mock.no_called()
def test_downloads_json(self):
self.get_download_series_mock.return_value = [
{
@ -771,88 +661,6 @@ class TestCsvAndJsonViews(ESStatsTestCase):
2009-06-01,1000""",
)
@override_flag('bigquery-download-stats', active=False)
def test_downloads_sources_json_legacy(self):
response = self.get_view_response(
'stats.sources_series', group='day', format='json'
)
assert response.status_code == 200
self.assertListEqual(
json.loads(force_text(response.content)),
[
{
"count": 10,
"date": "2009-09-03",
"end": "2009-09-03",
"data": {"api": 2, "search": 3},
},
{
"count": 10,
"date": "2009-08-03",
"end": "2009-08-03",
"data": {"api": 2, "search": 3},
},
{
"count": 10,
"date": "2009-07-03",
"end": "2009-07-03",
"data": {"api": 2, "search": 3},
},
{
"count": 10,
"date": "2009-06-28",
"end": "2009-06-28",
"data": {"api": 2, "search": 3},
},
{
"count": 10,
"date": "2009-06-20",
"end": "2009-06-20",
"data": {"api": 2, "search": 3},
},
{
"count": 10,
"date": "2009-06-12",
"end": "2009-06-12",
"data": {"api": 2, "search": 3},
},
{
"count": 10,
"date": "2009-06-07",
"end": "2009-06-07",
"data": {"api": 2, "search": 3},
},
{
"count": 10,
"date": "2009-06-01",
"end": "2009-06-01",
"data": {"api": 2, "search": 3},
},
],
)
self.get_download_series_mock.no_called()
@override_flag('bigquery-download-stats', active=False)
def test_downloads_sources_csv_legacy(self):
response = self.get_view_response(
'stats.sources_series', group='day', format='csv'
)
assert response.status_code == 200
self.csv_eq(
response,
"""date,count,api,search
2009-09-03,10,2,3
2009-08-03,10,2,3
2009-07-03,10,2,3
2009-06-28,10,2,3
2009-06-20,10,2,3
2009-06-12,10,2,3
2009-06-07,10,2,3
2009-06-01,10,2,3""",
)
self.get_download_series_mock.no_called()
def test_download_by_source_json(self):
self.get_download_series_mock.return_value = [
{
@ -1142,33 +950,6 @@ class TestCsvAndJsonViews(ESStatsTestCase):
2009-06-02,1500,550,950""",
)
@override_flag('bigquery-download-stats', active=False)
def test_no_download_by_content_if_not_bigquery(self):
response = self.get_view_response(
'stats.contents_series', group='day', format='csv'
)
assert response.status_code == 404
self.get_download_series_mock.not_called()
@override_flag('bigquery-download-stats', active=False)
def test_no_download_by_campaign_if_not_bigquery(self):
response = self.get_view_response(
'stats.campaigns_series', group='day', format='csv'
)
assert response.status_code == 404
self.get_download_series_mock.not_called()
@override_flag('bigquery-download-stats', active=False)
def test_no_download_by_medium_if_not_bigquery(self):
response = self.get_view_response(
'stats.mediums_series', group='day', format='csv'
)
assert response.status_code == 404
self.get_download_series_mock.not_called()
class TestXss(amo.tests.TestXss):
def test_stats_page(self):
@ -1187,7 +968,6 @@ class TestXss(amo.tests.TestXss):
assert views.get_report_view(req) == {}
@override_flag('bigquery-download-stats', active=True)
class TestStatsWithBigQuery(TestCase):
def setUp(self):
super().setUp()
@ -1212,24 +992,6 @@ class TestStatsWithBigQuery(TestCase):
assert b'by Country' in response.content
@override_flag('bigquery-download-stats', active=False)
@mock.patch('olympia.stats.views.get_updates_series')
@mock.patch('olympia.stats.views.get_download_series')
def test_overview_series(
self, get_download_series, get_updates_series_mock
):
get_updates_series_mock.return_value = []
url = reverse('stats.overview_series', args=self.series_args)
self.client.get(url)
get_download_series.not_called()
get_updates_series_mock.assert_called_once_with(
addon=self.addon,
start_date=self.start_date,
end_date=self.end_date,
)
@mock.patch('olympia.stats.views.get_updates_series')
def test_usage_series(self, get_updates_series_mock):
get_updates_series_mock.return_value = []
@ -1306,18 +1068,6 @@ class TestStatsWithBigQuery(TestCase):
end_date=self.end_date,
)
@override_flag('bigquery-download-stats', active=False)
def test_overview_does_not_show_some_links_when_flag_is_disabled(self):
url = reverse('stats.overview', args=[self.addon.slug])
response = self.client.get(url)
assert b'Weekly Downloads' in response.content
assert b'by Source' in response.content
assert b'by Medium' not in response.content
assert b'by Content' not in response.content
assert b'by Campaign' not in response.content
def test_overview_shows_links_to_bigquery_download_stats(self):
url = reverse('stats.overview', args=[self.addon.slug])

Просмотреть файл

@ -26,7 +26,6 @@ from olympia.core.languages import ALL_LANGUAGES
from olympia.stats.decorators import addon_view_stats
from olympia.stats.forms import DateForm
from .models import DownloadCount
from .utils import get_updates_series, get_download_series
@ -136,15 +135,9 @@ def overview_series(request, addon, group, start, end, format):
start_date, end_date = date_range
check_stats_permission(request, addon)
if waffle.flag_is_active(request, 'bigquery-download-stats'):
downloads = get_download_series(
addon=addon, start_date=start_date, end_date=end_date
)
else:
downloads = get_series(
DownloadCount, addon=addon.id, date__range=date_range
)
downloads = get_download_series(
addon=addon, start_date=start_date, end_date=end_date
)
updates = get_updates_series(
addon=addon, start_date=start_date, end_date=end_date
)
@ -196,14 +189,9 @@ def downloads_series(request, addon, group, start, end, format):
start_date, end_date = date_range
check_stats_permission(request, addon)
if waffle.flag_is_active(request, 'bigquery-download-stats'):
series = get_download_series(
addon=addon, start_date=start_date, end_date=end_date
)
else:
series = get_series(
DownloadCount, addon=addon.id, date__range=date_range
)
series = get_download_series(
addon=addon, start_date=start_date, end_date=end_date
)
if format == 'csv':
return render_csv(request, addon, series, ['date', 'count'])
@ -221,25 +209,13 @@ def download_breakdown_series(
start_date, end_date = date_range
check_stats_permission(request, addon)
if waffle.flag_is_active(request, 'bigquery-download-stats'):
series = get_download_series(
addon=addon,
start_date=start_date,
end_date=end_date,
source=source,
)
series = rename_unknown_values(series)
else:
# Legacy stats only have download stats "by source".
if source != 'sources':
raise http.Http404
series = get_series(
DownloadCount,
addon=addon.id,
date__range=date_range,
source=source
)
series = get_download_series(
addon=addon,
start_date=start_date,
end_date=end_date,
source=source,
)
series = rename_unknown_values(series)
if format == 'csv':
series, fields = csv_fields(series)
@ -389,9 +365,6 @@ def stats_report(request, addon, report):
'report': report,
'stats_base_url': stats_base_url,
'view': view,
'bigquery_download_stats': waffle.flag_is_active(
request, 'bigquery-download-stats'
),
'use_fenix_build_ids': (
waffle.switch_is_active('use-fenix-build-ids')
),

Просмотреть файл

@ -10,12 +10,6 @@ z.StatsManager = (function () {
var $primary = $('.primary');
// Make sure to use a different session cache when users have the waffle
// flag enabled or disabled.
if ($primary.data('bigquery-download-stats') === 'True') {
STATS_VERSION += '-preview';
}
var useFenixBuildIDs = $primary.data('use-fenix-build-ids') === 'True';
var storage = z.Storage('stats'),