Bug 1594111 - Change data source to BigQuery (#376)

This commit is contained in:
Ben Wu 2019-11-22 16:00:20 -05:00 коммит произвёл GitHub
Родитель 99e8415ac3
Коммит 5956649bb5
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
9 изменённых файлов: 137 добавлений и 85 удалений

Просмотреть файл

@ -6,4 +6,4 @@ SECRET_KEY=59114b6a-2858-4caf-8878-482a24ee9542
CACHE_URL=redis://redis:6379/1
FETCH_MEASURE_DATA=0
LOGGING_USE_JSON=0
MISSION_CONTROL_TABLE=error_aggregates_v2
MISSION_CONTROL_TABLE=telemetry_derived.error_aggregates

Просмотреть файл

@ -19,12 +19,6 @@ CREATE TABLE hive.default.error_aggregates_v1 (
gpu_crashes bigint,
plugin_crashes bigint,
gmplugin_crashes bigint,
content_shutdown_crashes bigint,
browser_shim_usage_blocked bigint,
permissions_sql_corrupted bigint,
defective_permissions_sql_removed bigint,
slow_script_notice_count bigint,
slow_script_page_count bigint,
submission_date varchar,
window_start timestamp,
window_end timestamp

Просмотреть файл

@ -15,12 +15,12 @@
pk: 1
fields:
name: windows
telemetry_name: Windows_NT
telemetry_name: Windows
- model: base.Platform
pk: 2
fields:
name: mac
telemetry_name: Darwin
telemetry_name: Mac
- model: base.Platform
pk: 3
fields:

Просмотреть файл

@ -23,22 +23,13 @@ class Command(BaseCommand):
'content_shutdown_crashes',
'startup_crashes'
]
UNIVERSAL_DESKTOP_QUALITY_MEASURES = [
'browser_shim_usage_blocked',
]
PRERELEASE_DESKTOP_QUALITY_MEASURES = [
'slow_script_notice_count',
'slow_script_page_count',
'permissions_sql_corrupted',
'defective_permissions_sql_removed',
]
FIREFOX_APPLICATION = Application.objects.get(name='firefox')
# all crash measures and a small number of quality measures are
# applicable to all desktop platforms and channels
for platform in Platform.objects.exclude(name='android'):
for measure_name in (DESKTOP_CRASH_MEASURES +
UNIVERSAL_DESKTOP_QUALITY_MEASURES):
for measure_name in DESKTOP_CRASH_MEASURES:
measure, _ = Measure.objects.update_or_create(
name=measure_name, application=FIREFOX_APPLICATION,
platform=platform, defaults={'enabled': True})
@ -55,17 +46,6 @@ class Command(BaseCommand):
gpu_crashes_measure.channels.set(Channel.objects.all())
gpu_crashes_measure.save()
# most desktop quality measures are on beta/nightly only
development_channels = Channel.objects.filter(
name__in=['nightly', 'beta'])
for platform in Platform.objects.exclude(name='android'):
for measure_name in PRERELEASE_DESKTOP_QUALITY_MEASURES:
measure, _ = Measure.objects.update_or_create(
name=measure_name, application=FIREFOX_APPLICATION,
platform=platform, defaults={'enabled': True})
measure.channels.set(development_channels)
measure.save()
# create a set of non-platform-specific crash measures for experiments
for measure_name in DESKTOP_CRASH_MEASURES + DESKTOP_CRASH_MEASURES:
Measure.objects.get_or_create(name=measure_name, platform=None)

Просмотреть файл

@ -0,0 +1,5 @@
from google.cloud import bigquery
def get_bigquery_client():
return bigquery.Client()

Просмотреть файл

@ -8,6 +8,7 @@ from django.db import transaction
from django.db.models import Max
from django.db.utils import IntegrityError
from . import bigquery
from missioncontrol.celery import celery
from missioncontrol.base.models import (Application,
Build,
@ -30,11 +31,6 @@ def update_measures(application_name, platform_name, channel_name,
Updates (or creates) a local cache entry for a specify platform/channel/measure
aggregate, which can later be retrieved by the API
'''
# hack: importing raw_query here to make monkeypatching work
# (if we put it on top it is impossible to override if something
# else imports this module first)
from .presto import raw_query
logger.info('Updating measures: %s %s (date: %s)', channel_name, platform_name,
submission_date or 'latest')
@ -47,7 +43,8 @@ def update_measures(application_name, platform_name, channel_name,
channel = Channel.objects.get(name=channel_name)
measures = Measure.objects.filter(channels=channel,
application=application,
platform=platform)
platform=platform,
enabled=True)
if submission_date is None:
now = datetime.datetime.utcnow()
submission_date = datetime.datetime(year=now.year, month=now.month,
@ -90,20 +87,7 @@ def update_measures(application_name, platform_name, channel_name,
# query from incorrect parameters
measure_sums = ', '.join([
'sum({})'.format(measure.name) for measure in measures])
query_template = f'''
select window_start, build_id, display_version, sum(usage_hours),
sum(count),
{measure_sums}
from {MISSION_CONTROL_TABLE} where
application=%(application_name)s and
display_version > %(min_version)s and display_version < %(max_version)s and
build_id > %(min_build_id)s and build_id < %(max_build_id)s and
os_name=%(os_name)s and
channel=%(channel_name)s and
window_start > timestamp %(min_timestamp)s and
submission_date_s3 = %(submission_date)s
group by (window_start, build_id, display_version)
having sum(usage_hours) > 0'''.replace('\n', '').strip()
params = {
'application_name': application.telemetry_name,
'min_version': str(min_version),
@ -113,14 +97,46 @@ def update_measures(application_name, platform_name, channel_name,
'os_name': platform.telemetry_name,
'channel_name': channel_name,
'min_timestamp': min_timestamp.strftime("%Y-%m-%d %H:%M:%S"),
'submission_date': submission_date.strftime("%Y%m%d")
'submission_date': submission_date.strftime("%Y-%m-%d")
}
logger.info('Querying: %s', query_template % params)
query_sql = ' '.join(f'''
SELECT
window_start,
build_id,
display_version,
SUM(usage_hours) summed_usage_hours,
SUM(count),
{measure_sums}
FROM
{MISSION_CONTROL_TABLE}
WHERE
submission_date = \'{submission_date.strftime('%Y-%m-%d')}\'
AND application = \'{params['application_name']}\'
AND display_version > \'{params['min_version']}\'
AND display_version < \'{params['max_version']}\'
AND build_id > \'{params['min_build_id']}\'
AND build_id < \'{params['max_build_id']}\'
AND os_name = \'{params['os_name']}\'
AND channel = \'{params['channel_name']}\'
AND window_start > \'{params['min_timestamp']}\'
GROUP BY
window_start,
build_id,
display_version
HAVING
summed_usage_hours > 0
'''.split())
logger.info('Querying: %s', query_sql)
client = bigquery.get_bigquery_client()
query_job = client.query(query=query_sql)
# bulk create any new datum objects from the returned results
build_cache = {}
datum_objs = []
for row in raw_query(query_template, params):
for row in query_job:
(window_start, build_id, version, usage_hours, client_count) = row[:5]
for (measure, measure_count) in zip(measures, row[5:]):
if measure_count is None:

Просмотреть файл

@ -97,7 +97,7 @@ CACHES = {'default': django_cache_url.config()}
PRESTO_URL = config('PRESTO_URL')
MISSION_CONTROL_TABLE = config('MISSION_CONTROL_TABLE',
default='telemetry.error_aggregates_v2')
default='telemetry_derived.error_aggregates')
PRESTO_EXPERIMENTS_ERROR_AGGREGATES_TABLE = config(
'PRESTO_EXPERIMENTS_ERROR_AGGREGATES_TABLE',
default='telemetry.experiment_error_aggregates_v1')

Просмотреть файл

@ -101,16 +101,9 @@ Brotli==0.6.0 \
django-csp==3.3 \
--hash=sha256:a95eeeb04df155b6270612238700f16abd2b0cc3d32dbcfca8c7fd06d26dacba \
--hash=sha256:db082b75075672fb9cf4a622c75c94ee624cb566ffe9d2a645687e58598b6d39
pytz==2017.2 \
--hash=sha256:c883c2d6670042c7bc1688645cac73dd2b03193d1f7a6847b6154e96890be06d \
--hash=sha256:03c9962afe00e503e2d96abab4e8998a0f84d4230fa57afe1e0528473698cdd9 \
--hash=sha256:487e7d50710661116325747a9cd1744d3323f8e49748e287bc9e659060ec6bf9 \
--hash=sha256:43f52d4c6a0be301d53ebd867de05e2926c35728b3260157d274635a0a947f1c \
--hash=sha256:d1d6729c85acea5423671382868627129432fba9a89ecbb248d8d1c7a9f01c67 \
--hash=sha256:54a935085f7bf101f86b2aff75bd9672b435f51c3339db2ff616e66845f2b8f9 \
--hash=sha256:39504670abb5dae77f56f8eb63823937ce727d7cdd0088e6909e6dcac0f89043 \
--hash=sha256:ddc93b6d41cfb81266a27d23a79e13805d4a5521032b512643af8729041a81b4 \
--hash=sha256:f5c056e8f62d45ba8215e5cb8f50dfccb198b4b9fbea8500674f3443e4689589
pytz==2019.3 \
--hash=sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d \
--hash=sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be
newrelic==4.10.0.112 \
--hash=sha256:e837010e8052484ba4070f9197d87e077a62f44a12bfa0ab27548d256c3e3882
django-session-csrf==0.7.1 \
@ -149,9 +142,9 @@ tenacity==5.0.2 \
boto3==1.4.5 \
--hash=sha256:a4552ebaee08d1cc77c7f0e2756bb9e51b3f40076cf304c8c42e51e8b85d5e8f \
--hash=sha256:6d570df0f692e82b35e9abafbb4584b899b2803e8cfcb70d1f371ca08919831d
six==1.10.0 \
--hash=sha256:0ff78c403d9bccf5a425a6d31a12aa6b47f1c21ca4dc2573a7e2f32a97335eb1 \
--hash=sha256:105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a
six==1.13.0 \
--hash=sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd \
--hash=sha256:30f610279e8b2578cab6db20741130331735c781b56053c59c4076da27f06b66
monotonic==1.3 \
--hash=sha256:a8c7690953546c6bc8a4f05d347718db50de1225b29f4b9f346c0c6f19bdc286 \
--hash=sha256:2b469e2d7dd403f7f7f79227fe5ad551ee1e76f8bb300ae935209884b93c7c1b
@ -183,23 +176,23 @@ django-configurations==2.0 \
django-cors-headers==2.1.0 \
--hash=sha256:4e02be61ffaaab5917f1fd7cc3c305c4fb7ccd0156a649c96f49bc0a09c5f572 \
--hash=sha256:451bc37a514792c2b46c52362368f7985985933ecdbf1a85f82652579a5cbe01
requests==2.20.1 \
--hash=sha256:65b3a120e4329e33c9889db89c80976c5272f56ea92d3e74da8a463992e3ff54 \
--hash=sha256:ea881206e59f41dbd0bd445437d792e43906703fff75ca8ff43ccdb11f33f263
requests==2.22.0 \
--hash=sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4 \
--hash=sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31
# required by requests
certifi==2018.8.13 \
--hash=sha256:b6e8b28b2b7e771a41ecdd12d4d43262ecab52adebbafa42c77d6b57fb6ad3a4 \
--hash=sha256:4c1d68a1408dd090d2f3a869aa94c3947cc1d967821d1ed303208c9f41f0f2f4
certifi==2019.9.11 \
--hash=sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50 \
--hash=sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef
chardet==3.0.4 \
--hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691 \
--hash=sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae
idna==2.7 \
--hash=sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e \
--hash=sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16
urllib3==1.24.1 \
--hash=sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39 \
--hash=sha256:de9529817c93f27c8ccbfead6985011db27bd0ddfcdb2d86f3f663385c6a9c22
idna==2.8 \
--hash=sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407 \
--hash=sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c
urllib3==1.25.6 \
--hash=sha256:3de946ffbed6e6746608990594d08faac602528ac7015ac28d33cee6a45b7398 \
--hash=sha256:9a107b99a5393caf59c7aa3c1249c16e6879447533d0887f4336dde834c7be86
celery==4.2.1 \
--hash=sha256:ad7a7411772b80a4d6c64f2f7f723200e39fb66cf614a7fdfab76d345acc7b13 \
@ -284,3 +277,66 @@ yarl==1.1.1 \
--hash=sha256:d9ca55a5a297408f08e5401c23ad22bd9f580dab899212f0d5dc1830f0909404 \
--hash=sha256:e072edbd1c5628c0b8f97d00cf6c9fcd6a4ee2b5ded10d463fcb6eaa066cf40c \
--hash=sha256:e9a6a319c4bbfb57618f207e86a7c519ab0f637be3d2366e4cdac271577834b8
# Packages needed for bigquery
cachetools==3.1.1 \
--hash=sha256:428266a1c0d36dc5aca63a2d7c5942e88c2c898d72139fca0e97fdd2380517ae \
--hash=sha256:8ea2d3ce97850f31e4a08b0e2b5e6c34997d7216a9d2c98e0f3978630d4da69a \
# via google-auth
google-api-core==1.14.3 \
--hash=sha256:b95895a9398026bc0500cf9b4a3f82c3f72c3f9150b26ff53af40c74e91c264a \
--hash=sha256:df8adc4b97f5ab4328a0e745bee77877cf4a7d4601cb1cd5959d2bbf8fba57aa \
# via google-cloud-core
google-auth==1.7.0 \
--hash=sha256:8b67e34a07055b9785948ff9d3e044f93be9019f4f69711b04450087ae150817 \
--hash=sha256:cf60c71698f90177e044c8df1e2915a6da372a99d2af0e236d76c426aaf4f114 \
# via google-api-core
google-cloud-bigquery==1.21.0 \
--hash=sha256:8682dc2ae6e6e74af0624cde921a0153e8fcbefbdd281bd1c68de2dc28c9e543 \
--hash=sha256:b38d5669235583ee4334d468b3719ea4a381da4b2abbedbf13cb926d893a11ab
google-cloud-core==1.0.3 \
--hash=sha256:0ee17abc74ff02176bee221d4896a00a3c202f3fb07125a7d814ccabd20d7eb5 \
--hash=sha256:10750207c1a9ad6f6e082d91dbff3920443bdaf1c344a782730489a9efa802f1 \
# via google-cloud-bigquery
google-resumable-media==0.4.1 \
--hash=sha256:5fd2e641f477e50be925a55bcfdf0b0cb97c2b92aacd7b15c1d339f70d55c1c7 \
--hash=sha256:cdeb8fbb3551a665db921023603af2f0d6ac59ad8b48259cb510b8799505775f \
# via google-cloud-bigquery
googleapis-common-protos==1.6.0 \
--hash=sha256:e61b8ed5e36b976b487c6e7b15f31bb10c7a0ca7bd5c0e837f4afab64b53a0c6 \
# via google-api-core
protobuf==3.10.0 \
--hash=sha256:125713564d8cfed7610e52444c9769b8dcb0b55e25cc7841f2290ee7bc86636f \
--hash=sha256:1accdb7a47e51503be64d9a57543964ba674edac103215576399d2d0e34eac77 \
--hash=sha256:27003d12d4f68e3cbea9eb67427cab3bfddd47ff90670cb367fcd7a3a89b9657 \
--hash=sha256:3264f3c431a631b0b31e9db2ae8c927b79fc1a7b1b06b31e8e5bcf2af91fe896 \
--hash=sha256:3c5ab0f5c71ca5af27143e60613729e3488bb45f6d3f143dc918a20af8bab0bf \
--hash=sha256:45dcf8758873e3f69feab075e5f3177270739f146255225474ee0b90429adef6 \
--hash=sha256:56a77d61a91186cc5676d8e11b36a5feb513873e4ae88d2ee5cf530d52bbcd3b \
--hash=sha256:5984e4947bbcef5bd849d6244aec507d31786f2dd3344139adc1489fb403b300 \
--hash=sha256:6b0441da73796dd00821763bb4119674eaf252776beb50ae3883bed179a60b2a \
--hash=sha256:6f6677c5ade94d4fe75a912926d6796d5c71a2a90c2aeefe0d6f211d75c74789 \
--hash=sha256:84a825a9418d7196e2acc48f8746cf1ee75877ed2f30433ab92a133f3eaf8fbe \
--hash=sha256:b842c34fe043ccf78b4a6cf1019d7b80113707d68c88842d061fa2b8fb6ddedc \
--hash=sha256:ca33d2f09dae149a1dcf942d2d825ebb06343b77b437198c9e2ef115cf5d5bc1 \
--hash=sha256:db83b5c12c0cd30150bb568e6feb2435c49ce4e68fe2d7b903113f0e221e58fe \
--hash=sha256:f50f3b1c5c1c1334ca7ce9cad5992f098f460ffd6388a3cabad10b66c2006b09 \
--hash=sha256:f99f127909731cafb841c52f9216e447d3e4afb99b17bebfad327a75aee206de \
# via google-api-core, google-cloud-bigquery, googleapis-common-protos
pyasn1-modules==0.2.7 \
--hash=sha256:0c35a52e00b672f832e5846826f1fb7507907f7d52fba6faa9e3c4cbe874fe4b \
--hash=sha256:b6ada4f840fe51abf5a6bd545b45bf537bea62221fa0dde2e8a553ed9f06a4e3 \
# via google-auth
pyasn1==0.4.7 \
--hash=sha256:62cdade8b5530f0b185e09855dd422bc05c0bbff6b72ff61381c09dac7befd8c \
--hash=sha256:a9495356ca1d66ed197a0f72b41eb1823cf7ea8b5bd07191673e8147aecf8604 \
# via pyasn1-modules, rsa
pytz==2019.3 \
--hash=sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d \
--hash=sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be \
# via google-api-core
rsa==4.0 \
--hash=sha256:14ba45700ff1ec9eeb206a2ce76b32814958a98e372006c8fb76ba820211be66 \
--hash=sha256:1a836406405730121ae9823e19c6e806c62bbad73f890574fff50efa4122c487 \
# via google-auth
# via google-api-core, google-auth, google-resumable-media, protobuf

Просмотреть файл

@ -25,15 +25,16 @@ def mock_raw_query_data(monkeypatch, base_datapoint_time):
@pytest.fixture
def mock_raw_query(monkeypatch, mock_raw_query_data):
import missioncontrol.etl.presto
import missioncontrol.etl.bigquery
def _raw_query(sql, params):
return mock_raw_query_data
class MockClient:
def query(self, query):
return mock_raw_query_data
monkeypatch.setattr(missioncontrol.etl.presto, 'raw_query', _raw_query)
monkeypatch.setattr(missioncontrol.etl.bigquery, 'get_bigquery_client', MockClient)
def test_update_measures_no_build_data(initial_data, mock_raw_query,
def test_update_measures_no_build_data(initial_data,
mock_raw_query_data):
(application, platform, channel) = ('firefox', 'linux', 'release')
with pytest.raises(Exception, match='No valid versions'):