Fix flake8 errors and add flake8 tests to tox

This commit is contained in:
Ryan Harter 2017-05-15 19:58:54 -04:00
Родитель 3e3fdd644e
Коммит efac040b70
6 изменённых файлов: 43 добавлений и 15 удалений

Просмотреть файл

@ -5,15 +5,22 @@ from datetime import datetime, timedelta
from moztelemetry.dataset import Dataset
from .utils import get_short_and_long_spinners
def run_spinner_etl(sc):
nightly_build_channels = ["nightly", "aurora"]
sample_size = 1.0
probe_available = datetime(2016, 9, 8)
days_to_look_back = 180
start_date = max(probe_available, datetime.today() - timedelta(days=days_to_look_back)).strftime("%Y%m%d")
look_back_date = datetime.today() - timedelta(days=180)
start_date = max(probe_available, look_back_date).strftime("%Y%m%d")
end_date = datetime.today().strftime("%Y%m%d")
def appBuildId_filter(b):
return (
(b.startswith(start_date) or b > start_date) and
(b.startswith(end_date) or b < end_date)
)
print "Start Date: {}, End Date: {}".format(start_date, end_date)
build_results = {}
@ -24,14 +31,14 @@ def run_spinner_etl(sc):
old_infra_pings = Dataset.from_source("telemetry-oldinfra") \
.where(docType='main') \
.where(submissionDate=lambda b: b < "20161201") \
.where(appBuildId=lambda b: (b.startswith(start_date) or b > start_date) and (b.startswith(end_date) or b < end_date)) \
.where(appBuildId=appBuildId_filter) \
.where(appUpdateChannel=build_type) \
.records(sc, sample=sample_size)
new_infra_pings = Dataset.from_source("telemetry") \
.where(docType='main') \
.where(submissionDate=lambda b: (b.startswith("20161201") or b > "20161201")) \
.where(appBuildId=lambda b: (b.startswith(start_date) or b > start_date) and (b.startswith(end_date) or b < end_date)) \
.where(appBuildId=appBuildId_filter) \
.where(appUpdateChannel=build_type) \
.records(sc, sample=sample_size)
@ -47,7 +54,7 @@ def run_spinner_etl(sc):
f.write(results_json)
s3_client.upload_file(
filename,
'telemetry-public-analysis-2',
filename,
'telemetry-public-analysis-2',
'spinner-severity-generator/data/{}'.format(filename)
)

Просмотреть файл

@ -2,18 +2,22 @@ import pandas as pd
from moztelemetry import get_pings_properties
def windows_only(p):
return p["environment/system/os/name"] == "Windows_NT"
def e10s_enabled_only(p):
return p["environment/settings/e10sEnabled"]
def long_spinners_keyed_by_build_and_client(ping):
return ((ping["application/buildId"][:8],
ping["clientId"]),
(ping["payload/histograms/FX_TAB_SWITCH_SPINNER_VISIBLE_LONG_MS"],
ping["payload/histograms/FX_TAB_SWITCH_SPINNER_VISIBLE_MS"]))
def add_tuple_series(x, y):
long_x = x[0]
long_y = y[0]
@ -31,6 +35,7 @@ def add_tuple_series(x, y):
return (long_x.add(long_y, fill_value=0.0), short_x.add(short_y, fill_value=0.0))
def bucket_by_long_severity_per_client(spinner_pair):
buildId = spinner_pair[0][0]
hist = spinner_pair[1][0]
@ -64,6 +69,7 @@ def bucket_by_long_severity_per_client(spinner_pair):
return (buildId, severity)
def bucket_by_short_severity_per_client(spinner_pair):
buildId = spinner_pair[0][0]
long_hist = spinner_pair[1][0]
@ -99,12 +105,14 @@ def bucket_by_short_severity_per_client(spinner_pair):
return (buildId, severity)
def to_percentages(build_severities):
severities = build_severities[1]
total_clients = severities.sum()
if total_clients > 0:
return (build_severities[0], severities / total_clients)
def collect_aggregated_spinners(rdd, map_func):
collected_percentages = rdd \
.map(map_func) \
@ -115,6 +123,7 @@ def collect_aggregated_spinners(rdd, map_func):
return sorted(collected_percentages, key=lambda result: result[0])
def get_short_and_long_spinners(pings):
properties = ["clientId",
@ -129,9 +138,9 @@ def get_short_and_long_spinners(pings):
windows_pings_only = ping_props.filter(windows_only)
e10s_enabled_on_windows_pings_only = windows_pings_only.filter(e10s_enabled_only)
grouped_spinners = e10s_enabled_on_windows_pings_only \
.repartition(200) \
.map(long_spinners_keyed_by_build_and_client) \
.reduceByKey(add_tuple_series)
.repartition(200) \
.map(long_spinners_keyed_by_build_and_client) \
.reduceByKey(add_tuple_series)
final_result_long = collect_aggregated_spinners(
grouped_spinners,

Просмотреть файл

@ -11,14 +11,17 @@ def transform_pings(sqlContext, pings):
DataFrameConfig([
("uuid", "payload/payload/uuid", None, StringType()),
("userContextId", "payload/payload/userContextId", None, StringType()),
("clickedContainerTabCount", "payload/payload/clickedContainerTabCount", None, LongType()),
("clickedContainerTabCount",
"payload/payload/clickedContainerTabCount", None, LongType()),
("eventSource", "payload/payload/eventSource", None, StringType()),
("event", "payload/payload/event", None, StringType()),
("hiddenContainersCount", "payload/payload/hiddenContainersCount", None, LongType()),
("shownContainersCount", "payload/payload/shownContainersCount", None, LongType()),
("totalContainersCount", "payload/payload/totalContainersCount", None, LongType()),
("totalContainerTabsCount", "payload/payload/totalContainerTabsCount", None, LongType()),
("totalNonContainerTabsCount", "payload/payload/totalNonContainerTabsCount", None, LongType()),
("totalContainerTabsCount",
"payload/payload/totalContainerTabsCount", None, LongType()),
("totalNonContainerTabsCount",
"payload/payload/totalNonContainerTabsCount", None, LongType()),
("pageRequestCount", "payload/payload/pageRequestCount", None, LongType()),
("test", "payload/test", None, StringType()),
], lambda ping: ping['payload/test'] == "@testpilot-containers")

Просмотреть файл

@ -19,7 +19,10 @@ def test_simple_transform(simple_rdd, spark_context):
results = get_short_and_long_spinners(simple_rdd)
# get rid of pd index
result = {k: {build_id: series.values for build_id, series in v} for k, v in results.iteritems()}
result = {
k: {build_id: series.values for build_id, series in v}
for k, v in results.iteritems()
}
expected = {
"short": {"20170101": pd.Series([0, 1.0, 0, 0, 0, 0, 0, 0])},

Просмотреть файл

@ -1,5 +1,4 @@
import json
import os
from click.testing import CliRunner

Просмотреть файл

@ -4,7 +4,7 @@
# and then run "tox" from this directory.
[tox]
envlist = py27
envlist = py27, flake8
[pytest]
addopts = --cov=mozetl tests/
@ -21,3 +21,10 @@ deps =
coverage
moto
mock
[testenv:flake8]
basepython = python2.7
deps =
flake8
commands =
flake8 mozetl tests --max-line-length=100