Remove excess list calls and use python3 features

This commit is contained in:
Anthony Miyaguchi 2019-11-08 10:08:16 -05:00 коммит произвёл Anthony Miyaguchi
Родитель dd25ecafec
Коммит 446a168154
7 изменённых файлов: 42 добавлений и 38 удалений

Просмотреть файл

@ -55,14 +55,14 @@ def run_aggregator(
path = create_path(credentials_protocol, credentials_bucket, credentials_prefix)
creds = spark.read.json(path, multiLine=True).first().asDict()
for k, v in list(creds.items()):
for k, v in creds.items():
environ[k] = v
# Attempt a database connection now so we can fail fast if credentials are broken.
db._preparedb()
channels = [channel.strip() for channel in channels.split(",")]
print(("Running job for {}".format(date)))
print(f"Running job for {date}")
aggregates = aggregator.aggregate_metrics(
spark.sparkContext,
channels,
@ -72,8 +72,8 @@ def run_aggregator(
project_id=project_id,
dataset_id=dataset_id,
)
print(("Number of build-id aggregates: {}".format(aggregates[0].count())))
print(("Number of submission date aggregates: {}".format(aggregates[1].count())))
print(f"Number of build-id aggregates: {aggregates[0].count()}")
print(f"Number of submission date aggregates: {aggregates[1].count()}")
# Store the results in Postgres.
db.submit_aggregates(aggregates)
@ -105,8 +105,8 @@ def run_parquet(date, channels, output, num_partitions, source, project_id, data
project_id=project_id,
dataset_id=dataset_id,
)
print(("Number of build-id aggregates: {}".format(aggregates[0].count())))
print(("Number of submission date aggregates: {}".format(aggregates[1].count())))
print(f"Number of build-id aggregates: {aggregates[0].count()}")
print(f"Number of submission date aggregates: {aggregates[1].count()}")
parquet.write_aggregates(spark, aggregates, output, "append")
@ -131,7 +131,7 @@ def run_parquet(date, channels, output, num_partitions, source, project_id, data
def run_mobile(date, output, num_partitions, source, project_id, dataset_id):
spark = SparkSession.builder.getOrCreate()
print(("Running job for {}".format(date)))
print(f"Running job for {date}")
agg_metrics = mobile.aggregate_metrics(
spark.sparkContext,
date,

Просмотреть файл

@ -108,7 +108,7 @@ def _aggregate_ping(state, metrics):
% type(metrics)
)
for process in list(metrics.keys()):
for process in metrics.keys():
process_metrics = metrics.get(process, {})
_extract_process_histograms(state, process_metrics, process)
_extract_process_scalars(state, process_metrics, process)

Просмотреть файл

@ -436,7 +436,7 @@ def get_dates_metrics(prefix, channel):
# We received an unsupported query string to filter by, return 405.
valid_url = '{}?{}'.format(
request.path,
urlencode({k: v for k, v in list(dimensions.items()) if k in ALLOWED_DIMENSIONS}))
urlencode({k: v for k, v in dimensions.items() if k in ALLOWED_DIMENSIONS}))
raise MethodNotAllowed(valid_methods=[valid_url])
if 'child' in dimensions:

Просмотреть файл

@ -203,10 +203,12 @@ def generate_payload(dimensions, aggregated_child_histograms):
child_payloads = [{"simpleMeasurements": simple_measurements_template}
for i in range(NUM_CHILDREN_PER_PING)]
scalars = dict(chain(iter(scalars_template.items()), iter(ignored_scalars_template.items())))
keyed_scalars = dict(chain(iter(keyed_scalars_template.items()),
iter(ignored_keyed_scalars_template.items()),
iter(private_keyed_scalars_template.items())))
scalars = {**scalars_template, **ignored_scalars_template}
keyed_scalars = {
**keyed_scalars_template,
**ignored_keyed_scalars_template,
**private_keyed_scalars_template,
}
processes_payload = {
"parent": {
@ -236,8 +238,10 @@ def generate_payload(dimensions, aggregated_child_histograms):
payload = {
"simpleMeasurements": simple_measurements_template,
"histograms": histograms_template,
"keyedHistograms": dict(list(keyed_histograms_template.items()) +
list(ignored_keyed_histograms_template.items())),
"keyedHistograms": {
**keyed_histograms_template,
**ignored_keyed_histograms_template
},
"childPayloads": child_payloads,
"processes": processes_payload,
}

Просмотреть файл

@ -88,17 +88,17 @@ def test_simple_measurements(build_id_aggregates):
assert(value["histogram"][str(d.SIMPLE_SCALAR_BUCKET)] == value["count"])
assert len(metric_count) == len(d.simple_measurements_template)
for process_counts in list(metric_count.values()):
for process_counts in metric_count.values():
assert(len(process_counts) == 2) # 1 for parent, 1 for childPayloads
for v in list(process_counts.values()):
for v in process_counts.values():
assert(v == len(build_id_aggregates))
def test_numerical_scalars(build_id_aggregates):
metric_count = defaultdict(lambda: defaultdict(int))
scalar_metrics = set([k.upper() for k in list(d.scalars_template.keys())])
keyed_scalar_metrics = set([k.upper() for k in list(d.keyed_scalars_template.keys())])
keyed_scalar_metrics |= set([k.upper() for k in list(d.private_keyed_scalars_template.keys())])
scalar_metrics = set([k.upper() for k in d.scalars_template.keys()])
keyed_scalar_metrics = set([k.upper() for k in d.keyed_scalars_template.keys()])
keyed_scalar_metrics |= set([k.upper() for k in d.private_keyed_scalars_template.keys()])
for aggregate in build_id_aggregates:
for key, value in aggregate[1].items():
@ -172,9 +172,9 @@ def test_count_histograms(build_id_aggregates):
assert(value["histogram"][str(d.COUNT_SCALAR_BUCKET)] == value["count"])
assert len(metric_count) == len(histograms)
for process_counts in list(metric_count.values()):
for process_counts in metric_count.values():
assert(set(process_counts.keys()) == PROCESS_TYPES)
for v in list(process_counts.values()):
for v in process_counts.values():
assert(v == len(build_id_aggregates))
@ -185,8 +185,8 @@ def test_keyed_histograms(build_id_aggregates):
for key, value in aggregate[1].items():
metric, label, process_type = key
if metric in list(d.keyed_histograms_template.keys()):
metric_label = "{}_{}".format(metric, label)
if metric in d.keyed_histograms_template.keys():
metric_label = f"{metric}_{label}"
if metric_label not in metric_count:
metric_count[metric_label] = defaultdict(int)
metric_count[metric_label][process_type] += 1
@ -198,10 +198,10 @@ def test_keyed_histograms(build_id_aggregates):
assert(set(histogram_template.keys()) == set(value["histogram"].keys()))
assert((pd.Series(histogram_template) * value["count"] == pd.Series(value["histogram"])).all())
assert(metric not in list(d.ignored_keyed_histograms_template.keys()))
assert(metric not in d.ignored_keyed_histograms_template.keys())
assert(len(metric_count) == len(d.keyed_histograms_template)) # Assume one label per keyed histogram
for process_counts in list(metric_count.values()):
for process_counts in metric_count.values():
assert(set(process_counts.keys()) == PROCESS_TYPES)
for v in list(process_counts.values()):
for v in process_counts.values():
assert(v == len(build_id_aggregates))

Просмотреть файл

@ -46,35 +46,35 @@ def test_keys(aggregates):
def test_histograms(aggregates):
n = d.NUM_PINGS_PER_DIMENSIONS
for aggregate in aggregates:
for metric_data in list(aggregate[1].items()):
for metric_data in aggregate[1].items():
metric_name, metric_key, process = metric_data[0]
# A regular histogram.
if metric_name in list(d.histograms_template.keys()):
if metric_name in d.histograms_template.keys():
tpl = d.histograms_template[metric_name]
assert(metric_data[1]['count'] == n)
assert(metric_data[1]['sum'] == tpl['sum'] * n)
for k, v in list(tpl['values'].items()):
for k, v in tpl['values'].items():
assert(metric_data[1]['histogram'][k] == v * n)
# A keyed histogram.
elif metric_name in list(d.keyed_histograms_template.keys()):
elif metric_name in d.keyed_histograms_template.keys():
tpl = d.keyed_histograms_template[metric_name]
assert(metric_data[1]['count'] == n)
assert(metric_data[1]['sum'] == tpl[metric_key]['sum'] * n)
for k, v in list(tpl[metric_key]['values'].items()):
for k, v in tpl[metric_key]['values'].items():
assert(metric_data[1]['histogram'][k] == v * n)
def test_scalars(aggregates):
n = d.NUM_PINGS_PER_DIMENSIONS
for aggregate in aggregates:
for metric_data in list(aggregate[1].items()):
for metric_data in aggregate[1].items():
metric_name, metric_key, process = metric_data[0]
metric_name = metric_name.split('_')[1].lower()
# A regular scalar.
if metric_name in list(d.scalars_template.keys()):
if metric_name in d.scalars_template.keys():
value = d.scalars_template[metric_name]
# A keyed scalar.
elif metric_name in list(d.keyed_scalars_template.keys()):
elif metric_name in d.keyed_scalars_template.keys():
value = d.keyed_scalars_template[metric_name][metric_key]
else:
continue

Просмотреть файл

@ -156,7 +156,7 @@ class ServiceTestCase(unittest.TestCase):
def test_cached_auth(self):
token = "cached-token"
auth0_cache[token] = True
for metric in list(histograms_template.keys()):
for metric in histograms_template.keys():
resp = self.app.get(
'/aggregates_by/build_id/channels/release/?version=41&dates={}&metric={}'.format(self.build_id_1, metric),
headers={'If-None-Match': SUBMISSION_DATE_ETAG, 'Authorization': ' Bearer ' + token})
@ -238,14 +238,14 @@ class ServiceTestCase(unittest.TestCase):
assert cache.get((url, False)) is None
def test_auth_header(self):
for metric in list(histograms_template.keys()):
for metric in histograms_template.keys():
resp = self.app.get(
'/aggregates_by/build_id/channels/release/?version=41&dates={}&metric={}'.format(self.build_id_1, metric),
headers={'If-None-Match': SUBMISSION_DATE_ETAG, 'Authorization': ' Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6IlJqUkZSREpFT0RnMk16UTNSRE0zT0VSRFFrWkZOalJETmpGQ05qZzBOVVEzTW9.eyJpc3MiOiJodHRwczovL2NodXR0ZW4uYXV0aDAuY29tLyIsInN1YiI6ImF1dGgwfDViYTNiOTVkYmExM2UyMzMxYzVmYzMzOCIsImF1ZCI6ImFnZ3JlZ2F0ZXMudGVsZW1ldHJ5Lm1vemlsbGEub3JnIiwiaWF0IjoxNTM3NTQ4Nzc2LCJleHAiOjE1Mzc1NTU5NzYsImF6cCI6InY0MjdDQmlwNjZoUzRxMm10SmVaSldpWUsxYUV2UVRLIiwic2NvcGUiOiJyZWFkOmFnZ3JlZ2F0ZXMifQ.ETE6m-fevEYxAoeg1lrER0Jm0nAMc-G_EgXsSF4t5at4RX6oYidcCT3dkbhWm3MYZrG3KHBYcGh8FRsdw2oxgJYsEFCKGxlA2lCta-3yrebIy_SmLGNjrXEWYpwXQ_yeCaOMz3aQ5hSvoIbUYDdaqEWqibfFvwD2Gu2cjsoXmoHKPVpBiwERUDIjfAfuW3-NP0qirpCR3LyuY2Iw7oOZB-uqdd_zeoD1IosliT7JhkRjzrQnYJN93Zx392KI3H_E08Assv_d9gUqFEiKvDQ7b10iB5A4fWnVYjtYqugvOmkDlQHTUY5Y7zbT8DJ4SYarXiJBxijwPeGpo4cslJVe5c'})
self.assertEqual(resp.status_code, 403)
def test_release_nonwhitelist(self):
for metric in list(histograms_template.keys()):
for metric in histograms_template.keys():
resp = self.app.get(
'/aggregates_by/build_id/channels/release/?version=41&dates={}&metric={}'
.format(self.build_id_1, metric),