Merge pull request #24 from fbertsch/handle_null
Handle null unicode char in labels
This commit is contained in:
Коммит
62e0a03b5c
|
@ -61,12 +61,14 @@ def submit_aggregates(aggregates, dry_run=False):
|
|||
|
||||
build_id_count = aggregates[0].\
|
||||
map(lambda x: (x[0][:4], _aggregate_to_sql(x))).\
|
||||
filter(lambda x: x[1]).\
|
||||
reduceByKey(lambda x, y: x + y).\
|
||||
map(lambda x: _upsert_build_id_aggregates(x[0], x[1], connection_string, dry_run=dry_run)).\
|
||||
count()
|
||||
|
||||
submission_date_count = aggregates[1].\
|
||||
map(lambda x: (x[0][:3], _aggregate_to_sql(x))).\
|
||||
filter(lambda x: x[1]).\
|
||||
reduceByKey(lambda x, y: x + y).\
|
||||
map(lambda x: _upsert_submission_date_aggregates(x[0], x[1], connection_string, dry_run=dry_run)).\
|
||||
count()
|
||||
|
@ -111,6 +113,9 @@ def _aggregate_to_sql(aggregate):
|
|||
if not set(metric).issubset(_metric_printable):
|
||||
continue # Ignore metrics with non printable characters...
|
||||
|
||||
if u'\u0000' in label:
|
||||
continue # Ignore labels with null character
|
||||
|
||||
try:
|
||||
# Make sure values fit within a pgsql bigint
|
||||
# TODO: we should probably log this event
|
||||
|
@ -119,7 +124,8 @@ def _aggregate_to_sql(aggregate):
|
|||
|
||||
histogram = _get_complete_histogram(channel, metric, payload["histogram"]) + [payload["sum"], payload["count"]]
|
||||
histogram = [str(long(x)) for x in histogram]
|
||||
except KeyError:
|
||||
except KeyError as e:
|
||||
# Should eventually log errors
|
||||
continue
|
||||
|
||||
dimensions["metric"] = metric
|
||||
|
@ -133,7 +139,6 @@ def _aggregate_to_sql(aggregate):
|
|||
json_dimensions = json_dimensions.replace("\\", "\\\\")
|
||||
|
||||
result.write("{}\t{}\n".format(json_dimensions, "{" + ",".join(histogram) + "}"))
|
||||
|
||||
return result.getvalue()
|
||||
|
||||
|
||||
|
|
2
setup.py
2
setup.py
|
@ -8,7 +8,7 @@
|
|||
from setuptools import setup
|
||||
|
||||
setup(name='python_mozaggregator',
|
||||
version='0.2.5.11',
|
||||
version='0.2.5.12',
|
||||
author='Roberto Agostino Vitillo',
|
||||
author_email='rvitillo@mozilla.com',
|
||||
description='Telemetry aggregation job',
|
||||
|
|
|
@ -149,6 +149,19 @@ def test_submission_dates_metrics():
|
|||
test_keyed_histogram("submission_date", channel, version, template_submission_date, metric, histograms, expected_count)
|
||||
|
||||
|
||||
def test_null_label_character_submit():
|
||||
metric_info = ("SIMPLE_MEASURES_NULL_METRIC_LABEL", u"\u0001\u0000\u0000\u0000\u7000\ub82c", False)
|
||||
payload = {"sum": 4, "count": 2, "histogram": {2: 2}}
|
||||
key = ('20161111', 'nightly', '52', '20161111', 'Firefox', 'arch', 'linux', '42', 'false')
|
||||
aggregate = (key, {metric_info: payload})
|
||||
|
||||
aggregates = [sc.parallelize([aggregate]), sc.parallelize([aggregate])]
|
||||
build_id_count, submission_date_count = submit_aggregates(aggregates)
|
||||
|
||||
assert build_id_count == 0, "Build id count should be 0, was {}".format(build_id_count)
|
||||
assert submission_date_count == 0, "submission date count should be 0, was {}".format(build_id_count)
|
||||
|
||||
|
||||
@nottest
|
||||
def test_histogram(prefix, channel, version, dates, metric, value, expected_count):
|
||||
if metric.endswith("CONTENT_DOCUMENTS_DESTROYED"): # Ignore USE_COUNTER2_ support histograms
|
||||
|
|
Загрузка…
Ссылка в новой задаче