Merge pull request #24 from fbertsch/handle_null

Handle null unicode char in labels
This commit is contained in:
Roberto Agostino Vitillo 2016-12-08 15:46:23 -10:00 коммит произвёл GitHub
Родитель 2ef938d4d1 eb968c5ca3
Коммит 62e0a03b5c
3 изменённых файлов: 21 добавлений и 3 удалений

Просмотреть файл

@ -61,12 +61,14 @@ def submit_aggregates(aggregates, dry_run=False):
build_id_count = aggregates[0].\
map(lambda x: (x[0][:4], _aggregate_to_sql(x))).\
filter(lambda x: x[1]).\
reduceByKey(lambda x, y: x + y).\
map(lambda x: _upsert_build_id_aggregates(x[0], x[1], connection_string, dry_run=dry_run)).\
count()
submission_date_count = aggregates[1].\
map(lambda x: (x[0][:3], _aggregate_to_sql(x))).\
filter(lambda x: x[1]).\
reduceByKey(lambda x, y: x + y).\
map(lambda x: _upsert_submission_date_aggregates(x[0], x[1], connection_string, dry_run=dry_run)).\
count()
@ -111,6 +113,9 @@ def _aggregate_to_sql(aggregate):
if not set(metric).issubset(_metric_printable):
continue # Ignore metrics with non printable characters...
if u'\u0000' in label:
continue # Ignore labels with null character
try:
# Make sure values fit within a pgsql bigint
# TODO: we should probably log this event
@ -119,7 +124,8 @@ def _aggregate_to_sql(aggregate):
histogram = _get_complete_histogram(channel, metric, payload["histogram"]) + [payload["sum"], payload["count"]]
histogram = [str(long(x)) for x in histogram]
except KeyError:
except KeyError as e:
# Should eventually log errors
continue
dimensions["metric"] = metric
@ -133,7 +139,6 @@ def _aggregate_to_sql(aggregate):
json_dimensions = json_dimensions.replace("\\", "\\\\")
result.write("{}\t{}\n".format(json_dimensions, "{" + ",".join(histogram) + "}"))
return result.getvalue()

Просмотреть файл

@ -8,7 +8,7 @@
from setuptools import setup
setup(name='python_mozaggregator',
version='0.2.5.11',
version='0.2.5.12',
author='Roberto Agostino Vitillo',
author_email='rvitillo@mozilla.com',
description='Telemetry aggregation job',

Просмотреть файл

@ -149,6 +149,19 @@ def test_submission_dates_metrics():
test_keyed_histogram("submission_date", channel, version, template_submission_date, metric, histograms, expected_count)
def test_null_label_character_submit():
metric_info = ("SIMPLE_MEASURES_NULL_METRIC_LABEL", u"\u0001\u0000\u0000\u0000\u7000\ub82c", False)
payload = {"sum": 4, "count": 2, "histogram": {2: 2}}
key = ('20161111', 'nightly', '52', '20161111', 'Firefox', 'arch', 'linux', '42', 'false')
aggregate = (key, {metric_info: payload})
aggregates = [sc.parallelize([aggregate]), sc.parallelize([aggregate])]
build_id_count, submission_date_count = submit_aggregates(aggregates)
assert build_id_count == 0, "Build id count should be 0, was {}".format(build_id_count)
assert submission_date_count == 0, "submission date count should be 0, was {}".format(build_id_count)
@nottest
def test_histogram(prefix, channel, version, dates, metric, value, expected_count):
if metric.endswith("CONTENT_DOCUMENTS_DESTROYED"): # Ignore USE_COUNTER2_ support histograms