diff --git a/mozaggregator/aggregator.py b/mozaggregator/aggregator.py index bf4fe08..5b4ea69 100644 --- a/mozaggregator/aggregator.py +++ b/mozaggregator/aggregator.py @@ -123,20 +123,20 @@ def _extract_main_histograms(state, histograms, is_child): return # Deal with USE_COUNTER2_ histograms, see Bug 1204994 - docs_destroyed = histograms.get("CONTENT_DOCUMENTS_DESTROYED", {}).get("sum", None) - top_docs_destroyed = histograms.get("TOP_LEVEL_CONTENT_DOCUMENTS_DESTROYED", {}).get("sum", None) - - if docs_destroyed is not None and top_docs_destroyed is not None: - total_destroyed = docs_destroyed - top_docs_destroyed - else: - total_destroyed = None + pages_destroyed = histograms.get("TOP_LEVEL_CONTENT_DOCUMENTS_DESTROYED", {}).get("sum", -1) + docs_destroyed = histograms.get("CONTENT_DOCUMENTS_DESTROYED", {}).get("sum", -1) for histogram_name, histogram in histograms.iteritems(): - if total_destroyed is not None and histogram_name.startswith("USE_COUNTER2_"): - used = histogram.get("values", {}).get("1", None) - if used is None: + if pages_destroyed >= 0 and histogram_name.startswith("USE_COUNTER2_") and histogram_name.endswith("_PAGE"): + used = histogram.get("values", {}).get("1", -1) + if used <= 0: continue - histogram["values"]["0"] = total_destroyed - used + histogram["values"]["0"] = pages_destroyed - used + elif docs_destroyed >= 0 and histogram_name.startswith("USE_COUNTER2_") and histogram_name.endswith("_DOCUMENT"): + used = histogram.get("values", {}).get("1", -1) + if used <= 0: + continue + histogram["values"]["0"] = docs_destroyed - used _extract_histogram(state, histogram, histogram_name, u"", is_child) diff --git a/tests/dataset.py b/tests/dataset.py index c5875ce..6bb3e8f 100644 --- a/tests/dataset.py +++ b/tests/dataset.py @@ -36,21 +36,26 @@ histograms_template = {u"EVENTLOOP_UI_LAG_EXP_MS": {u'bucket_count': 20, u'range': [1, 2], u'sum': SCALAR_VALUE, u'values': {u'0': SCALAR_VALUE, u'1': 0}}, + u"USE_COUNTER2_PROPERTY_FILL_PAGE": {u'bucket_count': 3, + u'histogram_type': 2, + u'range': [1, 2], + u'sum': 2, + u'values': {u'0': 0, u'1': 2, u'2': 0}}, u"USE_COUNTER2_PROPERTY_FILL_DOCUMENT": {u'bucket_count': 3, u'histogram_type': 2, u'range': [1, 2], - u'sum': SCALAR_VALUE, - u'values': {u'0': 0, u'1': SCALAR_VALUE, u'2': 0}}, + u'sum': 1, + u'values': {u'0': 0, u'1': 1, u'2': 0}}, u"CONTENT_DOCUMENTS_DESTROYED": {u'bucket_count': 3, u'histogram_type': 4, u'range': [1, 2], - u'sum': 2*SCALAR_VALUE, - u'values': {u'0': 2*SCALAR_VALUE, u'1': 0}}, + u'sum': 17, + u'values': {u'0': 17, u'1': 0}}, u"TOP_LEVEL_CONTENT_DOCUMENTS_DESTROYED": {u'bucket_count': 3, u'histogram_type': 4, u'range': [1, 2], - u'sum': SCALAR_VALUE, - u'values': {u'0': SCALAR_VALUE, u'1': 0}}, + u'sum': 19, + u'values': {u'0': 19, u'1': 0}}, u"USE_COUNTER_PROPERTY_FILL_DOCUMENT": {u'bucket_count': 3, u'histogram_type': 2, u'range': [1, 2], diff --git a/tests/test_aggregator.py b/tests/test_aggregator.py index 6134733..3850f88 100644 --- a/tests/test_aggregator.py +++ b/tests/test_aggregator.py @@ -130,9 +130,8 @@ def test_use_counter2_histogram(): metric_count = defaultdict(int) histograms = {k: v for k, v in histograms_template.iteritems() if k.startswith("USE_COUNTER2_")} + pages_destroyed = histograms_template["TOP_LEVEL_CONTENT_DOCUMENTS_DESTROYED"]["sum"] docs_destroyed = histograms_template["CONTENT_DOCUMENTS_DESTROYED"]["sum"] - top_docs_destroyed = histograms_template["TOP_LEVEL_CONTENT_DOCUMENTS_DESTROYED"]["sum"] - partial = docs_destroyed - top_docs_destroyed for aggregate in build_id_aggregates: for key, value in aggregate[1].iteritems(): @@ -144,7 +143,12 @@ def test_use_counter2_histogram(): assert(label == "") assert(value["count"] == NUM_PINGS_PER_DIMENSIONS*(NUM_CHILDREN_PER_PING if child else 1)) assert(value["sum"] == value["count"]*histogram["sum"]) - assert(value["histogram"]["0"] == value["count"]*(partial - histogram["values"]["1"])) + + if metric.endswith("_DOCUMENT"): + assert(value["histogram"]["0"] == value["count"]*(docs_destroyed - histogram["values"]["1"])) + else: + assert(value["histogram"]["0"] == value["count"]*(pages_destroyed - histogram["values"]["1"])) + assert len(metric_count) == len(histograms) for v in metric_count.values(): diff --git a/tests/test_db.py b/tests/test_db.py index c54e87b..8607f1a 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -169,6 +169,18 @@ def test_histogram(prefix, channel, version, dates, metric, value, expected_coun assert(res["histogram"][bucket_index] == res["count"]) assert(res["sum"] == value["sum"]*res["count"]) assert((current == expected).all()) + elif metric.startswith("USE_COUNTER2_"): + if metric.endswith("_PAGE"): + destroyed = histograms_template["TOP_LEVEL_CONTENT_DOCUMENTS_DESTROYED"]["sum"] + else: + destroyed = histograms_template["CONTENT_DOCUMENTS_DESTROYED"]["sum"] + value["values"]["0"] = destroyed - value["values"]["1"] + + current = pd.Series(res["histogram"], index=map(int, reply["buckets"])) + expected = Histogram(metric, value).get_value()*res["count"] + + assert((current == expected).all()) + assert(res["sum"] == value["sum"]*res["count"]) else: current = pd.Series(res["histogram"], index=map(int, reply["buckets"])) expected = Histogram(metric, value).get_value()*res["count"]