Bug 1655032 - Add splitting functionality to metrics module. r=tarek

This patch adds the `--<LAYER>-split-by` option to the metric layers. It allows users to split the data they obtain using a given data field name. For instance, if `browserScripts.pageinfo.url` is provided, then the data will be split based on the unique URLs that are found.

Differential Revision: https://phabricator.services.mozilla.com/D84822
This commit is contained in:
Gregory Mierzwinski 2020-07-30 23:12:28 +00:00
Родитель c8cd713611
Коммит d93acb6b75
5 изменённых файлов: 174 добавлений и 101 удалений

Просмотреть файл

@ -8,10 +8,28 @@ from mozperftest.metrics.exceptions import (
MetricsMultipleTransformsError,
MetricsMissingResultsError,
)
from mozperftest.metrics.utils import validate_intermediate_results
from mozperftest.metrics.utils import validate_intermediate_results, metric_fields
from mozperftest.metrics.notebook import PerftestETL
COMMON_ARGS = {
"metrics": {
"type": metric_fields,
"nargs": "*",
"default": [],
"help": "The metrics that should be retrieved from the data.",
},
"prefix": {"type": str, "default": "", "help": "Prefix used by the output files."},
"split-by": {
"type": str,
"default": None,
"help": "A metric name to use for splitting the data. For instance, "
"using browserScripts.pageinfo.url will split the data by the unique "
"URLs that are found.",
},
}
class MetricsStorage(object):
"""Holds data that is commonly used across all metrics layers.
@ -153,6 +171,7 @@ class MetricsStorage(object):
transformer="SingleJsonRetriever",
metrics=None,
exclude=None,
split_by=None,
):
"""Filters the metrics to only those that were requested by `metrics`.
@ -175,6 +194,20 @@ class MetricsStorage(object):
if not exclude:
exclude = []
# Get the field to split the results by (if any)
if split_by is not None:
splitting_entry = None
for data_type, data_info in results.items():
for res in data_info:
if split_by in res["subtest"]:
splitting_entry = res
break
if splitting_entry is not None:
split_by = defaultdict(list)
for c, entry in enumerate(splitting_entry["data"]):
split_by[entry["value"]].append(c)
# Filter metrics
filtered = {}
for data_type, data_info in results.items():
newresults = []
@ -185,6 +218,29 @@ class MetricsStorage(object):
newresults.append(res)
filtered[data_type] = newresults
# Split the filtered results
if split_by is not None:
newfilt = {}
total_iterations = sum([len(inds) for _, inds in split_by.items()])
for data_type in filtered:
if not filtered[data_type]:
# Ignore empty data types
continue
newresults = []
newfilt[data_type] = newresults
for split, indices in split_by.items():
for res in filtered[data_type]:
if len(res["data"]) != total_iterations:
# Skip data that cannot be split
continue
splitres = {key: val for key, val in res.items()}
splitres["subtest"] += " " + split
splitres["data"] = [res["data"][i] for i in indices]
newresults.append(splitres)
filtered = newfilt
return filtered
@ -200,6 +256,7 @@ def filtered_metrics(
metrics=None,
settings=False,
exclude=None,
split_by=None,
):
"""Returns standardized data extracted from the metadata instance.
@ -214,7 +271,11 @@ def filtered_metrics(
storage = _metrics[key]
results = storage.filtered_metrics(
group_name=group_name, transformer=transformer, metrics=metrics, exclude=exclude
group_name=group_name,
transformer=transformer,
metrics=metrics,
exclude=exclude,
split_by=split_by,
)
# XXX returning two different types is a problem

Просмотреть файл

@ -1,7 +1,7 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from mozperftest.metrics.common import filtered_metrics
from mozperftest.metrics.common import filtered_metrics, COMMON_ARGS
from mozperftest.layers import Layer
@ -22,20 +22,7 @@ class ConsoleOutput(Layer):
name = "console"
activated = False
arguments = {
"metrics": {
"nargs": "*",
"default": [],
"help": "The metrics that should be retrieved from the data.",
},
# XXX can we guess this by asking the metrics storage ??
"prefix": {
"type": str,
"default": "",
"help": "Prefix used by the output files.",
},
}
arguments = COMMON_ARGS
def run(self, metadata):
# Get filtered metrics
@ -44,6 +31,7 @@ class ConsoleOutput(Layer):
self.get_arg("output"),
self.get_arg("prefix"),
metrics=self.get_arg("metrics"),
split_by=self.get_arg("split-by"),
)
if not results:

Просмотреть файл

@ -4,9 +4,9 @@
import pathlib
from mozperftest.layers import Layer
from mozperftest.metrics.common import filtered_metrics
from mozperftest.metrics.common import filtered_metrics, COMMON_ARGS
from mozperftest.metrics.notebook import PerftestNotebook
from mozperftest.metrics.utils import is_number, metric_fields
from mozperftest.metrics.utils import is_number
class Notebook(Layer):
@ -15,49 +15,43 @@ class Notebook(Layer):
name = "notebook"
activated = False
arguments = {
"metrics": {
"type": metric_fields,
"nargs": "*",
"default": [],
"help": "The metrics that should be retrieved from the data.",
},
"prefix": {
"type": str,
"default": "",
"help": "Prefix used by the output files.",
},
"analysis": {
"nargs": "*",
"default": [],
"help": "List of analyses to run in Iodide.",
},
"analyze-strings": {
"action": "store_true",
"default": False,
"help": "If set, strings won't be filtered out of the results to analyze in Iodide.",
},
"no-server": {
"action": "store_true",
"default": False,
"help": "f set, the data won't be opened in Iodide.",
},
"compare-to": {
"nargs": "*",
"default": [],
"help": (
"Compare the results from this test to the historical data in the folder(s) "
"specified through this option. Only JSON data can be processed for the moment."
"Each folder containing those JSONs is considered as a distinct data point "
"to compare with the newest run."
),
},
"stats": {
"action": "store_true",
"default": False,
"help": "If set, browsertime statistics will be reported.",
},
}
arguments = COMMON_ARGS
arguments.update(
{
"analysis": {
"nargs": "*",
"default": [],
"help": "List of analyses to run in Iodide.",
},
"analyze-strings": {
"action": "store_true",
"default": False,
"help": (
"If set, strings won't be filtered out of the results to analyze in Iodide."
),
},
"no-server": {
"action": "store_true",
"default": False,
"help": "If set, the data won't be opened in Iodide.",
},
"compare-to": {
"nargs": "*",
"default": [],
"help": (
"Compare the results from this test to the historical data in the folder(s) "
"specified through this option. Only JSON data can be processed for the "
"moment. Each folder containing those JSONs is considered as a distinct "
"data point to compare with the newest run."
),
},
"stats": {
"action": "store_true",
"default": False,
"help": "If set, browsertime statistics will be reported.",
},
}
)
def run(self, metadata):
exclusions = None
@ -92,6 +86,7 @@ class Notebook(Layer):
self.get_arg("prefix"),
metrics=self.get_arg("metrics"),
exclude=exclusions,
split_by=self.get_arg("split-by"),
)
if not results:

Просмотреть файл

@ -10,8 +10,8 @@ import statistics
from mozperftest.utils import strtobool
from mozperftest.layers import Layer
from mozperftest.metrics.exceptions import PerfherderValidDataError
from mozperftest.metrics.common import filtered_metrics
from mozperftest.metrics.utils import write_json, is_number, metric_fields
from mozperftest.metrics.common import filtered_metrics, COMMON_ARGS
from mozperftest.metrics.utils import write_json, is_number
PERFHERDER_SCHEMA = pathlib.Path(
@ -26,42 +26,34 @@ class Perfherder(Layer):
name = "perfherder"
activated = False
arguments = {
"prefix": {
"type": str,
"default": "",
"help": "Prefix the output files with this string.",
},
"app": {
"type": str,
"default": "firefox",
"choices": [
"firefox",
"chrome-m",
"chrome",
"chromium",
"fennec",
"geckoview",
"fenix",
"refbrow",
],
"help": (
"Shorthand name of application that is "
"being tested (used in perfherder data)."
),
},
"metrics": {
"type": metric_fields,
"nargs": "*",
"default": [],
"help": "The metrics that should be retrieved from the data.",
},
"stats": {
"action": "store_true",
"default": False,
"help": "If set, browsertime statistics will be reported.",
},
}
arguments = COMMON_ARGS
arguments.update(
{
"app": {
"type": str,
"default": "firefox",
"choices": [
"firefox",
"chrome-m",
"chrome",
"chromium",
"fennec",
"geckoview",
"fenix",
"refbrow",
],
"help": (
"Shorthand name of application that is "
"being tested (used in perfherder data)."
),
},
"stats": {
"action": "store_true",
"default": False,
"help": "If set, browsertime statistics will be reported.",
},
}
)
def run(self, metadata):
"""Processes the given results into a perfherder-formatted data blob.
@ -96,6 +88,7 @@ class Perfherder(Layer):
metrics=metrics,
settings=True,
exclude=exclusions,
split_by=self.get_arg("split-by"),
)
if not any([results[name] for name in results]):

Просмотреть файл

@ -196,7 +196,8 @@ def test_perfherder_exlude_stats():
assert len(output["suites"][0]["subtests"]) == 1
assert output["suites"][0]["value"] > 0
# Check if only one firstPaint metric was obtained
# Check if only firstPaint metric was obtained with 2 replicates
assert len(output["suites"][0]["subtests"][0]["replicates"]) == 2
assert (
"browserScripts.timings.firstPaint"
== output["suites"][0]["subtests"][0]["name"]
@ -226,6 +227,41 @@ def test_perfherder_app_name():
assert "version" not in output["application"]
def test_perfherder_split_by():
options = {
"perfherder": True,
"perfherder-prefix": "",
"perfherder-app": "fenix",
"perfherder-metrics": [metric_fields("firstPaint")],
"perfherder-split-by": "browserScripts.pageinfo.url",
}
metrics, metadata, env = setup_env(options)
with temp_file() as output:
env.set_arg("output", output)
with metrics as m, silence():
m(metadata)
output_file = metadata.get_output()
with open(output_file) as f:
output = json.loads(f.read())
# Sanity check
assert len(output["suites"]) == 1
# We should have 2 subtests (1 per URL)
assert len(output["suites"][0]["subtests"]) == 2
# Check to make sure that they were properly split
names = [subtest["name"] for subtest in output["suites"][0]["subtests"]]
assert sorted(names) == [
"browserScripts.timings.firstPaint https://www.mozilla.org/en-US/",
"browserScripts.timings.firstPaint https://www.sitespeed.io/",
]
for i in range(2):
assert len(output["suites"][0]["subtests"][i]["replicates"]) == 1
def test_perfherder_bad_app_name():
options = {
"perfherder": True,