Bug 1636534 - Separate perftestetl from perftestnotebook r=sparky

Differential Revision: https://phabricator.services.mozilla.com/D75684
This commit is contained in:
xuanqi xu 2020-06-02 21:34:49 +00:00
Родитель cf5ee680a4
Коммит 2b9bf62319
11 изменённых файлов: 355 добавлений и 214 удалений

Просмотреть файл

@ -4,10 +4,11 @@
from mozperftest.layers import Layers
from mozperftest.metrics.perfherder import Perfherder
from mozperftest.metrics.consoleoutput import ConsoleOutput
from mozperftest.metrics.notebookupload import Notebook
def get_layers():
return Perfherder, ConsoleOutput
return Perfherder, ConsoleOutput, Notebook
def pick_metrics(env, flavor, mach_cmd):

Просмотреть файл

@ -9,7 +9,7 @@ from mozperftest.metrics.exceptions import (
MetricsMissingResultsError,
)
from mozperftest.metrics.utils import validate_intermediate_results
from mozperftest.metrics.notebook import PerftestNotebook
from mozperftest.metrics.notebook import PerftestETL
class MetricsStorage(object):
@ -135,10 +135,10 @@ class MetricsStorage(object):
"file_groups": {data_type: data_info["files"]},
}
ptnb = PerftestNotebook(
ptnb = PerftestETL(
config["file_groups"], config, data_info["transformer"]
)
r = ptnb.process(no_iodide=True, **data_info["options"])
r = ptnb.process(**data_info["options"])
self.stddata[data_type] = r["data"]
return self.stddata

Просмотреть файл

@ -1,6 +1,7 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from .perftestetl import PerftestETL
from .perftestnotebook import PerftestNotebook
__all__ = ["PerftestNotebook"]
__all__ = ["PerftestETL", "PerftestNotebook"]

Просмотреть файл

@ -1,51 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from .constant import Constant
from .logger import logger
class NotebookAnalyzer(object):
"""Analyze the standardized data.
The methods in these functions will be injected in an Iodide page in the future.
"""
def __init__(self, data):
"""Initialize the Analyzer.
:param dict data: Standardized data, post-transformation.
"""
self.data = data
self.const = Constant()
def split_subtests(self):
"""If the subtest field exists, split the data based
on it, grouping data into subtest groupings.
"""
if "subtest" not in self.data[0]:
return {"": self.data}
split_data = {}
for entry in self.data:
subtest = entry["subtest"]
if subtest not in split_data:
split_data[subtest] = []
split_data[subtest].append(entry)
return split_data
def get_header(self):
template_header_path = str(self.const.here / "notebook-sections" / "header")
with open(template_header_path, "r") as f:
template_header_content = f.read()
return template_header_content
def get_notebook_section(self, func):
template_function_folder_path = self.const.here / "notebook-sections"
template_function_file_path = template_function_folder_path / func
if not template_function_file_path.exists():
logger.warning(f"Could not find the notebook-section called {func}")
return ""
with open(str(template_function_file_path), "r") as f:
return f.read()

Просмотреть файл

@ -0,0 +1,12 @@
%% md
# Welcome to PerftestNotebook
press the :fast_forward: button on your top left corner to run whole notebook
%% fetch
text: data_string = http://127.0.0.1:5000/data
%% js
var data_object = JSON.parse(data_string);

Просмотреть файл

@ -0,0 +1,15 @@
%% py
from js import data_object
import matplotlib.pyplot as plt
plt.figure()
for element in data_object:
data_array = element["data"]
x = [x["xaxis"] for x in data_array]
y = [x["value"] for x in data_array]
label = element["name"]+"\n"+element["subtest"]
plt.scatter(x,y,label=label)
plt.legend()
plt.show()

Просмотреть файл

@ -0,0 +1,165 @@
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import json
import os
import pathlib
from collections import OrderedDict
from .transformer import Transformer, SimplePerfherderTransformer
from .constant import Constant
from .logger import NotebookLogger
logger = NotebookLogger()
class PerftestETL(object):
"""Controller class for the PerftestETL."""
def __init__(self, file_groups, config, custom_transform=None, sort_files=False):
"""Initializes PerftestNotebook.
:param dict file_groups: A dict of file groupings. The value
of each of the dict entries is the name of the data that
will be produced.
:param str custom_transform: The class name of a custom transformer.
"""
self.fmt_data = {}
self.file_groups = file_groups
self.config = config
self.sort_files = sort_files
self.const = Constant()
# Gather the available transformers
tfms_dict = self.const.predefined_transformers
# XXX NOTEBOOK_PLUGIN functionality is broken at the moment.
# This code block will raise an exception if it detects it in
# the environment.
plugin_path = os.getenv("NOTEBOOK_PLUGIN")
if plugin_path:
raise Exception("NOTEBOOK_PLUGIN is currently broken.")
# Initialize the requested transformer
if custom_transform:
tfm_cls = tfms_dict.get(custom_transform)
if tfm_cls:
self.transformer = Transformer(files=[], custom_transformer=tfm_cls())
logger.info(f"Found {custom_transform} transformer")
else:
raise Exception(f"Could not get a {custom_transform} transformer.")
else:
self.transformer = Transformer(
files=[], custom_transformer=SimplePerfherderTransformer()
)
def parse_file_grouping(self, file_grouping):
"""Handles differences in the file_grouping definitions.
It can either be a path to a folder containing the files, a list of files,
or it can contain settings from an artifact_downloader instance.
:param file_grouping: A file grouping entry.
:return: A list of files to process.
"""
files = []
if isinstance(file_grouping, list):
# A list of files was provided
files = file_grouping
elif isinstance(file_grouping, dict):
# A dictionary of settings from an artifact_downloader instance
# was provided here
raise Exception(
"Artifact downloader tooling is disabled for the time being."
)
elif isinstance(file_grouping, str):
# Assume a path to files was given
filepath = file_grouping
newf = [f.resolve().as_posix() for f in pathlib.Path(filepath).rglob("*")]
files = newf
else:
raise Exception(
"Unknown file grouping type provided here: %s" % file_grouping
)
if self.sort_files:
if isinstance(files, list):
files.sort()
else:
for _, file_list in files.items():
file_list.sort()
files = OrderedDict(sorted(files.items(), key=lambda entry: entry[0]))
if not files:
raise Exception(
"Could not find any files in this configuration: %s" % file_grouping
)
return files
def parse_output(self):
# XXX Fix up this function, it should only return a directory for output
# not a directory or a file. Or remove it completely, it's not very useful.
prefix = "" if "prefix" not in self.config else self.config["prefix"]
filepath = f"{prefix}std-output.json"
if "output" in self.config:
filepath = self.config["output"]
if os.path.isdir(filepath):
filepath = os.path.join(filepath, f"{prefix}std-output.json")
return filepath
def process(self, **kwargs):
"""Process the file groups and return the results of the requested analyses.
:return: All the results in a dictionary. The field names are the Analyzer
funtions that were called.
"""
fmt_data = []
for name, files in self.file_groups.items():
files = self.parse_file_grouping(files)
if isinstance(files, dict):
for subtest, files in files.items():
self.transformer.files = files
trfm_data = self.transformer.process(name, **kwargs)
if isinstance(trfm_data, list):
for e in trfm_data:
if "subtest" not in e:
e["subtest"] = subtest
else:
e["subtest"] = "%s-%s" % (subtest, e["subtest"])
fmt_data.extend(trfm_data)
else:
if "subtest" not in trfm_data:
trfm_data["subtest"] = subtest
else:
trfm_data["subtest"] = "%s-%s" % (
subtest,
trfm_data["subtest"],
)
fmt_data.append(trfm_data)
else:
# Transform the data
self.transformer.files = files
trfm_data = self.transformer.process(name, **kwargs)
if isinstance(trfm_data, list):
fmt_data.extend(trfm_data)
else:
fmt_data.append(trfm_data)
self.fmt_data = fmt_data
# Write formatted data output to filepath
output_data_filepath = self.parse_output()
print("Writing results to %s" % output_data_filepath)
with open(output_data_filepath, "w") as f:
json.dump(self.fmt_data, f, indent=4, sort_keys=True)
return {"data": self.fmt_data, "file-output": output_data_filepath}

Просмотреть файл

@ -1,182 +1,76 @@
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import json
import os
import pathlib
from collections import OrderedDict
import webbrowser
from http.server import BaseHTTPRequestHandler, HTTPServer
from .transformer import Transformer, SimplePerfherderTransformer
from .analyzer import NotebookAnalyzer
from .constant import Constant
from .logger import NotebookLogger
logger = NotebookLogger()
from .logger import logger
class PerftestNotebook(object):
"""
Controller class for the Perftest-Notebook.
"""
"""Controller class for PerftestNotebook."""
def __init__(self, file_groups, config, custom_transform=None, sort_files=False):
"""Initializes PerftestNotebook.
def __init__(self, data):
"""Initialize the PerftestNotebook.
:param dict file_groups: A dict of file groupings. The value
of each of the dict entries is the name of the data that
will be produced.
:param str custom_transform: The class name of a custom transformer.
:param dict data: Standardized data, post-transformation.
"""
self.fmt_data = {}
self.file_groups = file_groups
self.config = config
self.sort_files = sort_files
self.data = data
self.const = Constant()
# Gather the available transformers
tfms_dict = self.const.predefined_transformers
def get_notebook_section(self, func):
"""Fetch notebook content based on analysis name.
# XXX NOTEBOOK_PLUGIN functionality is broken at the moment.
# This code block will raise an exception if it detects it in
# the environment.
plugin_path = os.getenv("NOTEBOOK_PLUGIN")
if plugin_path:
raise Exception("NOTEBOOK_PLUGIN is currently broken.")
# Initialize the requested transformer
if custom_transform:
tfm_cls = tfms_dict.get(custom_transform)
if tfm_cls:
self.transformer = Transformer(files=[], custom_transformer=tfm_cls())
logger.info(f"Found {custom_transform} transformer")
else:
raise Exception(f"Could not get a {custom_transform} transformer.")
else:
self.transformer = Transformer(
files=[], custom_transformer=SimplePerfherderTransformer()
)
self.analyzer = NotebookAnalyzer(data=None)
def parse_file_grouping(self, file_grouping):
"""Handles differences in the file_grouping definitions.
It can either be a path to a folder containing the files, a list of files,
or it can contain settings from an artifact_downloader instance.
:param file_grouping: A file grouping entry.
:return: A list of files to process.
:param str func: analysis or notebook section name
"""
files = []
if isinstance(file_grouping, list):
# A list of files was provided
files = file_grouping
elif isinstance(file_grouping, dict):
# A dictionary of settings from an artifact_downloader instance
# was provided here
raise Exception(
"Artifact downloader tooling is disabled for the time being."
)
elif isinstance(file_grouping, str):
# Assume a path to files was given
filepath = file_grouping
newf = [f.resolve().as_posix() for f in pathlib.Path(filepath).rglob("*")]
files = newf
else:
raise Exception(
"Unknown file grouping type provided here: %s" % file_grouping
)
template_path = self.const.here / "notebook-sections" / func
if not template_path.exists():
logger.warning(f"Could not find the notebook-section called {func}")
return ""
with template_path.open() as f:
return f.read()
if self.sort_files:
if isinstance(files, list):
files.sort()
else:
for _, file_list in files.items():
file_list.sort()
files = OrderedDict(sorted(files.items(), key=lambda entry: entry[0]))
def post_to_iodide(self, analysis=None, start_local_server=True):
"""Build notebook and post it to iodide.
if not files:
raise Exception(
"Could not find any files in this configuration: %s" % file_grouping
)
return files
def parse_output(self):
# XXX Fix up this function, it should only return a directory for output
# not a directory or a file. Or remove it completely, it's not very useful.
prefix = "" if "prefix" not in self.config else self.config["prefix"]
filepath = f"{prefix}std-output.json"
if "output" in self.config:
filepath = self.config["output"]
if os.path.isdir(filepath):
filepath = os.path.join(filepath, f"{prefix}std-output.json")
return filepath
def process(self, no_iodide=True, **kwargs):
"""Process the file groups and return the results of the requested analyses.
:return: All the results in a dictionary. The field names are the Analyzer
funtions that were called.
:param list analysis: notebook section names, analysis to perform in iodide
"""
fmt_data = []
data = self.data
notebook_sections = ""
for name, files in self.file_groups.items():
files = self.parse_file_grouping(files)
if isinstance(files, dict):
for subtest, files in files.items():
self.transformer.files = files
template_header_path = self.const.here / "notebook-sections" / "header"
with template_header_path.open() as f:
notebook_sections += f.read()
trfm_data = self.transformer.process(name, **kwargs)
if analysis:
for func in analysis:
notebook_sections += self.get_notebook_section(func)
if isinstance(trfm_data, list):
for e in trfm_data:
if "subtest" not in e:
e["subtest"] = subtest
else:
e["subtest"] = "%s-%s" % (subtest, e["subtest"])
fmt_data.extend(trfm_data)
else:
if "subtest" not in trfm_data:
trfm_data["subtest"] = subtest
else:
trfm_data["subtest"] = "%s-%s" % (
subtest,
trfm_data["subtest"],
)
fmt_data.append(trfm_data)
else:
# Transform the data
self.transformer.files = files
trfm_data = self.transformer.process(name, **kwargs)
template_upload_file_path = self.const.here / "template_upload_file.html"
with template_upload_file_path.open() as f:
html = f.read().replace("replace_me", repr(notebook_sections))
if isinstance(trfm_data, list):
fmt_data.extend(trfm_data)
else:
fmt_data.append(trfm_data)
upload_file_path = self.const.here / "upload_file.html"
with upload_file_path.open("w") as f:
f.write(html)
self.fmt_data = fmt_data
# set up local server. Iodide will fetch data from localhost:5000/data
class DataRequestHandler(BaseHTTPRequestHandler):
def do_GET(self):
if self.path == "/data":
self.send_response(200)
self.send_header("Content-type", "application/json")
self.send_header("Access-Control-Allow-Origin", "*")
self.end_headers()
self.wfile.write(bytes(json.dumps(data).encode("utf-8")))
# Write formatted data output to filepath
output_data_filepath = self.parse_output()
print("Writing results to %s" % output_data_filepath)
with open(output_data_filepath, "w") as f:
json.dump(self.fmt_data, f, indent=4, sort_keys=True)
# Gather config["analysis"] corresponding notebook sections
if "analysis" in self.config:
raise NotImplementedError(
"Analysis aspect of the notebook is disabled for the time being"
)
# Post to Iodide server
if not no_iodide:
raise NotImplementedError(
"Opening report through Iodide is not available in production at the moment"
)
return {"data": self.fmt_data, "file-output": output_data_filepath}
PORT_NUMBER = 5000
server = HTTPServer(("", PORT_NUMBER), DataRequestHandler)
if start_local_server:
webbrowser.open_new_tab(str(upload_file_path))
try:
server.serve_forever()
finally:
server.server_close()

Просмотреть файл

@ -0,0 +1,39 @@
<!DOCTYPE html>
<!-- This Source Code Form is subject to the terms of the Mozilla Public
- License, v. 2.0. If a copy of the MPL was not distributed with this
- file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
<html>
<body>
Redirecting to Iodide...
<script>
function post(path, params, method='post') {
const form = document.createElement('form');
form.method = method;
form.action = path;
form.id = 'uploadform';
for (const key in params) {
if (params.hasOwnProperty(key)) {
const textarea = document.createElement('textarea');
textarea.name = key;
textarea.value = params[key];
textarea.style.display = "none";
form.appendChild(textarea);
}
}
document.body.appendChild(form);
form.submit();
}
// TODO Need to escape all `'`,
// Otherwsie, this will result in javascript failures.
var template = replace_me
// Create a form object, and send it
// after release, change back to https://alpha.iodide.io/from-template/
post("https://alpha.iodide.io/from-template/", {"iomd": template})
</script>
</body>
</html>

Просмотреть файл

@ -0,0 +1,65 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from mozperftest.layers import Layer
from mozperftest.metrics.common import filtered_metrics
from mozperftest.metrics.notebook import PerftestNotebook
class Notebook(Layer):
"""Post standarized data to iodide and run analysis."""
name = "notebook"
activated = False
arguments = {
"metrics": {
"nargs": "*",
"default": [],
"help": "The metrics that should be retrieved from the data.",
},
"prefix": {
"type": str,
"default": "",
"help": "Prefix used by the output files.",
},
"analysis": {
"nargs": "*",
"default": [],
"help": "List of analyses to run in Iodide.",
},
"analyze-strings": {
"action": "store_true",
"default": False,
"help": "If set, strings won't be filtered out of the results to analyze in Iodide.",
},
}
def __call__(self, metadata):
# Get filtered metrics
results = filtered_metrics(
metadata,
self.get_arg("output"),
self.get_arg("prefix"),
metrics=self.get_arg("metrics"),
)
if not results:
self.warning("No results left after filtering")
return metadata
analysis = self.get_arg("analysis")
data_to_post = []
for name, res in results.items():
for r in res:
val = r["data"][0]["value"]
if type(val) in (int, float):
data_to_post.append(r)
elif self.get_arg("analyze-strings"):
data_to_post.append(r)
self.ptnb = PerftestNotebook(data_to_post)
self.ptnb.post_to_iodide(analysis)
return metadata

Просмотреть файл

@ -114,7 +114,7 @@ class Perfherder(Layer):
vals = [
v["value"]
for v in r["data"]
if isinstance(v["value"], (int, float))
if type(v["value"]) in (int, float)
]
if vals:
subtests[r["subtest"]] = vals