Bug 1772510: Add 'mach perf-data-review' command that generates a template data review for simple performance monitoring probes. r=chutten,ahochheiden

Differential Revision: https://phabricator.services.mozilla.com/D148255
This commit is contained in:
Bas Schouten 2022-07-08 12:40:44 +00:00
Родитель c61fece71c
Коммит 740a43ba09
3 изменённых файлов: 199 добавлений и 0 удалений

Просмотреть файл

@ -333,6 +333,9 @@ def initialize(topsrcdir):
"data-review": MachCommandReference(
"toolkit/components/glean/build_scripts/mach_commands.py"
),
"perf-data-review": MachCommandReference(
"toolkit/components/glean/build_scripts/mach_commands.py"
),
"update-glean-tags": MachCommandReference(
"toolkit/components/glean/build_scripts/mach_commands.py"
),

Просмотреть файл

@ -40,6 +40,32 @@ def data_review(command_context, bug=None):
)
@Command(
"perf-data-review",
category="misc",
description="Generate a skeleton performance data review request form for a given bug's data",
)
@CommandArgument(
"bug", default=None, nargs="?", type=str, help="bug number or search pattern"
)
def perf_data_review(command_context, bug=None):
# Get the metrics_index's list of metrics indices
# by loading the index as a module.
from os import path
import sys
sys.path.append(path.join(path.dirname(__file__), path.pardir))
from metrics_index import metrics_yamls
sys.path.append(path.dirname(__file__))
import perf_data_review
from pathlib import Path
return perf_data_review.generate(
bug, [Path(command_context.topsrcdir) / x for x in metrics_yamls]
)
@Command(
"update-glean-tags",
category="misc",

Просмотреть файл

@ -0,0 +1,170 @@
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Produce skeleton Performance Data Review Requests.
This was mostly copies from glean_parser, and should be kept in sync.
"""
from pathlib import Path
from typing import Sequence
import re
from glean_parser import parser
from glean_parser import util
def generate(
bug: str,
metrics_files: Sequence[Path],
) -> int:
"""
Commandline helper for Data Review Request template generation.
:param bug: pattern to match in metrics' bug_numbers lists.
:param metrics_files: List of Path objects to load metrics from.
:return: Non-zero if there were any errors.
"""
metrics_files = util.ensure_list(metrics_files)
# Accept any value of expires.
parser_options = {
"allow_reserved": True,
"custom_is_expired": lambda expires: False,
"custom_validate_expires": lambda expires: True,
}
all_objects = parser.parse_objects(metrics_files, parser_options)
if util.report_validation_errors(all_objects):
return 1
# I tried [\W\Z] but it complained. So `|` it is.
reobj = re.compile(f"\\W{bug}\\W|\\W{bug}$")
durations = set()
responsible_emails = set()
metrics_table = ""
for category_name, metrics in all_objects.value.items():
for metric in metrics.values():
if not any([len(reobj.findall(bug)) == 1 for bug in metric.bugs]):
continue
metric_name = util.snake_case(metric.name)
category_name = util.snake_case(category_name)
one_line_desc = metric.description.replace("\n", " ")
sensitivity = ", ".join([s.name for s in metric.data_sensitivity])
last_bug = metric.bugs[-1]
metrics_table += f"`{category_name}.{metric_name}` | "
metrics_table += f"{one_line_desc} | {sensitivity} | {last_bug}\n"
if metric.type == "event" and len(metric.allowed_extra_keys):
for extra_name, extra_detail in metric.extra_keys.items():
extra_one_line_desc = extra_detail["description"].replace("\n", " ")
metrics_table += f"`{category_name}.{metric_name}#{extra_name}` | "
metrics_table += (
f"{extra_one_line_desc} | {sensitivity} | {last_bug}\n"
)
durations.add(metric.expires)
if metric.expires == "never":
responsible_emails.update(metric.notification_emails)
if len(durations) == 1:
duration = next(iter(durations))
if duration == "never":
collection_duration = "This collection will be collected permanently."
else:
collection_duration = f"This collection has expiry '{duration}'"
else:
collection_duration = "Parts of this collection expire at different times: "
collection_duration += f"{durations}"
if "never" in durations:
collection_duration += "\n" + ", ".join(responsible_emails) + " "
collection_duration += "will be responsible for the permanent collections."
if len(durations) == 0:
print(f"I'm sorry, I couldn't find metrics matching the bug number {bug}.")
return 1
# This template is pulled from
# https://github.com/mozilla/data-review/blob/main/request.md
print(
"""
!! Reminder: it is your responsibility to complete and check the correctness of
!! this automatically-generated request skeleton before requesting Data
!! Collection Review. See https://wiki.mozilla.org/Data_Collection for details.
DATA REVIEW REQUEST
1. What questions will you answer with this data?
TODO: Fill this in.
2. Why does Mozilla need to answer these questions? Are there benefits for users?
Do we need this information to address product or business requirements?
In order to guarantee the performance of our products, it is vital to monitor
real-world installs used by real-world users.
3. What alternative methods did you consider to answer these questions?
Why were they not sufficient?
Our ability to measure the practical performance impact of changes through CI
and manual testing is limited. Monitoring the performance of our products in
the wild among real users is the only way to be sure we have an accurate
picture.
4. Can current instrumentation answer these questions?
No.
5. List all proposed measurements and indicate the category of data collection for each
measurement, using the Firefox data collection categories found on the Mozilla wiki.
Measurement Name | Measurement Description | Data Collection Category | Tracking Bug
---------------- | ----------------------- | ------------------------ | ------------"""
)
print(metrics_table)
print(
"""
6. Please provide a link to the documentation for this data collection which
describes the ultimate data set in a public, complete, and accurate way.
This collection is Glean so is documented
[in the Glean Dictionary](https://dictionary.telemetry.mozilla.org).
7. How long will this data be collected?
"""
)
print(collection_duration)
print(
"""
8. What populations will you measure?
All channels, countries, and locales. No filters.
9. If this data collection is default on, what is the opt-out mechanism for users?
These collections are Glean. The opt-out can be found in the product's preferences.
10. Please provide a general description of how you will analyze this data.
This will be continuously monitored for regression and improvement detection.
11. Where do you intend to share the results of your analysis?
Internal monitoring (GLAM, Redash, Looker, etc.).
12. Is there a third-party tool (i.e. not Telemetry) that you
are proposing to use for this data collection?
No.
"""
)
return 0