зеркало из https://github.com/mozilla/bugbug.git
Родитель
893740bda0
Коммит
f0ef378e49
|
@ -3,53 +3,23 @@
|
|||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
from os.path import abspath
|
||||
|
||||
import requests
|
||||
import taskcluster
|
||||
|
||||
LATEST_URI = "train_{}.latest"
|
||||
VERSIONED_URI = "train_{}.{}"
|
||||
DATED_VERSIONED_URI = "train_{}.{}.{}"
|
||||
BASE_URL = "https://index.taskcluster.net/v1/task/project.relman.bugbug.{}/artifacts/public/metrics.json"
|
||||
from bugbug.utils import get_taskcluster_options
|
||||
|
||||
ROOT_URI = "train_{}.per_date"
|
||||
DATE_URI = "train_{}.per_date.{}"
|
||||
BASE_URL = "https://index.taskcluster.net/v1/task/{}/artifacts/public/metrics.json"
|
||||
NAMESPACE_URI = "project.relman.bugbug.{}"
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
def main():
|
||||
description = "Retrieve a model training metrics"
|
||||
parser = argparse.ArgumentParser(description=description)
|
||||
|
||||
parser.add_argument("model", help="Which model to retrieve training metrics from.")
|
||||
parser.add_argument(
|
||||
"version",
|
||||
nargs="?",
|
||||
help="Which bugbug version should we retrieve training metrics from.",
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
"date",
|
||||
nargs="?",
|
||||
help="Which date should we retrieve training metrics from. Default to latest",
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
"-o",
|
||||
help="Where to output the metrics.json file. Default to printing its content",
|
||||
default=None,
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.version:
|
||||
index_uri = LATEST_URI.format(args.model)
|
||||
elif not args.date:
|
||||
index_uri = VERSIONED_URI.format(args.model, args.version)
|
||||
else:
|
||||
index_uri = DATED_VERSIONED_URI.format(args.model, args.version, args.date)
|
||||
|
||||
def get_task_metrics_from_uri(index_uri):
|
||||
index_url = BASE_URL.format(index_uri)
|
||||
LOGGER.info(f"Retrieving metrics from {index_url}")
|
||||
r = requests.get(index_url)
|
||||
|
@ -60,13 +30,95 @@ def main():
|
|||
|
||||
r.raise_for_status()
|
||||
|
||||
if args.output:
|
||||
file_path = abspath(args.output)
|
||||
with open(file_path, "w") as output_file:
|
||||
output_file.write(r.text)
|
||||
LOGGER.info(f"Metrics saved to {file_path!r}")
|
||||
else:
|
||||
print(r.text)
|
||||
return r
|
||||
|
||||
|
||||
def get_namespaces(index, index_uri):
|
||||
index_namespaces = index.listNamespaces(index_uri)
|
||||
|
||||
return index_namespaces["namespaces"]
|
||||
|
||||
|
||||
def is_later_or_equal(partial_date, from_date):
|
||||
for partial_date_part, from_date_part in zip(partial_date, from_date):
|
||||
if int(partial_date_part) > int(from_date_part):
|
||||
return True
|
||||
elif int(partial_date_part) < int(from_date_part):
|
||||
return False
|
||||
else:
|
||||
continue
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def get_task_metrics_from_date(model, date):
|
||||
options = get_taskcluster_options()
|
||||
|
||||
index = taskcluster.Index(options)
|
||||
|
||||
index.ping()
|
||||
|
||||
# Split the date
|
||||
from_date = date.split(".")
|
||||
|
||||
namespaces = []
|
||||
|
||||
# Start at the root level
|
||||
# We need an empty list in order to append namespaces part to it
|
||||
namespaces.append([])
|
||||
|
||||
# Recursively list all namespaces greater or equals than the given date
|
||||
while namespaces:
|
||||
current_ns = namespaces.pop()
|
||||
|
||||
# Handle version level namespaces
|
||||
if not current_ns:
|
||||
ns_uri = ROOT_URI.format(model)
|
||||
else:
|
||||
current_ns_date = ".".join(current_ns)
|
||||
ns_uri = DATE_URI.format(model, current_ns_date)
|
||||
|
||||
ns_full_uri = NAMESPACE_URI.format(ns_uri)
|
||||
|
||||
tasks = index.listTasks(ns_full_uri)
|
||||
for task in tasks["tasks"]:
|
||||
task_uri = task["namespace"]
|
||||
r = get_task_metrics_from_uri(task_uri)
|
||||
|
||||
# Write the file on disk
|
||||
file_path = f"metric_{'_'.join(task_uri.split('.'))}.json"
|
||||
with open(file_path, "w") as metric_file:
|
||||
metric_file.write(r.text)
|
||||
LOGGER.info(f"Metrics saved to {file_path!r}")
|
||||
|
||||
for namespace in get_namespaces(index, ns_full_uri):
|
||||
new_ns = current_ns.copy()
|
||||
new_ns.append(namespace["name"])
|
||||
|
||||
if not is_later_or_equal(new_ns, from_date):
|
||||
LOGGER.debug("NEW namespace %s is before %s", new_ns, from_date)
|
||||
continue
|
||||
|
||||
# Might not be efficient but size of `namespaces` shouldn't be too
|
||||
# big as we are doing a depth-first traversal
|
||||
if new_ns not in namespaces:
|
||||
namespaces.append(new_ns)
|
||||
|
||||
|
||||
def main():
|
||||
description = "Retrieve a model training metrics"
|
||||
parser = argparse.ArgumentParser(description=description)
|
||||
|
||||
parser.add_argument("model", help="Which model to retrieve training metrics from.")
|
||||
parser.add_argument(
|
||||
"date",
|
||||
nargs="?",
|
||||
help="Which date should we retrieve training metrics from. Default to latest",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
get_task_metrics_from_date(args.model, args.date)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Загрузка…
Ссылка в новой задаче