Retrieve all metrics from a date and later (#734)

Fixes #614
This commit is contained in:
Boris Feld 2019-08-01 17:05:59 +02:00 коммит произвёл Marco
Родитель 893740bda0
Коммит f0ef378e49
1 изменённых файлов: 97 добавлений и 45 удалений

Просмотреть файл

@ -3,53 +3,23 @@
import argparse
import logging
import sys
from os.path import abspath
import requests
import taskcluster
LATEST_URI = "train_{}.latest"
VERSIONED_URI = "train_{}.{}"
DATED_VERSIONED_URI = "train_{}.{}.{}"
BASE_URL = "https://index.taskcluster.net/v1/task/project.relman.bugbug.{}/artifacts/public/metrics.json"
from bugbug.utils import get_taskcluster_options
ROOT_URI = "train_{}.per_date"
DATE_URI = "train_{}.per_date.{}"
BASE_URL = "https://index.taskcluster.net/v1/task/{}/artifacts/public/metrics.json"
NAMESPACE_URI = "project.relman.bugbug.{}"
LOGGER = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
def main():
description = "Retrieve a model training metrics"
parser = argparse.ArgumentParser(description=description)
parser.add_argument("model", help="Which model to retrieve training metrics from.")
parser.add_argument(
"version",
nargs="?",
help="Which bugbug version should we retrieve training metrics from.",
default=None,
)
parser.add_argument(
"date",
nargs="?",
help="Which date should we retrieve training metrics from. Default to latest",
default=None,
)
parser.add_argument(
"--output",
"-o",
help="Where to output the metrics.json file. Default to printing its content",
default=None,
)
args = parser.parse_args()
if not args.version:
index_uri = LATEST_URI.format(args.model)
elif not args.date:
index_uri = VERSIONED_URI.format(args.model, args.version)
else:
index_uri = DATED_VERSIONED_URI.format(args.model, args.version, args.date)
def get_task_metrics_from_uri(index_uri):
index_url = BASE_URL.format(index_uri)
LOGGER.info(f"Retrieving metrics from {index_url}")
r = requests.get(index_url)
@ -60,13 +30,95 @@ def main():
r.raise_for_status()
if args.output:
file_path = abspath(args.output)
with open(file_path, "w") as output_file:
output_file.write(r.text)
LOGGER.info(f"Metrics saved to {file_path!r}")
return r
def get_namespaces(index, index_uri):
index_namespaces = index.listNamespaces(index_uri)
return index_namespaces["namespaces"]
def is_later_or_equal(partial_date, from_date):
for partial_date_part, from_date_part in zip(partial_date, from_date):
if int(partial_date_part) > int(from_date_part):
return True
elif int(partial_date_part) < int(from_date_part):
return False
else:
print(r.text)
continue
return True
def get_task_metrics_from_date(model, date):
options = get_taskcluster_options()
index = taskcluster.Index(options)
index.ping()
# Split the date
from_date = date.split(".")
namespaces = []
# Start at the root level
# We need an empty list in order to append namespaces part to it
namespaces.append([])
# Recursively list all namespaces greater or equals than the given date
while namespaces:
current_ns = namespaces.pop()
# Handle version level namespaces
if not current_ns:
ns_uri = ROOT_URI.format(model)
else:
current_ns_date = ".".join(current_ns)
ns_uri = DATE_URI.format(model, current_ns_date)
ns_full_uri = NAMESPACE_URI.format(ns_uri)
tasks = index.listTasks(ns_full_uri)
for task in tasks["tasks"]:
task_uri = task["namespace"]
r = get_task_metrics_from_uri(task_uri)
# Write the file on disk
file_path = f"metric_{'_'.join(task_uri.split('.'))}.json"
with open(file_path, "w") as metric_file:
metric_file.write(r.text)
LOGGER.info(f"Metrics saved to {file_path!r}")
for namespace in get_namespaces(index, ns_full_uri):
new_ns = current_ns.copy()
new_ns.append(namespace["name"])
if not is_later_or_equal(new_ns, from_date):
LOGGER.debug("NEW namespace %s is before %s", new_ns, from_date)
continue
# Might not be efficient but size of `namespaces` shouldn't be too
# big as we are doing a depth-first traversal
if new_ns not in namespaces:
namespaces.append(new_ns)
def main():
description = "Retrieve a model training metrics"
parser = argparse.ArgumentParser(description=description)
parser.add_argument("model", help="Which model to retrieve training metrics from.")
parser.add_argument(
"date",
nargs="?",
help="Which date should we retrieve training metrics from. Default to latest",
)
args = parser.parse_args()
get_task_metrics_from_date(args.model, args.date)
if __name__ == "__main__":