2019-06-23 00:18:08 +03:00
|
|
|
# -*- coding: utf-8 -*-
|
2019-08-05 11:22:55 +03:00
|
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
|
|
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
2019-06-23 00:18:08 +03:00
|
|
|
|
|
|
|
import argparse
|
2019-06-27 11:58:07 +03:00
|
|
|
import logging
|
2019-08-05 11:22:55 +03:00
|
|
|
import os
|
2019-06-23 00:18:08 +03:00
|
|
|
import sys
|
2019-08-05 11:22:55 +03:00
|
|
|
from os.path import abspath, join
|
2019-06-23 00:18:08 +03:00
|
|
|
|
|
|
|
import requests
|
2019-08-01 18:05:59 +03:00
|
|
|
import taskcluster
|
2019-06-23 00:18:08 +03:00
|
|
|
|
2019-08-01 18:05:59 +03:00
|
|
|
from bugbug.utils import get_taskcluster_options
|
|
|
|
|
|
|
|
ROOT_URI = "train_{}.per_date"
|
|
|
|
DATE_URI = "train_{}.per_date.{}"
|
2019-11-09 00:13:10 +03:00
|
|
|
BASE_URL = "https://community-tc.services.mozilla.com/api/index/v1/task/{}/artifacts/public/metrics.json"
|
2020-09-01 15:44:49 +03:00
|
|
|
NAMESPACE_URI = "project.bugbug.{}"
|
2019-06-23 00:18:08 +03:00
|
|
|
|
2019-06-27 11:58:07 +03:00
|
|
|
LOGGER = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
2019-06-23 00:18:08 +03:00
|
|
|
|
2019-08-01 18:05:59 +03:00
|
|
|
def get_task_metrics_from_uri(index_uri):
|
|
|
|
index_url = BASE_URL.format(index_uri)
|
2023-03-09 13:58:37 +03:00
|
|
|
LOGGER.info("Retrieving metrics from %s", index_url)
|
2019-08-01 18:05:59 +03:00
|
|
|
r = requests.get(index_url)
|
|
|
|
|
|
|
|
if r.status_code == 404:
|
|
|
|
LOGGER.error(f"File not found for URL {index_url}, check your arguments")
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
r.raise_for_status()
|
|
|
|
|
|
|
|
return r
|
|
|
|
|
|
|
|
|
|
|
|
def get_namespaces(index, index_uri):
|
|
|
|
index_namespaces = index.listNamespaces(index_uri)
|
|
|
|
|
|
|
|
return index_namespaces["namespaces"]
|
|
|
|
|
|
|
|
|
|
|
|
def is_later_or_equal(partial_date, from_date):
|
|
|
|
for partial_date_part, from_date_part in zip(partial_date, from_date):
|
|
|
|
if int(partial_date_part) > int(from_date_part):
|
|
|
|
return True
|
|
|
|
elif int(partial_date_part) < int(from_date_part):
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
continue
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
2019-08-05 11:22:55 +03:00
|
|
|
def get_task_metrics_from_date(model, date, output_directory):
|
2019-08-01 18:05:59 +03:00
|
|
|
options = get_taskcluster_options()
|
|
|
|
|
|
|
|
index = taskcluster.Index(options)
|
|
|
|
|
|
|
|
index.ping()
|
|
|
|
|
|
|
|
# Split the date
|
|
|
|
from_date = date.split(".")
|
|
|
|
|
|
|
|
namespaces = []
|
|
|
|
|
|
|
|
# Start at the root level
|
|
|
|
# We need an empty list in order to append namespaces part to it
|
|
|
|
namespaces.append([])
|
|
|
|
|
|
|
|
# Recursively list all namespaces greater or equals than the given date
|
|
|
|
while namespaces:
|
|
|
|
current_ns = namespaces.pop()
|
|
|
|
|
|
|
|
# Handle version level namespaces
|
|
|
|
if not current_ns:
|
|
|
|
ns_uri = ROOT_URI.format(model)
|
|
|
|
else:
|
|
|
|
current_ns_date = ".".join(current_ns)
|
|
|
|
ns_uri = DATE_URI.format(model, current_ns_date)
|
|
|
|
|
|
|
|
ns_full_uri = NAMESPACE_URI.format(ns_uri)
|
|
|
|
|
|
|
|
tasks = index.listTasks(ns_full_uri)
|
|
|
|
for task in tasks["tasks"]:
|
|
|
|
task_uri = task["namespace"]
|
|
|
|
r = get_task_metrics_from_uri(task_uri)
|
|
|
|
|
|
|
|
# Write the file on disk
|
2019-08-05 11:22:55 +03:00
|
|
|
file_name = f"metric_{'_'.join(task_uri.split('.'))}.json"
|
|
|
|
file_path = abspath(join(output_directory, file_name))
|
2019-08-01 18:05:59 +03:00
|
|
|
with open(file_path, "w") as metric_file:
|
|
|
|
metric_file.write(r.text)
|
|
|
|
LOGGER.info(f"Metrics saved to {file_path!r}")
|
|
|
|
|
|
|
|
for namespace in get_namespaces(index, ns_full_uri):
|
|
|
|
new_ns = current_ns.copy()
|
|
|
|
new_ns.append(namespace["name"])
|
|
|
|
|
|
|
|
if not is_later_or_equal(new_ns, from_date):
|
|
|
|
LOGGER.debug("NEW namespace %s is before %s", new_ns, from_date)
|
|
|
|
continue
|
|
|
|
|
|
|
|
# Might not be efficient but size of `namespaces` shouldn't be too
|
|
|
|
# big as we are doing a depth-first traversal
|
|
|
|
if new_ns not in namespaces:
|
|
|
|
namespaces.append(new_ns)
|
|
|
|
|
|
|
|
|
2019-06-23 00:18:08 +03:00
|
|
|
def main():
|
|
|
|
description = "Retrieve a model training metrics"
|
|
|
|
parser = argparse.ArgumentParser(description=description)
|
|
|
|
|
2019-08-05 11:22:55 +03:00
|
|
|
parser.add_argument(
|
|
|
|
"-d",
|
|
|
|
"--output-directory",
|
|
|
|
default=os.getcwd(),
|
|
|
|
help="In which directory the script should save the metrics file. The directory must exists",
|
|
|
|
)
|
2019-06-23 00:18:08 +03:00
|
|
|
parser.add_argument("model", help="Which model to retrieve training metrics from.")
|
|
|
|
parser.add_argument(
|
|
|
|
"date",
|
|
|
|
nargs="?",
|
|
|
|
help="Which date should we retrieve training metrics from. Default to latest",
|
|
|
|
)
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
2019-08-05 11:22:55 +03:00
|
|
|
get_task_metrics_from_date(args.model, args.date, args.output_directory)
|
2019-06-23 00:18:08 +03:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|