Support retrieving some label files at runtime, and do it for the regressor labels

This commit is contained in:
Marco Castelluccio 2019-06-05 00:37:26 +02:00
Родитель 5165524b62
Коммит f5951ad63a
4 изменённых файлов: 39 добавлений и 25 удалений

Просмотреть файл

@ -7,13 +7,24 @@ import csv
import os
import sys
from bugbug import utils
LABELS_URLS = {
"regressor": "https://github.com/marco-c/mozilla-central-regressors/raw/master/regressor.csv"
}
def get_labels_dir():
return os.path.join(os.path.dirname(sys.modules[__package__].__file__), "labels")
def get_labels(file_name):
with open(os.path.join(get_labels_dir(), f"{file_name}.csv"), "r") as f:
path = os.path.join(get_labels_dir(), f"{file_name}.csv")
if not os.path.exists(path) and file_name in LABELS_URLS:
utils.download_check_etag(LABELS_URLS[file_name], path)
with open(path, "r") as f:
reader = csv.reader(f)
next(reader)
yield from reader

Просмотреть файл

@ -17,10 +17,9 @@ from collections import deque
from datetime import datetime
import hglib
import requests
from tqdm import tqdm
from bugbug import db
from bugbug import db, utils
COMMITS_DB = "data/commits.json"
db.register(
@ -647,26 +646,10 @@ def get_commits_to_ignore(repo_dir, commits):
def download_component_mapping():
global path_to_component
component_mapping_url = "https://index.taskcluster.net/v1/task/gecko.v2.mozilla-central.latest.source.source-bugzilla-info/artifacts/public/components.json"
r = requests.head(component_mapping_url, allow_redirects=True)
new_etag = r.headers["ETag"]
try:
with open(f"data/component_mapping.etag", "r") as f:
old_etag = f.read()
except IOError:
old_etag = None
if old_etag != new_etag:
r = requests.get(component_mapping_url)
r.raise_for_status()
with open("data/component_mapping.json", "w") as f:
f.write(r.text)
with open(f"data/component_mapping.etag", "w") as f:
f.write(new_etag)
utils.download_check_etag(
"https://index.taskcluster.net/v1/task/gecko.v2.mozilla-central.latest.source.source-bugzilla-info/artifacts/public/components.json",
"data/component_mapping.json",
)
with open("data/component_mapping.json", "r") as f:
path_to_component = json.load(f)

Просмотреть файл

@ -7,6 +7,7 @@ import collections
import os
import numpy as np
import requests
import taskcluster
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer
@ -110,3 +111,24 @@ def get_secret(secret_id):
else:
raise ValueError("Failed to find secret {}".format(secret_id))
def download_check_etag(url, path):
r = requests.head(url, allow_redirects=True)
new_etag = r.headers["ETag"]
try:
with open(f"{path}.etag", "r") as f:
old_etag = f.read()
except IOError:
old_etag = None
if old_etag != new_etag:
r = requests.get(url)
r.raise_for_status()
with open(path, "w") as f:
f.write(r.text)
with open(f"{path}.etag", "w") as f:
f.write(new_etag)

Просмотреть файл

@ -1,5 +1,3 @@
FROM mozilla/bugbug-base:latest
ADD https://github.com/marco-c/mozilla-central-regressors/raw/master/regressor.csv /usr/local/lib/python3.7/site-packages/bugbug/labels/regressor.csv
CMD bugbug-train regressor