зеркало из https://github.com/mozilla/bugbug.git
Support retrieving some label files at runtime, and do it for the regressor labels
This commit is contained in:
Родитель
5165524b62
Коммит
f5951ad63a
|
@ -7,13 +7,24 @@ import csv
|
|||
import os
|
||||
import sys
|
||||
|
||||
from bugbug import utils
|
||||
|
||||
LABELS_URLS = {
|
||||
"regressor": "https://github.com/marco-c/mozilla-central-regressors/raw/master/regressor.csv"
|
||||
}
|
||||
|
||||
|
||||
def get_labels_dir():
|
||||
return os.path.join(os.path.dirname(sys.modules[__package__].__file__), "labels")
|
||||
|
||||
|
||||
def get_labels(file_name):
|
||||
with open(os.path.join(get_labels_dir(), f"{file_name}.csv"), "r") as f:
|
||||
path = os.path.join(get_labels_dir(), f"{file_name}.csv")
|
||||
|
||||
if not os.path.exists(path) and file_name in LABELS_URLS:
|
||||
utils.download_check_etag(LABELS_URLS[file_name], path)
|
||||
|
||||
with open(path, "r") as f:
|
||||
reader = csv.reader(f)
|
||||
next(reader)
|
||||
yield from reader
|
||||
|
|
|
@ -17,10 +17,9 @@ from collections import deque
|
|||
from datetime import datetime
|
||||
|
||||
import hglib
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
from bugbug import db
|
||||
from bugbug import db, utils
|
||||
|
||||
COMMITS_DB = "data/commits.json"
|
||||
db.register(
|
||||
|
@ -647,26 +646,10 @@ def get_commits_to_ignore(repo_dir, commits):
|
|||
def download_component_mapping():
|
||||
global path_to_component
|
||||
|
||||
component_mapping_url = "https://index.taskcluster.net/v1/task/gecko.v2.mozilla-central.latest.source.source-bugzilla-info/artifacts/public/components.json"
|
||||
|
||||
r = requests.head(component_mapping_url, allow_redirects=True)
|
||||
new_etag = r.headers["ETag"]
|
||||
|
||||
try:
|
||||
with open(f"data/component_mapping.etag", "r") as f:
|
||||
old_etag = f.read()
|
||||
except IOError:
|
||||
old_etag = None
|
||||
|
||||
if old_etag != new_etag:
|
||||
r = requests.get(component_mapping_url)
|
||||
r.raise_for_status()
|
||||
|
||||
with open("data/component_mapping.json", "w") as f:
|
||||
f.write(r.text)
|
||||
|
||||
with open(f"data/component_mapping.etag", "w") as f:
|
||||
f.write(new_etag)
|
||||
utils.download_check_etag(
|
||||
"https://index.taskcluster.net/v1/task/gecko.v2.mozilla-central.latest.source.source-bugzilla-info/artifacts/public/components.json",
|
||||
"data/component_mapping.json",
|
||||
)
|
||||
|
||||
with open("data/component_mapping.json", "r") as f:
|
||||
path_to_component = json.load(f)
|
||||
|
|
|
@ -7,6 +7,7 @@ import collections
|
|||
import os
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
import taskcluster
|
||||
from sklearn.base import BaseEstimator, TransformerMixin
|
||||
from sklearn.compose import ColumnTransformer
|
||||
|
@ -110,3 +111,24 @@ def get_secret(secret_id):
|
|||
|
||||
else:
|
||||
raise ValueError("Failed to find secret {}".format(secret_id))
|
||||
|
||||
|
||||
def download_check_etag(url, path):
|
||||
r = requests.head(url, allow_redirects=True)
|
||||
new_etag = r.headers["ETag"]
|
||||
|
||||
try:
|
||||
with open(f"{path}.etag", "r") as f:
|
||||
old_etag = f.read()
|
||||
except IOError:
|
||||
old_etag = None
|
||||
|
||||
if old_etag != new_etag:
|
||||
r = requests.get(url)
|
||||
r.raise_for_status()
|
||||
|
||||
with open(path, "w") as f:
|
||||
f.write(r.text)
|
||||
|
||||
with open(f"{path}.etag", "w") as f:
|
||||
f.write(new_etag)
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
FROM mozilla/bugbug-base:latest
|
||||
|
||||
ADD https://github.com/marco-c/mozilla-central-regressors/raw/master/regressor.csv /usr/local/lib/python3.7/site-packages/bugbug/labels/regressor.csv
|
||||
|
||||
CMD bugbug-train regressor
|
||||
|
|
Загрузка…
Ссылка в новой задаче