Make utils.download_check_etag automatically detect the file name from its URL (#1114)

Fixes #1112
This commit is contained in:
Abhay Katheria 2019-11-27 04:24:51 +05:30 коммит произвёл Marco Castelluccio
Родитель 958c086004
Коммит b38b3aa35c
8 изменённых файлов: 27 добавлений и 19 удалений

Просмотреть файл

@ -134,9 +134,12 @@ def get_secret(secret_id):
raise ValueError("Failed to find secret {}".format(secret_id))
def download_check_etag(url, path):
def download_check_etag(url, path=None):
r = requests.head(url, allow_redirects=True)
if path is None:
path = url.split("/")[-1]
new_etag = r.headers["ETag"]
try:

Просмотреть файл

@ -32,8 +32,7 @@ def classify_bugs(model_name, classifier, bug_id):
logger.info(f"{model_file_name} does not exist. Downloading the model....")
try:
download_check_etag(
f"https://community-tc.services.mozilla.com/api/index/v1/task/project.relman.bugbug.train_{model_name}.latest/artifacts/public/{model_file_name}.zst",
f"{model_file_name}.zst",
f"https://community-tc.services.mozilla.com/api/index/v1/task/project.relman.bugbug.train_{model_name}.latest/artifacts/public/{model_file_name}.zst"
)
except requests.HTTPError:
logger.error(

Просмотреть файл

@ -14,7 +14,7 @@ logger = getLogger(__name__)
def download_model(model_url, file_path):
logger.info(f"Downloading model from {model_url!r} and save it in {file_path!r}")
download_check_etag(model_url, f"{file_path}.zst")
download_check_etag(model_url)
zstd_decompress(file_path)
logger.info(f"Written model in {file_path}")

Просмотреть файл

@ -152,8 +152,7 @@ class CommitClassifier(object):
download_check_etag(
URL.format(
model_name=model_name, file_name=f"{model_data_X_path}.zst"
),
f"{model_data_X_path}.zst",
)
)
zstd_decompress(model_data_X_path)
assert os.path.exists(
@ -165,8 +164,7 @@ class CommitClassifier(object):
download_check_etag(
URL.format(
model_name=model_name, file_name=f"{model_data_y_path}.zst"
),
f"{model_data_y_path}.zst",
)
)
zstd_decompress(model_data_y_path)
assert os.path.exists(
@ -190,8 +188,7 @@ class CommitClassifier(object):
model_path = f"{model_name}model"
if not os.path.exists(model_path):
download_check_etag(
URL.format(model_name=model_name, file_name=f"{model_path}.zst"),
f"{model_path}.zst",
URL.format(model_name=model_name, file_name=f"{model_path}.zst")
)
zstd_decompress(model_path)
assert os.path.exists(model_path), "Decompressed model exists"

Просмотреть файл

@ -61,7 +61,7 @@ def download_model(model_name):
if not os.path.exists(f"{model_name}model"):
url = BASE_URL.format(model_name=model_name)
logger.info(f"Downloading {url}...")
download_check_etag(url, f"{model_name}model.zst")
download_check_etag(url)
zstd_decompress(f"{model_name}model")
assert os.path.exists(f"{model_name}model"), "Decompressed file exists"

Просмотреть файл

@ -37,7 +37,7 @@ def main(args):
if not os.path.exists(model_file_name):
logger.info(f"{model_file_name} does not exist. Downloading the model....")
try:
download_check_etag(URL.format(model_file_name), f"{model_file_name}.zst")
download_check_etag(URL.format(model_file_name))
except requests.HTTPError:
logger.error(
f"A pre-trained model is not available, you will need to train it yourself using the trainer script"

Просмотреть файл

@ -96,7 +96,7 @@ file = {{ driver = "file", path = "{os.path.abspath(cache_path)}" }}
def generate_test_scheduling_history(self):
if not os.path.exists("push_data.json"):
download_check_etag(PUSH_DATA_URL, "push_data.json.zst")
download_check_etag(PUSH_DATA_URL)
zstd_decompress("push_data.json")
assert os.path.exists(
"push_data.json"

Просмотреть файл

@ -102,13 +102,22 @@ def test_download_check_etag():
responses.add(responses.GET, url, status=200, body="prova")
assert utils.download_check_etag(url, "prova.txt")
assert utils.download_check_etag(url)
assert os.path.exists("prova.txt")
with open("prova.txt", "r") as f:
assert f.read() == "prova"
assert utils.download_check_etag(url, "data/prova2.txt")
assert os.path.exists("data/prova2.txt")
assert not os.path.exists("prova2.txt")
with open("data/prova2.txt", "r") as f:
assert f.read() == "prova"
def test_download_check_etag_changed():
url = "https://community-tc.services.mozilla.com/api/index/v1/task/project.relman.bugbug/prova.txt"
@ -131,14 +140,14 @@ def test_download_check_etag_changed():
responses.add(responses.GET, url, status=200, body="prova2")
assert utils.download_check_etag(url, "prova.txt")
assert utils.download_check_etag(url)
assert os.path.exists("prova.txt")
with open("prova.txt", "r") as f:
assert f.read() == "prova"
assert utils.download_check_etag(url, "prova.txt")
assert utils.download_check_etag(url)
assert os.path.exists("prova.txt")
@ -167,14 +176,14 @@ def test_download_check_etag_unchanged():
responses.add(responses.GET, url, status=200, body="prova2")
assert utils.download_check_etag(url, "prova.txt")
assert utils.download_check_etag(url)
assert os.path.exists("prova.txt")
with open("prova.txt", "r") as f:
assert f.read() == "prova"
assert not utils.download_check_etag(url, "prova.txt")
assert not utils.download_check_etag(url)
assert os.path.exists("prova.txt")
@ -197,7 +206,7 @@ def test_download_check_missing():
)
with pytest.raises(requests.exceptions.HTTPError, match="HTTP error"):
utils.download_check_etag(url, "prova.txt")
utils.download_check_etag(url)
assert not os.path.exists("prova.txt")