Use utils.download_model for downloading models in the HTTP service instead of reimplementing it

Fixes #1242
2020-03-01 21:33:12 +01:00 · 2020-03-01 21:33:12 +01:00 · 3eee2f8c7a
--- a/http_service/bugbug_http/download_models.py
+++ b/http_service/bugbug_http/download_models.py
@ -3,12 +3,13 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.

-from bugbug_http.models import MODELS_NAMES, retrieve_model
+from bugbug import utils
+from bugbug_http.models import MODELS_NAMES


 def preload_models():
    for model_name in MODELS_NAMES:
-        retrieve_model(model_name)
+        utils.download_model(model_name)


 if __name__ == "__main__":
--- a/http_service/bugbug_http/models.py
+++ b/http_service/bugbug_http/models.py
@ -8,16 +8,13 @@ import logging
 import os
 from datetime import datetime
 from typing import Dict
-from urllib.request import urlretrieve

-import requests
 from dateutil.relativedelta import relativedelta
 from redis import Redis

-from bugbug import bugzilla, get_bugbug_version
+from bugbug import bugzilla
 from bugbug.model import Model
 from bugbug.models import load_model
-from bugbug.utils import zstd_decompress
 from bugbug_http import ALLOW_MISSING_MODELS

 logging.basicConfig(level=logging.INFO)
@ -37,8 +34,6 @@ MODELS_TO_PRELOAD = [
    "testlabelselect",
    "testgroupselect",
 ]
-MODELS_DIR = "models"
-BASE_URL = "https://community-tc.services.mozilla.com/api/index/v1/task/project.relman.bugbug.train_{}.latest/artifacts/public"
 DEFAULT_EXPIRATION_TTL = 7 * 24 * 3600  # A week


@ -58,7 +53,7 @@ def get_model(model_name):
    if model_name not in MODEL_CACHE:
        print("Recreating the %r model in cache" % model_name)
        try:
-            model = load_model(model_name, MODELS_DIR)
+            model = load_model(model_name)
        except FileNotFoundError:
            if ALLOW_MISSING_MODELS:
                print(
@ -86,41 +81,6 @@ def preload_models():
        get_model(model)


-def retrieve_model(name):
-    os.makedirs(MODELS_DIR, exist_ok=True)
-
-    file_name = f"{name}model"
-    file_path = os.path.join(MODELS_DIR, file_name)
-
-    base_model_url = BASE_URL.format(name, f"v{get_bugbug_version()}")
-    model_url = f"{base_model_url}/{file_name}.zst"
-    LOGGER.info(f"Checking ETAG of {model_url}")
-
-    r = requests.head(model_url, allow_redirects=True)
-    r.raise_for_status()
-    new_etag = r.headers["ETag"]
-
-    try:
-        with open(f"{file_path}.etag", "r") as f:
-            old_etag = f.read()
-    except IOError:
-        old_etag = None
-
-    if old_etag != new_etag:
-        LOGGER.info(f"Downloading the model from {model_url}")
-        urlretrieve(model_url, f"{file_path}.zst")
-
-        zstd_decompress(file_path)
-        LOGGER.info(f"Written model in {file_path}")
-
-        with open(f"{file_path}.etag", "w") as f:
-            f.write(new_etag)
-    else:
-        LOGGER.info(f"ETAG for {model_url} is ok")
-
-    return file_path
-
-
 def classify_bug(
    model_name, bug_ids, bugzilla_token, expiration=DEFAULT_EXPIRATION_TTL
 ):
--- a/scripts/integration_test.sh
+++ b/scripts/integration_test.sh
@ -6,9 +6,6 @@ set -euox pipefail

 # Supposed to be run from the repository root directory

-mkdir -p models
-cd models
-
 # Remove the models and any old data
 rm defectenhancementtaskmodel* || true;
 rm backout* || true;
@ -41,10 +38,7 @@ bugbug-train --limit 30000 --no-download backout
 # Then spin the http service up
 # This part duplicates the http service Dockerfiles because we cannot easily spin Docker containers
 # up on Taskcluster
-cd ../
 pip install --disable-pip-version-check --quiet --no-cache-dir ./http_service
-pwd
-ls models

 export REDIS_URL=redis://localhost:6379/4