Removed all old S3 JSON loader code and references

2018-08-07 13:35:55 -04:00 · 2018-08-07 13:35:55 -04:00 · e19550b556
--- a/taar/cache.py
+++ b/taar/cache.py
@ -1,65 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this file,
-# You can obtain one at http://mozilla.org/MPL/2.0/.
-
-import time
-import threading
-
-
-class Clock:
-    def time(self):
-        """Return epoch time in seconds like time.time()"""
-        return time.time()
-
-
-class JSONCache:
-    """
-    This class keeps a cache of JSON blobs and S3 bucket data.
-
-    All data is expired simultaneously
-    """
-    def __init__(self, ctx):
-        assert 'utils' in ctx
-        assert 'clock' in ctx
-        self._ctx = ctx
-
-        # Set to 4 hours
-        self._ttl = 60 * 60 * 4
-
-        self._json_cache = {}
-        self._s3_json_cache = {}
-
-        self.refresh_expiry()
-
-        self._lock = threading.RLock()
-
-    def refresh_expiry(self):
-        self._expiry_time = self._ctx['clock'].time() + self._ttl
-
-    def fetch_json(self, url):
-        with self._lock:
-            utils = self._ctx['utils']
-            if url not in self._json_cache:
-                self._json_cache[url] = utils.fetch_json(url)
-            content = self._json_cache[url]
-            self.expire_cache()
-            return content
-
-    def get_s3_json_content(self, s3_bucket, s3_key):
-        with self._lock:
-            utils = self._ctx['utils']
-            key = (s3_bucket, s3_key)
-            if key not in self._s3_json_cache:
-                self._s3_json_cache[key] = utils.get_s3_json_content(s3_bucket, s3_key)
-            content = self._s3_json_cache[key]
-            self.expire_cache()
-            return content
-
-    def expire_cache(self):
-        with self._lock:
-            clock = self._ctx['clock']
-
-            if clock.time() >= self._expiry_time:
-                self._json_cache.clear()
-                self._s3_json_cache.clear()
-                self.refresh_expiry()
--- a/taar/context.py
+++ b/taar/context.py
@ -13,7 +13,6 @@ configuration information as we pass the context through an object
 chain.
 """

-from taar.recommenders import utils
 # Clobber the Context name to prevent messy name collisions
 from srgutil.context import Context as _Context

@ -24,7 +23,6 @@ def default_context():
    from taar.recommenders import SimilarityRecommender
    from taar.recommenders import LocaleRecommender
    from taar.cache import Clock
-    from taar.cache import JSONCache

    # Note that the EnsembleRecommender is *not* in this map as it
    # needs to ensure that the recommender_map key is installed in the
@ -33,7 +31,5 @@ def default_context():
                                      'similarity': lambda: SimilarityRecommender(ctx.child()),
                                      'locale': lambda: LocaleRecommender(ctx.child())}

-    ctx['utils'] = utils
    ctx['clock'] = Clock()
-    ctx['cache'] = JSONCache(ctx)
    return ctx
--- a/taar/recommenders/collaborative_recommender.py
+++ b/taar/recommenders/collaborative_recommender.py
@ -38,8 +38,6 @@ class CollaborativeRecommender(AbstractRecommender):
        self._ctx = ctx
        self.logger = self._ctx[IMozLogging].get_logger('taar')

-        assert 'cache' in self._ctx
-
        self._load_json_models()
        self.model = None
        self._build_model()
--- a/taar/recommenders/ensemble_recommender.py
+++ b/taar/recommenders/ensemble_recommender.py
@ -15,7 +15,6 @@ ENSEMBLE_WEIGHTS = 'taar/ensemble/ensemble_weight.json'
 class WeightCache:
    def __init__(self, ctx):
        self._ctx = ctx
-        assert 'cache' in self._ctx

        self._lock = threading.RLock()

--- a/taar/recommenders/locale_recommender.py
+++ b/taar/recommenders/locale_recommender.py
@ -21,7 +21,6 @@ class LocaleRecommender(AbstractRecommender):
    """
    def __init__(self, ctx):
        self._ctx = ctx
-        assert 'cache' in self._ctx
        self._init_from_ctx()
        self.logger = self._ctx[IMozLogging].get_logger('taar')

--- a/taar/recommenders/s3_data.py
+++ b/taar/recommenders/s3_data.py
@ -24,10 +24,6 @@ class CuratedWhitelistCache:
    def __init__(self, ctx):
        self._ctx = ctx
        self._lock = threading.RLock()
-
-        # Enable this check when we start using srgutils
-        # assert 'cache' in self._ctx
-
        self._json_data = None

    def get_whitelist(self):
--- a/taar/recommenders/similarity_recommender.py
+++ b/taar/recommenders/similarity_recommender.py
@ -40,8 +40,6 @@ class SimilarityRecommender(AbstractRecommender):
        self._ctx = ctx
        self.logger = self._ctx[IMozLogging].get_logger('taar')

-        assert 'cache' in self._ctx
-
        self._init_from_ctx()

    def _init_from_ctx(self):
--- a/taar/recommenders/utils.py
+++ b/taar/recommenders/utils.py
@ -1,61 +0,0 @@
-import boto3
-import json
-import logging
-import requests
-import requests.exceptions
-
-
-logger = logging.getLogger(__name__)
-
-
-def fetch_json(uri):
-    """ Perform an HTTP GET on the given uri, return the results as json.
-
-    Args:
-        uri: the string URI to fetch.
-
-    Returns:
-        A JSON object with the response or None if the status code of the
-        response is an error code.
-    """
-    try:
-        r = requests.get(uri)
-        if r.status_code != requests.codes.ok:
-            return None
-        return r.json()
-    except requests.exceptions.ConnectionError:
-        return None
-
-
-def get_s3_json_content(s3_bucket, s3_key):
-    """Download and parse a json file stored on AWS S3.
-
-    The file is downloaded and then cached for future use.
-    """
-
-    raw_data = None
-    try:
-        s3 = boto3.resource('s3')
-        raw_data = (
-            s3
-            .Object(s3_bucket, s3_key)
-            .get()['Body']
-            .read()
-            .decode('utf-8')
-        )
-    except Exception:
-        logger.exception("Failed to download from S3", extra={
-            "bucket": s3_bucket,
-            "key": s3_key})
-        return None
-
-    # It can happen to have corrupted files. Account for the
-    # sad reality of life.
-    try:
-        return json.loads(raw_data)
-    except ValueError:
-        logging.error("Cannot parse JSON resource from S3", extra={
-            "bucket": s3_bucket,
-            "key": s3_key})
-
-    return None