Removed all old S3 JSON loader code and references

This commit is contained in:
Victor Ng 2018-08-07 13:35:55 -04:00
Родитель 20f09884e0
Коммит e19550b556
8 изменённых файлов: 0 добавлений и 140 удалений

Просмотреть файл

@ -1,65 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
import time
import threading
class Clock:
def time(self):
"""Return epoch time in seconds like time.time()"""
return time.time()
class JSONCache:
"""
This class keeps a cache of JSON blobs and S3 bucket data.
All data is expired simultaneously
"""
def __init__(self, ctx):
assert 'utils' in ctx
assert 'clock' in ctx
self._ctx = ctx
# Set to 4 hours
self._ttl = 60 * 60 * 4
self._json_cache = {}
self._s3_json_cache = {}
self.refresh_expiry()
self._lock = threading.RLock()
def refresh_expiry(self):
self._expiry_time = self._ctx['clock'].time() + self._ttl
def fetch_json(self, url):
with self._lock:
utils = self._ctx['utils']
if url not in self._json_cache:
self._json_cache[url] = utils.fetch_json(url)
content = self._json_cache[url]
self.expire_cache()
return content
def get_s3_json_content(self, s3_bucket, s3_key):
with self._lock:
utils = self._ctx['utils']
key = (s3_bucket, s3_key)
if key not in self._s3_json_cache:
self._s3_json_cache[key] = utils.get_s3_json_content(s3_bucket, s3_key)
content = self._s3_json_cache[key]
self.expire_cache()
return content
def expire_cache(self):
with self._lock:
clock = self._ctx['clock']
if clock.time() >= self._expiry_time:
self._json_cache.clear()
self._s3_json_cache.clear()
self.refresh_expiry()

Просмотреть файл

@ -13,7 +13,6 @@ configuration information as we pass the context through an object
chain.
"""
from taar.recommenders import utils
# Clobber the Context name to prevent messy name collisions
from srgutil.context import Context as _Context
@ -24,7 +23,6 @@ def default_context():
from taar.recommenders import SimilarityRecommender
from taar.recommenders import LocaleRecommender
from taar.cache import Clock
from taar.cache import JSONCache
# Note that the EnsembleRecommender is *not* in this map as it
# needs to ensure that the recommender_map key is installed in the
@ -33,7 +31,5 @@ def default_context():
'similarity': lambda: SimilarityRecommender(ctx.child()),
'locale': lambda: LocaleRecommender(ctx.child())}
ctx['utils'] = utils
ctx['clock'] = Clock()
ctx['cache'] = JSONCache(ctx)
return ctx

Просмотреть файл

@ -38,8 +38,6 @@ class CollaborativeRecommender(AbstractRecommender):
self._ctx = ctx
self.logger = self._ctx[IMozLogging].get_logger('taar')
assert 'cache' in self._ctx
self._load_json_models()
self.model = None
self._build_model()

Просмотреть файл

@ -15,7 +15,6 @@ ENSEMBLE_WEIGHTS = 'taar/ensemble/ensemble_weight.json'
class WeightCache:
def __init__(self, ctx):
self._ctx = ctx
assert 'cache' in self._ctx
self._lock = threading.RLock()

Просмотреть файл

@ -21,7 +21,6 @@ class LocaleRecommender(AbstractRecommender):
"""
def __init__(self, ctx):
self._ctx = ctx
assert 'cache' in self._ctx
self._init_from_ctx()
self.logger = self._ctx[IMozLogging].get_logger('taar')

Просмотреть файл

@ -24,10 +24,6 @@ class CuratedWhitelistCache:
def __init__(self, ctx):
self._ctx = ctx
self._lock = threading.RLock()
# Enable this check when we start using srgutils
# assert 'cache' in self._ctx
self._json_data = None
def get_whitelist(self):

Просмотреть файл

@ -40,8 +40,6 @@ class SimilarityRecommender(AbstractRecommender):
self._ctx = ctx
self.logger = self._ctx[IMozLogging].get_logger('taar')
assert 'cache' in self._ctx
self._init_from_ctx()
def _init_from_ctx(self):

Просмотреть файл

@ -1,61 +0,0 @@
import boto3
import json
import logging
import requests
import requests.exceptions
logger = logging.getLogger(__name__)
def fetch_json(uri):
""" Perform an HTTP GET on the given uri, return the results as json.
Args:
uri: the string URI to fetch.
Returns:
A JSON object with the response or None if the status code of the
response is an error code.
"""
try:
r = requests.get(uri)
if r.status_code != requests.codes.ok:
return None
return r.json()
except requests.exceptions.ConnectionError:
return None
def get_s3_json_content(s3_bucket, s3_key):
"""Download and parse a json file stored on AWS S3.
The file is downloaded and then cached for future use.
"""
raw_data = None
try:
s3 = boto3.resource('s3')
raw_data = (
s3
.Object(s3_bucket, s3_key)
.get()['Body']
.read()
.decode('utf-8')
)
except Exception:
logger.exception("Failed to download from S3", extra={
"bucket": s3_bucket,
"key": s3_key})
return None
# It can happen to have corrupted files. Account for the
# sad reality of life.
try:
return json.loads(raw_data)
except ValueError:
logging.error("Cannot parse JSON resource from S3", extra={
"bucket": s3_bucket,
"key": s3_key})
return None