зеркало из https://github.com/mozilla/taar.git
Removed all old S3 JSON loader code and references
This commit is contained in:
Родитель
20f09884e0
Коммит
e19550b556
|
@ -1,65 +0,0 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import time
|
||||
import threading
|
||||
|
||||
|
||||
class Clock:
|
||||
def time(self):
|
||||
"""Return epoch time in seconds like time.time()"""
|
||||
return time.time()
|
||||
|
||||
|
||||
class JSONCache:
|
||||
"""
|
||||
This class keeps a cache of JSON blobs and S3 bucket data.
|
||||
|
||||
All data is expired simultaneously
|
||||
"""
|
||||
def __init__(self, ctx):
|
||||
assert 'utils' in ctx
|
||||
assert 'clock' in ctx
|
||||
self._ctx = ctx
|
||||
|
||||
# Set to 4 hours
|
||||
self._ttl = 60 * 60 * 4
|
||||
|
||||
self._json_cache = {}
|
||||
self._s3_json_cache = {}
|
||||
|
||||
self.refresh_expiry()
|
||||
|
||||
self._lock = threading.RLock()
|
||||
|
||||
def refresh_expiry(self):
|
||||
self._expiry_time = self._ctx['clock'].time() + self._ttl
|
||||
|
||||
def fetch_json(self, url):
|
||||
with self._lock:
|
||||
utils = self._ctx['utils']
|
||||
if url not in self._json_cache:
|
||||
self._json_cache[url] = utils.fetch_json(url)
|
||||
content = self._json_cache[url]
|
||||
self.expire_cache()
|
||||
return content
|
||||
|
||||
def get_s3_json_content(self, s3_bucket, s3_key):
|
||||
with self._lock:
|
||||
utils = self._ctx['utils']
|
||||
key = (s3_bucket, s3_key)
|
||||
if key not in self._s3_json_cache:
|
||||
self._s3_json_cache[key] = utils.get_s3_json_content(s3_bucket, s3_key)
|
||||
content = self._s3_json_cache[key]
|
||||
self.expire_cache()
|
||||
return content
|
||||
|
||||
def expire_cache(self):
|
||||
with self._lock:
|
||||
clock = self._ctx['clock']
|
||||
|
||||
if clock.time() >= self._expiry_time:
|
||||
self._json_cache.clear()
|
||||
self._s3_json_cache.clear()
|
||||
self.refresh_expiry()
|
|
@ -13,7 +13,6 @@ configuration information as we pass the context through an object
|
|||
chain.
|
||||
"""
|
||||
|
||||
from taar.recommenders import utils
|
||||
# Clobber the Context name to prevent messy name collisions
|
||||
from srgutil.context import Context as _Context
|
||||
|
||||
|
@ -24,7 +23,6 @@ def default_context():
|
|||
from taar.recommenders import SimilarityRecommender
|
||||
from taar.recommenders import LocaleRecommender
|
||||
from taar.cache import Clock
|
||||
from taar.cache import JSONCache
|
||||
|
||||
# Note that the EnsembleRecommender is *not* in this map as it
|
||||
# needs to ensure that the recommender_map key is installed in the
|
||||
|
@ -33,7 +31,5 @@ def default_context():
|
|||
'similarity': lambda: SimilarityRecommender(ctx.child()),
|
||||
'locale': lambda: LocaleRecommender(ctx.child())}
|
||||
|
||||
ctx['utils'] = utils
|
||||
ctx['clock'] = Clock()
|
||||
ctx['cache'] = JSONCache(ctx)
|
||||
return ctx
|
||||
|
|
|
@ -38,8 +38,6 @@ class CollaborativeRecommender(AbstractRecommender):
|
|||
self._ctx = ctx
|
||||
self.logger = self._ctx[IMozLogging].get_logger('taar')
|
||||
|
||||
assert 'cache' in self._ctx
|
||||
|
||||
self._load_json_models()
|
||||
self.model = None
|
||||
self._build_model()
|
||||
|
|
|
@ -15,7 +15,6 @@ ENSEMBLE_WEIGHTS = 'taar/ensemble/ensemble_weight.json'
|
|||
class WeightCache:
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
assert 'cache' in self._ctx
|
||||
|
||||
self._lock = threading.RLock()
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@ class LocaleRecommender(AbstractRecommender):
|
|||
"""
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
assert 'cache' in self._ctx
|
||||
self._init_from_ctx()
|
||||
self.logger = self._ctx[IMozLogging].get_logger('taar')
|
||||
|
||||
|
|
|
@ -24,10 +24,6 @@ class CuratedWhitelistCache:
|
|||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
self._lock = threading.RLock()
|
||||
|
||||
# Enable this check when we start using srgutils
|
||||
# assert 'cache' in self._ctx
|
||||
|
||||
self._json_data = None
|
||||
|
||||
def get_whitelist(self):
|
||||
|
|
|
@ -40,8 +40,6 @@ class SimilarityRecommender(AbstractRecommender):
|
|||
self._ctx = ctx
|
||||
self.logger = self._ctx[IMozLogging].get_logger('taar')
|
||||
|
||||
assert 'cache' in self._ctx
|
||||
|
||||
self._init_from_ctx()
|
||||
|
||||
def _init_from_ctx(self):
|
||||
|
|
|
@ -1,61 +0,0 @@
|
|||
import boto3
|
||||
import json
|
||||
import logging
|
||||
import requests
|
||||
import requests.exceptions
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def fetch_json(uri):
|
||||
""" Perform an HTTP GET on the given uri, return the results as json.
|
||||
|
||||
Args:
|
||||
uri: the string URI to fetch.
|
||||
|
||||
Returns:
|
||||
A JSON object with the response or None if the status code of the
|
||||
response is an error code.
|
||||
"""
|
||||
try:
|
||||
r = requests.get(uri)
|
||||
if r.status_code != requests.codes.ok:
|
||||
return None
|
||||
return r.json()
|
||||
except requests.exceptions.ConnectionError:
|
||||
return None
|
||||
|
||||
|
||||
def get_s3_json_content(s3_bucket, s3_key):
|
||||
"""Download and parse a json file stored on AWS S3.
|
||||
|
||||
The file is downloaded and then cached for future use.
|
||||
"""
|
||||
|
||||
raw_data = None
|
||||
try:
|
||||
s3 = boto3.resource('s3')
|
||||
raw_data = (
|
||||
s3
|
||||
.Object(s3_bucket, s3_key)
|
||||
.get()['Body']
|
||||
.read()
|
||||
.decode('utf-8')
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Failed to download from S3", extra={
|
||||
"bucket": s3_bucket,
|
||||
"key": s3_key})
|
||||
return None
|
||||
|
||||
# It can happen to have corrupted files. Account for the
|
||||
# sad reality of life.
|
||||
try:
|
||||
return json.loads(raw_data)
|
||||
except ValueError:
|
||||
logging.error("Cannot parse JSON resource from S3", extra={
|
||||
"bucket": s3_bucket,
|
||||
"key": s3_key})
|
||||
|
||||
return None
|
Загрузка…
Ссылка в новой задаче