зеркало из https://github.com/mozilla/taar.git
Features/taar in redis (#179)
* Migrated taar locale recommender to use redis * added a noop fixture loader module for tests * Converted TAAR Collaborative recommender to use redis * Dropped hybrid recommender * Ported similarity recommender to use redis * Ported ensemble and recommendation manager to use redis * dropped LazyJSONLoader * dropped moto dependency * Renamed AddonsCoinstallCache to TAARCache * renamed bin/taarlite-redis to bin/taar-redis * Execute data preprocess step on redis ptr change * bumped to 0.7.4
This commit is contained in:
Родитель
9773053739
Коммит
b33a1b684c
|
@ -28,6 +28,7 @@ from shutil import rmtree
|
|||
from subprocess import check_output
|
||||
from sys import exit
|
||||
from tempfile import mkdtemp
|
||||
|
||||
try:
|
||||
from urllib2 import build_opener, HTTPHandler, HTTPSHandler
|
||||
except ImportError:
|
||||
|
@ -40,26 +41,34 @@ except ImportError:
|
|||
|
||||
PACKAGES = [
|
||||
# Pip has no dependencies, as it vendors everything:
|
||||
('https://pypi.python.org/packages/source/p/pip/pip-8.0.2.tar.gz',
|
||||
'46f4bd0d8dfd51125a554568d646fe4200a3c2c6c36b9f2d06d2212148439521'),
|
||||
(
|
||||
"https://pypi.python.org/packages/source/p/pip/pip-8.0.2.tar.gz",
|
||||
"46f4bd0d8dfd51125a554568d646fe4200a3c2c6c36b9f2d06d2212148439521",
|
||||
),
|
||||
# This version of setuptools has only optional dependencies:
|
||||
('https://pypi.python.org/packages/source/s/setuptools/'
|
||||
'setuptools-19.4.tar.gz',
|
||||
'214bf29933f47cf25e6faa569f710731728a07a19cae91ea64f826051f68a8cf'),
|
||||
(
|
||||
"https://pypi.python.org/packages/source/s/setuptools/"
|
||||
"setuptools-19.4.tar.gz",
|
||||
"214bf29933f47cf25e6faa569f710731728a07a19cae91ea64f826051f68a8cf",
|
||||
),
|
||||
# We require Python 2.7 or later because we don't support wheel's
|
||||
# conditional dep on argparse. This version of wheel has no other
|
||||
# dependencies:
|
||||
('https://pypi.python.org/packages/source/w/wheel/wheel-0.26.0.tar.gz',
|
||||
'eaad353805c180a47545a256e6508835b65a8e830ba1093ed8162f19a50a530c')
|
||||
(
|
||||
"https://pypi.python.org/packages/source/w/wheel/wheel-0.26.0.tar.gz",
|
||||
"eaad353805c180a47545a256e6508835b65a8e830ba1093ed8162f19a50a530c",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class HashError(Exception):
|
||||
def __str__(self):
|
||||
url, path, actual, expected = self.args
|
||||
return ('{url} did not match the expected hash {expected}. Instead, '
|
||||
'it was {actual}. The file (left at {path}) may have been '
|
||||
'tampered with.'.format(**locals()))
|
||||
return (
|
||||
"{url} did not match the expected hash {expected}. Instead, "
|
||||
"it was {actual}. The file (left at {path}) may have been "
|
||||
"tampered with.".format(**locals())
|
||||
)
|
||||
|
||||
|
||||
def hashed_download(url, temp, digest):
|
||||
|
@ -82,9 +91,9 @@ def hashed_download(url, temp, digest):
|
|||
yield chunk
|
||||
|
||||
response = opener().open(url)
|
||||
path = join(temp, urlparse(url).path.split('/')[-1])
|
||||
path = join(temp, urlparse(url).path.split("/")[-1])
|
||||
actual_hash = sha256()
|
||||
with open(path, 'wb') as file:
|
||||
with open(path, "wb") as file:
|
||||
for chunk in read_chunks(response, 4096):
|
||||
file.write(chunk)
|
||||
actual_hash.update(chunk)
|
||||
|
@ -96,13 +105,14 @@ def hashed_download(url, temp, digest):
|
|||
|
||||
|
||||
def main():
|
||||
temp = mkdtemp(prefix='pipstrap-')
|
||||
temp = mkdtemp(prefix="pipstrap-")
|
||||
try:
|
||||
downloads = [hashed_download(url, temp, digest)
|
||||
for url, digest in PACKAGES]
|
||||
check_output('pip install --no-index --no-deps -U ' +
|
||||
' '.join(quote(d) for d in downloads),
|
||||
shell=True)
|
||||
downloads = [hashed_download(url, temp, digest) for url, digest in PACKAGES]
|
||||
check_output(
|
||||
"pip install --no-index --no-deps -U "
|
||||
+ " ".join(quote(d) for d in downloads),
|
||||
shell=True,
|
||||
)
|
||||
except HashError as exc:
|
||||
print(exc)
|
||||
except Exception:
|
||||
|
@ -114,5 +124,5 @@ def main():
|
|||
return 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
from taar.recommenders.redis_cache import AddonsCoinstallCache
|
||||
from taar.recommenders.redis_cache import TAARCache
|
||||
from taar.context import default_context
|
||||
import click
|
||||
|
||||
|
@ -11,7 +11,7 @@ import click
|
|||
@click.option("--info", is_flag=True, help="Display information about the cache state")
|
||||
def main(reset, load, info):
|
||||
"""
|
||||
Manage the TAARLite redis cache.
|
||||
Manage the TAAR+TAARLite redis cache.
|
||||
|
||||
This expecte that the following enviroment variables are set:
|
||||
|
||||
|
@ -23,7 +23,7 @@ def main(reset, load, info):
|
|||
return
|
||||
|
||||
ctx = default_context()
|
||||
cache = AddonsCoinstallCache(ctx)
|
||||
cache = TAARCache.get_instance(ctx)
|
||||
if reset:
|
||||
if cache.reset():
|
||||
print("Successfully flushed db0 bookkeeping database.")
|
|
@ -83,7 +83,6 @@ dependencies:
|
|||
- markus[datadog]==2.2.0
|
||||
- mock==2.0.0
|
||||
- more-itertools==4.2.0
|
||||
- moto==1.3.14
|
||||
- mozilla-srgutil==0.1.7
|
||||
- mozilla-jsoncache==0.1.7
|
||||
- networkx==2.4
|
||||
|
|
4
setup.py
4
setup.py
|
@ -3,7 +3,7 @@ from setuptools import find_packages, setup
|
|||
setup(
|
||||
name="mozilla-taar3",
|
||||
use_scm_version=False,
|
||||
version="0.7.3",
|
||||
version="0.7.4",
|
||||
setup_requires=["setuptools_scm", "pytest-runner"],
|
||||
tests_require=["pytest"],
|
||||
include_package_data=True,
|
||||
|
@ -29,6 +29,6 @@ setup(
|
|||
[taarapi_app]
|
||||
app=taar.plugin:configure_plugin
|
||||
""",
|
||||
scripts=["bin/taarlite-redis.py"],
|
||||
scripts=["bin/taar-redis.py"],
|
||||
zip_safe=False,
|
||||
)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from .profile_fetcher import ProfileFetcher # noqa
|
||||
from .profile_fetcher import ProfileFetcher # noqa
|
||||
import pkg_resources
|
||||
|
||||
__version__ = pkg_resources.require("mozilla-taar3")[0].version
|
||||
|
|
|
@ -20,8 +20,7 @@ PLUGIN = config("TAAR_API_PLUGIN", default=None)
|
|||
|
||||
|
||||
sentry_sdk.init(
|
||||
dsn=config("SENTRY_DSN", ''),
|
||||
integrations=[FlaskIntegration()],
|
||||
dsn=config("SENTRY_DSN", ""), integrations=[FlaskIntegration()],
|
||||
)
|
||||
|
||||
# There should only be a single registered app for the taar-api
|
||||
|
|
|
@ -132,9 +132,7 @@ class ProfileFetcher:
|
|||
"locale": profile_data.get("locale", ""),
|
||||
"os": profile_data.get("os", ""),
|
||||
"installed_addons": addon_ids,
|
||||
"disabled_addons_ids": profile_data.get(
|
||||
"disabled_addons_ids", []
|
||||
),
|
||||
"disabled_addons_ids": profile_data.get("disabled_addons_ids", []),
|
||||
"bookmark_count": profile_data.get("places_bookmarks_count", 0),
|
||||
"tab_open_count": profile_data.get(
|
||||
"scalar_parent_browser_engagement_tab_open_event_count", 0
|
||||
|
|
|
@ -3,38 +3,18 @@
|
|||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from srgutil.interfaces import IMozLogging
|
||||
from .lazys3 import LazyJSONLoader
|
||||
import numpy as np
|
||||
import operator as op
|
||||
import functools
|
||||
import threading
|
||||
|
||||
from .base_recommender import AbstractRecommender
|
||||
|
||||
from taar.settings import (
|
||||
TAAR_ITEM_MATRIX_BUCKET,
|
||||
TAAR_ITEM_MATRIX_KEY,
|
||||
TAAR_ADDON_MAPPING_BUCKET,
|
||||
TAAR_ADDON_MAPPING_KEY,
|
||||
)
|
||||
from taar.recommenders.redis_cache import TAARCache
|
||||
|
||||
import markus
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
|
||||
def synchronized(wrapped):
|
||||
""" Synchronization decorator. """
|
||||
|
||||
@functools.wraps(wrapped)
|
||||
def wrapper(*args, **kwargs):
|
||||
self = args[0]
|
||||
with self._lock:
|
||||
return wrapped(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def java_string_hashcode(s):
|
||||
h = 0
|
||||
for c in s:
|
||||
|
@ -58,31 +38,20 @@ class CollaborativeRecommender(AbstractRecommender):
|
|||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
|
||||
self._lock = threading.RLock()
|
||||
|
||||
self._addon_mapping = LazyJSONLoader(
|
||||
self._ctx,
|
||||
TAAR_ADDON_MAPPING_BUCKET,
|
||||
TAAR_ADDON_MAPPING_KEY,
|
||||
"addon_mapping",
|
||||
)
|
||||
|
||||
self._raw_item_matrix = LazyJSONLoader(
|
||||
self._ctx, TAAR_ITEM_MATRIX_BUCKET, TAAR_ITEM_MATRIX_KEY, "item_matrix",
|
||||
)
|
||||
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
|
||||
self._redis_cache = TAARCache.get_instance(self._ctx)
|
||||
|
||||
self.model = None
|
||||
|
||||
@property
|
||||
def addon_mapping(self):
|
||||
return self._addon_mapping.get()[0]
|
||||
return self._redis_cache.collab_addon_mapping()
|
||||
|
||||
@property
|
||||
def raw_item_matrix(self):
|
||||
val, new_copy = self._raw_item_matrix.get()
|
||||
if val is not None and new_copy:
|
||||
val = self._redis_cache.collab_raw_item_matrix()
|
||||
if val not in (None, ""):
|
||||
# Build a dense numpy matrix out of it.
|
||||
num_rows = len(val)
|
||||
num_cols = len(val[0]["features"])
|
||||
|
@ -90,27 +59,10 @@ class CollaborativeRecommender(AbstractRecommender):
|
|||
self.model = np.zeros(shape=(num_rows, num_cols))
|
||||
for index, row in enumerate(val):
|
||||
self.model[index, :] = row["features"]
|
||||
elif val is None and new_copy:
|
||||
else:
|
||||
self.model = None
|
||||
return val
|
||||
|
||||
def _load_json_models(self):
|
||||
# Download the addon mappings.
|
||||
if self.addon_mapping is None:
|
||||
self.logger.error(
|
||||
"Cannot download the addon mapping file {} {}".format(
|
||||
TAAR_ADDON_MAPPING_BUCKET, TAAR_ADDON_MAPPING_KEY
|
||||
)
|
||||
)
|
||||
|
||||
if self.addon_mapping is None:
|
||||
self.logger.error(
|
||||
"Cannot download the model file {} {}".format(
|
||||
TAAR_ITEM_MATRIX_BUCKET, TAAR_ITEM_MATRIX_KEY
|
||||
)
|
||||
)
|
||||
|
||||
@synchronized
|
||||
def can_recommend(self, client_data, extra_data={}):
|
||||
# We can't recommend if we don't have our data files.
|
||||
if (
|
||||
|
@ -178,22 +130,18 @@ class CollaborativeRecommender(AbstractRecommender):
|
|||
@metrics.timer_decorator("collaborative_recommend")
|
||||
def recommend(self, client_data, limit, extra_data={}):
|
||||
# Addons identifiers are stored as positive hash values within the model.
|
||||
with self._lock:
|
||||
try:
|
||||
recommendations = self._recommend(client_data, limit, extra_data)
|
||||
except Exception as e:
|
||||
recommendations = []
|
||||
try:
|
||||
recommendations = self._recommend(client_data, limit, extra_data)
|
||||
except Exception as e:
|
||||
recommendations = []
|
||||
|
||||
self._addon_mapping.force_expiry()
|
||||
self._raw_item_matrix.force_expiry()
|
||||
|
||||
metrics.incr("error_collaborative", value=1)
|
||||
self.logger.exception(
|
||||
"Collaborative recommender crashed for {}".format(
|
||||
client_data.get("client_id", "no-client-id")
|
||||
),
|
||||
e,
|
||||
)
|
||||
metrics.incr("error_collaborative", value=1)
|
||||
self.logger.exception(
|
||||
"Collaborative recommender crashed for {}".format(
|
||||
client_data.get("client_id", "no-client-id")
|
||||
),
|
||||
e,
|
||||
)
|
||||
|
||||
log_data = (
|
||||
client_data["client_id"],
|
||||
|
|
|
@ -5,16 +5,9 @@
|
|||
from srgutil.interfaces import IMozLogging
|
||||
import itertools
|
||||
from .base_recommender import AbstractRecommender
|
||||
from .lazys3 import LazyJSONLoader
|
||||
|
||||
from taar.settings import (
|
||||
TAAR_WHITELIST_BUCKET,
|
||||
TAAR_WHITELIST_KEY,
|
||||
TAAR_ENSEMBLE_BUCKET,
|
||||
TAAR_ENSEMBLE_KEY,
|
||||
)
|
||||
|
||||
from taar.utils import hasher
|
||||
from taar.recommenders.redis_cache import TAARCache
|
||||
|
||||
import markus
|
||||
|
||||
|
@ -27,18 +20,6 @@ def is_test_client(client_id):
|
|||
return len(set(client_id.replace("-", ""))) == 1
|
||||
|
||||
|
||||
class WeightCache:
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
|
||||
self._weights = LazyJSONLoader(
|
||||
self._ctx, TAAR_ENSEMBLE_BUCKET, TAAR_ENSEMBLE_KEY, "ensemble"
|
||||
)
|
||||
|
||||
def getWeights(self):
|
||||
return self._weights.get()[0]["ensemble_weights"]
|
||||
|
||||
|
||||
class EnsembleRecommender(AbstractRecommender):
|
||||
"""
|
||||
The EnsembleRecommender is a collection of recommenders where the
|
||||
|
@ -50,12 +31,17 @@ class EnsembleRecommender(AbstractRecommender):
|
|||
def __init__(self, ctx):
|
||||
self.RECOMMENDER_KEYS = ["collaborative", "similarity", "locale"]
|
||||
self._ctx = ctx
|
||||
|
||||
self._redis_cache = TAARCache.get_instance(self._ctx)
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar.ensemble")
|
||||
|
||||
assert "recommender_factory" in self._ctx
|
||||
|
||||
self._init_from_ctx()
|
||||
|
||||
def getWeights(self):
|
||||
return self._redis_cache.ensemble_weights()
|
||||
|
||||
def _init_from_ctx(self):
|
||||
# Copy the map of the recommenders
|
||||
self._recommender_map = {}
|
||||
|
@ -64,11 +50,6 @@ class EnsembleRecommender(AbstractRecommender):
|
|||
for rkey in self.RECOMMENDER_KEYS:
|
||||
self._recommender_map[rkey] = recommender_factory.create(rkey)
|
||||
|
||||
self._whitelist_data = LazyJSONLoader(
|
||||
self._ctx, TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY, "whitelist"
|
||||
)
|
||||
|
||||
self._weight_cache = WeightCache(self._ctx.child())
|
||||
self.logger.info("EnsembleRecommender initialized")
|
||||
|
||||
def can_recommend(self, client_data, extra_data={}):
|
||||
|
@ -88,7 +69,7 @@ class EnsembleRecommender(AbstractRecommender):
|
|||
client_id = client_data.get("client_id", "no-client-id")
|
||||
|
||||
if is_test_client(client_id):
|
||||
whitelist = self._whitelist_data.get()[0]
|
||||
whitelist = self._redis_cache.whitelist_data()
|
||||
samples = whitelist[:limit]
|
||||
self.logger.info("Test ID detected [{}]".format(client_id))
|
||||
|
||||
|
@ -102,7 +83,6 @@ class EnsembleRecommender(AbstractRecommender):
|
|||
results = self._recommend(client_data, limit, extra_data)
|
||||
except Exception as e:
|
||||
results = []
|
||||
self._weight_cache._weights.force_expiry()
|
||||
self.logger.exception(
|
||||
"Ensemble recommender crashed for {}".format(client_id), e
|
||||
)
|
||||
|
@ -130,7 +110,7 @@ class EnsembleRecommender(AbstractRecommender):
|
|||
extended_limit = limit + len(preinstalled_addon_ids)
|
||||
|
||||
flattened_results = []
|
||||
ensemble_weights = self._weight_cache.getWeights()
|
||||
ensemble_weights = self._redis_cache.ensemble_weights()
|
||||
|
||||
for rkey in self.RECOMMENDER_KEYS:
|
||||
recommender = self._recommender_map[rkey]
|
||||
|
|
|
@ -10,7 +10,7 @@ from srgutil.interfaces import IMozLogging
|
|||
|
||||
import markus
|
||||
|
||||
from taar.recommenders.redis_cache import AddonsCoinstallCache
|
||||
from taar.recommenders.redis_cache import TAARCache
|
||||
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
@ -76,7 +76,7 @@ class GuidBasedRecommender:
|
|||
self._ctx = ctx
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taarlite")
|
||||
|
||||
self._redis_cache = AddonsCoinstallCache(self._ctx)
|
||||
self._redis_cache = TAARCache.get_instance(self._ctx)
|
||||
self.logger.info("GUIDBasedRecommender is initialized")
|
||||
|
||||
def cache_ready(self):
|
||||
|
|
|
@ -1,178 +0,0 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from .base_recommender import AbstractRecommender
|
||||
from .lazys3 import LazyJSONLoader
|
||||
from srgutil.interfaces import IMozLogging
|
||||
import operator as op
|
||||
import random
|
||||
|
||||
from taar.settings import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
|
||||
|
||||
import markus
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
|
||||
class CuratedWhitelistCache:
|
||||
"""
|
||||
This fetches the curated whitelist from S3.
|
||||
"""
|
||||
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
self._data = LazyJSONLoader(
|
||||
self._ctx, TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY, "whitelist",
|
||||
)
|
||||
|
||||
def get_whitelist(self):
|
||||
return self._data.get()[0]
|
||||
|
||||
def get_randomized_guid_sample(self, item_count):
|
||||
""" Fetch a subset of randomzied GUIDs from the whitelist """
|
||||
dataset = self.get_whitelist()
|
||||
random.shuffle(dataset)
|
||||
return dataset[:item_count]
|
||||
|
||||
|
||||
class CuratedRecommender(AbstractRecommender):
|
||||
"""
|
||||
The curated recommender just delegates to the whitelist
|
||||
that is provided by the AMO team.
|
||||
|
||||
This recommender simply provides a randomized sample of
|
||||
pre-approved addons for recommendation. It does not use any other
|
||||
external data to generate recommendations, nor does it use any
|
||||
information from the Firefox agent.
|
||||
"""
|
||||
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar.curated")
|
||||
self._curated_wl = CuratedWhitelistCache(self._ctx)
|
||||
|
||||
def can_recommend(self, client_data, extra_data={}):
|
||||
"""The Curated recommender will always be able to recommend
|
||||
something"""
|
||||
self.logger.info("Curated can_recommend: {}".format(True))
|
||||
return True
|
||||
|
||||
@metrics.timer_decorator("hybrid_recommend")
|
||||
def recommend(self, client_data, limit, extra_data={}):
|
||||
"""
|
||||
Curated recommendations are just random selections
|
||||
from the whitelist and we explicitly set the weighting to 1.0
|
||||
"""
|
||||
guids = self._curated_wl.get_randomized_guid_sample(limit)
|
||||
|
||||
results = [(guid, 1.0) for guid in guids]
|
||||
|
||||
log_data = (client_data["client_id"], str(guids))
|
||||
self.logger.info(
|
||||
"Curated recommendations client_id: [%s], guids: [%s]" % log_data
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
class HybridRecommender(AbstractRecommender):
|
||||
"""
|
||||
The EnsembleRecommender is a collection of recommenders where the
|
||||
results from each recommendation is amplified or dampened by a
|
||||
factor. The aggregate results are combines and used to recommend
|
||||
addons for users.
|
||||
"""
|
||||
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
|
||||
self._ensemble_recommender = self._ctx["ensemble_recommender"]
|
||||
self._curated_recommender = CuratedRecommender(self._ctx.child())
|
||||
|
||||
def can_recommend(self, client_data, extra_data={}):
|
||||
"""The ensemble recommender is always going to be
|
||||
available if at least one recommender is available"""
|
||||
ensemble_recommend = self._ensemble_recommender.can_recommend(
|
||||
client_data, extra_data
|
||||
)
|
||||
curated_recommend = self._curated_recommender.can_recommend(
|
||||
client_data, extra_data
|
||||
)
|
||||
result = ensemble_recommend and curated_recommend
|
||||
self.logger.info("Hybrid can_recommend: {}".format(result))
|
||||
return result
|
||||
|
||||
def recommend(self, client_data, limit, extra_data={}):
|
||||
"""
|
||||
Hybrid recommendations simply select half recommendations from
|
||||
the ensemble recommender, and half from the curated one.
|
||||
|
||||
Duplicate recommendations are accomodated by rank ordering
|
||||
by weight.
|
||||
"""
|
||||
|
||||
preinstalled_addon_ids = client_data.get("installed_addons", [])
|
||||
|
||||
# Compute an extended limit by adding the length of
|
||||
# the list of any preinstalled addons.
|
||||
extended_limit = limit + len(preinstalled_addon_ids)
|
||||
|
||||
ensemble_suggestions = self._ensemble_recommender.recommend(
|
||||
client_data, extended_limit, extra_data
|
||||
)
|
||||
curated_suggestions = self._curated_recommender.recommend(
|
||||
client_data, extended_limit, extra_data
|
||||
)
|
||||
|
||||
# Generate a set of results from each of the composite
|
||||
# recommenders. We select one item from each recommender
|
||||
# sequentially so that we do not bias one recommender over the
|
||||
# other.
|
||||
merged_results = set()
|
||||
|
||||
while (
|
||||
len(merged_results) < limit
|
||||
and len(ensemble_suggestions) > 0
|
||||
and len(curated_suggestions) > 0
|
||||
):
|
||||
|
||||
r1 = ensemble_suggestions.pop()
|
||||
if r1[0] not in [temp[0] for temp in merged_results]:
|
||||
merged_results.add(r1)
|
||||
|
||||
# Terminate early if we have an odd number for the limit
|
||||
if not (
|
||||
len(merged_results) < limit
|
||||
and len(ensemble_suggestions) > 0
|
||||
and len(curated_suggestions) > 0
|
||||
):
|
||||
break
|
||||
|
||||
r2 = curated_suggestions.pop()
|
||||
if r2[0] not in [temp[0] for temp in merged_results]:
|
||||
merged_results.add(r2)
|
||||
|
||||
if len(merged_results) < limit:
|
||||
msg = (
|
||||
"Defaulting to empty results. Insufficient recommendations found for client: %s"
|
||||
% client_data["client_id"]
|
||||
)
|
||||
self.logger.info(msg)
|
||||
return []
|
||||
|
||||
sorted_results = sorted(
|
||||
list(merged_results), key=op.itemgetter(1), reverse=True
|
||||
)
|
||||
|
||||
log_data = (
|
||||
client_data["client_id"],
|
||||
str([r[0] for r in sorted_results]),
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
"Hybrid recommendations client_id: [%s], guids: [%s]" % log_data
|
||||
)
|
||||
return sorted_results
|
|
@ -1,139 +0,0 @@
|
|||
import boto3
|
||||
from botocore.client import Config
|
||||
|
||||
from srgutil.interfaces import IMozLogging, IClock
|
||||
import json
|
||||
import threading
|
||||
import time
|
||||
|
||||
import markus
|
||||
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
|
||||
class LazyJSONLoader:
|
||||
def __init__(self, ctx, s3_bucket, s3_key, metric_name="", ttl=14400):
|
||||
self._ctx = ctx
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
self._clock = self._ctx[IClock]
|
||||
|
||||
self._s3_bucket = s3_bucket
|
||||
self._s3_key = s3_key
|
||||
self._metric_name = metric_name
|
||||
self._ttl = int(ttl)
|
||||
self._expiry_time = 0
|
||||
|
||||
self._key_str = "{}|{}".format(self._s3_bucket, self._s3_key)
|
||||
|
||||
self._cached_copy = None
|
||||
msg = "Cache expiry of {} is set to TTL of {} seconds".format(
|
||||
self._key_str, self._ttl
|
||||
)
|
||||
self.logger.info(msg)
|
||||
|
||||
self._lock = threading.RLock()
|
||||
|
||||
self.logger.info("{} loader is initialized".format(self._key_str))
|
||||
|
||||
def force_expiry(self):
|
||||
msg = "Existing model for {} reset to 0. Model was:".format(
|
||||
self._key_str, str(self._cached_copy)
|
||||
)
|
||||
self.logger.info(msg)
|
||||
self._expiry_time = 0
|
||||
|
||||
def has_expired(self):
|
||||
return self._clock.time() > self._expiry_time
|
||||
|
||||
def get(self, transform=None):
|
||||
"""
|
||||
Return the JSON defined at the S3 location in the constructor.
|
||||
|
||||
The get method will reload the S3 object after the TTL has
|
||||
expired.
|
||||
Fetch the JSON object from cache or S3 if necessary
|
||||
"""
|
||||
if not self.has_expired() and self._cached_copy is not None:
|
||||
return self._cached_copy, False
|
||||
|
||||
return self._refresh_cache(transform), True
|
||||
|
||||
def _refresh_cache(self, transform=None):
|
||||
|
||||
with self._lock:
|
||||
# If some requests get stale data while the S3 bucket is
|
||||
# being reloaded - it's not the end of the world.
|
||||
#
|
||||
# Likewise when the TTL expires, it's possible for
|
||||
# multiple threads to concurrently lock and update the
|
||||
# cache. Again - not world ending.
|
||||
#
|
||||
# Immediately update the expiry time as we don't want other
|
||||
# threads to wait on the lock while we update the
|
||||
# cached_copy
|
||||
#
|
||||
self._expiry_time = self._clock.time() + self._ttl
|
||||
|
||||
raw_data = None
|
||||
raw_bytes = None
|
||||
|
||||
try:
|
||||
# We need to force a data reload from S3
|
||||
config = Config(connect_timeout=10, retries={"max_attempts": 3})
|
||||
s3 = boto3.resource("s3", config=config)
|
||||
|
||||
start_load = time.time()
|
||||
raw_bytes = (
|
||||
s3.Object(self._s3_bucket, self._s3_key).get()["Body"].read()
|
||||
)
|
||||
end_load = time.time()
|
||||
load_time = end_load - start_load
|
||||
|
||||
raw_data = raw_bytes.decode("utf-8")
|
||||
|
||||
msg = "Loaded S3: {}. Byte count: {:d}. Time to Load: {:0.3f}"
|
||||
msg_params = self._key_str, len(raw_bytes), load_time
|
||||
self.logger.info(msg.format(*msg_params))
|
||||
|
||||
# It is possible to have corrupted files in S3, so
|
||||
# protect against that.
|
||||
try:
|
||||
tmp = json.loads(raw_data)
|
||||
if transform is not None:
|
||||
tmp = transform(tmp)
|
||||
self._cached_copy = tmp
|
||||
metrics.timing(
|
||||
self._metric_name,
|
||||
value=load_time * 1000,
|
||||
tags=[
|
||||
f"store:s3",
|
||||
f"bucket:{self._s3_bucket}",
|
||||
f"key:{self._s3_key}",
|
||||
],
|
||||
)
|
||||
except ValueError:
|
||||
# In the event of an error, we want to try to reload
|
||||
# the data so force the expiry to 0, but leave the
|
||||
# existing cached data alone so we can still service
|
||||
# requests.
|
||||
self._expiry_time = 0
|
||||
|
||||
self.logger.error(
|
||||
"Cannot parse JSON resource from S3",
|
||||
extra={"bucket": self._s3_bucket, "key": self._s3_key},
|
||||
)
|
||||
|
||||
return self._cached_copy
|
||||
except Exception:
|
||||
# In the event of an error, we want to try to reload
|
||||
# the data so force the expiry to 0, but leave the
|
||||
# existing cached data alone so we can still service
|
||||
# requests.
|
||||
self._expiry_time = 0
|
||||
|
||||
self.logger.exception(
|
||||
"Failed to download from S3",
|
||||
extra={"bucket": self._s3_bucket, "key": self._s3_key},
|
||||
)
|
||||
return self._cached_copy
|
|
@ -2,14 +2,13 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from srgutil.interfaces import IMozLogging
|
||||
from .base_recommender import AbstractRecommender
|
||||
from .lazys3 import LazyJSONLoader
|
||||
|
||||
from taar.settings import TAAR_LOCALE_BUCKET, TAAR_LOCALE_KEY
|
||||
|
||||
import markus
|
||||
|
||||
from srgutil.interfaces import IMozLogging
|
||||
|
||||
from .base_recommender import AbstractRecommender
|
||||
from taar.recommenders.redis_cache import TAARCache
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
|
||||
|
@ -29,27 +28,12 @@ class LocaleRecommender(AbstractRecommender):
|
|||
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
|
||||
self._top_addons_per_locale = LazyJSONLoader(
|
||||
self._ctx, TAAR_LOCALE_BUCKET, TAAR_LOCALE_KEY, "locale"
|
||||
)
|
||||
|
||||
self._init_from_ctx()
|
||||
self._redis_cache = TAARCache.get_instance(self._ctx)
|
||||
|
||||
# DONE removed
|
||||
@property
|
||||
def top_addons_per_locale(self):
|
||||
def presort_locale(data):
|
||||
result = {}
|
||||
for locale, guid_list in data.items():
|
||||
result[locale] = sorted(guid_list, key=lambda x: x[1], reverse=True)
|
||||
return result
|
||||
|
||||
return self._top_addons_per_locale.get(transform=presort_locale)[0]
|
||||
|
||||
def _init_from_ctx(self):
|
||||
if self.top_addons_per_locale is None:
|
||||
self.logger.error(
|
||||
"Cannot download the top per locale file {}".format(TAAR_LOCALE_KEY)
|
||||
)
|
||||
return self._redis_cache.top_addons_per_locale()
|
||||
|
||||
def can_recommend(self, client_data, extra_data={}):
|
||||
# We can't recommend if we don't have our data files.
|
||||
|
|
|
@ -8,13 +8,9 @@ from taar.recommenders.ensemble_recommender import (
|
|||
)
|
||||
from taar.recommenders.randomizer import in_experiment, reorder_guids
|
||||
from srgutil.interfaces import IMozLogging
|
||||
from .lazys3 import LazyJSONLoader
|
||||
from taar.recommenders.redis_cache import TAARCache
|
||||
|
||||
from taar.settings import (
|
||||
TAAR_WHITELIST_BUCKET,
|
||||
TAAR_WHITELIST_KEY,
|
||||
TAAR_EXPERIMENT_PROB,
|
||||
)
|
||||
from taar.settings import TAAR_EXPERIMENT_PROB
|
||||
|
||||
import markus
|
||||
|
||||
|
@ -61,9 +57,7 @@ class RecommendationManager:
|
|||
|
||||
# The whitelist data is only used for test client IDs
|
||||
|
||||
self._whitelist_data = LazyJSONLoader(
|
||||
self._ctx, TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY, "whitelist"
|
||||
)
|
||||
self._redis_cache = TAARCache.get_instance(self._ctx)
|
||||
|
||||
self._experiment_prob = ctx.get("TAAR_EXPERIMENT_PROB", TAAR_EXPERIMENT_PROB)
|
||||
|
||||
|
@ -98,7 +92,7 @@ class RecommendationManager:
|
|||
# Fetch back all possible whitelisted addons for this
|
||||
# client
|
||||
extra_data["guid_randomization"] = True
|
||||
whitelist = self._whitelist_data.get()[0]
|
||||
whitelist = self._redis_cache.whitelist_data()
|
||||
results = self._ensemble_recommender.recommend(
|
||||
client_info, len(whitelist), extra_data
|
||||
)
|
||||
|
|
|
@ -8,39 +8,103 @@ import threading
|
|||
import redis
|
||||
import numpy as np
|
||||
from srgutil.interfaces import IMozLogging
|
||||
|
||||
from taar.settings import (
|
||||
REDIS_HOST,
|
||||
REDIS_PORT,
|
||||
)
|
||||
|
||||
|
||||
# TAARLite configuration
|
||||
from taar.settings import (
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_COINSTALL_KEY,
|
||||
TAARLITE_GUID_RANKING_KEY,
|
||||
TAARLITE_TTL,
|
||||
TAARLITE_TRUNCATE,
|
||||
TAARLITE_MUTEX_TTL,
|
||||
)
|
||||
|
||||
# TAAR configuration
|
||||
from taar.settings import (
|
||||
# Locale
|
||||
TAAR_LOCALE_BUCKET,
|
||||
TAAR_LOCALE_KEY,
|
||||
# Collaborative dta
|
||||
TAAR_ADDON_MAPPING_BUCKET,
|
||||
TAAR_ADDON_MAPPING_KEY,
|
||||
TAAR_ITEM_MATRIX_BUCKET,
|
||||
TAAR_ITEM_MATRIX_KEY,
|
||||
# Similarity data
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_DONOR_KEY,
|
||||
TAAR_SIMILARITY_LRCURVES_KEY,
|
||||
# Ensemble data
|
||||
TAAR_ENSEMBLE_BUCKET,
|
||||
TAAR_ENSEMBLE_KEY,
|
||||
# Whitelist data
|
||||
TAAR_WHITELIST_BUCKET,
|
||||
TAAR_WHITELIST_KEY,
|
||||
)
|
||||
|
||||
from jsoncache.loader import s3_json_loader
|
||||
|
||||
|
||||
# This marks which of the redis databases is currently
|
||||
# active for read
|
||||
ACTIVE_DB = "active_db"
|
||||
|
||||
# This is a mutex to block multiple writers from redis
|
||||
UPDATE_CHECK = "update_mutex|"
|
||||
|
||||
|
||||
# taarlite guid guid coinstallation matrix
|
||||
COINSTALL_PREFIX = "coinstall|"
|
||||
|
||||
# taarlite guid guid coinstallation matrix filtered by
|
||||
# minimum installation threshholds
|
||||
FILTERED_COINSTALL_PREFIX = "filtered_coinstall|"
|
||||
|
||||
# taarlite ranking data
|
||||
RANKING_PREFIX = "ranking|"
|
||||
|
||||
# taarlite minimum installation threshold
|
||||
MIN_INSTALLS_PREFIX = "min_installs|"
|
||||
|
||||
# This is a map is guid->sum of coinstall counts
|
||||
# taarlite map of guid->(sum of coinstall counts)
|
||||
NORMDATA_COUNT_MAP_PREFIX = "normdata_count_map_prefix|"
|
||||
|
||||
# Capture the number of times a GUID shows up per row
|
||||
# taarlite number of times a GUID shows up per row
|
||||
# of coinstallation data.
|
||||
NORMDATA_ROWCOUNT_PREFIX = "normdata_rowcount_prefix|"
|
||||
|
||||
# taarlite row nownormalization data
|
||||
NORMDATA_GUID_ROW_NORM_PREFIX = "normdata_guid_row_norm_prefix|"
|
||||
|
||||
|
||||
# TAAR: Locale data
|
||||
LOCALE_DATA = "taar_locale_data|"
|
||||
|
||||
# TAAR: collaborative data
|
||||
COLLAB_MAPPING_DATA = "taar_collab_mapping|"
|
||||
COLLAB_ITEM_MATRIX = "taar_collab_item_matrix|"
|
||||
|
||||
# TAAR: similarity data
|
||||
SIMILARITY_DONORS = "taar_similarity_donors|"
|
||||
SIMILARITY_LRCURVES = "taar_similarity_lrcurves|"
|
||||
|
||||
# TAAR: similarity preprocessed data
|
||||
SIMILARITY_NUM_DONORS = "taar_similarity_num_donors|"
|
||||
SIMILARITY_CONTINUOUS_FEATURES = "taar_similarity_continuous_features|"
|
||||
SIMILARITY_CATEGORICAL_FEATURES = "taar_similarity_categorical_features|"
|
||||
|
||||
# TAAR: ensemble weights
|
||||
|
||||
ENSEMBLE_WEIGHTS = "taar_ensemble_weights|"
|
||||
|
||||
# TAAR: whitelist data
|
||||
WHITELIST_DATA = "taar_whitelist_data|"
|
||||
|
||||
|
||||
class PrefixStripper:
|
||||
def __init__(self, prefix, iterator, cast_to_str=False):
|
||||
self._prefix = prefix
|
||||
|
@ -58,19 +122,42 @@ class PrefixStripper:
|
|||
return result
|
||||
|
||||
|
||||
class AddonsCoinstallCache:
|
||||
class TAARCache:
|
||||
"""
|
||||
This class manages a redis instance to hold onto the taar-lite
|
||||
GUID->GUID co-installation data
|
||||
"""
|
||||
|
||||
def __init__(self, ctx, ttl=TAARLITE_TTL):
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls, ctx):
|
||||
if cls._instance is None:
|
||||
cls._instance = TAARCache(ctx, i_didnt_read_the_docs=False)
|
||||
return cls._instance
|
||||
|
||||
def __init__(self, ctx, i_didnt_read_the_docs=True):
|
||||
"""
|
||||
Don't call this directly - use get_instance instace
|
||||
"""
|
||||
if i_didnt_read_the_docs:
|
||||
raise RuntimeError(
|
||||
"You cannot call this method directly - use get_instance"
|
||||
)
|
||||
|
||||
self._ctx = ctx
|
||||
self._last_db = None
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
|
||||
self._ttl = ttl
|
||||
# Keep an integer handle (or None) on the last known database
|
||||
self._last_db = None
|
||||
|
||||
self._similarity_num_donors = 0
|
||||
self._similarity_continuous_features = None
|
||||
self._similarity_categorical_features = None
|
||||
|
||||
rcon = self.init_redis_connections()
|
||||
|
||||
self._r0 = rcon[0]
|
||||
self._r1 = rcon[1]
|
||||
self._r2 = rcon[2]
|
||||
|
@ -136,9 +223,6 @@ class AddonsCoinstallCache:
|
|||
self._r0.delete(UPDATE_CHECK)
|
||||
self.logger.info("UPDATE_CHECK field is cleared")
|
||||
|
||||
def fetch_ranking_data(self):
|
||||
return s3_json_loader(TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_RANKING_KEY)
|
||||
|
||||
def guid_maps_count_map(self, guid, default=None):
|
||||
tmp = self._db().get(NORMDATA_COUNT_MAP_PREFIX + guid)
|
||||
if tmp:
|
||||
|
@ -167,11 +251,6 @@ class AddonsCoinstallCache:
|
|||
return 0
|
||||
return float(result.decode("utf8"))
|
||||
|
||||
def fetch_coinstall_data(self):
|
||||
return s3_json_loader(
|
||||
TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_COINSTALL_KEY
|
||||
)
|
||||
|
||||
def get_filtered_coinstall(self, guid, default=None):
|
||||
tmp = self._db().get(FILTERED_COINSTALL_PREFIX + guid)
|
||||
if tmp:
|
||||
|
@ -224,7 +303,92 @@ class AddonsCoinstallCache:
|
|||
# Any value in ACTIVE_DB indicates that data is live
|
||||
return self._r0.get(ACTIVE_DB) is not None
|
||||
|
||||
# Private methods below
|
||||
def top_addons_per_locale(self):
|
||||
"""
|
||||
Get locale data
|
||||
"""
|
||||
tmp = self._db().get(LOCALE_DATA)
|
||||
if tmp:
|
||||
return json.loads(tmp.decode("utf8"))
|
||||
return None
|
||||
|
||||
def collab_raw_item_matrix(self):
|
||||
"""
|
||||
Get the taar collaborative item matrix
|
||||
"""
|
||||
tmp = self._db().get(COLLAB_ITEM_MATRIX)
|
||||
if tmp:
|
||||
return json.loads(tmp.decode("utf8"))
|
||||
return None
|
||||
|
||||
def collab_addon_mapping(self):
|
||||
"""
|
||||
Get the taar collaborative addon mappin
|
||||
"""
|
||||
tmp = self._db().get(COLLAB_MAPPING_DATA)
|
||||
if tmp:
|
||||
return json.loads(tmp.decode("utf8"))
|
||||
return None
|
||||
|
||||
def similarity_donors(self):
|
||||
"""
|
||||
Get the taar similarity donors
|
||||
"""
|
||||
tmp = self._db().get(SIMILARITY_DONORS)
|
||||
if tmp:
|
||||
return json.loads(tmp.decode("utf8"))
|
||||
return None
|
||||
|
||||
def similarity_lrcurves(self):
|
||||
"""
|
||||
Get the taar similarity donors
|
||||
"""
|
||||
tmp = self._db().get(SIMILARITY_LRCURVES)
|
||||
if tmp:
|
||||
return json.loads(tmp.decode("utf8"))
|
||||
return None
|
||||
|
||||
def similarity_continuous_features(self):
|
||||
"""
|
||||
precomputed similarity recommender continuous features cache
|
||||
"""
|
||||
_ = self._db() # make sure we've computed data from the live redis instance
|
||||
return self._similarity_continuous_features
|
||||
|
||||
def similarity_categorical_features(self):
|
||||
"""
|
||||
precomputed similarity recommender categorical features cache
|
||||
"""
|
||||
_ = self._db() # make sure we've computed data from the live redis instance
|
||||
return self._similarity_categorical_features
|
||||
|
||||
@property
|
||||
def similarity_num_donors(self):
|
||||
"""
|
||||
precomputed similarity recommender categorical features cache
|
||||
"""
|
||||
_ = self._db() # make sure we've computed data from the live redis instance
|
||||
return self._similarity_num_donors
|
||||
|
||||
def ensemble_weights(self):
|
||||
tmp = self._db().get(ENSEMBLE_WEIGHTS)
|
||||
if tmp:
|
||||
return json.loads(tmp)
|
||||
return None
|
||||
|
||||
def whitelist_data(self):
|
||||
tmp = self._db().get(WHITELIST_DATA)
|
||||
if tmp:
|
||||
return json.loads(tmp)
|
||||
return None
|
||||
|
||||
"""
|
||||
|
||||
################################
|
||||
|
||||
Private methods below
|
||||
|
||||
"""
|
||||
|
||||
def _db(self):
|
||||
"""
|
||||
|
@ -232,21 +396,166 @@ class AddonsCoinstallCache:
|
|||
active redis instance
|
||||
"""
|
||||
active_db = self._r0.get(ACTIVE_DB)
|
||||
|
||||
if active_db is not None:
|
||||
db = int(active_db.decode("utf8"))
|
||||
|
||||
if db == 1:
|
||||
return self._r1
|
||||
# Run all callback functions to preprocess model data
|
||||
live_db = self._r1
|
||||
elif db == 2:
|
||||
return self._r2
|
||||
live_db = self._r2
|
||||
|
||||
self._update_data_callback(db, live_db)
|
||||
return live_db
|
||||
|
||||
def _update_data_callback(self, db_num, db):
|
||||
"""
|
||||
Preprocess data when the current redis instance does not match
|
||||
the last known instance.
|
||||
"""
|
||||
if db_num == self._last_db:
|
||||
return
|
||||
|
||||
self._last_db = db_num
|
||||
self._build_similarity_features_caches(db)
|
||||
self.logger.info("Completed precomputing normalized data")
|
||||
|
||||
def _build_similarity_features_caches(self, db):
|
||||
"""
|
||||
This function build two feature cache matrices and sets the
|
||||
number of donors (self.similarity_num_donors)
|
||||
|
||||
That's the self.categorical_features and
|
||||
self.continuous_features attributes.
|
||||
|
||||
One matrix is for the continuous features and the other is for
|
||||
the categorical features. This is needed to speed up the similarity
|
||||
recommendation process."""
|
||||
from taar.recommenders.similarity_recommender import (
|
||||
CONTINUOUS_FEATURES,
|
||||
CATEGORICAL_FEATURES,
|
||||
)
|
||||
|
||||
tmp = db.get(SIMILARITY_DONORS)
|
||||
if tmp is None:
|
||||
return
|
||||
donors_pool = json.loads(tmp.decode("utf8"))
|
||||
|
||||
self._similarity_num_donors = len(donors_pool)
|
||||
|
||||
# Build a numpy matrix cache for the continuous features.
|
||||
continuous_features = np.zeros(
|
||||
(self.similarity_num_donors, len(CONTINUOUS_FEATURES))
|
||||
)
|
||||
|
||||
for idx, d in enumerate(donors_pool):
|
||||
features = [d.get(specified_key) for specified_key in CONTINUOUS_FEATURES]
|
||||
continuous_features[idx] = features
|
||||
self._similarity_continuous_features = continuous_features
|
||||
|
||||
# Build the cache for categorical features.
|
||||
categorical_features = np.zeros(
|
||||
(self.similarity_num_donors, len(CATEGORICAL_FEATURES)), dtype="object",
|
||||
)
|
||||
for idx, d in enumerate(donors_pool):
|
||||
features = [d.get(specified_key) for specified_key in CATEGORICAL_FEATURES]
|
||||
categorical_features[idx] = np.array([features], dtype="object")
|
||||
|
||||
self._similarity_categorical_features = categorical_features
|
||||
|
||||
self.logger.info("Reconstructed matrices for similarity recommender")
|
||||
|
||||
@property
|
||||
def _ident(self):
|
||||
""" pid/thread identity """
|
||||
return f"{os.getpid()}_{threading.get_ident()}"
|
||||
|
||||
def _update_coinstall_data(self, db):
|
||||
def _fetch_coinstall_data(self):
|
||||
return s3_json_loader(
|
||||
TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_COINSTALL_KEY
|
||||
)
|
||||
|
||||
data = self.fetch_coinstall_data()
|
||||
def _fetch_ranking_data(self):
|
||||
return s3_json_loader(TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_RANKING_KEY)
|
||||
|
||||
def _fetch_locale_data(self):
|
||||
return s3_json_loader(TAAR_LOCALE_BUCKET, TAAR_LOCALE_KEY)
|
||||
|
||||
def _fetch_collaborative_mapping_data(self):
|
||||
return s3_json_loader(TAAR_ADDON_MAPPING_BUCKET, TAAR_ADDON_MAPPING_KEY)
|
||||
|
||||
def _fetch_collaborative_item_matrix(self):
|
||||
return s3_json_loader(TAAR_ITEM_MATRIX_BUCKET, TAAR_ITEM_MATRIX_KEY)
|
||||
|
||||
def _fetch_similarity_donors(self):
|
||||
return s3_json_loader(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY,)
|
||||
|
||||
def _fetch_similarity_lrcurves(self):
|
||||
return s3_json_loader(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY,)
|
||||
|
||||
def _fetch_ensemble_weights(self):
|
||||
return s3_json_loader(TAAR_ENSEMBLE_BUCKET, TAAR_ENSEMBLE_KEY)
|
||||
|
||||
def _fetch_whitelist(self):
|
||||
return s3_json_loader(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY)
|
||||
|
||||
def _update_whitelist_data(self, db):
|
||||
"""
|
||||
Load the TAAR whitelist data
|
||||
"""
|
||||
tmp = self._fetch_whitelist()
|
||||
if tmp:
|
||||
db.set(WHITELIST_DATA, json.dumps(tmp))
|
||||
|
||||
def _update_ensemble_data(self, db):
|
||||
"""
|
||||
Load the TAAR ensemble data
|
||||
"""
|
||||
tmp = self._fetch_ensemble_weights()
|
||||
if tmp:
|
||||
db.set(ENSEMBLE_WEIGHTS, json.dumps(tmp["ensemble_weights"]))
|
||||
|
||||
def _update_similarity_data(self, db):
|
||||
"""
|
||||
Load the TAAR similarity data
|
||||
"""
|
||||
donors = self._fetch_similarity_donors()
|
||||
lrcurves = self._fetch_similarity_lrcurves()
|
||||
|
||||
db.set(SIMILARITY_DONORS, json.dumps(donors))
|
||||
db.set(SIMILARITY_LRCURVES, json.dumps(lrcurves))
|
||||
|
||||
def _update_collab_data(self, db):
|
||||
"""
|
||||
Load the TAAR collaborative data. This is two parts: an item
|
||||
matrix and a mapping of GUIDs
|
||||
"""
|
||||
# Load the item matrix into redis
|
||||
item_matrix = self._fetch_collaborative_item_matrix()
|
||||
db.set(COLLAB_ITEM_MATRIX, json.dumps(item_matrix))
|
||||
|
||||
# Load the taar collaborative mapping data
|
||||
mapping_data = self._fetch_collaborative_mapping_data()
|
||||
db.set(COLLAB_MAPPING_DATA, json.dumps(mapping_data))
|
||||
|
||||
def _update_locale_data(self, db):
|
||||
"""
|
||||
Load the TAAR locale data
|
||||
"""
|
||||
data = self._fetch_locale_data()
|
||||
result = {}
|
||||
for locale, guid_list in data.items():
|
||||
result[locale] = sorted(guid_list, key=lambda x: x[1], reverse=True)
|
||||
|
||||
db.set(LOCALE_DATA, json.dumps(result))
|
||||
|
||||
def _update_coinstall_data(self, db):
|
||||
"""
|
||||
Load the TAAR Lite GUID GUID coinstallation data
|
||||
"""
|
||||
|
||||
data = self._fetch_coinstall_data()
|
||||
|
||||
items = data.items()
|
||||
len_items = len(items)
|
||||
|
@ -302,7 +611,7 @@ class AddonsCoinstallCache:
|
|||
|
||||
def _update_rank_data(self, db):
|
||||
|
||||
data = self.fetch_ranking_data()
|
||||
data = self._fetch_ranking_data()
|
||||
|
||||
items = data.items()
|
||||
len_items = len(items)
|
||||
|
@ -330,10 +639,6 @@ class AddonsCoinstallCache:
|
|||
|
||||
self._copy_data(next_active_db)
|
||||
|
||||
self.logger.info("Completed precomputing normalized data")
|
||||
|
||||
# TODO: should this autoexpire to help indicate that no fresh
|
||||
# data has loaded? Maybe N * update TTL time?
|
||||
self._r0.set(ACTIVE_DB, next_active_db)
|
||||
self.logger.info(f"Active DB is set to {next_active_db}")
|
||||
|
||||
|
@ -345,5 +650,22 @@ class AddonsCoinstallCache:
|
|||
|
||||
# Clear this database before we do anything with it
|
||||
db.flushdb()
|
||||
|
||||
# Update TAARlite
|
||||
self._update_rank_data(db)
|
||||
self._update_coinstall_data(db)
|
||||
|
||||
# Update TAAR locale data
|
||||
self._update_locale_data(db)
|
||||
|
||||
# Update TAAR collaborative data
|
||||
self._update_collab_data(db)
|
||||
|
||||
# Update TAAR similarity data
|
||||
self._update_similarity_data(db)
|
||||
|
||||
# Update TAAR ensemble data
|
||||
self._update_ensemble_data(db)
|
||||
|
||||
# Update TAAR ensemble data
|
||||
self._update_whitelist_data(db)
|
||||
|
|
|
@ -7,13 +7,7 @@ from itertools import groupby
|
|||
from scipy.spatial import distance
|
||||
from srgutil.interfaces import IMozLogging
|
||||
import numpy as np
|
||||
from .lazys3 import LazyJSONLoader
|
||||
|
||||
from taar.settings import (
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_DONOR_KEY,
|
||||
TAAR_SIMILARITY_LRCURVES_KEY,
|
||||
)
|
||||
from taar.recommenders.redis_cache import TAARCache
|
||||
|
||||
import markus
|
||||
|
||||
|
@ -52,99 +46,29 @@ class SimilarityRecommender(AbstractRecommender):
|
|||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
|
||||
if "similarity_donors_pool" in self._ctx:
|
||||
self._donors_pool = self._ctx["similarity_donors_pool"]
|
||||
else:
|
||||
self._donors_pool = LazyJSONLoader(
|
||||
self._ctx,
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_DONOR_KEY,
|
||||
"similarity_donor",
|
||||
)
|
||||
|
||||
if "similarity_lr_curves" in self._ctx:
|
||||
self._lr_curves = self._ctx["similarity_lr_curves"]
|
||||
else:
|
||||
self._lr_curves = LazyJSONLoader(
|
||||
self._ctx,
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_LRCURVES_KEY,
|
||||
"similarity_curves",
|
||||
)
|
||||
self._redis_cache = TAARCache.get_instance(self._ctx)
|
||||
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
|
||||
self._init_from_ctx()
|
||||
@property
|
||||
def categorical_features(self):
|
||||
return self._redis_cache.similarity_categorical_features()
|
||||
|
||||
@property
|
||||
def continuous_features(self):
|
||||
return self._redis_cache.similarity_continuous_features()
|
||||
|
||||
@property
|
||||
def num_donors(self):
|
||||
return self._redis_cache.similarity_num_donors
|
||||
|
||||
@property
|
||||
def donors_pool(self):
|
||||
result, status = self._donors_pool.get()
|
||||
if status:
|
||||
# Force a reconstruction of the features cache on new
|
||||
# donor pool data
|
||||
self._build_features_caches()
|
||||
return result
|
||||
return self._redis_cache.similarity_donors()
|
||||
|
||||
@property
|
||||
def lr_curves(self):
|
||||
result, status = self._lr_curves.get()
|
||||
if status:
|
||||
# Force a reconstruction of the features cache on new
|
||||
# curve data
|
||||
self._build_features_caches()
|
||||
return result
|
||||
|
||||
def _init_from_ctx(self):
|
||||
# Download the addon donors list.
|
||||
if self.donors_pool is None:
|
||||
self.logger.info(
|
||||
"Similarity donors pool has not been fetched from S3: {}".format(
|
||||
TAAR_SIMILARITY_DONOR_KEY
|
||||
)
|
||||
)
|
||||
|
||||
# Download the probability mapping curves from similarity to likelihood of being a good donor.
|
||||
if self.lr_curves is None:
|
||||
self.logger.error(
|
||||
"Similarity LR Curves have not been fetched from S3: {}".format(
|
||||
TAAR_SIMILARITY_LRCURVES_KEY
|
||||
)
|
||||
)
|
||||
|
||||
def _build_features_caches(self):
|
||||
"""This function build two feature cache matrices.
|
||||
|
||||
That's the self.categorical_features and
|
||||
self.continuous_features attributes.
|
||||
|
||||
One matrix is for the continuous features and the other is for
|
||||
the categorical features. This is needed to speed up the similarity
|
||||
recommendation process."""
|
||||
_donors_pool = self._donors_pool.get()[0]
|
||||
_lr_curves = self._lr_curves.get()[0]
|
||||
|
||||
if _donors_pool is None or _lr_curves is None:
|
||||
# We need to have both donors_pool and lr_curves defined
|
||||
# to reconstruct the matrices
|
||||
return None
|
||||
|
||||
self.num_donors = len(_donors_pool)
|
||||
|
||||
# Build a numpy matrix cache for the continuous features.
|
||||
self.continuous_features = np.zeros((self.num_donors, len(CONTINUOUS_FEATURES)))
|
||||
for idx, d in enumerate(_donors_pool):
|
||||
features = [d.get(specified_key) for specified_key in CONTINUOUS_FEATURES]
|
||||
self.continuous_features[idx] = features
|
||||
|
||||
# Build the cache for categorical features.
|
||||
self.categorical_features = np.zeros(
|
||||
(self.num_donors, len(CATEGORICAL_FEATURES)), dtype="object"
|
||||
)
|
||||
for idx, d in enumerate(_donors_pool):
|
||||
features = [d.get(specified_key) for specified_key in CATEGORICAL_FEATURES]
|
||||
self.categorical_features[idx] = np.array([features], dtype="object")
|
||||
|
||||
self.logger.info("Reconstructed matrices for similarity recommender")
|
||||
return self._redis_cache.similarity_lrcurves()
|
||||
|
||||
def can_recommend(self, client_data, extra_data={}):
|
||||
# We can't recommend if we don't have our data files.
|
||||
|
@ -301,8 +225,6 @@ class SimilarityRecommender(AbstractRecommender):
|
|||
recommendations_out = self._recommend(client_data, limit, extra_data)
|
||||
except Exception as e:
|
||||
recommendations_out = []
|
||||
self._donors_pool.force_expiry()
|
||||
self._lr_curves.force_expiry()
|
||||
|
||||
metrics.incr("error_similarity", value=1)
|
||||
self.logger.exception(
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import re
|
||||
|
||||
RE_PLATFORM = re.compile('(linux|windows|macintosh|android|fxios).*firefox')
|
||||
RE_PLATFORM = re.compile("(linux|windows|macintosh|android|fxios).*firefox")
|
||||
|
||||
LINUX = 1
|
||||
WINDOWS = 2
|
||||
|
@ -8,11 +8,13 @@ MACINTOSH = 3
|
|||
ANDROID = 4
|
||||
FXIOS = 5
|
||||
|
||||
OSNAME_TO_ID = {'linux': LINUX,
|
||||
'windows': WINDOWS,
|
||||
'macintosh': MACINTOSH,
|
||||
'android': ANDROID,
|
||||
'fxios': FXIOS}
|
||||
OSNAME_TO_ID = {
|
||||
"linux": LINUX,
|
||||
"windows": WINDOWS,
|
||||
"macintosh": MACINTOSH,
|
||||
"android": ANDROID,
|
||||
"fxios": FXIOS,
|
||||
}
|
||||
|
||||
|
||||
def parse_ua(user_agent):
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
"""
|
||||
|
||||
Noop helpers
|
||||
"""
|
||||
|
||||
import mock
|
||||
from taar.recommenders.redis_cache import TAARCache
|
||||
|
||||
|
||||
def noop_taarlite_dataload(stack):
|
||||
# no-op the taarlite rankdata
|
||||
stack.enter_context(
|
||||
mock.patch.object(TAARCache, "_update_rank_data", return_value=None)
|
||||
)
|
||||
# no-op the taarlite guidguid data
|
||||
stack.enter_context(
|
||||
mock.patch.object(TAARCache, "_update_coinstall_data", return_value=None,)
|
||||
)
|
||||
return stack
|
||||
|
||||
|
||||
def noop_taarlocale_dataload(stack):
|
||||
# no-op the taarlite rankdata
|
||||
stack.enter_context(
|
||||
mock.patch.object(TAARCache, "_update_locale_data", return_value=None)
|
||||
)
|
||||
return stack
|
||||
|
||||
|
||||
def noop_taarcollab_dataload(stack):
|
||||
# no-op the taar collab
|
||||
stack.enter_context(
|
||||
mock.patch.object(TAARCache, "_update_collab_data", return_value=None)
|
||||
)
|
||||
return stack
|
||||
|
||||
|
||||
def noop_taarsimilarity_dataload(stack):
|
||||
# no-op the taar collab
|
||||
stack.enter_context(
|
||||
mock.patch.object(TAARCache, "_update_similarity_data", return_value=None)
|
||||
)
|
||||
return stack
|
||||
|
||||
|
||||
def noop_taarensemble_dataload(stack):
|
||||
# no-op the taar collab
|
||||
stack.enter_context(
|
||||
mock.patch.object(TAARCache, "_update_ensemble_data", return_value=None)
|
||||
)
|
||||
stack.enter_context(
|
||||
mock.patch.object(TAARCache, "_update_whitelist_data", return_value=None)
|
||||
)
|
||||
return stack
|
|
@ -7,7 +7,12 @@
|
|||
|
||||
CONTINUOUS_FEATURE_FIXTURE_DATA = [
|
||||
{
|
||||
"active_addons": ["{test-guid-1}", "{test-guid-2}", "{test-guid-3}", "{test-guid-4}"],
|
||||
"active_addons": [
|
||||
"{test-guid-1}",
|
||||
"{test-guid-2}",
|
||||
"{test-guid-3}",
|
||||
"{test-guid-4}",
|
||||
],
|
||||
"geo_city": "brasilia-br",
|
||||
"subsession_length": 4911,
|
||||
"locale": "br-PT",
|
||||
|
@ -15,10 +20,15 @@ CONTINUOUS_FEATURE_FIXTURE_DATA = [
|
|||
"bookmark_count": 7,
|
||||
"tab_open_count": 4,
|
||||
"total_uri": 190,
|
||||
"unique_tlds": 21
|
||||
"unique_tlds": 21,
|
||||
},
|
||||
{
|
||||
"active_addons": ["{test-guid-5}", "{test-guid-6}", "{test-guid-1}", "{test-guid-8}"],
|
||||
"active_addons": [
|
||||
"{test-guid-5}",
|
||||
"{test-guid-6}",
|
||||
"{test-guid-1}",
|
||||
"{test-guid-8}",
|
||||
],
|
||||
"geo_city": "brasilia-br",
|
||||
"subsession_length": 4911,
|
||||
"locale": "br-PT",
|
||||
|
@ -26,10 +36,15 @@ CONTINUOUS_FEATURE_FIXTURE_DATA = [
|
|||
"bookmark_count": 7,
|
||||
"tab_open_count": 4,
|
||||
"total_uri": 200,
|
||||
"unique_tlds": 21
|
||||
"unique_tlds": 21,
|
||||
},
|
||||
{
|
||||
"active_addons": ["{test-guid-9}", "{test-guid-10}", "{test-guid-11}", "{test-guid-12}"],
|
||||
"active_addons": [
|
||||
"{test-guid-9}",
|
||||
"{test-guid-10}",
|
||||
"{test-guid-11}",
|
||||
"{test-guid-12}",
|
||||
],
|
||||
"geo_city": "brasilia-br",
|
||||
"subsession_length": 4911,
|
||||
"locale": "br-PT",
|
||||
|
@ -37,7 +52,7 @@ CONTINUOUS_FEATURE_FIXTURE_DATA = [
|
|||
"bookmark_count": 7,
|
||||
"tab_open_count": 4,
|
||||
"total_uri": 222,
|
||||
"unique_tlds": 21
|
||||
"unique_tlds": 21,
|
||||
},
|
||||
{
|
||||
"active_addons": ["{test-guid-13}", "{test-guid-14}"],
|
||||
|
@ -48,8 +63,8 @@ CONTINUOUS_FEATURE_FIXTURE_DATA = [
|
|||
"bookmark_count": 7,
|
||||
"tab_open_count": 4,
|
||||
"total_uri": 210,
|
||||
"unique_tlds": 21
|
||||
}
|
||||
"unique_tlds": 21,
|
||||
},
|
||||
]
|
||||
|
||||
# Match the fixture taar client, but vary the geo_city to test only
|
||||
|
@ -60,7 +75,12 @@ CONTINUOUS_FEATURE_FIXTURE_DATA = [
|
|||
|
||||
CATEGORICAL_FEATURE_FIXTURE_DATA = [
|
||||
{
|
||||
"active_addons": ["{test-guid-1}", "{test-guid-2}", "{test-guid-3}", "{test-guid-4}"],
|
||||
"active_addons": [
|
||||
"{test-guid-1}",
|
||||
"{test-guid-2}",
|
||||
"{test-guid-3}",
|
||||
"{test-guid-4}",
|
||||
],
|
||||
"geo_city": "brasilia-br",
|
||||
"subsession_length": 4911,
|
||||
"locale": "br-PT",
|
||||
|
@ -68,11 +88,16 @@ CATEGORICAL_FEATURE_FIXTURE_DATA = [
|
|||
"bookmark_count": 7,
|
||||
"tab_open_count": 4,
|
||||
"total_uri": 222,
|
||||
"unique_tlds": 21
|
||||
"unique_tlds": 21,
|
||||
},
|
||||
{
|
||||
# "{test-guid-1}" appears in duplicate here.
|
||||
"active_addons": ["{test-guid-5}", "{test-guid-6}", "{test-guid-1}", "{test-guid-8}"],
|
||||
"active_addons": [
|
||||
"{test-guid-5}",
|
||||
"{test-guid-6}",
|
||||
"{test-guid-1}",
|
||||
"{test-guid-8}",
|
||||
],
|
||||
"geo_city": "toronto-ca",
|
||||
"subsession_length": 4911,
|
||||
"locale": "br-PT",
|
||||
|
@ -80,10 +105,15 @@ CATEGORICAL_FEATURE_FIXTURE_DATA = [
|
|||
"bookmark_count": 7,
|
||||
"tab_open_count": 4,
|
||||
"total_uri": 222,
|
||||
"unique_tlds": 21
|
||||
"unique_tlds": 21,
|
||||
},
|
||||
{
|
||||
"active_addons": ["{test-guid-9}", "{test-guid-10}", "{test-guid-11}", "{test-guid-12}"],
|
||||
"active_addons": [
|
||||
"{test-guid-9}",
|
||||
"{test-guid-10}",
|
||||
"{test-guid-11}",
|
||||
"{test-guid-12}",
|
||||
],
|
||||
"geo_city": "brasilia-br",
|
||||
"subsession_length": 4911,
|
||||
"locale": "br-PT",
|
||||
|
@ -91,7 +121,7 @@ CATEGORICAL_FEATURE_FIXTURE_DATA = [
|
|||
"bookmark_count": 7,
|
||||
"tab_open_count": 4,
|
||||
"total_uri": 222,
|
||||
"unique_tlds": 21
|
||||
"unique_tlds": 21,
|
||||
},
|
||||
{
|
||||
"active_addons": ["{test-guid-13}", "{test-guid-1}"],
|
||||
|
@ -102,6 +132,6 @@ CATEGORICAL_FEATURE_FIXTURE_DATA = [
|
|||
"bookmark_count": 7,
|
||||
"tab_open_count": 4,
|
||||
"total_uri": 222,
|
||||
"unique_tlds": 21
|
||||
}
|
||||
"unique_tlds": 21,
|
||||
},
|
||||
]
|
||||
|
|
|
@ -8,22 +8,25 @@ Test cases for the TAAR CollaborativeRecommender
|
|||
|
||||
import numpy
|
||||
|
||||
from moto import mock_s3
|
||||
import boto3
|
||||
from taar.recommenders.collaborative_recommender import (
|
||||
TAAR_ITEM_MATRIX_BUCKET,
|
||||
TAAR_ITEM_MATRIX_KEY,
|
||||
TAAR_ADDON_MAPPING_BUCKET,
|
||||
TAAR_ADDON_MAPPING_KEY,
|
||||
)
|
||||
import fakeredis
|
||||
import mock
|
||||
import contextlib
|
||||
from taar.recommenders.redis_cache import TAARCache
|
||||
|
||||
|
||||
from taar.recommenders.collaborative_recommender import CollaborativeRecommender
|
||||
from taar.recommenders.collaborative_recommender import positive_hash
|
||||
import json
|
||||
|
||||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
from .noop_fixtures import (
|
||||
noop_taarlocale_dataload,
|
||||
noop_taarlite_dataload,
|
||||
noop_taarensemble_dataload,
|
||||
noop_taarsimilarity_dataload,
|
||||
)
|
||||
|
||||
|
||||
"""
|
||||
We need to generate a synthetic list of addons and relative weights
|
||||
|
@ -33,29 +36,56 @@ the Java hash function.
|
|||
"""
|
||||
|
||||
|
||||
def install_none_mock_data(ctx):
|
||||
def noop_other_recommenders(stack):
|
||||
stack = noop_taarlocale_dataload(stack)
|
||||
stack = noop_taarlite_dataload(stack)
|
||||
stack = noop_taarsimilarity_dataload(stack)
|
||||
stack = noop_taarensemble_dataload(stack)
|
||||
return stack
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def mock_install_none_mock_data(ctx):
|
||||
"""
|
||||
Overload the 'real' addon model and mapping URLs responses so that
|
||||
we always get 404 errors.
|
||||
"""
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
with contextlib.ExitStack() as stack:
|
||||
TAARCache._instance = None
|
||||
|
||||
conn.create_bucket(Bucket=TAAR_ITEM_MATRIX_BUCKET)
|
||||
conn.Object(TAAR_ITEM_MATRIX_BUCKET, TAAR_ITEM_MATRIX_KEY).put(Body="")
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache, "_fetch_collaborative_item_matrix", return_value="",
|
||||
)
|
||||
)
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache, "_fetch_collaborative_mapping_data", return_value="",
|
||||
)
|
||||
)
|
||||
|
||||
# Don't reuse connections with moto. badness happens
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
conn.create_bucket(Bucket=TAAR_ADDON_MAPPING_BUCKET)
|
||||
conn.Object(TAAR_ADDON_MAPPING_BUCKET, TAAR_ADDON_MAPPING_KEY).put(Body="")
|
||||
return ctx
|
||||
stack = noop_other_recommenders(stack)
|
||||
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache,
|
||||
"init_redis_connections",
|
||||
return_value={
|
||||
0: fakeredis.FakeStrictRedis(db=0),
|
||||
1: fakeredis.FakeStrictRedis(db=1),
|
||||
2: fakeredis.FakeStrictRedis(db=2),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# Initialize redis
|
||||
TAARCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
def install_mock_data(ctx):
|
||||
"""
|
||||
Overload the 'real' addon model and mapping URLs responses so that
|
||||
we always the fixture data at the top of this test module.
|
||||
"""
|
||||
|
||||
@contextlib.contextmanager
|
||||
def mock_install_mock_data(ctx):
|
||||
addon_space = [
|
||||
{"id": "addon1.id", "name": "addon1.name", "isWebextension": True},
|
||||
{"id": "addon2.id", "name": "addon2.name", "isWebextension": True},
|
||||
|
@ -66,7 +96,10 @@ def install_mock_data(ctx):
|
|||
|
||||
fake_addon_matrix = []
|
||||
for i, addon in enumerate(addon_space):
|
||||
row = {"id": positive_hash(addon["id"]), "features": [0, 0.2, 0.0, 0.1, 0.15]}
|
||||
row = {
|
||||
"id": positive_hash(addon["id"]),
|
||||
"features": [0, 0.2, 0.0, 0.1, 0.15],
|
||||
}
|
||||
row["features"][i] = 1.0
|
||||
fake_addon_matrix.append(row)
|
||||
|
||||
|
@ -75,74 +108,123 @@ def install_mock_data(ctx):
|
|||
java_hash = positive_hash(addon["id"])
|
||||
fake_mapping[str(java_hash)] = addon
|
||||
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
conn.create_bucket(Bucket=TAAR_ITEM_MATRIX_BUCKET)
|
||||
conn.Object(TAAR_ITEM_MATRIX_BUCKET, TAAR_ITEM_MATRIX_KEY).put(
|
||||
Body=json.dumps(fake_addon_matrix)
|
||||
)
|
||||
with contextlib.ExitStack() as stack:
|
||||
TAARCache._instance = None
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache,
|
||||
"_fetch_collaborative_item_matrix",
|
||||
return_value=fake_addon_matrix,
|
||||
)
|
||||
)
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache,
|
||||
"_fetch_collaborative_mapping_data",
|
||||
return_value=fake_mapping,
|
||||
)
|
||||
)
|
||||
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
conn.create_bucket(Bucket=TAAR_ADDON_MAPPING_BUCKET)
|
||||
conn.Object(TAAR_ADDON_MAPPING_BUCKET, TAAR_ADDON_MAPPING_KEY).put(
|
||||
Body=json.dumps(fake_mapping)
|
||||
)
|
||||
stack = noop_other_recommenders(stack)
|
||||
|
||||
return ctx
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache,
|
||||
"init_redis_connections",
|
||||
return_value={
|
||||
0: fakeredis.FakeStrictRedis(db=0),
|
||||
1: fakeredis.FakeStrictRedis(db=1),
|
||||
2: fakeredis.FakeStrictRedis(db=2),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# Initialize redis
|
||||
TAARCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_cant_recommend(test_ctx):
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = CollaborativeRecommender(ctx)
|
||||
with mock_install_mock_data(test_ctx):
|
||||
r = CollaborativeRecommender(test_ctx)
|
||||
|
||||
# Test that we can't recommend if we have not enough client info.
|
||||
assert not r.can_recommend({})
|
||||
assert not r.can_recommend({"installed_addons": []})
|
||||
# Test that we can't recommend if we have not enough client info.
|
||||
assert not r.can_recommend({})
|
||||
assert not r.can_recommend({"installed_addons": []})
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_can_recommend(test_ctx):
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = CollaborativeRecommender(ctx)
|
||||
with mock_install_mock_data(test_ctx):
|
||||
r = CollaborativeRecommender(test_ctx)
|
||||
|
||||
# For some reason, moto doesn't like to play nice with this call
|
||||
# Check that we can recommend if we the user has at least an addon.
|
||||
assert r.can_recommend(
|
||||
{"installed_addons": ["uBlock0@raymondhill.net"], "client_id": "test-client"}
|
||||
)
|
||||
# Check that we can recommend if the user has at least an addon.
|
||||
assert r.can_recommend(
|
||||
{
|
||||
"installed_addons": ["uBlock0@raymondhill.net"],
|
||||
"client_id": "test-client",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_can_recommend_no_model(test_ctx):
|
||||
ctx = install_none_mock_data(test_ctx)
|
||||
r = CollaborativeRecommender(ctx)
|
||||
with mock_install_none_mock_data(test_ctx):
|
||||
r = CollaborativeRecommender(test_ctx)
|
||||
|
||||
# We should never be able to recommend if something went wrong with the model.
|
||||
assert not r.can_recommend({})
|
||||
assert not r.can_recommend({"installed_addons": []})
|
||||
assert not r.can_recommend({"installed_addons": ["uBlock0@raymondhill.net"]})
|
||||
# We should never be able to recommend if something went wrong with the model.
|
||||
assert not r.can_recommend({})
|
||||
assert not r.can_recommend({"installed_addons": []})
|
||||
assert not r.can_recommend({"installed_addons": ["uBlock0@raymondhill.net"]})
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_empty_recommendations(test_ctx):
|
||||
# Tests that the empty recommender always recommends an empty list
|
||||
# of addons if we have no addons
|
||||
ctx = install_none_mock_data(test_ctx)
|
||||
r = CollaborativeRecommender(ctx)
|
||||
assert not r.can_recommend({})
|
||||
with mock_install_none_mock_data(test_ctx):
|
||||
r = CollaborativeRecommender(test_ctx)
|
||||
assert not r.can_recommend({})
|
||||
|
||||
# Note that calling recommend() if can_recommend has failed is not
|
||||
# defined.
|
||||
# Note that calling recommend() if can_recommend has failed is not
|
||||
# defined.
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_best_recommendation(test_ctx):
|
||||
with MetricsMock() as mm:
|
||||
|
||||
# Make sure the structure of the recommendations is correct and that we
|
||||
# recommended the the right addon.
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = CollaborativeRecommender(ctx)
|
||||
with mock_install_mock_data(test_ctx):
|
||||
r = CollaborativeRecommender(test_ctx)
|
||||
|
||||
# An non-empty set of addons should give a list of recommendations
|
||||
fixture_client_data = {
|
||||
"installed_addons": ["addon4.id"],
|
||||
"client_id": "test_client",
|
||||
}
|
||||
assert r.can_recommend(fixture_client_data)
|
||||
recommendations = r.recommend(fixture_client_data, 1)
|
||||
|
||||
assert isinstance(recommendations, list)
|
||||
assert len(recommendations) == 1
|
||||
|
||||
# Verify that addon2 - the most heavy weighted addon was
|
||||
# recommended
|
||||
result = recommendations[0]
|
||||
assert type(result) is tuple
|
||||
assert len(result) == 2
|
||||
assert result[0] == "addon2.id"
|
||||
assert type(result[1]) is numpy.float64
|
||||
assert numpy.isclose(result[1], numpy.float64("0.3225"))
|
||||
|
||||
assert mm.has_record(TIMING, stat="taar.collaborative_recommend")
|
||||
|
||||
|
||||
def test_recommendation_weights(test_ctx):
|
||||
"""
|
||||
Weights should be ordered greatest to lowest
|
||||
"""
|
||||
with mock_install_mock_data(test_ctx):
|
||||
r = CollaborativeRecommender(test_ctx)
|
||||
|
||||
# An non-empty set of addons should give a list of recommendations
|
||||
fixture_client_data = {
|
||||
|
@ -150,10 +232,9 @@ def test_best_recommendation(test_ctx):
|
|||
"client_id": "test_client",
|
||||
}
|
||||
assert r.can_recommend(fixture_client_data)
|
||||
recommendations = r.recommend(fixture_client_data, 1)
|
||||
|
||||
recommendations = r.recommend(fixture_client_data, 2)
|
||||
assert isinstance(recommendations, list)
|
||||
assert len(recommendations) == 1
|
||||
assert len(recommendations) == 2
|
||||
|
||||
# Verify that addon2 - the most heavy weighted addon was
|
||||
# recommended
|
||||
|
@ -164,43 +245,11 @@ def test_best_recommendation(test_ctx):
|
|||
assert type(result[1]) is numpy.float64
|
||||
assert numpy.isclose(result[1], numpy.float64("0.3225"))
|
||||
|
||||
assert mm.has_record(TIMING, stat="taar.item_matrix")
|
||||
assert mm.has_record(TIMING, stat="taar.addon_mapping")
|
||||
assert mm.has_record(TIMING, stat="taar.collaborative_recommend")
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recommendation_weights(test_ctx):
|
||||
"""
|
||||
Weights should be ordered greatest to lowest
|
||||
"""
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = CollaborativeRecommender(ctx)
|
||||
|
||||
# An non-empty set of addons should give a list of recommendations
|
||||
fixture_client_data = {
|
||||
"installed_addons": ["addon4.id"],
|
||||
"client_id": "test_client",
|
||||
}
|
||||
assert r.can_recommend(fixture_client_data)
|
||||
recommendations = r.recommend(fixture_client_data, 2)
|
||||
assert isinstance(recommendations, list)
|
||||
assert len(recommendations) == 2
|
||||
|
||||
# Verify that addon2 - the most heavy weighted addon was
|
||||
# recommended
|
||||
result = recommendations[0]
|
||||
assert type(result) is tuple
|
||||
assert len(result) == 2
|
||||
assert result[0] == "addon2.id"
|
||||
assert type(result[1]) is numpy.float64
|
||||
assert numpy.isclose(result[1], numpy.float64("0.3225"))
|
||||
|
||||
# Verify that addon2 - the most heavy weighted addon was
|
||||
# recommended
|
||||
result = recommendations[1]
|
||||
assert type(result) is tuple
|
||||
assert len(result) == 2
|
||||
assert result[0] == "addon5.id"
|
||||
assert type(result[1]) is numpy.float64
|
||||
assert numpy.isclose(result[1], numpy.float64("0.29"))
|
||||
# Verify that addon2 - the most heavy weighted addon was
|
||||
# recommended
|
||||
result = recommendations[1]
|
||||
assert type(result) is tuple
|
||||
assert len(result) == 2
|
||||
assert result[0] == "addon5.id"
|
||||
assert type(result[1]) is numpy.float64
|
||||
assert numpy.isclose(result[1], numpy.float64("0.29"))
|
||||
|
|
|
@ -2,19 +2,17 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from taar.recommenders.ensemble_recommender import (
|
||||
WeightCache,
|
||||
EnsembleRecommender,
|
||||
from taar.recommenders.ensemble_recommender import EnsembleRecommender
|
||||
import mock
|
||||
import contextlib
|
||||
import fakeredis
|
||||
from taar.recommenders.redis_cache import TAARCache
|
||||
from .noop_fixtures import (
|
||||
noop_taarlocale_dataload,
|
||||
noop_taarcollab_dataload,
|
||||
noop_taarlite_dataload,
|
||||
noop_taarsimilarity_dataload,
|
||||
)
|
||||
from taar.settings import (
|
||||
TAAR_ENSEMBLE_BUCKET,
|
||||
TAAR_ENSEMBLE_KEY,
|
||||
TAAR_WHITELIST_BUCKET,
|
||||
TAAR_WHITELIST_KEY,
|
||||
)
|
||||
from moto import mock_s3
|
||||
import boto3
|
||||
import json
|
||||
from .mocks import MockRecommenderFactory
|
||||
|
||||
from markus import TIMING
|
||||
|
@ -23,146 +21,177 @@ from markus.testing import MetricsMock
|
|||
EXPECTED = {"collaborative": 1000, "similarity": 100, "locale": 10}
|
||||
|
||||
|
||||
def install_mock_ensemble_data(ctx):
|
||||
def noop_loaders(stack):
|
||||
stack = noop_taarlocale_dataload(stack)
|
||||
stack = noop_taarcollab_dataload(stack)
|
||||
stack = noop_taarlite_dataload(stack)
|
||||
stack = noop_taarsimilarity_dataload(stack)
|
||||
return stack
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def mock_install_mock_ensemble_data(ctx):
|
||||
DATA = {"ensemble_weights": EXPECTED}
|
||||
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
conn.create_bucket(Bucket=TAAR_ENSEMBLE_BUCKET)
|
||||
conn.Object(TAAR_ENSEMBLE_BUCKET, TAAR_ENSEMBLE_KEY).put(Body=json.dumps(DATA))
|
||||
WHITELIST_DATA = [
|
||||
"2.0@disconnect.me",
|
||||
"@contain-facebook",
|
||||
"@testpilot-containers",
|
||||
"CookieAutoDelete@kennydo.com",
|
||||
"FirefoxColor@mozilla.com",
|
||||
"adblockultimate@adblockultimate.net",
|
||||
"addon@darkreader.org",
|
||||
"adguardadblocker@adguard.com",
|
||||
"adnauseam@rednoise.org",
|
||||
"clearcache@michel.de.almeida",
|
||||
"copyplaintext@eros.man",
|
||||
"default-bookmark-folder@gustiaux.com",
|
||||
"enhancerforyoutube@maximerf.addons.mozilla.org",
|
||||
"extension@one-tab.com",
|
||||
"extension@tabliss.io",
|
||||
"firefox-addon@myki.co",
|
||||
"firefox@ghostery.com",
|
||||
"forecastfox@s3_fix_version",
|
||||
"forget-me-not@lusito.info",
|
||||
"foxyproxy@eric.h.jung",
|
||||
"foxytab@eros.man",
|
||||
"gmailnoads@mywebber.com",
|
||||
]
|
||||
|
||||
conn.create_bucket(Bucket=TAAR_WHITELIST_BUCKET)
|
||||
conn.Object(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY).put(
|
||||
Body=json.dumps(
|
||||
[
|
||||
"2.0@disconnect.me",
|
||||
"@contain-facebook",
|
||||
"@testpilot-containers",
|
||||
"CookieAutoDelete@kennydo.com",
|
||||
"FirefoxColor@mozilla.com",
|
||||
"adblockultimate@adblockultimate.net",
|
||||
"addon@darkreader.org",
|
||||
"adguardadblocker@adguard.com",
|
||||
"adnauseam@rednoise.org",
|
||||
"clearcache@michel.de.almeida",
|
||||
"copyplaintext@eros.man",
|
||||
"default-bookmark-folder@gustiaux.com",
|
||||
"enhancerforyoutube@maximerf.addons.mozilla.org",
|
||||
"extension@one-tab.com",
|
||||
"extension@tabliss.io",
|
||||
"firefox-addon@myki.co",
|
||||
"firefox@ghostery.com",
|
||||
"forecastfox@s3_fix_version",
|
||||
"forget-me-not@lusito.info",
|
||||
"foxyproxy@eric.h.jung",
|
||||
"foxytab@eros.man",
|
||||
"gmailnoads@mywebber.com",
|
||||
]
|
||||
with contextlib.ExitStack() as stack:
|
||||
TAARCache._instance = None
|
||||
stack.enter_context(
|
||||
mock.patch.object(TAARCache, "_fetch_ensemble_weights", return_value=DATA,)
|
||||
)
|
||||
)
|
||||
|
||||
return ctx
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache, "_fetch_whitelist", return_value=WHITELIST_DATA,
|
||||
)
|
||||
)
|
||||
|
||||
stack = noop_loaders(stack)
|
||||
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache,
|
||||
"init_redis_connections",
|
||||
return_value={
|
||||
0: fakeredis.FakeStrictRedis(db=0),
|
||||
1: fakeredis.FakeStrictRedis(db=1),
|
||||
2: fakeredis.FakeStrictRedis(db=2),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# Initialize redis
|
||||
TAARCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_weight_cache(test_ctx):
|
||||
ctx = install_mock_ensemble_data(test_ctx)
|
||||
wc = WeightCache(ctx)
|
||||
actual = wc.getWeights()
|
||||
assert EXPECTED == actual
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recommendations(test_ctx):
|
||||
with MetricsMock() as mm:
|
||||
ctx = install_mock_ensemble_data(test_ctx)
|
||||
|
||||
EXPECTED_RESULTS = [
|
||||
("ghi", 3430.0),
|
||||
("def", 3320.0),
|
||||
("ijk", 3200.0),
|
||||
("hij", 3100.0),
|
||||
("lmn", 420.0),
|
||||
]
|
||||
|
||||
with mock_install_mock_ensemble_data(test_ctx):
|
||||
factory = MockRecommenderFactory()
|
||||
ctx["recommender_factory"] = factory
|
||||
test_ctx["recommender_factory"] = factory
|
||||
|
||||
ctx["recommender_map"] = {
|
||||
test_ctx["recommender_map"] = {
|
||||
"collaborative": factory.create("collaborative"),
|
||||
"similarity": factory.create("similarity"),
|
||||
"locale": factory.create("locale"),
|
||||
}
|
||||
r = EnsembleRecommender(ctx.child())
|
||||
client = {"client_id": "12345"} # Anything will work here
|
||||
|
||||
r = EnsembleRecommender(test_ctx)
|
||||
actual = r.getWeights()
|
||||
assert EXPECTED == actual
|
||||
|
||||
|
||||
def test_recommendations(test_ctx):
|
||||
with MetricsMock() as mm:
|
||||
with mock_install_mock_ensemble_data(test_ctx):
|
||||
EXPECTED_RESULTS = [
|
||||
("ghi", 3430.0),
|
||||
("def", 3320.0),
|
||||
("ijk", 3200.0),
|
||||
("hij", 3100.0),
|
||||
("lmn", 420.0),
|
||||
]
|
||||
|
||||
factory = MockRecommenderFactory()
|
||||
test_ctx["recommender_factory"] = factory
|
||||
|
||||
test_ctx["recommender_map"] = {
|
||||
"collaborative": factory.create("collaborative"),
|
||||
"similarity": factory.create("similarity"),
|
||||
"locale": factory.create("locale"),
|
||||
}
|
||||
r = EnsembleRecommender(test_ctx)
|
||||
client = {"client_id": "12345"} # Anything will work here
|
||||
|
||||
recommendation_list = r.recommend(client, 5)
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert recommendation_list == EXPECTED_RESULTS
|
||||
|
||||
assert mm.has_record(TIMING, "taar.ensemble_recommend")
|
||||
|
||||
|
||||
def test_preinstalled_guids(test_ctx):
|
||||
with mock_install_mock_ensemble_data(test_ctx):
|
||||
EXPECTED_RESULTS = [
|
||||
("ghi", 3430.0),
|
||||
("ijk", 3200.0),
|
||||
("lmn", 420.0),
|
||||
("klm", 409.99999999999994),
|
||||
("abc", 23.0),
|
||||
]
|
||||
|
||||
factory = MockRecommenderFactory()
|
||||
test_ctx["recommender_factory"] = factory
|
||||
|
||||
test_ctx["recommender_map"] = {
|
||||
"collaborative": factory.create("collaborative"),
|
||||
"similarity": factory.create("similarity"),
|
||||
"locale": factory.create("locale"),
|
||||
}
|
||||
r = EnsembleRecommender(test_ctx)
|
||||
|
||||
# 'hij' should be excluded from the suggestions list
|
||||
# The other two addon GUIDs 'def' and 'jkl' will never be
|
||||
# recommended anyway and should have no impact on results
|
||||
client = {"client_id": "12345", "installed_addons": ["def", "hij", "jkl"]}
|
||||
|
||||
recommendation_list = r.recommend(client, 5)
|
||||
print(recommendation_list)
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert recommendation_list == EXPECTED_RESULTS
|
||||
|
||||
|
||||
def test_mock_client_ids(test_ctx):
|
||||
with mock_install_mock_ensemble_data(test_ctx):
|
||||
|
||||
EXPECTED_RESULTS = [
|
||||
("2.0@disconnect.me", 0.17),
|
||||
("@contain-facebook", 0.25),
|
||||
("@testpilot-containers", 0.72),
|
||||
("CookieAutoDelete@kennydo.com", 0.37),
|
||||
("FirefoxColor@mozilla.com", 0.32),
|
||||
]
|
||||
|
||||
factory = MockRecommenderFactory()
|
||||
test_ctx["recommender_factory"] = factory
|
||||
|
||||
test_ctx["recommender_map"] = {
|
||||
"collaborative": factory.create("collaborative"),
|
||||
"similarity": factory.create("similarity"),
|
||||
"locale": factory.create("locale"),
|
||||
}
|
||||
r = EnsembleRecommender(test_ctx)
|
||||
|
||||
# 'hij' should be excluded from the suggestions list
|
||||
# The other two addon GUIDs 'def' and 'jkl' will never be
|
||||
# recommended anyway and should have no impact on results
|
||||
client = {"client_id": "11111"}
|
||||
|
||||
recommendation_list = r.recommend(client, 5)
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert recommendation_list == EXPECTED_RESULTS
|
||||
|
||||
assert mm.has_record(TIMING, "taar.ensemble")
|
||||
assert mm.has_record(TIMING, "taar.ensemble_recommend")
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_preinstalled_guids(test_ctx):
|
||||
ctx = install_mock_ensemble_data(test_ctx)
|
||||
|
||||
EXPECTED_RESULTS = [
|
||||
("ghi", 3430.0),
|
||||
("ijk", 3200.0),
|
||||
("lmn", 420.0),
|
||||
("klm", 409.99999999999994),
|
||||
("abc", 23.0),
|
||||
]
|
||||
|
||||
factory = MockRecommenderFactory()
|
||||
ctx["recommender_factory"] = factory
|
||||
|
||||
ctx["recommender_map"] = {
|
||||
"collaborative": factory.create("collaborative"),
|
||||
"similarity": factory.create("similarity"),
|
||||
"locale": factory.create("locale"),
|
||||
}
|
||||
r = EnsembleRecommender(ctx.child())
|
||||
|
||||
# 'hij' should be excluded from the suggestions list
|
||||
# The other two addon GUIDs 'def' and 'jkl' will never be
|
||||
# recommended anyway and should have no impact on results
|
||||
client = {"client_id": "12345", "installed_addons": ["def", "hij", "jkl"]}
|
||||
|
||||
recommendation_list = r.recommend(client, 5)
|
||||
print(recommendation_list)
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert recommendation_list == EXPECTED_RESULTS
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_mock_client_ids(test_ctx):
|
||||
ctx = install_mock_ensemble_data(test_ctx)
|
||||
|
||||
EXPECTED_RESULTS = [
|
||||
("2.0@disconnect.me", 0.17),
|
||||
("@contain-facebook", 0.25),
|
||||
("@testpilot-containers", 0.72),
|
||||
("CookieAutoDelete@kennydo.com", 0.37),
|
||||
("FirefoxColor@mozilla.com", 0.32),
|
||||
]
|
||||
|
||||
factory = MockRecommenderFactory()
|
||||
ctx["recommender_factory"] = factory
|
||||
|
||||
ctx["recommender_map"] = {
|
||||
"collaborative": factory.create("collaborative"),
|
||||
"similarity": factory.create("similarity"),
|
||||
"locale": factory.create("locale"),
|
||||
}
|
||||
r = EnsembleRecommender(ctx.child())
|
||||
|
||||
# 'hij' should be excluded from the suggestions list
|
||||
# The other two addon GUIDs 'def' and 'jkl' will never be
|
||||
# recommended anyway and should have no impact on results
|
||||
client = {"client_id": "11111"}
|
||||
|
||||
recommendation_list = r.recommend(client, 5)
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert recommendation_list == EXPECTED_RESULTS
|
||||
|
|
|
@ -5,8 +5,15 @@ import pytest
|
|||
import mock
|
||||
import contextlib
|
||||
|
||||
from .noop_fixtures import (
|
||||
noop_taarlocale_dataload,
|
||||
noop_taarcollab_dataload,
|
||||
noop_taarsimilarity_dataload,
|
||||
noop_taarensemble_dataload,
|
||||
)
|
||||
|
||||
from taar.recommenders.guid_based_recommender import GuidBasedRecommender
|
||||
from taar.recommenders.redis_cache import AddonsCoinstallCache
|
||||
from taar.recommenders.redis_cache import TAARCache
|
||||
|
||||
from taar.recommenders.redis_cache import NORMDATA_GUID_ROW_NORM_PREFIX
|
||||
|
||||
|
@ -85,23 +92,28 @@ RESULTS = {
|
|||
def mock_coinstall_ranking_context(ctx, mock_coinstall, mock_ranking):
|
||||
|
||||
with contextlib.ExitStack() as stack:
|
||||
TAARCache._instance = None
|
||||
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache, "fetch_ranking_data", return_value=mock_ranking,
|
||||
TAARCache, "_fetch_ranking_data", return_value=mock_ranking,
|
||||
)
|
||||
)
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
"fetch_coinstall_data",
|
||||
return_value=mock_coinstall,
|
||||
TAARCache, "_fetch_coinstall_data", return_value=mock_coinstall,
|
||||
)
|
||||
)
|
||||
|
||||
stack = noop_taarlocale_dataload(stack)
|
||||
stack = noop_taarcollab_dataload(stack)
|
||||
stack = noop_taarsimilarity_dataload(stack)
|
||||
stack = noop_taarensemble_dataload(stack)
|
||||
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
TAARCache,
|
||||
"init_redis_connections",
|
||||
return_value={
|
||||
0: fakeredis.FakeStrictRedis(db=0),
|
||||
|
@ -112,7 +124,7 @@ def mock_coinstall_ranking_context(ctx, mock_coinstall, mock_ranking):
|
|||
)
|
||||
|
||||
# Initialize redis
|
||||
AddonsCoinstallCache(ctx).safe_load_data()
|
||||
TAARCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
|
|
|
@ -1,138 +0,0 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
"""
|
||||
Test cases for the TAAR Hybrid recommender
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from taar.recommenders.hybrid_recommender import CuratedRecommender
|
||||
from taar.recommenders.hybrid_recommender import HybridRecommender
|
||||
from taar.recommenders.ensemble_recommender import EnsembleRecommender
|
||||
|
||||
from taar.settings import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
|
||||
|
||||
# from taar.recommenders.hybrid_recommender import ENSEMBLE_WEIGHTS
|
||||
from .test_ensemblerecommender import install_mock_ensemble_data
|
||||
from .mocks import MockRecommenderFactory
|
||||
|
||||
import json
|
||||
from moto import mock_s3
|
||||
import boto3
|
||||
|
||||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
|
||||
def install_no_curated_data(ctx):
|
||||
ctx = ctx.child()
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
|
||||
conn.create_bucket(Bucket=TAAR_WHITELIST_BUCKET)
|
||||
conn.Object(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY).put(Body="")
|
||||
|
||||
return ctx
|
||||
|
||||
|
||||
def install_mock_curated_data(ctx):
|
||||
mock_data = []
|
||||
for i in range(20):
|
||||
mock_data.append(str(i) * 16)
|
||||
|
||||
ctx = ctx.child()
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
|
||||
conn.create_bucket(Bucket=TAAR_WHITELIST_BUCKET)
|
||||
conn.Object(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY).put(
|
||||
Body=json.dumps(mock_data)
|
||||
)
|
||||
|
||||
return ctx
|
||||
|
||||
|
||||
def install_ensemble_fixtures(ctx):
|
||||
ctx = install_mock_ensemble_data(ctx)
|
||||
|
||||
factory = MockRecommenderFactory()
|
||||
ctx["recommender_factory"] = factory
|
||||
|
||||
ctx["recommender_map"] = {
|
||||
"collaborative": factory.create("collaborative"),
|
||||
"similarity": factory.create("similarity"),
|
||||
"locale": factory.create("locale"),
|
||||
}
|
||||
ctx["ensemble_recommender"] = EnsembleRecommender(ctx.child())
|
||||
return ctx
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_curated_can_recommend(test_ctx):
|
||||
ctx = install_no_curated_data(test_ctx)
|
||||
r = CuratedRecommender(ctx)
|
||||
|
||||
# CuratedRecommender will always recommend something no matter
|
||||
# what
|
||||
assert r.can_recommend({})
|
||||
assert r.can_recommend({"installed_addons": []})
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_curated_recommendations(test_ctx):
|
||||
with MetricsMock() as mm:
|
||||
ctx = install_mock_curated_data(test_ctx)
|
||||
r = CuratedRecommender(ctx)
|
||||
|
||||
# CuratedRecommender will always recommend something no matter
|
||||
# what
|
||||
|
||||
for LIMIT in range(1, 5):
|
||||
guid_list = r.recommend({"client_id": "000000"}, limit=LIMIT)
|
||||
# The curated recommendations should always return with some kind
|
||||
# of recommendations
|
||||
assert len(guid_list) == LIMIT
|
||||
|
||||
assert mm.has_record(TIMING, "taar.whitelist")
|
||||
assert mm.has_record(TIMING, "taar.hybrid_recommend")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="this test seems to break sporadically")
|
||||
@mock_s3
|
||||
def test_hybrid_recommendations(test_ctx):
|
||||
# verify that the recommendations mix the curated and
|
||||
# ensemble results
|
||||
ctx = install_mock_curated_data(test_ctx)
|
||||
ctx = install_ensemble_fixtures(ctx)
|
||||
|
||||
r = HybridRecommender(ctx)
|
||||
|
||||
# Test that we can generate lists of results
|
||||
for LIMIT in range(4, 8):
|
||||
guid_list = r.recommend({"client_id": "000000"}, limit=LIMIT)
|
||||
# The curated recommendations should always return with some kind
|
||||
# of recommendations
|
||||
assert len(guid_list) == LIMIT
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="this test seems to break sporadically")
|
||||
@mock_s3
|
||||
def test_stable_hybrid_results(test_ctx):
|
||||
# verify that the recommendations mix the curated and
|
||||
# ensemble results
|
||||
ctx = install_mock_curated_data(test_ctx)
|
||||
ctx = install_ensemble_fixtures(ctx)
|
||||
|
||||
r = HybridRecommender(ctx)
|
||||
# Test that the results are actually mixed
|
||||
guid_list = r.recommend({"client_id": "000000"}, limit=4)
|
||||
|
||||
assert len(guid_list) == 4
|
||||
|
||||
# A mixed list will have two recommendations with weight = 1.0
|
||||
# (curated) and 2 with exactly weight < 1.0 from the ensemble list
|
||||
|
||||
assert guid_list[0][1] == 1.0
|
||||
assert guid_list[1][1] == 1.0
|
||||
assert guid_list[2][1] < 1.0
|
||||
assert guid_list[3][1] < 1.0
|
|
@ -1,94 +0,0 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import json
|
||||
|
||||
from taar.recommenders.lazys3 import LazyJSONLoader
|
||||
|
||||
import boto3
|
||||
from moto import mock_s3
|
||||
|
||||
|
||||
from taar.settings import (
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_DONOR_KEY,
|
||||
)
|
||||
|
||||
|
||||
def install_categorical_data(ctx):
|
||||
ctx = ctx.child()
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
|
||||
try:
|
||||
conn.create_bucket(Bucket=TAAR_SIMILARITY_BUCKET)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY).put(
|
||||
Body=json.dumps({"test": "donor_key"})
|
||||
)
|
||||
|
||||
ctx["similarity_donors_pool"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY
|
||||
)
|
||||
|
||||
return ctx
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_does_it_load(test_ctx):
|
||||
ctx = install_categorical_data(test_ctx)
|
||||
|
||||
jdata, status = ctx["similarity_donors_pool"].get()
|
||||
assert jdata["test"] == "donor_key"
|
||||
check_jdata_status(jdata, status)
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_cached_load(test_ctx):
|
||||
ctx = install_categorical_data(test_ctx)
|
||||
loader = ctx["similarity_donors_pool"]
|
||||
jdata, status = loader.get()
|
||||
check_jdata_status(jdata, status)
|
||||
jdata, status = loader.get()
|
||||
assert not status
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_reload_on_expiry(test_ctx):
|
||||
ctx = install_categorical_data(test_ctx)
|
||||
loader = ctx["similarity_donors_pool"]
|
||||
|
||||
jdata, status = loader.get()
|
||||
check_jdata_status(jdata, status)
|
||||
jdata, status = loader.get()
|
||||
assert not status
|
||||
|
||||
# Force expirty time to be 10 seconds ago
|
||||
loader._expiry_time = loader._clock.time() - 10
|
||||
|
||||
jdata, status = loader.get()
|
||||
check_jdata_status(jdata, status)
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_force_expiry(test_ctx):
|
||||
ctx = install_categorical_data(test_ctx)
|
||||
loader = ctx["similarity_donors_pool"]
|
||||
|
||||
jdata, status = loader.get()
|
||||
check_jdata_status(jdata, status)
|
||||
jdata, status = loader.get()
|
||||
assert not status
|
||||
|
||||
# Force expirty time to be 10 seconds ago
|
||||
loader.force_expiry()
|
||||
|
||||
jdata, status = loader.get()
|
||||
check_jdata_status(jdata, status)
|
||||
|
||||
|
||||
def check_jdata_status(jdata, status):
|
||||
assert jdata == {"test": "donor_key"}
|
||||
assert status
|
|
@ -2,9 +2,18 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from moto import mock_s3
|
||||
import boto3
|
||||
import mock
|
||||
|
||||
import contextlib
|
||||
import fakeredis
|
||||
from taar.recommenders.redis_cache import TAARCache
|
||||
from .noop_fixtures import (
|
||||
noop_taarcollab_dataload,
|
||||
noop_taarlite_dataload,
|
||||
noop_taarsimilarity_dataload,
|
||||
noop_taarensemble_dataload,
|
||||
)
|
||||
import json
|
||||
|
||||
|
||||
|
@ -14,6 +23,7 @@ from taar.settings import TAAR_LOCALE_KEY, TAAR_LOCALE_BUCKET
|
|||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
|
||||
FAKE_LOCALE_DATA = {
|
||||
"te-ST": [
|
||||
["{1e6b8bce-7dc8-481c-9f19-123e41332b72}", 0.1],
|
||||
|
@ -37,32 +47,62 @@ def install_mock_data(ctx):
|
|||
return ctx
|
||||
|
||||
|
||||
@mock_s3
|
||||
@contextlib.contextmanager
|
||||
def mock_locale_data(ctx):
|
||||
with contextlib.ExitStack() as stack:
|
||||
TAARCache._instance = None
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache, "_fetch_locale_data", return_value=FAKE_LOCALE_DATA,
|
||||
)
|
||||
)
|
||||
|
||||
stack = noop_taarlite_dataload(stack)
|
||||
stack = noop_taarcollab_dataload(stack)
|
||||
stack = noop_taarsimilarity_dataload(stack)
|
||||
stack = noop_taarensemble_dataload(stack)
|
||||
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache,
|
||||
"init_redis_connections",
|
||||
return_value={
|
||||
0: fakeredis.FakeStrictRedis(db=0),
|
||||
1: fakeredis.FakeStrictRedis(db=1),
|
||||
2: fakeredis.FakeStrictRedis(db=2),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# Initialize redis
|
||||
TAARCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
def test_can_recommend(test_ctx):
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = LocaleRecommender(ctx)
|
||||
with mock_locale_data(test_ctx):
|
||||
r = LocaleRecommender(test_ctx)
|
||||
|
||||
# Test that we can't recommend if we have not enough client info.
|
||||
assert not r.can_recommend({})
|
||||
assert not r.can_recommend({"locale": []})
|
||||
# Test that we can't recommend if we have not enough client info.
|
||||
assert not r.can_recommend({})
|
||||
assert not r.can_recommend({"locale": []})
|
||||
|
||||
# Check that we can recommend if the user has at least an addon.
|
||||
assert r.can_recommend({"locale": "en"})
|
||||
# Check that we can recommend if the user has at least an addon.
|
||||
assert r.can_recommend({"locale": "en"})
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_can_recommend_no_model(test_ctx):
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = LocaleRecommender(ctx)
|
||||
with mock_locale_data(test_ctx):
|
||||
r = LocaleRecommender(test_ctx)
|
||||
|
||||
# We should never be able to recommend if something went
|
||||
# wrong with the model.
|
||||
assert not r.can_recommend({})
|
||||
assert not r.can_recommend({"locale": []})
|
||||
assert not r.can_recommend({"locale": "it"})
|
||||
# We should never be able to recommend if something went
|
||||
# wrong with the model.
|
||||
assert not r.can_recommend({})
|
||||
assert not r.can_recommend({"locale": []})
|
||||
assert not r.can_recommend({"locale": "it"})
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recommendations(test_ctx):
|
||||
"""Test that the locale recommender returns the correct
|
||||
locale dependent addons.
|
||||
|
@ -71,27 +111,26 @@ def test_recommendations(test_ctx):
|
|||
of (GUID, weight).
|
||||
"""
|
||||
with MetricsMock() as mm:
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = LocaleRecommender(ctx)
|
||||
recommendations = r.recommend({"locale": "en"}, 10)
|
||||
with mock_locale_data(test_ctx):
|
||||
r = LocaleRecommender(test_ctx)
|
||||
|
||||
# Make sure the structure of the recommendations is correct and that we
|
||||
# recommended the the right addon.
|
||||
assert isinstance(recommendations, list)
|
||||
assert len(recommendations) == len(FAKE_LOCALE_DATA["en"])
|
||||
recommendations = r.recommend({"locale": "en"}, 10)
|
||||
|
||||
# Make sure that the reported addons are the one from the fake data.
|
||||
for (addon_id, weight), (expected_id, expected_weight) in zip(
|
||||
recommendations, FAKE_LOCALE_DATA["en"]
|
||||
):
|
||||
assert addon_id == expected_id
|
||||
assert weight == expected_weight
|
||||
# Make sure the structure of the recommendations is correct and that we
|
||||
# recommended the the right addon.
|
||||
assert isinstance(recommendations, list)
|
||||
assert len(recommendations) == len(FAKE_LOCALE_DATA["en"])
|
||||
|
||||
assert mm.has_record(TIMING, "taar.locale")
|
||||
assert mm.has_record(TIMING, "taar.locale_recommend")
|
||||
# Make sure that the reported addons are the one from the fake data.
|
||||
for (addon_id, weight), (expected_id, expected_weight) in zip(
|
||||
recommendations, FAKE_LOCALE_DATA["en"]
|
||||
):
|
||||
assert addon_id == expected_id
|
||||
assert weight == expected_weight
|
||||
|
||||
assert mm.has_record(TIMING, "taar.locale_recommend")
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recommender_extra_data(test_ctx):
|
||||
# Test that the recommender uses locale data from the "extra"
|
||||
# section if available.
|
||||
|
@ -109,11 +148,13 @@ def test_recommender_extra_data(test_ctx):
|
|||
assert addon_id == expected_id
|
||||
assert weight == expected_weight
|
||||
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = LocaleRecommender(ctx)
|
||||
recommendations = r.recommend({}, 10, extra_data={"locale": "en"})
|
||||
validate_recommendations(recommendations, "en")
|
||||
with mock_locale_data(test_ctx):
|
||||
r = LocaleRecommender(test_ctx)
|
||||
recommendations = r.recommend({}, 10, extra_data={"locale": "en"})
|
||||
validate_recommendations(recommendations, "en")
|
||||
|
||||
# Make sure that we favour client data over the extra data.
|
||||
recommendations = r.recommend({"locale": "en"}, 10, extra_data={"locale": "te-ST"})
|
||||
validate_recommendations(recommendations, "en")
|
||||
# Make sure that we favour client data over the extra data.
|
||||
recommendations = r.recommend(
|
||||
{"locale": "en"}, 10, extra_data={"locale": "te-ST"}
|
||||
)
|
||||
validate_recommendations(recommendations, "en")
|
||||
|
|
|
@ -52,10 +52,7 @@ MOCK_DATA = {
|
|||
"tab_open_count": 46,
|
||||
"total_uri": 791,
|
||||
"unique_tlds": 11,
|
||||
"installed_addons": [
|
||||
"e10srollout@mozilla.org",
|
||||
"firefox@getpocket.com",
|
||||
],
|
||||
"installed_addons": ["e10srollout@mozilla.org", "firefox@getpocket.com",],
|
||||
"locale": "it-IT",
|
||||
},
|
||||
}
|
||||
|
|
|
@ -2,20 +2,17 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import boto3
|
||||
import json
|
||||
from moto import mock_s3
|
||||
from taar.recommenders import RecommendationManager
|
||||
from taar.recommenders.base_recommender import AbstractRecommender
|
||||
|
||||
from taar.recommenders.ensemble_recommender import (
|
||||
TAAR_ENSEMBLE_BUCKET,
|
||||
TAAR_ENSEMBLE_KEY,
|
||||
from .noop_fixtures import (
|
||||
noop_taarlocale_dataload,
|
||||
noop_taarcollab_dataload,
|
||||
noop_taarsimilarity_dataload,
|
||||
noop_taarlite_dataload,
|
||||
)
|
||||
|
||||
|
||||
from .mocks import MockRecommenderFactory
|
||||
from .test_hybrid_recommender import install_mock_curated_data
|
||||
|
||||
import operator
|
||||
from functools import reduce
|
||||
|
@ -23,6 +20,72 @@ from functools import reduce
|
|||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
import mock
|
||||
import contextlib
|
||||
import fakeredis
|
||||
from taar.recommenders.redis_cache import TAARCache
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def mock_install_mock_curated_data(ctx):
|
||||
mock_data = []
|
||||
for i in range(20):
|
||||
mock_data.append(str(i) * 16)
|
||||
|
||||
mock_ensemble_weights = {
|
||||
"ensemble_weights": {"collaborative": 1000, "similarity": 100, "locale": 10,}
|
||||
}
|
||||
|
||||
with contextlib.ExitStack() as stack:
|
||||
TAARCache._instance = None
|
||||
|
||||
stack.enter_context(
|
||||
mock.patch.object(TAARCache, "_fetch_whitelist", return_value=mock_data)
|
||||
)
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache,
|
||||
"_fetch_ensemble_weights",
|
||||
return_value=mock_ensemble_weights,
|
||||
)
|
||||
)
|
||||
|
||||
stack = noop_taarlite_dataload(stack)
|
||||
stack = noop_taarcollab_dataload(stack)
|
||||
stack = noop_taarlocale_dataload(stack)
|
||||
stack = noop_taarsimilarity_dataload(stack)
|
||||
|
||||
stack.enter_context(
|
||||
mock.patch.object(TAARCache, "_fetch_whitelist", return_value=mock_data)
|
||||
)
|
||||
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache,
|
||||
"init_redis_connections",
|
||||
return_value={
|
||||
0: fakeredis.FakeStrictRedis(db=0),
|
||||
1: fakeredis.FakeStrictRedis(db=1),
|
||||
2: fakeredis.FakeStrictRedis(db=2),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
class DefaultMockProfileFetcher:
|
||||
def get(self, client_id):
|
||||
return {"client_id": client_id}
|
||||
|
||||
mock_fetcher = DefaultMockProfileFetcher()
|
||||
|
||||
ctx["profile_fetcher"] = mock_fetcher
|
||||
ctx["recommender_factory"] = MockRecommenderFactory()
|
||||
|
||||
# Initialize redis
|
||||
TAARCache.get_instance(ctx).safe_load_data()
|
||||
|
||||
yield stack
|
||||
|
||||
|
||||
class StubRecommender(AbstractRecommender):
|
||||
""" A shared, stub recommender that can be used for testing.
|
||||
|
@ -39,129 +102,94 @@ class StubRecommender(AbstractRecommender):
|
|||
return self._recommendations
|
||||
|
||||
|
||||
def install_mocks(ctx, mock_fetcher=None):
|
||||
ctx = ctx.child()
|
||||
|
||||
class DefaultMockProfileFetcher:
|
||||
def get(self, client_id):
|
||||
return {"client_id": client_id}
|
||||
|
||||
if mock_fetcher is None:
|
||||
mock_fetcher = DefaultMockProfileFetcher()
|
||||
|
||||
ctx["profile_fetcher"] = mock_fetcher
|
||||
ctx["recommender_factory"] = MockRecommenderFactory()
|
||||
|
||||
DATA = {
|
||||
"ensemble_weights": {"collaborative": 1000, "similarity": 100, "locale": 10,}
|
||||
}
|
||||
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
conn.create_bucket(Bucket=TAAR_ENSEMBLE_BUCKET)
|
||||
conn.Object(TAAR_ENSEMBLE_BUCKET, TAAR_ENSEMBLE_KEY).put(Body=json.dumps(DATA))
|
||||
|
||||
return ctx
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_none_profile_returns_empty_list(test_ctx):
|
||||
ctx = install_mocks(test_ctx)
|
||||
with mock_install_mock_curated_data(test_ctx):
|
||||
|
||||
class MockProfileFetcher:
|
||||
def get(self, client_id):
|
||||
return None
|
||||
class MockProfileFetcher:
|
||||
def get(self, client_id):
|
||||
return None
|
||||
|
||||
ctx["profile_fetcher"] = MockProfileFetcher()
|
||||
test_ctx["profile_fetcher"] = MockProfileFetcher()
|
||||
|
||||
rec_manager = RecommendationManager(ctx)
|
||||
assert rec_manager.recommend("random-client-id", 10) == []
|
||||
rec_manager = RecommendationManager(test_ctx)
|
||||
assert rec_manager.recommend("random-client-id", 10) == []
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_simple_recommendation(test_ctx):
|
||||
ctx = install_mocks(test_ctx)
|
||||
with mock_install_mock_curated_data(test_ctx):
|
||||
|
||||
EXPECTED_RESULTS = [
|
||||
("ghi", 3430.0),
|
||||
("def", 3320.0),
|
||||
("ijk", 3200.0),
|
||||
("hij", 3100.0),
|
||||
("lmn", 420.0),
|
||||
("klm", 409.99999999999994),
|
||||
("jkl", 400.0),
|
||||
("abc", 23.0),
|
||||
("fgh", 22.0),
|
||||
("efg", 21.0),
|
||||
]
|
||||
EXPECTED_RESULTS = [
|
||||
("ghi", 3430.0),
|
||||
("def", 3320.0),
|
||||
("ijk", 3200.0),
|
||||
("hij", 3100.0),
|
||||
("lmn", 420.0),
|
||||
("klm", 409.99999999999994),
|
||||
("jkl", 400.0),
|
||||
("abc", 23.0),
|
||||
("fgh", 22.0),
|
||||
("efg", 21.0),
|
||||
]
|
||||
|
||||
with MetricsMock() as mm:
|
||||
manager = RecommendationManager(ctx.child())
|
||||
recommendation_list = manager.recommend("some_ignored_id", 10)
|
||||
with MetricsMock() as mm:
|
||||
manager = RecommendationManager(test_ctx)
|
||||
recommendation_list = manager.recommend("some_ignored_id", 10)
|
||||
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert recommendation_list == EXPECTED_RESULTS
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert recommendation_list == EXPECTED_RESULTS
|
||||
|
||||
assert mm.has_record(TIMING, stat="taar.ensemble")
|
||||
assert mm.has_record(TIMING, stat="taar.profile_recommendation")
|
||||
assert mm.has_record(TIMING, stat="taar.profile_recommendation")
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_fixed_client_id_valid(test_ctx):
|
||||
ctx = install_mocks(test_ctx)
|
||||
ctx = install_mock_curated_data(ctx)
|
||||
|
||||
manager = RecommendationManager(ctx.child())
|
||||
recommendation_list = manager.recommend("111111", 10)
|
||||
|
||||
assert len(recommendation_list) == 10
|
||||
with mock_install_mock_curated_data(test_ctx):
|
||||
manager = RecommendationManager(test_ctx)
|
||||
recommendation_list = manager.recommend("111111", 10)
|
||||
assert len(recommendation_list) == 10
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_fixed_client_id_empty_list(test_ctx):
|
||||
class NoClientFetcher:
|
||||
def get(self, client_id):
|
||||
return None
|
||||
|
||||
ctx = install_mocks(test_ctx, mock_fetcher=NoClientFetcher())
|
||||
with mock_install_mock_curated_data(test_ctx):
|
||||
test_ctx["profile_fetcher"] = NoClientFetcher()
|
||||
|
||||
ctx = install_mock_curated_data(ctx)
|
||||
manager = RecommendationManager(test_ctx)
|
||||
recommendation_list = manager.recommend("not_a_real_client_id", 10)
|
||||
|
||||
manager = RecommendationManager(ctx.child())
|
||||
recommendation_list = manager.recommend("not_a_real_client_id", 10)
|
||||
|
||||
assert len(recommendation_list) == 0
|
||||
assert len(recommendation_list) == 0
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_experimental_randomization(test_ctx):
|
||||
ctx = install_mocks(test_ctx)
|
||||
ctx = install_mock_curated_data(ctx)
|
||||
with mock_install_mock_curated_data(test_ctx):
|
||||
|
||||
manager = RecommendationManager(ctx.child())
|
||||
raw_list = manager.recommend("111111", 10)
|
||||
manager = RecommendationManager(test_ctx)
|
||||
raw_list = manager.recommend("111111", 10)
|
||||
|
||||
# Clobber the experiment probability to be 100% to force a
|
||||
# reordering.
|
||||
ctx["TAAR_EXPERIMENT_PROB"] = 1.0
|
||||
# Clobber the experiment probability to be 100% to force a
|
||||
# reordering.
|
||||
test_ctx["TAAR_EXPERIMENT_PROB"] = 1.0
|
||||
|
||||
manager = RecommendationManager(ctx.child())
|
||||
rand_list = manager.recommend("111111", 10)
|
||||
manager = RecommendationManager(test_ctx)
|
||||
rand_list = manager.recommend("111111", 10)
|
||||
|
||||
"""
|
||||
The two lists should be :
|
||||
"""
|
||||
The two lists should be :
|
||||
|
||||
* different (guid, weight) lists (possibly just order)
|
||||
* same length
|
||||
"""
|
||||
assert (
|
||||
reduce(
|
||||
operator.and_,
|
||||
[
|
||||
(t1[0] == t2[0] and t1[1] == t2[1])
|
||||
for t1, t2 in zip(rand_list, raw_list)
|
||||
],
|
||||
* different (guid, weight) lists (possibly just order)
|
||||
* same length
|
||||
"""
|
||||
assert (
|
||||
reduce(
|
||||
operator.and_,
|
||||
[
|
||||
(t1[0] == t2[0] and t1[1] == t2[1])
|
||||
for t1, t2 in zip(rand_list, raw_list)
|
||||
],
|
||||
)
|
||||
is False
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
assert len(rand_list) == len(raw_list)
|
||||
assert len(rand_list) == len(raw_list)
|
||||
|
|
|
@ -6,12 +6,9 @@ import json
|
|||
import six
|
||||
import logging
|
||||
|
||||
|
||||
import numpy as np
|
||||
import scipy.stats
|
||||
from taar.recommenders.lazys3 import LazyJSONLoader
|
||||
|
||||
import boto3
|
||||
from moto import mock_s3
|
||||
|
||||
from taar.recommenders.similarity_recommender import (
|
||||
CATEGORICAL_FEATURES,
|
||||
|
@ -25,11 +22,24 @@ from .similarity_data import CATEGORICAL_FEATURE_FIXTURE_DATA
|
|||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
from taar.settings import (
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_DONOR_KEY,
|
||||
TAAR_SIMILARITY_LRCURVES_KEY,
|
||||
import fakeredis
|
||||
import mock
|
||||
import contextlib
|
||||
from .noop_fixtures import (
|
||||
noop_taarcollab_dataload,
|
||||
noop_taarlite_dataload,
|
||||
noop_taarlocale_dataload,
|
||||
noop_taarensemble_dataload,
|
||||
)
|
||||
from taar.recommenders.redis_cache import TAARCache
|
||||
|
||||
|
||||
def noop_loaders(stack):
|
||||
stack = noop_taarlocale_dataload(stack)
|
||||
stack = noop_taarcollab_dataload(stack)
|
||||
stack = noop_taarensemble_dataload(stack)
|
||||
stack = noop_taarlite_dataload(stack)
|
||||
return stack
|
||||
|
||||
|
||||
def generate_fake_lr_curves(num_elements, ceiling=10.0):
|
||||
|
@ -68,311 +78,326 @@ def generate_a_fake_taar_client():
|
|||
}
|
||||
|
||||
|
||||
def install_no_data(ctx):
|
||||
ctx = ctx.child()
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
@contextlib.contextmanager
|
||||
def mock_install_no_data(ctx):
|
||||
|
||||
conn.create_bucket(Bucket=TAAR_SIMILARITY_BUCKET)
|
||||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY).put(Body="")
|
||||
with contextlib.ExitStack() as stack:
|
||||
TAARCache._instance = None
|
||||
stack.enter_context(
|
||||
mock.patch.object(TAARCache, "_fetch_similarity_donors", return_value="",)
|
||||
)
|
||||
|
||||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY).put(Body="")
|
||||
stack.enter_context(
|
||||
mock.patch.object(TAARCache, "_fetch_similarity_lrcurves", return_value="",)
|
||||
)
|
||||
|
||||
ctx["similarity_donors_pool"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
|
||||
)
|
||||
stack = noop_loaders(stack)
|
||||
|
||||
ctx["similarity_lr_curves"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
|
||||
)
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache,
|
||||
"init_redis_connections",
|
||||
return_value={
|
||||
0: fakeredis.FakeStrictRedis(db=0),
|
||||
1: fakeredis.FakeStrictRedis(db=1),
|
||||
2: fakeredis.FakeStrictRedis(db=2),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
return ctx
|
||||
# Initialize redis
|
||||
TAARCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
def install_categorical_data(ctx):
|
||||
ctx = ctx.child()
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
@contextlib.contextmanager
|
||||
def mock_install_categorical_data(ctx):
|
||||
|
||||
try:
|
||||
conn.create_bucket(Bucket=TAAR_SIMILARITY_BUCKET)
|
||||
except Exception:
|
||||
pass
|
||||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY).put(
|
||||
Body=json.dumps(CATEGORICAL_FEATURE_FIXTURE_DATA)
|
||||
)
|
||||
with contextlib.ExitStack() as stack:
|
||||
TAARCache._instance = None
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache,
|
||||
"_fetch_similarity_donors",
|
||||
return_value=CATEGORICAL_FEATURE_FIXTURE_DATA,
|
||||
)
|
||||
)
|
||||
|
||||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY).put(
|
||||
Body=json.dumps(generate_fake_lr_curves(1000))
|
||||
)
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache,
|
||||
"_fetch_similarity_lrcurves",
|
||||
return_value=generate_fake_lr_curves(1000),
|
||||
)
|
||||
)
|
||||
stack = noop_loaders(stack)
|
||||
|
||||
ctx["similarity_donors_pool"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
|
||||
)
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache,
|
||||
"init_redis_connections",
|
||||
return_value={
|
||||
0: fakeredis.FakeStrictRedis(db=0),
|
||||
1: fakeredis.FakeStrictRedis(db=1),
|
||||
2: fakeredis.FakeStrictRedis(db=2),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
ctx["similarity_lr_curves"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
|
||||
)
|
||||
|
||||
return ctx
|
||||
# Initialize redis
|
||||
TAARCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
def install_continuous_data(ctx):
|
||||
ctx = ctx.child()
|
||||
cts_data = json.dumps(CONTINUOUS_FEATURE_FIXTURE_DATA)
|
||||
lrs_data = json.dumps(generate_fake_lr_curves(1000))
|
||||
@contextlib.contextmanager
|
||||
def mock_install_continuous_data(ctx):
|
||||
cts_data = CONTINUOUS_FEATURE_FIXTURE_DATA
|
||||
lrs_data = generate_fake_lr_curves(1000)
|
||||
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
with contextlib.ExitStack() as stack:
|
||||
TAARCache._instance = None
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache, "_fetch_similarity_donors", return_value=cts_data,
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
conn.create_bucket(Bucket=TAAR_SIMILARITY_BUCKET)
|
||||
except Exception:
|
||||
pass
|
||||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY).put(Body=cts_data)
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache, "_fetch_similarity_lrcurves", return_value=lrs_data,
|
||||
)
|
||||
)
|
||||
stack = noop_loaders(stack)
|
||||
|
||||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY).put(Body=lrs_data)
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
TAARCache,
|
||||
"init_redis_connections",
|
||||
return_value={
|
||||
0: fakeredis.FakeStrictRedis(db=0),
|
||||
1: fakeredis.FakeStrictRedis(db=1),
|
||||
2: fakeredis.FakeStrictRedis(db=2),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
ctx["similarity_donors_pool"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
|
||||
)
|
||||
|
||||
ctx["similarity_lr_curves"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
|
||||
)
|
||||
|
||||
return ctx
|
||||
# Initialize redis
|
||||
TAARCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
def check_matrix_built(caplog):
|
||||
msg = "Reconstructed matrices for similarity recommender"
|
||||
return sum([msg in str(s) for s in caplog.records]) > 0
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_soft_fail(test_ctx, caplog):
|
||||
# Create a new instance of a SimilarityRecommender.
|
||||
ctx = install_no_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
with mock_install_no_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
|
||||
# Don't recommend if the source files cannot be found.
|
||||
assert not r.can_recommend({})
|
||||
assert not check_matrix_built(caplog)
|
||||
# Don't recommend if the source files cannot be found.
|
||||
assert not r.can_recommend({})
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_can_recommend(test_ctx, caplog):
|
||||
caplog.set_level(logging.INFO)
|
||||
|
||||
# Create a new instance of a SimilarityRecommender.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
with mock_install_continuous_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
|
||||
assert check_matrix_built(caplog)
|
||||
# Test that we can't recommend if we have not enough client info.
|
||||
assert not r.can_recommend({})
|
||||
|
||||
# Test that we can't recommend if we have not enough client info.
|
||||
assert not r.can_recommend({})
|
||||
# Test that we can recommend for a normal client.
|
||||
assert r.can_recommend(generate_a_fake_taar_client())
|
||||
|
||||
# Test that we can recommend for a normal client.
|
||||
assert r.can_recommend(generate_a_fake_taar_client())
|
||||
# Check that we can not recommend if any required client field is missing.
|
||||
required_fields = CATEGORICAL_FEATURES + CONTINUOUS_FEATURES
|
||||
|
||||
# Check that we can not recommend if any required client field is missing.
|
||||
required_fields = CATEGORICAL_FEATURES + CONTINUOUS_FEATURES
|
||||
for required_field in required_fields:
|
||||
profile_without_x = generate_a_fake_taar_client()
|
||||
|
||||
for required_field in required_fields:
|
||||
profile_without_x = generate_a_fake_taar_client()
|
||||
# Make an empty value in a required field in the client info dict.
|
||||
profile_without_x[required_field] = None
|
||||
assert not r.can_recommend(profile_without_x)
|
||||
|
||||
# Make an empty value in a required field in the client info dict.
|
||||
profile_without_x[required_field] = None
|
||||
assert not r.can_recommend(profile_without_x)
|
||||
|
||||
# Completely remove (in place) the entire required field from the dict.
|
||||
del profile_without_x[required_field]
|
||||
assert not r.can_recommend(profile_without_x)
|
||||
# Completely remove (in place) the entire required field from the dict.
|
||||
del profile_without_x[required_field]
|
||||
assert not r.can_recommend(profile_without_x)
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recommendations(test_ctx):
|
||||
with MetricsMock() as mm:
|
||||
# Create a new instance of a SimilarityRecommender.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
with mock_install_continuous_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
|
||||
recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)
|
||||
recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)
|
||||
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert len(recommendation_list) == 1
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert len(recommendation_list) == 1
|
||||
|
||||
recommendation, weight = recommendation_list[0]
|
||||
recommendation, weight = recommendation_list[0]
|
||||
|
||||
# Make sure that the reported addons are the expected ones from the most similar donor.
|
||||
assert "{test-guid-1}" == recommendation
|
||||
assert type(weight) == np.float64
|
||||
# Make sure that the reported addons are the expected ones from the most similar donor.
|
||||
assert "{test-guid-1}" == recommendation
|
||||
assert type(weight) == np.float64
|
||||
|
||||
assert mm.has_record(TIMING, stat="taar.similarity_donor")
|
||||
assert mm.has_record(TIMING, stat="taar.similarity_curves")
|
||||
assert mm.has_record(TIMING, stat="taar.similarity_recommend")
|
||||
assert mm.has_record(TIMING, stat="taar.similarity_recommend")
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recommender_str(test_ctx):
|
||||
# Tests that the string representation of the recommender is correct.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
assert str(r) == "SimilarityRecommender"
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_get_lr(test_ctx):
|
||||
# Tests that the likelihood ratio values are not empty for extreme values and are realistic.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
assert r.get_lr(0.0001) is not None
|
||||
assert r.get_lr(10.0) is not None
|
||||
assert r.get_lr(0.001) > r.get_lr(5.0)
|
||||
with mock_install_continuous_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
assert r.get_lr(0.0001) is not None
|
||||
assert r.get_lr(10.0) is not None
|
||||
assert r.get_lr(0.001) > r.get_lr(5.0)
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_compute_clients_dist(test_ctx):
|
||||
# Test the distance function computation.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
test_clients = [
|
||||
{
|
||||
"client_id": "test-client-002",
|
||||
"activeAddons": [],
|
||||
"geo_city": "sfo-us",
|
||||
"subsession_length": 1,
|
||||
"locale": "en-US",
|
||||
"os": "windows",
|
||||
"bookmark_count": 1,
|
||||
"tab_open_count": 1,
|
||||
"total_uri": 1,
|
||||
"unique_tlds": 1,
|
||||
},
|
||||
{
|
||||
"client_id": "test-client-003",
|
||||
"activeAddons": [],
|
||||
"geo_city": "brasilia-br",
|
||||
"subsession_length": 1,
|
||||
"locale": "br-PT",
|
||||
"os": "windows",
|
||||
"bookmark_count": 10,
|
||||
"tab_open_count": 1,
|
||||
"total_uri": 1,
|
||||
"unique_tlds": 1,
|
||||
},
|
||||
{
|
||||
"client_id": "test-client-004",
|
||||
"activeAddons": [],
|
||||
"geo_city": "brasilia-br",
|
||||
"subsession_length": 100,
|
||||
"locale": "br-PT",
|
||||
"os": "windows",
|
||||
"bookmark_count": 10,
|
||||
"tab_open_count": 10,
|
||||
"total_uri": 100,
|
||||
"unique_tlds": 10,
|
||||
},
|
||||
]
|
||||
per_client_test = []
|
||||
with mock_install_continuous_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
test_clients = [
|
||||
{
|
||||
"client_id": "test-client-002",
|
||||
"activeAddons": [],
|
||||
"geo_city": "sfo-us",
|
||||
"subsession_length": 1,
|
||||
"locale": "en-US",
|
||||
"os": "windows",
|
||||
"bookmark_count": 1,
|
||||
"tab_open_count": 1,
|
||||
"total_uri": 1,
|
||||
"unique_tlds": 1,
|
||||
},
|
||||
{
|
||||
"client_id": "test-client-003",
|
||||
"activeAddons": [],
|
||||
"geo_city": "brasilia-br",
|
||||
"subsession_length": 1,
|
||||
"locale": "br-PT",
|
||||
"os": "windows",
|
||||
"bookmark_count": 10,
|
||||
"tab_open_count": 1,
|
||||
"total_uri": 1,
|
||||
"unique_tlds": 1,
|
||||
},
|
||||
{
|
||||
"client_id": "test-client-004",
|
||||
"activeAddons": [],
|
||||
"geo_city": "brasilia-br",
|
||||
"subsession_length": 100,
|
||||
"locale": "br-PT",
|
||||
"os": "windows",
|
||||
"bookmark_count": 10,
|
||||
"tab_open_count": 10,
|
||||
"total_uri": 100,
|
||||
"unique_tlds": 10,
|
||||
},
|
||||
]
|
||||
per_client_test = []
|
||||
|
||||
# Compute a different set of distances for each set of clients.
|
||||
for tc in test_clients:
|
||||
test_distances = r.compute_clients_dist(tc)
|
||||
assert len(test_distances) == len(CONTINUOUS_FEATURE_FIXTURE_DATA)
|
||||
per_client_test.append(test_distances[2][0])
|
||||
# Compute a different set of distances for each set of clients.
|
||||
for tc in test_clients:
|
||||
test_distances = r.compute_clients_dist(tc)
|
||||
assert len(test_distances) == len(CONTINUOUS_FEATURE_FIXTURE_DATA)
|
||||
per_client_test.append(test_distances[2][0])
|
||||
|
||||
# Ensure the different clients also had different distances to a specific donor.
|
||||
assert per_client_test[0] >= per_client_test[1] >= per_client_test[2]
|
||||
# Ensure the different clients also had different distances to a specific donor.
|
||||
assert per_client_test[0] >= per_client_test[1] >= per_client_test[2]
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_distance_functions(test_ctx):
|
||||
# Tests the similarity functions via expected output when passing modified client data.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
# Tests the similarity functions via expected output when passing
|
||||
# modified client data.
|
||||
with mock_install_continuous_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
|
||||
# Generate a fake client.
|
||||
test_client = generate_a_fake_taar_client()
|
||||
recs = r.recommend(test_client, 10)
|
||||
assert len(recs) > 0
|
||||
# Generate a fake client.
|
||||
test_client = generate_a_fake_taar_client()
|
||||
recs = r.recommend(test_client, 10)
|
||||
assert len(recs) > 0
|
||||
|
||||
# Make it a generally poor match for the donors.
|
||||
test_client.update({"total_uri": 10, "bookmark_count": 2, "subsession_length": 10})
|
||||
# Make it a generally poor match for the donors.
|
||||
test_client.update(
|
||||
{"total_uri": 10, "bookmark_count": 2, "subsession_length": 10}
|
||||
)
|
||||
|
||||
all_client_values_zero = test_client
|
||||
# Make all categorical variables non-matching with any donor.
|
||||
all_client_values_zero.update(
|
||||
{key: "zero" for key in test_client.keys() if key in CATEGORICAL_FEATURES}
|
||||
)
|
||||
recs = r.recommend(all_client_values_zero, 10)
|
||||
assert len(recs) == 0
|
||||
all_client_values_zero = test_client
|
||||
# Make all categorical variables non-matching with any donor.
|
||||
all_client_values_zero.update(
|
||||
{key: "zero" for key in test_client.keys() if key in CATEGORICAL_FEATURES}
|
||||
)
|
||||
recs = r.recommend(all_client_values_zero, 10)
|
||||
assert len(recs) == 0
|
||||
|
||||
# Make all continuous variables equal to zero.
|
||||
all_client_values_zero.update(
|
||||
{key: 0 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
|
||||
)
|
||||
recs = r.recommend(all_client_values_zero, 10)
|
||||
assert len(recs) == 0
|
||||
# Make all continuous variables equal to zero.
|
||||
all_client_values_zero.update(
|
||||
{key: 0 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
|
||||
)
|
||||
recs = r.recommend(all_client_values_zero, 10)
|
||||
assert len(recs) == 0
|
||||
|
||||
# Make all categorical variables non-matching with any donor.
|
||||
all_client_values_high = test_client
|
||||
all_client_values_high.update(
|
||||
{
|
||||
key: "one billion"
|
||||
for key in test_client.keys()
|
||||
if key in CATEGORICAL_FEATURES
|
||||
}
|
||||
)
|
||||
recs = r.recommend(all_client_values_high, 10)
|
||||
assert len(recs) == 0
|
||||
# Make all categorical variables non-matching with any donor.
|
||||
all_client_values_high = test_client
|
||||
all_client_values_high.update(
|
||||
{
|
||||
key: "one billion"
|
||||
for key in test_client.keys()
|
||||
if key in CATEGORICAL_FEATURES
|
||||
}
|
||||
)
|
||||
recs = r.recommend(all_client_values_high, 10)
|
||||
assert len(recs) == 0
|
||||
|
||||
# Make all continuous variables equal to a very high numerical value.
|
||||
all_client_values_high.update(
|
||||
{key: 1e60 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
|
||||
)
|
||||
recs = r.recommend(all_client_values_high, 10)
|
||||
assert len(recs) == 0
|
||||
# Make all continuous variables equal to a very high numerical value.
|
||||
all_client_values_high.update(
|
||||
{key: 1e60 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
|
||||
)
|
||||
recs = r.recommend(all_client_values_high, 10)
|
||||
assert len(recs) == 0
|
||||
|
||||
# Test for 0.0 values if j_c is not normalized and j_d is fine.
|
||||
j_c = 0.0
|
||||
j_d = 0.42
|
||||
assert abs(j_c * j_d) == 0.0
|
||||
assert abs((j_c + 0.01) * j_d) != 0.0
|
||||
# Test for 0.0 values if j_c is not normalized and j_d is fine.
|
||||
j_c = 0.0
|
||||
j_d = 0.42
|
||||
assert abs(j_c * j_d) == 0.0
|
||||
assert abs((j_c + 0.01) * j_d) != 0.0
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_weights_continuous(test_ctx):
|
||||
# Create a new instance of a SimilarityRecommender.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
with mock_install_continuous_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
|
||||
# In the ensemble method recommendations should be a sorted list of tuples
|
||||
# containing [(guid, weight), (guid, weight)... (guid, weight)].
|
||||
recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
|
||||
with open("/tmp/similarity_recommender.json", "w") as fout:
|
||||
fout.write(json.dumps(recommendation_list))
|
||||
# In the ensemble method recommendations should be a sorted list of tuples
|
||||
# containing [(guid, weight), (guid, weight)... (guid, weight)].
|
||||
recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
|
||||
with open("/tmp/similarity_recommender.json", "w") as fout:
|
||||
fout.write(json.dumps(recommendation_list))
|
||||
|
||||
# Make sure the structure of the recommendations is correct and
|
||||
# that we recommended the the right addons.
|
||||
# Make sure the structure of the recommendations is correct and
|
||||
# that we recommended the the right addons.
|
||||
|
||||
assert len(recommendation_list) == 2
|
||||
for recommendation, weight in recommendation_list:
|
||||
assert isinstance(recommendation, six.string_types)
|
||||
assert isinstance(weight, float)
|
||||
assert len(recommendation_list) == 2
|
||||
for recommendation, weight in recommendation_list:
|
||||
assert isinstance(recommendation, six.string_types)
|
||||
assert isinstance(weight, float)
|
||||
|
||||
# Test that sorting is appropriate.
|
||||
rec0 = recommendation_list[0]
|
||||
rec1 = recommendation_list[1]
|
||||
# Test that sorting is appropriate.
|
||||
rec0 = recommendation_list[0]
|
||||
rec1 = recommendation_list[1]
|
||||
|
||||
rec0_weight = rec0[1]
|
||||
rec1_weight = rec1[1]
|
||||
rec0_weight = rec0[1]
|
||||
rec1_weight = rec1[1]
|
||||
|
||||
# Duplicate presence of test-guid-1 should mean rec0_weight is double
|
||||
# rec1_weight, and both should be greater than 1.0
|
||||
# Duplicate presence of test-guid-1 should mean rec0_weight is double
|
||||
# rec1_weight, and both should be greater than 1.0
|
||||
|
||||
assert rec0_weight > rec1_weight > 1.0
|
||||
assert rec0_weight > rec1_weight > 1.0
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_weights_categorical(test_ctx):
|
||||
"""
|
||||
This should get :
|
||||
|
@ -383,48 +408,24 @@ def test_weights_categorical(test_ctx):
|
|||
|
||||
"""
|
||||
# Create a new instance of a SimilarityRecommender.
|
||||
cat_ctx = install_categorical_data(test_ctx)
|
||||
cts_ctx = install_continuous_data(test_ctx)
|
||||
with mock_install_categorical_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
|
||||
wrapped = cts_ctx.wrap(cat_ctx)
|
||||
r = SimilarityRecommender(wrapped)
|
||||
# In the ensemble method recommendations should be a sorted list of tuples
|
||||
# containing [(guid, weight), (guid, weight)... (guid, weight)].
|
||||
recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
|
||||
|
||||
# In the ensemble method recommendations should be a sorted list of tuples
|
||||
# containing [(guid, weight), (guid, weight)... (guid, weight)].
|
||||
recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
|
||||
assert len(recommendation_list) == 2
|
||||
# Make sure the structure of the recommendations is correct and that we recommended the the right addons.
|
||||
for recommendation, weight in recommendation_list:
|
||||
assert isinstance(recommendation, six.string_types)
|
||||
assert isinstance(weight, float)
|
||||
|
||||
assert len(recommendation_list) == 2
|
||||
# Make sure the structure of the recommendations is correct and that we recommended the the right addons.
|
||||
for recommendation, weight in recommendation_list:
|
||||
assert isinstance(recommendation, six.string_types)
|
||||
assert isinstance(weight, float)
|
||||
# Test that sorting is appropriate.
|
||||
rec0 = recommendation_list[0]
|
||||
rec1 = recommendation_list[1]
|
||||
|
||||
# Test that sorting is appropriate.
|
||||
rec0 = recommendation_list[0]
|
||||
rec1 = recommendation_list[1]
|
||||
rec0_weight = rec0[1]
|
||||
rec1_weight = rec1[1]
|
||||
|
||||
rec0_weight = rec0[1]
|
||||
rec1_weight = rec1[1]
|
||||
|
||||
assert rec0_weight > rec1_weight > 0
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recompute_matrices(test_ctx, caplog):
|
||||
caplog.set_level(logging.INFO)
|
||||
|
||||
# Create a new instance of a SimilarityRecommender.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
|
||||
# Reloading the donors pool should reconstruct the matrices
|
||||
caplog.clear()
|
||||
r._donors_pool.force_expiry()
|
||||
r.donors_pool
|
||||
assert check_matrix_built(caplog)
|
||||
|
||||
# Reloading the LR curves should reconstruct the matrices
|
||||
caplog.clear()
|
||||
r._lr_curves.force_expiry()
|
||||
r.lr_curves
|
||||
assert check_matrix_built(caplog)
|
||||
assert rec0_weight > rec1_weight > 0
|
||||
|
|
Загрузка…
Ссылка в новой задаче