зеркало из https://github.com/mozilla/taar.git
Ported similarity recommender to use redis
This commit is contained in:
Родитель
fa78a731aa
Коммит
d594703a1a
|
@ -14,6 +14,7 @@ from taar.settings import (
|
|||
REDIS_PORT,
|
||||
)
|
||||
|
||||
|
||||
# TAARLite configuration
|
||||
from taar.settings import (
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
|
@ -23,14 +24,20 @@ from taar.settings import (
|
|||
TAARLITE_MUTEX_TTL,
|
||||
)
|
||||
|
||||
# TAARLite configuration
|
||||
# TAAR configuration
|
||||
from taar.settings import (
|
||||
# Locale
|
||||
TAAR_LOCALE_BUCKET,
|
||||
TAAR_LOCALE_KEY,
|
||||
# Collaborative dta
|
||||
TAAR_ADDON_MAPPING_BUCKET,
|
||||
TAAR_ADDON_MAPPING_KEY,
|
||||
TAAR_ITEM_MATRIX_BUCKET,
|
||||
TAAR_ITEM_MATRIX_KEY,
|
||||
# Similarity data
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_DONOR_KEY,
|
||||
TAAR_SIMILARITY_LRCURVES_KEY,
|
||||
)
|
||||
|
||||
from jsoncache.loader import s3_json_loader
|
||||
|
@ -75,6 +82,13 @@ LOCALE_DATA = "taar_locale_data|"
|
|||
COLLAB_MAPPING_DATA = "taar_collab_mapping|"
|
||||
COLLAB_ITEM_MATRIX = "taar_collab_item_matrix|"
|
||||
|
||||
SIMILARITY_DONORS = "taar_similarity_donors|"
|
||||
SIMILARITY_LRCURVES = "taar_similarity_lrcurves|"
|
||||
|
||||
SIMILARITY_NUM_DONORS = "taar_similarity_num_donors|"
|
||||
SIMILARITY_CONTINUOUS_FEATURES = "taar_similarity_continuous_features|"
|
||||
SIMILARITY_CATEGORICAL_FEATURES = "taar_similarity_categorical_features|"
|
||||
|
||||
|
||||
class PrefixStripper:
|
||||
def __init__(self, prefix, iterator, cast_to_str=False):
|
||||
|
@ -99,11 +113,27 @@ class AddonsCoinstallCache:
|
|||
GUID->GUID co-installation data
|
||||
"""
|
||||
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls, ctx):
|
||||
if cls._instance is None:
|
||||
cls._instance = AddonsCoinstallCache(ctx)
|
||||
return cls._instance
|
||||
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
|
||||
# Keep an integer handle (or None) on the last known database
|
||||
self._last_db = None
|
||||
|
||||
self._similarity_num_donors = 0
|
||||
self._similarity_continuous_features = None
|
||||
self._similarity_categorical_features = None
|
||||
|
||||
rcon = self.init_redis_connections()
|
||||
|
||||
self._r0 = rcon[0]
|
||||
self._r1 = rcon[1]
|
||||
self._r2 = rcon[2]
|
||||
|
@ -276,6 +306,43 @@ class AddonsCoinstallCache:
|
|||
return json.loads(tmp.decode("utf8"))
|
||||
return None
|
||||
|
||||
def similarity_donors(self):
|
||||
"""
|
||||
Get the taar similarity donors
|
||||
"""
|
||||
tmp = self._db().get(SIMILARITY_DONORS)
|
||||
if tmp:
|
||||
return json.loads(tmp.decode("utf8"))
|
||||
return None
|
||||
|
||||
def similarity_lrcurves(self):
|
||||
"""
|
||||
Get the taar similarity donors
|
||||
"""
|
||||
tmp = self._db().get(SIMILARITY_LRCURVES)
|
||||
if tmp:
|
||||
return json.loads(tmp.decode("utf8"))
|
||||
return None
|
||||
|
||||
def similarity_continuous_features(self):
|
||||
"""
|
||||
precomputed similarity recommender continuous features cache
|
||||
"""
|
||||
return self._similarity_continuous_features
|
||||
|
||||
def similarity_categorical_features(self):
|
||||
"""
|
||||
precomputed similarity recommender categorical features cache
|
||||
"""
|
||||
return self._similarity_categorical_features
|
||||
|
||||
@property
|
||||
def similarity_num_donors(self):
|
||||
"""
|
||||
precomputed similarity recommender categorical features cache
|
||||
"""
|
||||
return self._similarity_num_donors
|
||||
|
||||
"""
|
||||
|
||||
################################
|
||||
|
@ -290,13 +357,66 @@ class AddonsCoinstallCache:
|
|||
active redis instance
|
||||
"""
|
||||
active_db = self._r0.get(ACTIVE_DB)
|
||||
|
||||
if active_db is not None:
|
||||
db = int(active_db.decode("utf8"))
|
||||
|
||||
if db == 1:
|
||||
return self._r1
|
||||
elif db == 2:
|
||||
return self._r2
|
||||
|
||||
def _update_data_callback(self, db):
|
||||
"""
|
||||
Process data that needs updating when new data is loaded
|
||||
"""
|
||||
self._build_similarity_features_caches(db)
|
||||
|
||||
def _build_similarity_features_caches(self, db):
|
||||
"""
|
||||
This function build two feature cache matrices and sets the
|
||||
number of donors (self.similarity_num_donors)
|
||||
|
||||
That's the self.categorical_features and
|
||||
self.continuous_features attributes.
|
||||
|
||||
One matrix is for the continuous features and the other is for
|
||||
the categorical features. This is needed to speed up the similarity
|
||||
recommendation process."""
|
||||
from taar.recommenders.similarity_recommender import (
|
||||
CONTINUOUS_FEATURES,
|
||||
CATEGORICAL_FEATURES,
|
||||
)
|
||||
|
||||
tmp = db.get(SIMILARITY_DONORS)
|
||||
if tmp is None:
|
||||
return
|
||||
donors_pool = json.loads(tmp.decode("utf8"))
|
||||
|
||||
self._similarity_num_donors = len(donors_pool)
|
||||
|
||||
# Build a numpy matrix cache for the continuous features.
|
||||
continuous_features = np.zeros(
|
||||
(self.similarity_num_donors, len(CONTINUOUS_FEATURES))
|
||||
)
|
||||
|
||||
for idx, d in enumerate(donors_pool):
|
||||
features = [d.get(specified_key) for specified_key in CONTINUOUS_FEATURES]
|
||||
continuous_features[idx] = features
|
||||
self._similarity_continuous_features = continuous_features
|
||||
|
||||
# Build the cache for categorical features.
|
||||
categorical_features = np.zeros(
|
||||
(self.similarity_num_donors, len(CATEGORICAL_FEATURES)), dtype="object",
|
||||
)
|
||||
for idx, d in enumerate(donors_pool):
|
||||
features = [d.get(specified_key) for specified_key in CATEGORICAL_FEATURES]
|
||||
categorical_features[idx] = np.array([features], dtype="object")
|
||||
|
||||
self._similarity_categorical_features = categorical_features
|
||||
|
||||
self.logger.info("Reconstructed matrices for similarity recommender")
|
||||
|
||||
@property
|
||||
def _ident(self):
|
||||
""" pid/thread identity """
|
||||
|
@ -319,6 +439,22 @@ class AddonsCoinstallCache:
|
|||
def _fetch_collaborative_item_matrix(self):
|
||||
return s3_json_loader(TAAR_ITEM_MATRIX_BUCKET, TAAR_ITEM_MATRIX_KEY)
|
||||
|
||||
def _fetch_similarity_donors(self):
|
||||
return s3_json_loader(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY,)
|
||||
|
||||
def _fetch_similarity_lrcurves(self):
|
||||
return s3_json_loader(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY,)
|
||||
|
||||
def _update_similarity_data(self, db):
|
||||
"""
|
||||
Load the TAAR similarity data
|
||||
"""
|
||||
donors = self._fetch_similarity_donors()
|
||||
lrcurves = self._fetch_similarity_lrcurves()
|
||||
|
||||
db.set(SIMILARITY_DONORS, json.dumps(donors))
|
||||
db.set(SIMILARITY_LRCURVES, json.dumps(lrcurves))
|
||||
|
||||
def _update_collab_data(self, db):
|
||||
"""
|
||||
Load the TAAR collaborative data. This is two parts: an item
|
||||
|
@ -445,9 +581,18 @@ class AddonsCoinstallCache:
|
|||
|
||||
# Clear this database before we do anything with it
|
||||
db.flushdb()
|
||||
self._update_rank_data(db)
|
||||
|
||||
# Update TAARlite
|
||||
self._update_rank_data(db)
|
||||
self._update_coinstall_data(db)
|
||||
|
||||
# Update TAAR locale data
|
||||
self._update_locale_data(db)
|
||||
|
||||
# Update TAAR collaborative data
|
||||
self._update_collab_data(db)
|
||||
|
||||
# Update TAAR similarity data
|
||||
self._update_similarity_data(db)
|
||||
|
||||
self._update_data_callback(db)
|
||||
|
|
|
@ -7,13 +7,7 @@ from itertools import groupby
|
|||
from scipy.spatial import distance
|
||||
from srgutil.interfaces import IMozLogging
|
||||
import numpy as np
|
||||
from .lazys3 import LazyJSONLoader
|
||||
|
||||
from taar.settings import (
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_DONOR_KEY,
|
||||
TAAR_SIMILARITY_LRCURVES_KEY,
|
||||
)
|
||||
from taar.recommenders.redis_cache import AddonsCoinstallCache
|
||||
|
||||
import markus
|
||||
|
||||
|
@ -52,99 +46,29 @@ class SimilarityRecommender(AbstractRecommender):
|
|||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
|
||||
if "similarity_donors_pool" in self._ctx:
|
||||
self._donors_pool = self._ctx["similarity_donors_pool"]
|
||||
else:
|
||||
self._donors_pool = LazyJSONLoader(
|
||||
self._ctx,
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_DONOR_KEY,
|
||||
"similarity_donor",
|
||||
)
|
||||
|
||||
if "similarity_lr_curves" in self._ctx:
|
||||
self._lr_curves = self._ctx["similarity_lr_curves"]
|
||||
else:
|
||||
self._lr_curves = LazyJSONLoader(
|
||||
self._ctx,
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_LRCURVES_KEY,
|
||||
"similarity_curves",
|
||||
)
|
||||
self._redis_cache = AddonsCoinstallCache.get_instance(self._ctx)
|
||||
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
|
||||
self._init_from_ctx()
|
||||
@property
|
||||
def categorical_features(self):
|
||||
return self._redis_cache.similarity_categorical_features()
|
||||
|
||||
@property
|
||||
def continuous_features(self):
|
||||
return self._redis_cache.similarity_continuous_features()
|
||||
|
||||
@property
|
||||
def num_donors(self):
|
||||
return self._redis_cache.similarity_num_donors
|
||||
|
||||
@property
|
||||
def donors_pool(self):
|
||||
result, status = self._donors_pool.get()
|
||||
if status:
|
||||
# Force a reconstruction of the features cache on new
|
||||
# donor pool data
|
||||
self._build_features_caches()
|
||||
return result
|
||||
return self._redis_cache.similarity_donors()
|
||||
|
||||
@property
|
||||
def lr_curves(self):
|
||||
result, status = self._lr_curves.get()
|
||||
if status:
|
||||
# Force a reconstruction of the features cache on new
|
||||
# curve data
|
||||
self._build_features_caches()
|
||||
return result
|
||||
|
||||
def _init_from_ctx(self):
|
||||
# Download the addon donors list.
|
||||
if self.donors_pool is None:
|
||||
self.logger.info(
|
||||
"Similarity donors pool has not been fetched from S3: {}".format(
|
||||
TAAR_SIMILARITY_DONOR_KEY
|
||||
)
|
||||
)
|
||||
|
||||
# Download the probability mapping curves from similarity to likelihood of being a good donor.
|
||||
if self.lr_curves is None:
|
||||
self.logger.error(
|
||||
"Similarity LR Curves have not been fetched from S3: {}".format(
|
||||
TAAR_SIMILARITY_LRCURVES_KEY
|
||||
)
|
||||
)
|
||||
|
||||
def _build_features_caches(self):
|
||||
"""This function build two feature cache matrices.
|
||||
|
||||
That's the self.categorical_features and
|
||||
self.continuous_features attributes.
|
||||
|
||||
One matrix is for the continuous features and the other is for
|
||||
the categorical features. This is needed to speed up the similarity
|
||||
recommendation process."""
|
||||
_donors_pool = self._donors_pool.get()[0]
|
||||
_lr_curves = self._lr_curves.get()[0]
|
||||
|
||||
if _donors_pool is None or _lr_curves is None:
|
||||
# We need to have both donors_pool and lr_curves defined
|
||||
# to reconstruct the matrices
|
||||
return None
|
||||
|
||||
self.num_donors = len(_donors_pool)
|
||||
|
||||
# Build a numpy matrix cache for the continuous features.
|
||||
self.continuous_features = np.zeros((self.num_donors, len(CONTINUOUS_FEATURES)))
|
||||
for idx, d in enumerate(_donors_pool):
|
||||
features = [d.get(specified_key) for specified_key in CONTINUOUS_FEATURES]
|
||||
self.continuous_features[idx] = features
|
||||
|
||||
# Build the cache for categorical features.
|
||||
self.categorical_features = np.zeros(
|
||||
(self.num_donors, len(CATEGORICAL_FEATURES)), dtype="object"
|
||||
)
|
||||
for idx, d in enumerate(_donors_pool):
|
||||
features = [d.get(specified_key) for specified_key in CATEGORICAL_FEATURES]
|
||||
self.categorical_features[idx] = np.array([features], dtype="object")
|
||||
|
||||
self.logger.info("Reconstructed matrices for similarity recommender")
|
||||
return self._redis_cache.similarity_lrcurves()
|
||||
|
||||
def can_recommend(self, client_data, extra_data={}):
|
||||
# We can't recommend if we don't have our data files.
|
||||
|
@ -301,8 +225,6 @@ class SimilarityRecommender(AbstractRecommender):
|
|||
recommendations_out = self._recommend(client_data, limit, extra_data)
|
||||
except Exception as e:
|
||||
recommendations_out = []
|
||||
self._donors_pool.force_expiry()
|
||||
self._lr_curves.force_expiry()
|
||||
|
||||
metrics.incr("error_similarity", value=1)
|
||||
self.logger.exception(
|
||||
|
|
|
@ -39,3 +39,13 @@ def noop_taarcollab_dataload(stack):
|
|||
)
|
||||
)
|
||||
return stack
|
||||
|
||||
|
||||
def noop_taarsimilarity_dataload(stack):
|
||||
# no-op the taar collab
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache, "_update_similarity_data", return_value=None
|
||||
)
|
||||
)
|
||||
return stack
|
||||
|
|
|
@ -20,8 +20,11 @@ from taar.recommenders.collaborative_recommender import positive_hash
|
|||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
from .test_localerecommender import noop_taarlite_dataload
|
||||
from .noop_fixtures import noop_taarlocale_dataload
|
||||
from .noop_fixtures import (
|
||||
noop_taarlocale_dataload,
|
||||
noop_taarlite_dataload,
|
||||
noop_taarsimilarity_dataload,
|
||||
)
|
||||
|
||||
|
||||
"""
|
||||
|
@ -32,6 +35,13 @@ the Java hash function.
|
|||
"""
|
||||
|
||||
|
||||
def noop_other_recommenders(stack):
|
||||
stack = noop_taarlocale_dataload(stack)
|
||||
stack = noop_taarlite_dataload(stack)
|
||||
stack = noop_taarsimilarity_dataload(stack)
|
||||
return stack
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def mock_install_none_mock_data(ctx):
|
||||
"""
|
||||
|
@ -39,6 +49,8 @@ def mock_install_none_mock_data(ctx):
|
|||
we always get 404 errors.
|
||||
"""
|
||||
with contextlib.ExitStack() as stack:
|
||||
AddonsCoinstallCache._instance = None
|
||||
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
|
@ -54,8 +66,7 @@ def mock_install_none_mock_data(ctx):
|
|||
)
|
||||
)
|
||||
|
||||
stack = noop_taarlocale_dataload(stack)
|
||||
stack = noop_taarlite_dataload(stack)
|
||||
stack = noop_other_recommenders(stack)
|
||||
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
|
@ -71,7 +82,7 @@ def mock_install_none_mock_data(ctx):
|
|||
)
|
||||
|
||||
# Initialize redis
|
||||
AddonsCoinstallCache(ctx).safe_load_data()
|
||||
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
|
@ -100,6 +111,7 @@ def mock_install_mock_data(ctx):
|
|||
fake_mapping[str(java_hash)] = addon
|
||||
|
||||
with contextlib.ExitStack() as stack:
|
||||
AddonsCoinstallCache._instance = None
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
|
@ -115,8 +127,7 @@ def mock_install_mock_data(ctx):
|
|||
)
|
||||
)
|
||||
|
||||
stack = noop_taarlocale_dataload(stack)
|
||||
stack = noop_taarlite_dataload(stack)
|
||||
stack = noop_other_recommenders(stack)
|
||||
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
|
@ -132,7 +143,7 @@ def mock_install_mock_data(ctx):
|
|||
)
|
||||
|
||||
# Initialize redis
|
||||
AddonsCoinstallCache(ctx).safe_load_data()
|
||||
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
|
|
|
@ -5,7 +5,11 @@ import pytest
|
|||
import mock
|
||||
import contextlib
|
||||
|
||||
from .noop_fixtures import noop_taarlocale_dataload, noop_taarcollab_dataload
|
||||
from .noop_fixtures import (
|
||||
noop_taarlocale_dataload,
|
||||
noop_taarcollab_dataload,
|
||||
noop_taarsimilarity_dataload,
|
||||
)
|
||||
|
||||
from taar.recommenders.guid_based_recommender import GuidBasedRecommender
|
||||
from taar.recommenders.redis_cache import AddonsCoinstallCache
|
||||
|
@ -87,6 +91,8 @@ RESULTS = {
|
|||
def mock_coinstall_ranking_context(ctx, mock_coinstall, mock_ranking):
|
||||
|
||||
with contextlib.ExitStack() as stack:
|
||||
AddonsCoinstallCache._instance = None
|
||||
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache, "_fetch_ranking_data", return_value=mock_ranking,
|
||||
|
@ -102,6 +108,7 @@ def mock_coinstall_ranking_context(ctx, mock_coinstall, mock_ranking):
|
|||
|
||||
stack = noop_taarlocale_dataload(stack)
|
||||
stack = noop_taarcollab_dataload(stack)
|
||||
stack = noop_taarsimilarity_dataload(stack)
|
||||
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
|
@ -117,7 +124,7 @@ def mock_coinstall_ranking_context(ctx, mock_coinstall, mock_ranking):
|
|||
)
|
||||
|
||||
# Initialize redis
|
||||
AddonsCoinstallCache(ctx).safe_load_data()
|
||||
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
|
|
|
@ -8,8 +8,11 @@ import mock
|
|||
import contextlib
|
||||
import fakeredis
|
||||
from taar.recommenders.redis_cache import AddonsCoinstallCache
|
||||
from .noop_fixtures import noop_taarcollab_dataload, noop_taarlite_dataload
|
||||
|
||||
from .noop_fixtures import (
|
||||
noop_taarcollab_dataload,
|
||||
noop_taarlite_dataload,
|
||||
noop_taarsimilarity_dataload,
|
||||
)
|
||||
import json
|
||||
|
||||
|
||||
|
@ -46,6 +49,7 @@ def install_mock_data(ctx):
|
|||
@contextlib.contextmanager
|
||||
def mock_locale_data(ctx):
|
||||
with contextlib.ExitStack() as stack:
|
||||
AddonsCoinstallCache._instance = None
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
|
@ -56,6 +60,7 @@ def mock_locale_data(ctx):
|
|||
|
||||
stack = noop_taarlite_dataload(stack)
|
||||
stack = noop_taarcollab_dataload(stack)
|
||||
stack = noop_taarsimilarity_dataload(stack)
|
||||
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
|
@ -71,7 +76,7 @@ def mock_locale_data(ctx):
|
|||
)
|
||||
|
||||
# Initialize redis
|
||||
AddonsCoinstallCache(ctx).safe_load_data()
|
||||
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
|
|
|
@ -6,12 +6,9 @@ import json
|
|||
import six
|
||||
import logging
|
||||
|
||||
|
||||
import numpy as np
|
||||
import scipy.stats
|
||||
from taar.recommenders.lazys3 import LazyJSONLoader
|
||||
|
||||
import boto3
|
||||
from moto import mock_s3
|
||||
|
||||
from taar.recommenders.similarity_recommender import (
|
||||
CATEGORICAL_FEATURES,
|
||||
|
@ -25,11 +22,15 @@ from .similarity_data import CATEGORICAL_FEATURE_FIXTURE_DATA
|
|||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
from taar.settings import (
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_DONOR_KEY,
|
||||
TAAR_SIMILARITY_LRCURVES_KEY,
|
||||
import fakeredis
|
||||
import mock
|
||||
import contextlib
|
||||
from .noop_fixtures import (
|
||||
noop_taarcollab_dataload,
|
||||
noop_taarlite_dataload,
|
||||
noop_taarlocale_dataload,
|
||||
)
|
||||
from taar.recommenders.redis_cache import AddonsCoinstallCache
|
||||
|
||||
|
||||
def generate_fake_lr_curves(num_elements, ceiling=10.0):
|
||||
|
@ -68,311 +69,338 @@ def generate_a_fake_taar_client():
|
|||
}
|
||||
|
||||
|
||||
def install_no_data(ctx):
|
||||
ctx = ctx.child()
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
@contextlib.contextmanager
|
||||
def mock_install_no_data(ctx):
|
||||
|
||||
conn.create_bucket(Bucket=TAAR_SIMILARITY_BUCKET)
|
||||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY).put(Body="")
|
||||
with contextlib.ExitStack() as stack:
|
||||
AddonsCoinstallCache._instance = None
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache, "_fetch_similarity_donors", return_value="",
|
||||
)
|
||||
)
|
||||
|
||||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY).put(Body="")
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache, "_fetch_similarity_lrcurves", return_value="",
|
||||
)
|
||||
)
|
||||
|
||||
ctx["similarity_donors_pool"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
|
||||
)
|
||||
stack = noop_taarlocale_dataload(stack)
|
||||
stack = noop_taarcollab_dataload(stack)
|
||||
stack = noop_taarlite_dataload(stack)
|
||||
|
||||
ctx["similarity_lr_curves"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
|
||||
)
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
"init_redis_connections",
|
||||
return_value={
|
||||
0: fakeredis.FakeStrictRedis(db=0),
|
||||
1: fakeredis.FakeStrictRedis(db=1),
|
||||
2: fakeredis.FakeStrictRedis(db=2),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
return ctx
|
||||
# Initialize redis
|
||||
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
def install_categorical_data(ctx):
|
||||
ctx = ctx.child()
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
@contextlib.contextmanager
|
||||
def mock_install_categorical_data(ctx):
|
||||
|
||||
try:
|
||||
conn.create_bucket(Bucket=TAAR_SIMILARITY_BUCKET)
|
||||
except Exception:
|
||||
pass
|
||||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY).put(
|
||||
Body=json.dumps(CATEGORICAL_FEATURE_FIXTURE_DATA)
|
||||
)
|
||||
with contextlib.ExitStack() as stack:
|
||||
AddonsCoinstallCache._instance = None
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
"_fetch_similarity_donors",
|
||||
return_value=CATEGORICAL_FEATURE_FIXTURE_DATA,
|
||||
)
|
||||
)
|
||||
|
||||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY).put(
|
||||
Body=json.dumps(generate_fake_lr_curves(1000))
|
||||
)
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
"_fetch_similarity_lrcurves",
|
||||
return_value=generate_fake_lr_curves(1000),
|
||||
)
|
||||
)
|
||||
stack = noop_taarlocale_dataload(stack)
|
||||
stack = noop_taarcollab_dataload(stack)
|
||||
stack = noop_taarlite_dataload(stack)
|
||||
|
||||
ctx["similarity_donors_pool"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
|
||||
)
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
"init_redis_connections",
|
||||
return_value={
|
||||
0: fakeredis.FakeStrictRedis(db=0),
|
||||
1: fakeredis.FakeStrictRedis(db=1),
|
||||
2: fakeredis.FakeStrictRedis(db=2),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
ctx["similarity_lr_curves"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
|
||||
)
|
||||
|
||||
return ctx
|
||||
# Initialize redis
|
||||
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
def install_continuous_data(ctx):
|
||||
ctx = ctx.child()
|
||||
cts_data = json.dumps(CONTINUOUS_FEATURE_FIXTURE_DATA)
|
||||
lrs_data = json.dumps(generate_fake_lr_curves(1000))
|
||||
@contextlib.contextmanager
|
||||
def mock_install_continuous_data(ctx):
|
||||
cts_data = CONTINUOUS_FEATURE_FIXTURE_DATA
|
||||
lrs_data = generate_fake_lr_curves(1000)
|
||||
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
with contextlib.ExitStack() as stack:
|
||||
AddonsCoinstallCache._instance = None
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache, "_fetch_similarity_donors", return_value=cts_data,
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
conn.create_bucket(Bucket=TAAR_SIMILARITY_BUCKET)
|
||||
except Exception:
|
||||
pass
|
||||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY).put(Body=cts_data)
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
"_fetch_similarity_lrcurves",
|
||||
return_value=lrs_data,
|
||||
)
|
||||
)
|
||||
stack = noop_taarlocale_dataload(stack)
|
||||
stack = noop_taarcollab_dataload(stack)
|
||||
stack = noop_taarlite_dataload(stack)
|
||||
|
||||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY).put(Body=lrs_data)
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
"init_redis_connections",
|
||||
return_value={
|
||||
0: fakeredis.FakeStrictRedis(db=0),
|
||||
1: fakeredis.FakeStrictRedis(db=1),
|
||||
2: fakeredis.FakeStrictRedis(db=2),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
ctx["similarity_donors_pool"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
|
||||
)
|
||||
|
||||
ctx["similarity_lr_curves"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
|
||||
)
|
||||
|
||||
return ctx
|
||||
# Initialize redis
|
||||
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
def check_matrix_built(caplog):
|
||||
msg = "Reconstructed matrices for similarity recommender"
|
||||
return sum([msg in str(s) for s in caplog.records]) > 0
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_soft_fail(test_ctx, caplog):
|
||||
# Create a new instance of a SimilarityRecommender.
|
||||
ctx = install_no_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
with mock_install_no_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
|
||||
# Don't recommend if the source files cannot be found.
|
||||
assert not r.can_recommend({})
|
||||
assert not check_matrix_built(caplog)
|
||||
# Don't recommend if the source files cannot be found.
|
||||
assert not r.can_recommend({})
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_can_recommend(test_ctx, caplog):
|
||||
caplog.set_level(logging.INFO)
|
||||
|
||||
# Create a new instance of a SimilarityRecommender.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
with mock_install_continuous_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
|
||||
assert check_matrix_built(caplog)
|
||||
# Test that we can't recommend if we have not enough client info.
|
||||
assert not r.can_recommend({})
|
||||
|
||||
# Test that we can't recommend if we have not enough client info.
|
||||
assert not r.can_recommend({})
|
||||
# Test that we can recommend for a normal client.
|
||||
assert r.can_recommend(generate_a_fake_taar_client())
|
||||
|
||||
# Test that we can recommend for a normal client.
|
||||
assert r.can_recommend(generate_a_fake_taar_client())
|
||||
# Check that we can not recommend if any required client field is missing.
|
||||
required_fields = CATEGORICAL_FEATURES + CONTINUOUS_FEATURES
|
||||
|
||||
# Check that we can not recommend if any required client field is missing.
|
||||
required_fields = CATEGORICAL_FEATURES + CONTINUOUS_FEATURES
|
||||
for required_field in required_fields:
|
||||
profile_without_x = generate_a_fake_taar_client()
|
||||
|
||||
for required_field in required_fields:
|
||||
profile_without_x = generate_a_fake_taar_client()
|
||||
# Make an empty value in a required field in the client info dict.
|
||||
profile_without_x[required_field] = None
|
||||
assert not r.can_recommend(profile_without_x)
|
||||
|
||||
# Make an empty value in a required field in the client info dict.
|
||||
profile_without_x[required_field] = None
|
||||
assert not r.can_recommend(profile_without_x)
|
||||
|
||||
# Completely remove (in place) the entire required field from the dict.
|
||||
del profile_without_x[required_field]
|
||||
assert not r.can_recommend(profile_without_x)
|
||||
# Completely remove (in place) the entire required field from the dict.
|
||||
del profile_without_x[required_field]
|
||||
assert not r.can_recommend(profile_without_x)
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recommendations(test_ctx):
|
||||
with MetricsMock() as mm:
|
||||
# Create a new instance of a SimilarityRecommender.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
with mock_install_continuous_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
|
||||
recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)
|
||||
recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)
|
||||
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert len(recommendation_list) == 1
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert len(recommendation_list) == 1
|
||||
|
||||
recommendation, weight = recommendation_list[0]
|
||||
recommendation, weight = recommendation_list[0]
|
||||
|
||||
# Make sure that the reported addons are the expected ones from the most similar donor.
|
||||
assert "{test-guid-1}" == recommendation
|
||||
assert type(weight) == np.float64
|
||||
# Make sure that the reported addons are the expected ones from the most similar donor.
|
||||
assert "{test-guid-1}" == recommendation
|
||||
assert type(weight) == np.float64
|
||||
|
||||
assert mm.has_record(TIMING, stat="taar.similarity_donor")
|
||||
assert mm.has_record(TIMING, stat="taar.similarity_curves")
|
||||
assert mm.has_record(TIMING, stat="taar.similarity_recommend")
|
||||
assert mm.has_record(TIMING, stat="taar.similarity_recommend")
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recommender_str(test_ctx):
|
||||
# Tests that the string representation of the recommender is correct.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
assert str(r) == "SimilarityRecommender"
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_get_lr(test_ctx):
|
||||
# Tests that the likelihood ratio values are not empty for extreme values and are realistic.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
assert r.get_lr(0.0001) is not None
|
||||
assert r.get_lr(10.0) is not None
|
||||
assert r.get_lr(0.001) > r.get_lr(5.0)
|
||||
with mock_install_continuous_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
assert r.get_lr(0.0001) is not None
|
||||
assert r.get_lr(10.0) is not None
|
||||
assert r.get_lr(0.001) > r.get_lr(5.0)
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_compute_clients_dist(test_ctx):
|
||||
# Test the distance function computation.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
test_clients = [
|
||||
{
|
||||
"client_id": "test-client-002",
|
||||
"activeAddons": [],
|
||||
"geo_city": "sfo-us",
|
||||
"subsession_length": 1,
|
||||
"locale": "en-US",
|
||||
"os": "windows",
|
||||
"bookmark_count": 1,
|
||||
"tab_open_count": 1,
|
||||
"total_uri": 1,
|
||||
"unique_tlds": 1,
|
||||
},
|
||||
{
|
||||
"client_id": "test-client-003",
|
||||
"activeAddons": [],
|
||||
"geo_city": "brasilia-br",
|
||||
"subsession_length": 1,
|
||||
"locale": "br-PT",
|
||||
"os": "windows",
|
||||
"bookmark_count": 10,
|
||||
"tab_open_count": 1,
|
||||
"total_uri": 1,
|
||||
"unique_tlds": 1,
|
||||
},
|
||||
{
|
||||
"client_id": "test-client-004",
|
||||
"activeAddons": [],
|
||||
"geo_city": "brasilia-br",
|
||||
"subsession_length": 100,
|
||||
"locale": "br-PT",
|
||||
"os": "windows",
|
||||
"bookmark_count": 10,
|
||||
"tab_open_count": 10,
|
||||
"total_uri": 100,
|
||||
"unique_tlds": 10,
|
||||
},
|
||||
]
|
||||
per_client_test = []
|
||||
with mock_install_continuous_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
test_clients = [
|
||||
{
|
||||
"client_id": "test-client-002",
|
||||
"activeAddons": [],
|
||||
"geo_city": "sfo-us",
|
||||
"subsession_length": 1,
|
||||
"locale": "en-US",
|
||||
"os": "windows",
|
||||
"bookmark_count": 1,
|
||||
"tab_open_count": 1,
|
||||
"total_uri": 1,
|
||||
"unique_tlds": 1,
|
||||
},
|
||||
{
|
||||
"client_id": "test-client-003",
|
||||
"activeAddons": [],
|
||||
"geo_city": "brasilia-br",
|
||||
"subsession_length": 1,
|
||||
"locale": "br-PT",
|
||||
"os": "windows",
|
||||
"bookmark_count": 10,
|
||||
"tab_open_count": 1,
|
||||
"total_uri": 1,
|
||||
"unique_tlds": 1,
|
||||
},
|
||||
{
|
||||
"client_id": "test-client-004",
|
||||
"activeAddons": [],
|
||||
"geo_city": "brasilia-br",
|
||||
"subsession_length": 100,
|
||||
"locale": "br-PT",
|
||||
"os": "windows",
|
||||
"bookmark_count": 10,
|
||||
"tab_open_count": 10,
|
||||
"total_uri": 100,
|
||||
"unique_tlds": 10,
|
||||
},
|
||||
]
|
||||
per_client_test = []
|
||||
|
||||
# Compute a different set of distances for each set of clients.
|
||||
for tc in test_clients:
|
||||
test_distances = r.compute_clients_dist(tc)
|
||||
assert len(test_distances) == len(CONTINUOUS_FEATURE_FIXTURE_DATA)
|
||||
per_client_test.append(test_distances[2][0])
|
||||
# Compute a different set of distances for each set of clients.
|
||||
for tc in test_clients:
|
||||
test_distances = r.compute_clients_dist(tc)
|
||||
assert len(test_distances) == len(CONTINUOUS_FEATURE_FIXTURE_DATA)
|
||||
per_client_test.append(test_distances[2][0])
|
||||
|
||||
# Ensure the different clients also had different distances to a specific donor.
|
||||
assert per_client_test[0] >= per_client_test[1] >= per_client_test[2]
|
||||
# Ensure the different clients also had different distances to a specific donor.
|
||||
assert per_client_test[0] >= per_client_test[1] >= per_client_test[2]
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_distance_functions(test_ctx):
|
||||
# Tests the similarity functions via expected output when passing modified client data.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
# Tests the similarity functions via expected output when passing
|
||||
# modified client data.
|
||||
with mock_install_continuous_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
|
||||
# Generate a fake client.
|
||||
test_client = generate_a_fake_taar_client()
|
||||
recs = r.recommend(test_client, 10)
|
||||
assert len(recs) > 0
|
||||
# Generate a fake client.
|
||||
test_client = generate_a_fake_taar_client()
|
||||
recs = r.recommend(test_client, 10)
|
||||
assert len(recs) > 0
|
||||
|
||||
# Make it a generally poor match for the donors.
|
||||
test_client.update({"total_uri": 10, "bookmark_count": 2, "subsession_length": 10})
|
||||
# Make it a generally poor match for the donors.
|
||||
test_client.update(
|
||||
{"total_uri": 10, "bookmark_count": 2, "subsession_length": 10}
|
||||
)
|
||||
|
||||
all_client_values_zero = test_client
|
||||
# Make all categorical variables non-matching with any donor.
|
||||
all_client_values_zero.update(
|
||||
{key: "zero" for key in test_client.keys() if key in CATEGORICAL_FEATURES}
|
||||
)
|
||||
recs = r.recommend(all_client_values_zero, 10)
|
||||
assert len(recs) == 0
|
||||
all_client_values_zero = test_client
|
||||
# Make all categorical variables non-matching with any donor.
|
||||
all_client_values_zero.update(
|
||||
{key: "zero" for key in test_client.keys() if key in CATEGORICAL_FEATURES}
|
||||
)
|
||||
recs = r.recommend(all_client_values_zero, 10)
|
||||
assert len(recs) == 0
|
||||
|
||||
# Make all continuous variables equal to zero.
|
||||
all_client_values_zero.update(
|
||||
{key: 0 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
|
||||
)
|
||||
recs = r.recommend(all_client_values_zero, 10)
|
||||
assert len(recs) == 0
|
||||
# Make all continuous variables equal to zero.
|
||||
all_client_values_zero.update(
|
||||
{key: 0 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
|
||||
)
|
||||
recs = r.recommend(all_client_values_zero, 10)
|
||||
assert len(recs) == 0
|
||||
|
||||
# Make all categorical variables non-matching with any donor.
|
||||
all_client_values_high = test_client
|
||||
all_client_values_high.update(
|
||||
{
|
||||
key: "one billion"
|
||||
for key in test_client.keys()
|
||||
if key in CATEGORICAL_FEATURES
|
||||
}
|
||||
)
|
||||
recs = r.recommend(all_client_values_high, 10)
|
||||
assert len(recs) == 0
|
||||
# Make all categorical variables non-matching with any donor.
|
||||
all_client_values_high = test_client
|
||||
all_client_values_high.update(
|
||||
{
|
||||
key: "one billion"
|
||||
for key in test_client.keys()
|
||||
if key in CATEGORICAL_FEATURES
|
||||
}
|
||||
)
|
||||
recs = r.recommend(all_client_values_high, 10)
|
||||
assert len(recs) == 0
|
||||
|
||||
# Make all continuous variables equal to a very high numerical value.
|
||||
all_client_values_high.update(
|
||||
{key: 1e60 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
|
||||
)
|
||||
recs = r.recommend(all_client_values_high, 10)
|
||||
assert len(recs) == 0
|
||||
# Make all continuous variables equal to a very high numerical value.
|
||||
all_client_values_high.update(
|
||||
{key: 1e60 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
|
||||
)
|
||||
recs = r.recommend(all_client_values_high, 10)
|
||||
assert len(recs) == 0
|
||||
|
||||
# Test for 0.0 values if j_c is not normalized and j_d is fine.
|
||||
j_c = 0.0
|
||||
j_d = 0.42
|
||||
assert abs(j_c * j_d) == 0.0
|
||||
assert abs((j_c + 0.01) * j_d) != 0.0
|
||||
# Test for 0.0 values if j_c is not normalized and j_d is fine.
|
||||
j_c = 0.0
|
||||
j_d = 0.42
|
||||
assert abs(j_c * j_d) == 0.0
|
||||
assert abs((j_c + 0.01) * j_d) != 0.0
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_weights_continuous(test_ctx):
|
||||
# Create a new instance of a SimilarityRecommender.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
with mock_install_continuous_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
|
||||
# In the ensemble method recommendations should be a sorted list of tuples
|
||||
# containing [(guid, weight), (guid, weight)... (guid, weight)].
|
||||
recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
|
||||
with open("/tmp/similarity_recommender.json", "w") as fout:
|
||||
fout.write(json.dumps(recommendation_list))
|
||||
# In the ensemble method recommendations should be a sorted list of tuples
|
||||
# containing [(guid, weight), (guid, weight)... (guid, weight)].
|
||||
recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
|
||||
with open("/tmp/similarity_recommender.json", "w") as fout:
|
||||
fout.write(json.dumps(recommendation_list))
|
||||
|
||||
# Make sure the structure of the recommendations is correct and
|
||||
# that we recommended the the right addons.
|
||||
# Make sure the structure of the recommendations is correct and
|
||||
# that we recommended the the right addons.
|
||||
|
||||
assert len(recommendation_list) == 2
|
||||
for recommendation, weight in recommendation_list:
|
||||
assert isinstance(recommendation, six.string_types)
|
||||
assert isinstance(weight, float)
|
||||
assert len(recommendation_list) == 2
|
||||
for recommendation, weight in recommendation_list:
|
||||
assert isinstance(recommendation, six.string_types)
|
||||
assert isinstance(weight, float)
|
||||
|
||||
# Test that sorting is appropriate.
|
||||
rec0 = recommendation_list[0]
|
||||
rec1 = recommendation_list[1]
|
||||
# Test that sorting is appropriate.
|
||||
rec0 = recommendation_list[0]
|
||||
rec1 = recommendation_list[1]
|
||||
|
||||
rec0_weight = rec0[1]
|
||||
rec1_weight = rec1[1]
|
||||
rec0_weight = rec0[1]
|
||||
rec1_weight = rec1[1]
|
||||
|
||||
# Duplicate presence of test-guid-1 should mean rec0_weight is double
|
||||
# rec1_weight, and both should be greater than 1.0
|
||||
# Duplicate presence of test-guid-1 should mean rec0_weight is double
|
||||
# rec1_weight, and both should be greater than 1.0
|
||||
|
||||
assert rec0_weight > rec1_weight > 1.0
|
||||
assert rec0_weight > rec1_weight > 1.0
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_weights_categorical(test_ctx):
|
||||
"""
|
||||
This should get :
|
||||
|
@ -383,48 +411,24 @@ def test_weights_categorical(test_ctx):
|
|||
|
||||
"""
|
||||
# Create a new instance of a SimilarityRecommender.
|
||||
cat_ctx = install_categorical_data(test_ctx)
|
||||
cts_ctx = install_continuous_data(test_ctx)
|
||||
with mock_install_categorical_data(test_ctx):
|
||||
r = SimilarityRecommender(test_ctx)
|
||||
|
||||
wrapped = cts_ctx.wrap(cat_ctx)
|
||||
r = SimilarityRecommender(wrapped)
|
||||
# In the ensemble method recommendations should be a sorted list of tuples
|
||||
# containing [(guid, weight), (guid, weight)... (guid, weight)].
|
||||
recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
|
||||
|
||||
# In the ensemble method recommendations should be a sorted list of tuples
|
||||
# containing [(guid, weight), (guid, weight)... (guid, weight)].
|
||||
recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
|
||||
assert len(recommendation_list) == 2
|
||||
# Make sure the structure of the recommendations is correct and that we recommended the the right addons.
|
||||
for recommendation, weight in recommendation_list:
|
||||
assert isinstance(recommendation, six.string_types)
|
||||
assert isinstance(weight, float)
|
||||
|
||||
assert len(recommendation_list) == 2
|
||||
# Make sure the structure of the recommendations is correct and that we recommended the the right addons.
|
||||
for recommendation, weight in recommendation_list:
|
||||
assert isinstance(recommendation, six.string_types)
|
||||
assert isinstance(weight, float)
|
||||
# Test that sorting is appropriate.
|
||||
rec0 = recommendation_list[0]
|
||||
rec1 = recommendation_list[1]
|
||||
|
||||
# Test that sorting is appropriate.
|
||||
rec0 = recommendation_list[0]
|
||||
rec1 = recommendation_list[1]
|
||||
rec0_weight = rec0[1]
|
||||
rec1_weight = rec1[1]
|
||||
|
||||
rec0_weight = rec0[1]
|
||||
rec1_weight = rec1[1]
|
||||
|
||||
assert rec0_weight > rec1_weight > 0
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recompute_matrices(test_ctx, caplog):
|
||||
caplog.set_level(logging.INFO)
|
||||
|
||||
# Create a new instance of a SimilarityRecommender.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
|
||||
# Reloading the donors pool should reconstruct the matrices
|
||||
caplog.clear()
|
||||
r._donors_pool.force_expiry()
|
||||
r.donors_pool
|
||||
assert check_matrix_built(caplog)
|
||||
|
||||
# Reloading the LR curves should reconstruct the matrices
|
||||
caplog.clear()
|
||||
r._lr_curves.force_expiry()
|
||||
r.lr_curves
|
||||
assert check_matrix_built(caplog)
|
||||
assert rec0_weight > rec1_weight > 0
|
||||
|
|
Загрузка…
Ссылка в новой задаче