Ported similarity recommender to use redis

This commit is contained in:
Victor Ng 2020-09-01 19:13:48 -04:00
Родитель fa78a731aa
Коммит d594703a1a
7 изменённых файлов: 488 добавлений и 384 удалений

Просмотреть файл

@ -14,6 +14,7 @@ from taar.settings import (
REDIS_PORT,
)
# TAARLite configuration
from taar.settings import (
TAARLITE_GUID_COINSTALL_BUCKET,
@ -23,14 +24,20 @@ from taar.settings import (
TAARLITE_MUTEX_TTL,
)
# TAARLite configuration
# TAAR configuration
from taar.settings import (
# Locale
TAAR_LOCALE_BUCKET,
TAAR_LOCALE_KEY,
# Collaborative dta
TAAR_ADDON_MAPPING_BUCKET,
TAAR_ADDON_MAPPING_KEY,
TAAR_ITEM_MATRIX_BUCKET,
TAAR_ITEM_MATRIX_KEY,
# Similarity data
TAAR_SIMILARITY_BUCKET,
TAAR_SIMILARITY_DONOR_KEY,
TAAR_SIMILARITY_LRCURVES_KEY,
)
from jsoncache.loader import s3_json_loader
@ -75,6 +82,13 @@ LOCALE_DATA = "taar_locale_data|"
COLLAB_MAPPING_DATA = "taar_collab_mapping|"
COLLAB_ITEM_MATRIX = "taar_collab_item_matrix|"
SIMILARITY_DONORS = "taar_similarity_donors|"
SIMILARITY_LRCURVES = "taar_similarity_lrcurves|"
SIMILARITY_NUM_DONORS = "taar_similarity_num_donors|"
SIMILARITY_CONTINUOUS_FEATURES = "taar_similarity_continuous_features|"
SIMILARITY_CATEGORICAL_FEATURES = "taar_similarity_categorical_features|"
class PrefixStripper:
def __init__(self, prefix, iterator, cast_to_str=False):
@ -99,11 +113,27 @@ class AddonsCoinstallCache:
GUID->GUID co-installation data
"""
_instance = None
@classmethod
def get_instance(cls, ctx):
if cls._instance is None:
cls._instance = AddonsCoinstallCache(ctx)
return cls._instance
def __init__(self, ctx):
self._ctx = ctx
self.logger = self._ctx[IMozLogging].get_logger("taar")
# Keep an integer handle (or None) on the last known database
self._last_db = None
self._similarity_num_donors = 0
self._similarity_continuous_features = None
self._similarity_categorical_features = None
rcon = self.init_redis_connections()
self._r0 = rcon[0]
self._r1 = rcon[1]
self._r2 = rcon[2]
@ -276,6 +306,43 @@ class AddonsCoinstallCache:
return json.loads(tmp.decode("utf8"))
return None
def similarity_donors(self):
"""
Get the taar similarity donors
"""
tmp = self._db().get(SIMILARITY_DONORS)
if tmp:
return json.loads(tmp.decode("utf8"))
return None
def similarity_lrcurves(self):
"""
Get the taar similarity donors
"""
tmp = self._db().get(SIMILARITY_LRCURVES)
if tmp:
return json.loads(tmp.decode("utf8"))
return None
def similarity_continuous_features(self):
"""
precomputed similarity recommender continuous features cache
"""
return self._similarity_continuous_features
def similarity_categorical_features(self):
"""
precomputed similarity recommender categorical features cache
"""
return self._similarity_categorical_features
@property
def similarity_num_donors(self):
"""
precomputed similarity recommender categorical features cache
"""
return self._similarity_num_donors
"""
################################
@ -290,13 +357,66 @@ class AddonsCoinstallCache:
active redis instance
"""
active_db = self._r0.get(ACTIVE_DB)
if active_db is not None:
db = int(active_db.decode("utf8"))
if db == 1:
return self._r1
elif db == 2:
return self._r2
def _update_data_callback(self, db):
"""
Process data that needs updating when new data is loaded
"""
self._build_similarity_features_caches(db)
def _build_similarity_features_caches(self, db):
"""
This function build two feature cache matrices and sets the
number of donors (self.similarity_num_donors)
That's the self.categorical_features and
self.continuous_features attributes.
One matrix is for the continuous features and the other is for
the categorical features. This is needed to speed up the similarity
recommendation process."""
from taar.recommenders.similarity_recommender import (
CONTINUOUS_FEATURES,
CATEGORICAL_FEATURES,
)
tmp = db.get(SIMILARITY_DONORS)
if tmp is None:
return
donors_pool = json.loads(tmp.decode("utf8"))
self._similarity_num_donors = len(donors_pool)
# Build a numpy matrix cache for the continuous features.
continuous_features = np.zeros(
(self.similarity_num_donors, len(CONTINUOUS_FEATURES))
)
for idx, d in enumerate(donors_pool):
features = [d.get(specified_key) for specified_key in CONTINUOUS_FEATURES]
continuous_features[idx] = features
self._similarity_continuous_features = continuous_features
# Build the cache for categorical features.
categorical_features = np.zeros(
(self.similarity_num_donors, len(CATEGORICAL_FEATURES)), dtype="object",
)
for idx, d in enumerate(donors_pool):
features = [d.get(specified_key) for specified_key in CATEGORICAL_FEATURES]
categorical_features[idx] = np.array([features], dtype="object")
self._similarity_categorical_features = categorical_features
self.logger.info("Reconstructed matrices for similarity recommender")
@property
def _ident(self):
""" pid/thread identity """
@ -319,6 +439,22 @@ class AddonsCoinstallCache:
def _fetch_collaborative_item_matrix(self):
return s3_json_loader(TAAR_ITEM_MATRIX_BUCKET, TAAR_ITEM_MATRIX_KEY)
def _fetch_similarity_donors(self):
return s3_json_loader(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY,)
def _fetch_similarity_lrcurves(self):
return s3_json_loader(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY,)
def _update_similarity_data(self, db):
"""
Load the TAAR similarity data
"""
donors = self._fetch_similarity_donors()
lrcurves = self._fetch_similarity_lrcurves()
db.set(SIMILARITY_DONORS, json.dumps(donors))
db.set(SIMILARITY_LRCURVES, json.dumps(lrcurves))
def _update_collab_data(self, db):
"""
Load the TAAR collaborative data. This is two parts: an item
@ -445,9 +581,18 @@ class AddonsCoinstallCache:
# Clear this database before we do anything with it
db.flushdb()
self._update_rank_data(db)
# Update TAARlite
self._update_rank_data(db)
self._update_coinstall_data(db)
# Update TAAR locale data
self._update_locale_data(db)
# Update TAAR collaborative data
self._update_collab_data(db)
# Update TAAR similarity data
self._update_similarity_data(db)
self._update_data_callback(db)

Просмотреть файл

@ -7,13 +7,7 @@ from itertools import groupby
from scipy.spatial import distance
from srgutil.interfaces import IMozLogging
import numpy as np
from .lazys3 import LazyJSONLoader
from taar.settings import (
TAAR_SIMILARITY_BUCKET,
TAAR_SIMILARITY_DONOR_KEY,
TAAR_SIMILARITY_LRCURVES_KEY,
)
from taar.recommenders.redis_cache import AddonsCoinstallCache
import markus
@ -52,99 +46,29 @@ class SimilarityRecommender(AbstractRecommender):
def __init__(self, ctx):
self._ctx = ctx
if "similarity_donors_pool" in self._ctx:
self._donors_pool = self._ctx["similarity_donors_pool"]
else:
self._donors_pool = LazyJSONLoader(
self._ctx,
TAAR_SIMILARITY_BUCKET,
TAAR_SIMILARITY_DONOR_KEY,
"similarity_donor",
)
if "similarity_lr_curves" in self._ctx:
self._lr_curves = self._ctx["similarity_lr_curves"]
else:
self._lr_curves = LazyJSONLoader(
self._ctx,
TAAR_SIMILARITY_BUCKET,
TAAR_SIMILARITY_LRCURVES_KEY,
"similarity_curves",
)
self._redis_cache = AddonsCoinstallCache.get_instance(self._ctx)
self.logger = self._ctx[IMozLogging].get_logger("taar")
self._init_from_ctx()
@property
def categorical_features(self):
return self._redis_cache.similarity_categorical_features()
@property
def continuous_features(self):
return self._redis_cache.similarity_continuous_features()
@property
def num_donors(self):
return self._redis_cache.similarity_num_donors
@property
def donors_pool(self):
result, status = self._donors_pool.get()
if status:
# Force a reconstruction of the features cache on new
# donor pool data
self._build_features_caches()
return result
return self._redis_cache.similarity_donors()
@property
def lr_curves(self):
result, status = self._lr_curves.get()
if status:
# Force a reconstruction of the features cache on new
# curve data
self._build_features_caches()
return result
def _init_from_ctx(self):
# Download the addon donors list.
if self.donors_pool is None:
self.logger.info(
"Similarity donors pool has not been fetched from S3: {}".format(
TAAR_SIMILARITY_DONOR_KEY
)
)
# Download the probability mapping curves from similarity to likelihood of being a good donor.
if self.lr_curves is None:
self.logger.error(
"Similarity LR Curves have not been fetched from S3: {}".format(
TAAR_SIMILARITY_LRCURVES_KEY
)
)
def _build_features_caches(self):
"""This function build two feature cache matrices.
That's the self.categorical_features and
self.continuous_features attributes.
One matrix is for the continuous features and the other is for
the categorical features. This is needed to speed up the similarity
recommendation process."""
_donors_pool = self._donors_pool.get()[0]
_lr_curves = self._lr_curves.get()[0]
if _donors_pool is None or _lr_curves is None:
# We need to have both donors_pool and lr_curves defined
# to reconstruct the matrices
return None
self.num_donors = len(_donors_pool)
# Build a numpy matrix cache for the continuous features.
self.continuous_features = np.zeros((self.num_donors, len(CONTINUOUS_FEATURES)))
for idx, d in enumerate(_donors_pool):
features = [d.get(specified_key) for specified_key in CONTINUOUS_FEATURES]
self.continuous_features[idx] = features
# Build the cache for categorical features.
self.categorical_features = np.zeros(
(self.num_donors, len(CATEGORICAL_FEATURES)), dtype="object"
)
for idx, d in enumerate(_donors_pool):
features = [d.get(specified_key) for specified_key in CATEGORICAL_FEATURES]
self.categorical_features[idx] = np.array([features], dtype="object")
self.logger.info("Reconstructed matrices for similarity recommender")
return self._redis_cache.similarity_lrcurves()
def can_recommend(self, client_data, extra_data={}):
# We can't recommend if we don't have our data files.
@ -301,8 +225,6 @@ class SimilarityRecommender(AbstractRecommender):
recommendations_out = self._recommend(client_data, limit, extra_data)
except Exception as e:
recommendations_out = []
self._donors_pool.force_expiry()
self._lr_curves.force_expiry()
metrics.incr("error_similarity", value=1)
self.logger.exception(

Просмотреть файл

@ -39,3 +39,13 @@ def noop_taarcollab_dataload(stack):
)
)
return stack
def noop_taarsimilarity_dataload(stack):
# no-op the taar collab
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache, "_update_similarity_data", return_value=None
)
)
return stack

Просмотреть файл

@ -20,8 +20,11 @@ from taar.recommenders.collaborative_recommender import positive_hash
from markus import TIMING
from markus.testing import MetricsMock
from .test_localerecommender import noop_taarlite_dataload
from .noop_fixtures import noop_taarlocale_dataload
from .noop_fixtures import (
noop_taarlocale_dataload,
noop_taarlite_dataload,
noop_taarsimilarity_dataload,
)
"""
@ -32,6 +35,13 @@ the Java hash function.
"""
def noop_other_recommenders(stack):
stack = noop_taarlocale_dataload(stack)
stack = noop_taarlite_dataload(stack)
stack = noop_taarsimilarity_dataload(stack)
return stack
@contextlib.contextmanager
def mock_install_none_mock_data(ctx):
"""
@ -39,6 +49,8 @@ def mock_install_none_mock_data(ctx):
we always get 404 errors.
"""
with contextlib.ExitStack() as stack:
AddonsCoinstallCache._instance = None
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
@ -54,8 +66,7 @@ def mock_install_none_mock_data(ctx):
)
)
stack = noop_taarlocale_dataload(stack)
stack = noop_taarlite_dataload(stack)
stack = noop_other_recommenders(stack)
# Patch fakeredis in
stack.enter_context(
@ -71,7 +82,7 @@ def mock_install_none_mock_data(ctx):
)
# Initialize redis
AddonsCoinstallCache(ctx).safe_load_data()
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
yield stack
@ -100,6 +111,7 @@ def mock_install_mock_data(ctx):
fake_mapping[str(java_hash)] = addon
with contextlib.ExitStack() as stack:
AddonsCoinstallCache._instance = None
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
@ -115,8 +127,7 @@ def mock_install_mock_data(ctx):
)
)
stack = noop_taarlocale_dataload(stack)
stack = noop_taarlite_dataload(stack)
stack = noop_other_recommenders(stack)
# Patch fakeredis in
stack.enter_context(
@ -132,7 +143,7 @@ def mock_install_mock_data(ctx):
)
# Initialize redis
AddonsCoinstallCache(ctx).safe_load_data()
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
yield stack

Просмотреть файл

@ -5,7 +5,11 @@ import pytest
import mock
import contextlib
from .noop_fixtures import noop_taarlocale_dataload, noop_taarcollab_dataload
from .noop_fixtures import (
noop_taarlocale_dataload,
noop_taarcollab_dataload,
noop_taarsimilarity_dataload,
)
from taar.recommenders.guid_based_recommender import GuidBasedRecommender
from taar.recommenders.redis_cache import AddonsCoinstallCache
@ -87,6 +91,8 @@ RESULTS = {
def mock_coinstall_ranking_context(ctx, mock_coinstall, mock_ranking):
with contextlib.ExitStack() as stack:
AddonsCoinstallCache._instance = None
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache, "_fetch_ranking_data", return_value=mock_ranking,
@ -102,6 +108,7 @@ def mock_coinstall_ranking_context(ctx, mock_coinstall, mock_ranking):
stack = noop_taarlocale_dataload(stack)
stack = noop_taarcollab_dataload(stack)
stack = noop_taarsimilarity_dataload(stack)
# Patch fakeredis in
stack.enter_context(
@ -117,7 +124,7 @@ def mock_coinstall_ranking_context(ctx, mock_coinstall, mock_ranking):
)
# Initialize redis
AddonsCoinstallCache(ctx).safe_load_data()
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
yield stack

Просмотреть файл

@ -8,8 +8,11 @@ import mock
import contextlib
import fakeredis
from taar.recommenders.redis_cache import AddonsCoinstallCache
from .noop_fixtures import noop_taarcollab_dataload, noop_taarlite_dataload
from .noop_fixtures import (
noop_taarcollab_dataload,
noop_taarlite_dataload,
noop_taarsimilarity_dataload,
)
import json
@ -46,6 +49,7 @@ def install_mock_data(ctx):
@contextlib.contextmanager
def mock_locale_data(ctx):
with contextlib.ExitStack() as stack:
AddonsCoinstallCache._instance = None
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
@ -56,6 +60,7 @@ def mock_locale_data(ctx):
stack = noop_taarlite_dataload(stack)
stack = noop_taarcollab_dataload(stack)
stack = noop_taarsimilarity_dataload(stack)
# Patch fakeredis in
stack.enter_context(
@ -71,7 +76,7 @@ def mock_locale_data(ctx):
)
# Initialize redis
AddonsCoinstallCache(ctx).safe_load_data()
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
yield stack

Просмотреть файл

@ -6,12 +6,9 @@ import json
import six
import logging
import numpy as np
import scipy.stats
from taar.recommenders.lazys3 import LazyJSONLoader
import boto3
from moto import mock_s3
from taar.recommenders.similarity_recommender import (
CATEGORICAL_FEATURES,
@ -25,11 +22,15 @@ from .similarity_data import CATEGORICAL_FEATURE_FIXTURE_DATA
from markus import TIMING
from markus.testing import MetricsMock
from taar.settings import (
TAAR_SIMILARITY_BUCKET,
TAAR_SIMILARITY_DONOR_KEY,
TAAR_SIMILARITY_LRCURVES_KEY,
import fakeredis
import mock
import contextlib
from .noop_fixtures import (
noop_taarcollab_dataload,
noop_taarlite_dataload,
noop_taarlocale_dataload,
)
from taar.recommenders.redis_cache import AddonsCoinstallCache
def generate_fake_lr_curves(num_elements, ceiling=10.0):
@ -68,311 +69,338 @@ def generate_a_fake_taar_client():
}
def install_no_data(ctx):
ctx = ctx.child()
conn = boto3.resource("s3", region_name="us-west-2")
@contextlib.contextmanager
def mock_install_no_data(ctx):
conn.create_bucket(Bucket=TAAR_SIMILARITY_BUCKET)
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY).put(Body="")
with contextlib.ExitStack() as stack:
AddonsCoinstallCache._instance = None
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache, "_fetch_similarity_donors", return_value="",
)
)
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY).put(Body="")
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache, "_fetch_similarity_lrcurves", return_value="",
)
)
ctx["similarity_donors_pool"] = LazyJSONLoader(
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
)
stack = noop_taarlocale_dataload(stack)
stack = noop_taarcollab_dataload(stack)
stack = noop_taarlite_dataload(stack)
ctx["similarity_lr_curves"] = LazyJSONLoader(
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
)
# Patch fakeredis in
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
"init_redis_connections",
return_value={
0: fakeredis.FakeStrictRedis(db=0),
1: fakeredis.FakeStrictRedis(db=1),
2: fakeredis.FakeStrictRedis(db=2),
},
)
)
return ctx
# Initialize redis
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
yield stack
def install_categorical_data(ctx):
ctx = ctx.child()
conn = boto3.resource("s3", region_name="us-west-2")
@contextlib.contextmanager
def mock_install_categorical_data(ctx):
try:
conn.create_bucket(Bucket=TAAR_SIMILARITY_BUCKET)
except Exception:
pass
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY).put(
Body=json.dumps(CATEGORICAL_FEATURE_FIXTURE_DATA)
)
with contextlib.ExitStack() as stack:
AddonsCoinstallCache._instance = None
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
"_fetch_similarity_donors",
return_value=CATEGORICAL_FEATURE_FIXTURE_DATA,
)
)
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY).put(
Body=json.dumps(generate_fake_lr_curves(1000))
)
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
"_fetch_similarity_lrcurves",
return_value=generate_fake_lr_curves(1000),
)
)
stack = noop_taarlocale_dataload(stack)
stack = noop_taarcollab_dataload(stack)
stack = noop_taarlite_dataload(stack)
ctx["similarity_donors_pool"] = LazyJSONLoader(
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
)
# Patch fakeredis in
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
"init_redis_connections",
return_value={
0: fakeredis.FakeStrictRedis(db=0),
1: fakeredis.FakeStrictRedis(db=1),
2: fakeredis.FakeStrictRedis(db=2),
},
)
)
ctx["similarity_lr_curves"] = LazyJSONLoader(
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
)
return ctx
# Initialize redis
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
yield stack
def install_continuous_data(ctx):
ctx = ctx.child()
cts_data = json.dumps(CONTINUOUS_FEATURE_FIXTURE_DATA)
lrs_data = json.dumps(generate_fake_lr_curves(1000))
@contextlib.contextmanager
def mock_install_continuous_data(ctx):
cts_data = CONTINUOUS_FEATURE_FIXTURE_DATA
lrs_data = generate_fake_lr_curves(1000)
conn = boto3.resource("s3", region_name="us-west-2")
with contextlib.ExitStack() as stack:
AddonsCoinstallCache._instance = None
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache, "_fetch_similarity_donors", return_value=cts_data,
)
)
try:
conn.create_bucket(Bucket=TAAR_SIMILARITY_BUCKET)
except Exception:
pass
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY).put(Body=cts_data)
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
"_fetch_similarity_lrcurves",
return_value=lrs_data,
)
)
stack = noop_taarlocale_dataload(stack)
stack = noop_taarcollab_dataload(stack)
stack = noop_taarlite_dataload(stack)
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY).put(Body=lrs_data)
# Patch fakeredis in
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
"init_redis_connections",
return_value={
0: fakeredis.FakeStrictRedis(db=0),
1: fakeredis.FakeStrictRedis(db=1),
2: fakeredis.FakeStrictRedis(db=2),
},
)
)
ctx["similarity_donors_pool"] = LazyJSONLoader(
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
)
ctx["similarity_lr_curves"] = LazyJSONLoader(
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
)
return ctx
# Initialize redis
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
yield stack
def check_matrix_built(caplog):
msg = "Reconstructed matrices for similarity recommender"
return sum([msg in str(s) for s in caplog.records]) > 0
@mock_s3
def test_soft_fail(test_ctx, caplog):
# Create a new instance of a SimilarityRecommender.
ctx = install_no_data(test_ctx)
r = SimilarityRecommender(ctx)
with mock_install_no_data(test_ctx):
r = SimilarityRecommender(test_ctx)
# Don't recommend if the source files cannot be found.
assert not r.can_recommend({})
assert not check_matrix_built(caplog)
# Don't recommend if the source files cannot be found.
assert not r.can_recommend({})
@mock_s3
def test_can_recommend(test_ctx, caplog):
caplog.set_level(logging.INFO)
# Create a new instance of a SimilarityRecommender.
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
with mock_install_continuous_data(test_ctx):
r = SimilarityRecommender(test_ctx)
assert check_matrix_built(caplog)
# Test that we can't recommend if we have not enough client info.
assert not r.can_recommend({})
# Test that we can't recommend if we have not enough client info.
assert not r.can_recommend({})
# Test that we can recommend for a normal client.
assert r.can_recommend(generate_a_fake_taar_client())
# Test that we can recommend for a normal client.
assert r.can_recommend(generate_a_fake_taar_client())
# Check that we can not recommend if any required client field is missing.
required_fields = CATEGORICAL_FEATURES + CONTINUOUS_FEATURES
# Check that we can not recommend if any required client field is missing.
required_fields = CATEGORICAL_FEATURES + CONTINUOUS_FEATURES
for required_field in required_fields:
profile_without_x = generate_a_fake_taar_client()
for required_field in required_fields:
profile_without_x = generate_a_fake_taar_client()
# Make an empty value in a required field in the client info dict.
profile_without_x[required_field] = None
assert not r.can_recommend(profile_without_x)
# Make an empty value in a required field in the client info dict.
profile_without_x[required_field] = None
assert not r.can_recommend(profile_without_x)
# Completely remove (in place) the entire required field from the dict.
del profile_without_x[required_field]
assert not r.can_recommend(profile_without_x)
# Completely remove (in place) the entire required field from the dict.
del profile_without_x[required_field]
assert not r.can_recommend(profile_without_x)
@mock_s3
def test_recommendations(test_ctx):
with MetricsMock() as mm:
# Create a new instance of a SimilarityRecommender.
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
with mock_install_continuous_data(test_ctx):
r = SimilarityRecommender(test_ctx)
recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)
recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)
assert isinstance(recommendation_list, list)
assert len(recommendation_list) == 1
assert isinstance(recommendation_list, list)
assert len(recommendation_list) == 1
recommendation, weight = recommendation_list[0]
recommendation, weight = recommendation_list[0]
# Make sure that the reported addons are the expected ones from the most similar donor.
assert "{test-guid-1}" == recommendation
assert type(weight) == np.float64
# Make sure that the reported addons are the expected ones from the most similar donor.
assert "{test-guid-1}" == recommendation
assert type(weight) == np.float64
assert mm.has_record(TIMING, stat="taar.similarity_donor")
assert mm.has_record(TIMING, stat="taar.similarity_curves")
assert mm.has_record(TIMING, stat="taar.similarity_recommend")
assert mm.has_record(TIMING, stat="taar.similarity_recommend")
@mock_s3
def test_recommender_str(test_ctx):
# Tests that the string representation of the recommender is correct.
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
assert str(r) == "SimilarityRecommender"
@mock_s3
def test_get_lr(test_ctx):
# Tests that the likelihood ratio values are not empty for extreme values and are realistic.
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
assert r.get_lr(0.0001) is not None
assert r.get_lr(10.0) is not None
assert r.get_lr(0.001) > r.get_lr(5.0)
with mock_install_continuous_data(test_ctx):
r = SimilarityRecommender(test_ctx)
assert r.get_lr(0.0001) is not None
assert r.get_lr(10.0) is not None
assert r.get_lr(0.001) > r.get_lr(5.0)
@mock_s3
def test_compute_clients_dist(test_ctx):
# Test the distance function computation.
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
test_clients = [
{
"client_id": "test-client-002",
"activeAddons": [],
"geo_city": "sfo-us",
"subsession_length": 1,
"locale": "en-US",
"os": "windows",
"bookmark_count": 1,
"tab_open_count": 1,
"total_uri": 1,
"unique_tlds": 1,
},
{
"client_id": "test-client-003",
"activeAddons": [],
"geo_city": "brasilia-br",
"subsession_length": 1,
"locale": "br-PT",
"os": "windows",
"bookmark_count": 10,
"tab_open_count": 1,
"total_uri": 1,
"unique_tlds": 1,
},
{
"client_id": "test-client-004",
"activeAddons": [],
"geo_city": "brasilia-br",
"subsession_length": 100,
"locale": "br-PT",
"os": "windows",
"bookmark_count": 10,
"tab_open_count": 10,
"total_uri": 100,
"unique_tlds": 10,
},
]
per_client_test = []
with mock_install_continuous_data(test_ctx):
r = SimilarityRecommender(test_ctx)
test_clients = [
{
"client_id": "test-client-002",
"activeAddons": [],
"geo_city": "sfo-us",
"subsession_length": 1,
"locale": "en-US",
"os": "windows",
"bookmark_count": 1,
"tab_open_count": 1,
"total_uri": 1,
"unique_tlds": 1,
},
{
"client_id": "test-client-003",
"activeAddons": [],
"geo_city": "brasilia-br",
"subsession_length": 1,
"locale": "br-PT",
"os": "windows",
"bookmark_count": 10,
"tab_open_count": 1,
"total_uri": 1,
"unique_tlds": 1,
},
{
"client_id": "test-client-004",
"activeAddons": [],
"geo_city": "brasilia-br",
"subsession_length": 100,
"locale": "br-PT",
"os": "windows",
"bookmark_count": 10,
"tab_open_count": 10,
"total_uri": 100,
"unique_tlds": 10,
},
]
per_client_test = []
# Compute a different set of distances for each set of clients.
for tc in test_clients:
test_distances = r.compute_clients_dist(tc)
assert len(test_distances) == len(CONTINUOUS_FEATURE_FIXTURE_DATA)
per_client_test.append(test_distances[2][0])
# Compute a different set of distances for each set of clients.
for tc in test_clients:
test_distances = r.compute_clients_dist(tc)
assert len(test_distances) == len(CONTINUOUS_FEATURE_FIXTURE_DATA)
per_client_test.append(test_distances[2][0])
# Ensure the different clients also had different distances to a specific donor.
assert per_client_test[0] >= per_client_test[1] >= per_client_test[2]
# Ensure the different clients also had different distances to a specific donor.
assert per_client_test[0] >= per_client_test[1] >= per_client_test[2]
@mock_s3
def test_distance_functions(test_ctx):
# Tests the similarity functions via expected output when passing modified client data.
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
# Tests the similarity functions via expected output when passing
# modified client data.
with mock_install_continuous_data(test_ctx):
r = SimilarityRecommender(test_ctx)
# Generate a fake client.
test_client = generate_a_fake_taar_client()
recs = r.recommend(test_client, 10)
assert len(recs) > 0
# Generate a fake client.
test_client = generate_a_fake_taar_client()
recs = r.recommend(test_client, 10)
assert len(recs) > 0
# Make it a generally poor match for the donors.
test_client.update({"total_uri": 10, "bookmark_count": 2, "subsession_length": 10})
# Make it a generally poor match for the donors.
test_client.update(
{"total_uri": 10, "bookmark_count": 2, "subsession_length": 10}
)
all_client_values_zero = test_client
# Make all categorical variables non-matching with any donor.
all_client_values_zero.update(
{key: "zero" for key in test_client.keys() if key in CATEGORICAL_FEATURES}
)
recs = r.recommend(all_client_values_zero, 10)
assert len(recs) == 0
all_client_values_zero = test_client
# Make all categorical variables non-matching with any donor.
all_client_values_zero.update(
{key: "zero" for key in test_client.keys() if key in CATEGORICAL_FEATURES}
)
recs = r.recommend(all_client_values_zero, 10)
assert len(recs) == 0
# Make all continuous variables equal to zero.
all_client_values_zero.update(
{key: 0 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
)
recs = r.recommend(all_client_values_zero, 10)
assert len(recs) == 0
# Make all continuous variables equal to zero.
all_client_values_zero.update(
{key: 0 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
)
recs = r.recommend(all_client_values_zero, 10)
assert len(recs) == 0
# Make all categorical variables non-matching with any donor.
all_client_values_high = test_client
all_client_values_high.update(
{
key: "one billion"
for key in test_client.keys()
if key in CATEGORICAL_FEATURES
}
)
recs = r.recommend(all_client_values_high, 10)
assert len(recs) == 0
# Make all categorical variables non-matching with any donor.
all_client_values_high = test_client
all_client_values_high.update(
{
key: "one billion"
for key in test_client.keys()
if key in CATEGORICAL_FEATURES
}
)
recs = r.recommend(all_client_values_high, 10)
assert len(recs) == 0
# Make all continuous variables equal to a very high numerical value.
all_client_values_high.update(
{key: 1e60 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
)
recs = r.recommend(all_client_values_high, 10)
assert len(recs) == 0
# Make all continuous variables equal to a very high numerical value.
all_client_values_high.update(
{key: 1e60 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
)
recs = r.recommend(all_client_values_high, 10)
assert len(recs) == 0
# Test for 0.0 values if j_c is not normalized and j_d is fine.
j_c = 0.0
j_d = 0.42
assert abs(j_c * j_d) == 0.0
assert abs((j_c + 0.01) * j_d) != 0.0
# Test for 0.0 values if j_c is not normalized and j_d is fine.
j_c = 0.0
j_d = 0.42
assert abs(j_c * j_d) == 0.0
assert abs((j_c + 0.01) * j_d) != 0.0
@mock_s3
def test_weights_continuous(test_ctx):
# Create a new instance of a SimilarityRecommender.
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
with mock_install_continuous_data(test_ctx):
r = SimilarityRecommender(test_ctx)
# In the ensemble method recommendations should be a sorted list of tuples
# containing [(guid, weight), (guid, weight)... (guid, weight)].
recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
with open("/tmp/similarity_recommender.json", "w") as fout:
fout.write(json.dumps(recommendation_list))
# In the ensemble method recommendations should be a sorted list of tuples
# containing [(guid, weight), (guid, weight)... (guid, weight)].
recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
with open("/tmp/similarity_recommender.json", "w") as fout:
fout.write(json.dumps(recommendation_list))
# Make sure the structure of the recommendations is correct and
# that we recommended the the right addons.
# Make sure the structure of the recommendations is correct and
# that we recommended the the right addons.
assert len(recommendation_list) == 2
for recommendation, weight in recommendation_list:
assert isinstance(recommendation, six.string_types)
assert isinstance(weight, float)
assert len(recommendation_list) == 2
for recommendation, weight in recommendation_list:
assert isinstance(recommendation, six.string_types)
assert isinstance(weight, float)
# Test that sorting is appropriate.
rec0 = recommendation_list[0]
rec1 = recommendation_list[1]
# Test that sorting is appropriate.
rec0 = recommendation_list[0]
rec1 = recommendation_list[1]
rec0_weight = rec0[1]
rec1_weight = rec1[1]
rec0_weight = rec0[1]
rec1_weight = rec1[1]
# Duplicate presence of test-guid-1 should mean rec0_weight is double
# rec1_weight, and both should be greater than 1.0
# Duplicate presence of test-guid-1 should mean rec0_weight is double
# rec1_weight, and both should be greater than 1.0
assert rec0_weight > rec1_weight > 1.0
assert rec0_weight > rec1_weight > 1.0
@mock_s3
def test_weights_categorical(test_ctx):
"""
This should get :
@ -383,48 +411,24 @@ def test_weights_categorical(test_ctx):
"""
# Create a new instance of a SimilarityRecommender.
cat_ctx = install_categorical_data(test_ctx)
cts_ctx = install_continuous_data(test_ctx)
with mock_install_categorical_data(test_ctx):
r = SimilarityRecommender(test_ctx)
wrapped = cts_ctx.wrap(cat_ctx)
r = SimilarityRecommender(wrapped)
# In the ensemble method recommendations should be a sorted list of tuples
# containing [(guid, weight), (guid, weight)... (guid, weight)].
recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
# In the ensemble method recommendations should be a sorted list of tuples
# containing [(guid, weight), (guid, weight)... (guid, weight)].
recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
assert len(recommendation_list) == 2
# Make sure the structure of the recommendations is correct and that we recommended the the right addons.
for recommendation, weight in recommendation_list:
assert isinstance(recommendation, six.string_types)
assert isinstance(weight, float)
assert len(recommendation_list) == 2
# Make sure the structure of the recommendations is correct and that we recommended the the right addons.
for recommendation, weight in recommendation_list:
assert isinstance(recommendation, six.string_types)
assert isinstance(weight, float)
# Test that sorting is appropriate.
rec0 = recommendation_list[0]
rec1 = recommendation_list[1]
# Test that sorting is appropriate.
rec0 = recommendation_list[0]
rec1 = recommendation_list[1]
rec0_weight = rec0[1]
rec1_weight = rec1[1]
rec0_weight = rec0[1]
rec1_weight = rec1[1]
assert rec0_weight > rec1_weight > 0
@mock_s3
def test_recompute_matrices(test_ctx, caplog):
caplog.set_level(logging.INFO)
# Create a new instance of a SimilarityRecommender.
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
# Reloading the donors pool should reconstruct the matrices
caplog.clear()
r._donors_pool.force_expiry()
r.donors_pool
assert check_matrix_built(caplog)
# Reloading the LR curves should reconstruct the matrices
caplog.clear()
r._lr_curves.force_expiry()
r.lr_curves
assert check_matrix_built(caplog)
assert rec0_weight > rec1_weight > 0