Ported similarity recommender to use redis

2020-09-01 19:13:48 -04:00 · 2020-09-01 19:13:48 -04:00 · d594703a1a
--- a/taar/recommenders/redis_cache.py
+++ b/taar/recommenders/redis_cache.py
@ -14,6 +14,7 @@ from taar.settings import (
    REDIS_PORT,
 )

+
 # TAARLite configuration
 from taar.settings import (
    TAARLITE_GUID_COINSTALL_BUCKET,
@ -23,14 +24,20 @@ from taar.settings import (
    TAARLITE_MUTEX_TTL,
 )

-# TAARLite configuration
+# TAAR configuration
 from taar.settings import (
+    # Locale
    TAAR_LOCALE_BUCKET,
    TAAR_LOCALE_KEY,
+    # Collaborative dta
    TAAR_ADDON_MAPPING_BUCKET,
    TAAR_ADDON_MAPPING_KEY,
    TAAR_ITEM_MATRIX_BUCKET,
    TAAR_ITEM_MATRIX_KEY,
+    # Similarity data
+    TAAR_SIMILARITY_BUCKET,
+    TAAR_SIMILARITY_DONOR_KEY,
+    TAAR_SIMILARITY_LRCURVES_KEY,
 )

 from jsoncache.loader import s3_json_loader
@ -75,6 +82,13 @@ LOCALE_DATA = "taar_locale_data|"
 COLLAB_MAPPING_DATA = "taar_collab_mapping|"
 COLLAB_ITEM_MATRIX = "taar_collab_item_matrix|"

+SIMILARITY_DONORS = "taar_similarity_donors|"
+SIMILARITY_LRCURVES = "taar_similarity_lrcurves|"
+
+SIMILARITY_NUM_DONORS = "taar_similarity_num_donors|"
+SIMILARITY_CONTINUOUS_FEATURES = "taar_similarity_continuous_features|"
+SIMILARITY_CATEGORICAL_FEATURES = "taar_similarity_categorical_features|"
+

 class PrefixStripper:
    def __init__(self, prefix, iterator, cast_to_str=False):
@ -99,11 +113,27 @@ class AddonsCoinstallCache:
    GUID->GUID co-installation data
    """

+    _instance = None
+
+    @classmethod
+    def get_instance(cls, ctx):
+        if cls._instance is None:
+            cls._instance = AddonsCoinstallCache(ctx)
+        return cls._instance
+
    def __init__(self, ctx):
        self._ctx = ctx
        self.logger = self._ctx[IMozLogging].get_logger("taar")

+        # Keep an integer handle (or None) on the last known database
+        self._last_db = None
+
+        self._similarity_num_donors = 0
+        self._similarity_continuous_features = None
+        self._similarity_categorical_features = None
+
        rcon = self.init_redis_connections()
+
        self._r0 = rcon[0]
        self._r1 = rcon[1]
        self._r2 = rcon[2]
@ -276,6 +306,43 @@ class AddonsCoinstallCache:
            return json.loads(tmp.decode("utf8"))
        return None

+    def similarity_donors(self):
+        """
+        Get the taar similarity donors
+        """
+        tmp = self._db().get(SIMILARITY_DONORS)
+        if tmp:
+            return json.loads(tmp.decode("utf8"))
+        return None
+
+    def similarity_lrcurves(self):
+        """
+        Get the taar similarity donors
+        """
+        tmp = self._db().get(SIMILARITY_LRCURVES)
+        if tmp:
+            return json.loads(tmp.decode("utf8"))
+        return None
+
+    def similarity_continuous_features(self):
+        """
+        precomputed similarity recommender continuous features cache
+        """
+        return self._similarity_continuous_features
+
+    def similarity_categorical_features(self):
+        """
+        precomputed similarity recommender categorical features cache
+        """
+        return self._similarity_categorical_features
+
+    @property
+    def similarity_num_donors(self):
+        """
+        precomputed similarity recommender categorical features cache
+        """
+        return self._similarity_num_donors
+
    """

    ################################
@ -290,13 +357,66 @@ class AddonsCoinstallCache:
        active redis instance
        """
        active_db = self._r0.get(ACTIVE_DB)
+
        if active_db is not None:
            db = int(active_db.decode("utf8"))
+
            if db == 1:
                return self._r1
            elif db == 2:
                return self._r2

+    def _update_data_callback(self, db):
+        """
+        Process data that needs updating when new data is loaded
+        """
+        self._build_similarity_features_caches(db)
+
+    def _build_similarity_features_caches(self, db):
+        """
+        This function build two feature cache matrices and sets the
+        number of donors (self.similarity_num_donors)
+
+        That's the self.categorical_features and
+        self.continuous_features attributes.
+
+        One matrix is for the continuous features and the other is for
+        the categorical features. This is needed to speed up the similarity
+        recommendation process."""
+        from taar.recommenders.similarity_recommender import (
+            CONTINUOUS_FEATURES,
+            CATEGORICAL_FEATURES,
+        )
+
+        tmp = db.get(SIMILARITY_DONORS)
+        if tmp is None:
+            return
+        donors_pool = json.loads(tmp.decode("utf8"))
+
+        self._similarity_num_donors = len(donors_pool)
+
+        # Build a numpy matrix cache for the continuous features.
+        continuous_features = np.zeros(
+            (self.similarity_num_donors, len(CONTINUOUS_FEATURES))
+        )
+
+        for idx, d in enumerate(donors_pool):
+            features = [d.get(specified_key) for specified_key in CONTINUOUS_FEATURES]
+            continuous_features[idx] = features
+        self._similarity_continuous_features = continuous_features
+
+        # Build the cache for categorical features.
+        categorical_features = np.zeros(
+            (self.similarity_num_donors, len(CATEGORICAL_FEATURES)), dtype="object",
+        )
+        for idx, d in enumerate(donors_pool):
+            features = [d.get(specified_key) for specified_key in CATEGORICAL_FEATURES]
+            categorical_features[idx] = np.array([features], dtype="object")
+
+        self._similarity_categorical_features = categorical_features
+
+        self.logger.info("Reconstructed matrices for similarity recommender")
+
    @property
    def _ident(self):
        """ pid/thread identity """
@ -319,6 +439,22 @@ class AddonsCoinstallCache:
    def _fetch_collaborative_item_matrix(self):
        return s3_json_loader(TAAR_ITEM_MATRIX_BUCKET, TAAR_ITEM_MATRIX_KEY)

+    def _fetch_similarity_donors(self):
+        return s3_json_loader(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY,)
+
+    def _fetch_similarity_lrcurves(self):
+        return s3_json_loader(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY,)
+
+    def _update_similarity_data(self, db):
+        """
+        Load the TAAR similarity data
+        """
+        donors = self._fetch_similarity_donors()
+        lrcurves = self._fetch_similarity_lrcurves()
+
+        db.set(SIMILARITY_DONORS, json.dumps(donors))
+        db.set(SIMILARITY_LRCURVES, json.dumps(lrcurves))
+
    def _update_collab_data(self, db):
        """
        Load the TAAR collaborative data.  This is two parts: an item
@ -445,9 +581,18 @@ class AddonsCoinstallCache:

        # Clear this database before we do anything with it
        db.flushdb()
-        self._update_rank_data(db)

+        # Update TAARlite
+        self._update_rank_data(db)
        self._update_coinstall_data(db)

+        # Update TAAR locale data
        self._update_locale_data(db)
+
+        # Update TAAR collaborative data
        self._update_collab_data(db)
+
+        # Update TAAR similarity data
+        self._update_similarity_data(db)
+
+        self._update_data_callback(db)
--- a/taar/recommenders/similarity_recommender.py
+++ b/taar/recommenders/similarity_recommender.py
@ -7,13 +7,7 @@ from itertools import groupby
 from scipy.spatial import distance
 from srgutil.interfaces import IMozLogging
 import numpy as np
-from .lazys3 import LazyJSONLoader
-
-from taar.settings import (
-    TAAR_SIMILARITY_BUCKET,
-    TAAR_SIMILARITY_DONOR_KEY,
-    TAAR_SIMILARITY_LRCURVES_KEY,
-)
+from taar.recommenders.redis_cache import AddonsCoinstallCache

 import markus

@ -52,99 +46,29 @@ class SimilarityRecommender(AbstractRecommender):
    def __init__(self, ctx):
        self._ctx = ctx

-        if "similarity_donors_pool" in self._ctx:
-            self._donors_pool = self._ctx["similarity_donors_pool"]
-        else:
-            self._donors_pool = LazyJSONLoader(
-                self._ctx,
-                TAAR_SIMILARITY_BUCKET,
-                TAAR_SIMILARITY_DONOR_KEY,
-                "similarity_donor",
-            )
-
-        if "similarity_lr_curves" in self._ctx:
-            self._lr_curves = self._ctx["similarity_lr_curves"]
-        else:
-            self._lr_curves = LazyJSONLoader(
-                self._ctx,
-                TAAR_SIMILARITY_BUCKET,
-                TAAR_SIMILARITY_LRCURVES_KEY,
-                "similarity_curves",
-            )
+        self._redis_cache = AddonsCoinstallCache.get_instance(self._ctx)

        self.logger = self._ctx[IMozLogging].get_logger("taar")

-        self._init_from_ctx()
+    @property
+    def categorical_features(self):
+        return self._redis_cache.similarity_categorical_features()
+
+    @property
+    def continuous_features(self):
+        return self._redis_cache.similarity_continuous_features()
+
+    @property
+    def num_donors(self):
+        return self._redis_cache.similarity_num_donors

    @property
    def donors_pool(self):
-        result, status = self._donors_pool.get()
-        if status:
-            # Force a reconstruction of the features cache on new
-            # donor pool data
-            self._build_features_caches()
-        return result
+        return self._redis_cache.similarity_donors()

    @property
    def lr_curves(self):
-        result, status = self._lr_curves.get()
-        if status:
-            # Force a reconstruction of the features cache on new
-            # curve data
-            self._build_features_caches()
-        return result
-
-    def _init_from_ctx(self):
-        # Download the addon donors list.
-        if self.donors_pool is None:
-            self.logger.info(
-                "Similarity donors pool has not been fetched from S3: {}".format(
-                    TAAR_SIMILARITY_DONOR_KEY
-                )
-            )
-
-        # Download the probability mapping curves from similarity to likelihood of being a good donor.
-        if self.lr_curves is None:
-            self.logger.error(
-                "Similarity LR Curves have not been fetched from S3: {}".format(
-                    TAAR_SIMILARITY_LRCURVES_KEY
-                )
-            )
-
-    def _build_features_caches(self):
-        """This function build two feature cache matrices.
-
-        That's the self.categorical_features and
-        self.continuous_features attributes.
-
-        One matrix is for the continuous features and the other is for
-        the categorical features. This is needed to speed up the similarity
-        recommendation process."""
-        _donors_pool = self._donors_pool.get()[0]
-        _lr_curves = self._lr_curves.get()[0]
-
-        if _donors_pool is None or _lr_curves is None:
-            # We need to have both donors_pool and lr_curves defined
-            # to reconstruct the matrices
-            return None
-
-        self.num_donors = len(_donors_pool)
-
-        # Build a numpy matrix cache for the continuous features.
-        self.continuous_features = np.zeros((self.num_donors, len(CONTINUOUS_FEATURES)))
-        for idx, d in enumerate(_donors_pool):
-            features = [d.get(specified_key) for specified_key in CONTINUOUS_FEATURES]
-            self.continuous_features[idx] = features
-
-        # Build the cache for categorical features.
-        self.categorical_features = np.zeros(
-            (self.num_donors, len(CATEGORICAL_FEATURES)), dtype="object"
-        )
-        for idx, d in enumerate(_donors_pool):
-            features = [d.get(specified_key) for specified_key in CATEGORICAL_FEATURES]
-            self.categorical_features[idx] = np.array([features], dtype="object")
-
-        self.logger.info("Reconstructed matrices for similarity recommender")
+        return self._redis_cache.similarity_lrcurves()

    def can_recommend(self, client_data, extra_data={}):
        # We can't recommend if we don't have our data files.
@ -301,8 +225,6 @@ class SimilarityRecommender(AbstractRecommender):
            recommendations_out = self._recommend(client_data, limit, extra_data)
        except Exception as e:
            recommendations_out = []
-            self._donors_pool.force_expiry()
-            self._lr_curves.force_expiry()

            metrics.incr("error_similarity", value=1)
            self.logger.exception(
--- a/tests/noop_fixtures.py
+++ b/tests/noop_fixtures.py
@ -39,3 +39,13 @@ def noop_taarcollab_dataload(stack):
        )
    )
    return stack
+
+
+def noop_taarsimilarity_dataload(stack):
+    # no-op the taar collab
+    stack.enter_context(
+        mock.patch.object(
+            AddonsCoinstallCache, "_update_similarity_data", return_value=None
+        )
+    )
+    return stack
--- a/tests/test_collaborativerecommender.py
+++ b/tests/test_collaborativerecommender.py
@ -20,8 +20,11 @@ from taar.recommenders.collaborative_recommender import positive_hash
 from markus import TIMING
 from markus.testing import MetricsMock

-from .test_localerecommender import noop_taarlite_dataload
-from .noop_fixtures import noop_taarlocale_dataload
+from .noop_fixtures import (
+    noop_taarlocale_dataload,
+    noop_taarlite_dataload,
+    noop_taarsimilarity_dataload,
+)


 """
@ -32,6 +35,13 @@ the Java hash function.
 """


+def noop_other_recommenders(stack):
+    stack = noop_taarlocale_dataload(stack)
+    stack = noop_taarlite_dataload(stack)
+    stack = noop_taarsimilarity_dataload(stack)
+    return stack
+
+
@contextlib.contextmanager
 def mock_install_none_mock_data(ctx):
    """
@ -39,6 +49,8 @@ def mock_install_none_mock_data(ctx):
    we always get 404 errors.
    """
    with contextlib.ExitStack() as stack:
+        AddonsCoinstallCache._instance = None
+
        stack.enter_context(
            mock.patch.object(
                AddonsCoinstallCache,
@ -54,8 +66,7 @@ def mock_install_none_mock_data(ctx):
            )
        )

-        stack = noop_taarlocale_dataload(stack)
-        stack = noop_taarlite_dataload(stack)
+        stack = noop_other_recommenders(stack)

        # Patch fakeredis in
        stack.enter_context(
@ -71,7 +82,7 @@ def mock_install_none_mock_data(ctx):
        )

        # Initialize redis
-        AddonsCoinstallCache(ctx).safe_load_data()
+        AddonsCoinstallCache.get_instance(ctx).safe_load_data()
        yield stack


@ -100,6 +111,7 @@ def mock_install_mock_data(ctx):
        fake_mapping[str(java_hash)] = addon

    with contextlib.ExitStack() as stack:
+        AddonsCoinstallCache._instance = None
        stack.enter_context(
            mock.patch.object(
                AddonsCoinstallCache,
@ -115,8 +127,7 @@ def mock_install_mock_data(ctx):
            )
        )

-        stack = noop_taarlocale_dataload(stack)
-        stack = noop_taarlite_dataload(stack)
+        stack = noop_other_recommenders(stack)

        # Patch fakeredis in
        stack.enter_context(
@ -132,7 +143,7 @@ def mock_install_mock_data(ctx):
        )

        # Initialize redis
-        AddonsCoinstallCache(ctx).safe_load_data()
+        AddonsCoinstallCache.get_instance(ctx).safe_load_data()
        yield stack


--- a/tests/test_guid_based_recommender.py
+++ b/tests/test_guid_based_recommender.py
@ -5,7 +5,11 @@ import pytest
 import mock
 import contextlib

-from .noop_fixtures import noop_taarlocale_dataload, noop_taarcollab_dataload
+from .noop_fixtures import (
+    noop_taarlocale_dataload,
+    noop_taarcollab_dataload,
+    noop_taarsimilarity_dataload,
+)

 from taar.recommenders.guid_based_recommender import GuidBasedRecommender
 from taar.recommenders.redis_cache import AddonsCoinstallCache
@ -87,6 +91,8 @@ RESULTS = {
 def mock_coinstall_ranking_context(ctx, mock_coinstall, mock_ranking):

    with contextlib.ExitStack() as stack:
+        AddonsCoinstallCache._instance = None
+
        stack.enter_context(
            mock.patch.object(
                AddonsCoinstallCache, "_fetch_ranking_data", return_value=mock_ranking,
@ -102,6 +108,7 @@ def mock_coinstall_ranking_context(ctx, mock_coinstall, mock_ranking):

        stack = noop_taarlocale_dataload(stack)
        stack = noop_taarcollab_dataload(stack)
+        stack = noop_taarsimilarity_dataload(stack)

        # Patch fakeredis in
        stack.enter_context(
@ -117,7 +124,7 @@ def mock_coinstall_ranking_context(ctx, mock_coinstall, mock_ranking):
        )

        # Initialize redis
-        AddonsCoinstallCache(ctx).safe_load_data()
+        AddonsCoinstallCache.get_instance(ctx).safe_load_data()
        yield stack


--- a/tests/test_localerecommender.py
+++ b/tests/test_localerecommender.py
@ -8,8 +8,11 @@ import mock
 import contextlib
 import fakeredis
 from taar.recommenders.redis_cache import AddonsCoinstallCache
-from .noop_fixtures import noop_taarcollab_dataload, noop_taarlite_dataload
-
+from .noop_fixtures import (
+    noop_taarcollab_dataload,
+    noop_taarlite_dataload,
+    noop_taarsimilarity_dataload,
+)
 import json


@ -46,6 +49,7 @@ def install_mock_data(ctx):
@contextlib.contextmanager
 def mock_locale_data(ctx):
    with contextlib.ExitStack() as stack:
+        AddonsCoinstallCache._instance = None
        stack.enter_context(
            mock.patch.object(
                AddonsCoinstallCache,
@ -56,6 +60,7 @@ def mock_locale_data(ctx):

        stack = noop_taarlite_dataload(stack)
        stack = noop_taarcollab_dataload(stack)
+        stack = noop_taarsimilarity_dataload(stack)

        # Patch fakeredis in
        stack.enter_context(
@ -71,7 +76,7 @@ def mock_locale_data(ctx):
        )

        # Initialize redis
-        AddonsCoinstallCache(ctx).safe_load_data()
+        AddonsCoinstallCache.get_instance(ctx).safe_load_data()
        yield stack


--- a/tests/test_similarityrecommender.py
+++ b/tests/test_similarityrecommender.py
@ -6,12 +6,9 @@ import json
 import six
 import logging

+
 import numpy as np
 import scipy.stats
-from taar.recommenders.lazys3 import LazyJSONLoader
-
-import boto3
-from moto import mock_s3

 from taar.recommenders.similarity_recommender import (
    CATEGORICAL_FEATURES,
@ -25,11 +22,15 @@ from .similarity_data import CATEGORICAL_FEATURE_FIXTURE_DATA
 from markus import TIMING
 from markus.testing import MetricsMock

-from taar.settings import (
-    TAAR_SIMILARITY_BUCKET,
-    TAAR_SIMILARITY_DONOR_KEY,
-    TAAR_SIMILARITY_LRCURVES_KEY,
+import fakeredis
+import mock
+import contextlib
+from .noop_fixtures import (
+    noop_taarcollab_dataload,
+    noop_taarlite_dataload,
+    noop_taarlocale_dataload,
 )
+from taar.recommenders.redis_cache import AddonsCoinstallCache


 def generate_fake_lr_curves(num_elements, ceiling=10.0):
@ -68,311 +69,338 @@ def generate_a_fake_taar_client():
    }


-def install_no_data(ctx):
-    ctx = ctx.child()
-    conn = boto3.resource("s3", region_name="us-west-2")
+@contextlib.contextmanager
+def mock_install_no_data(ctx):

-    conn.create_bucket(Bucket=TAAR_SIMILARITY_BUCKET)
-    conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY).put(Body="")
+    with contextlib.ExitStack() as stack:
+        AddonsCoinstallCache._instance = None
+        stack.enter_context(
+            mock.patch.object(
+                AddonsCoinstallCache, "_fetch_similarity_donors", return_value="",
+            )
+        )

-    conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY).put(Body="")
+        stack.enter_context(
+            mock.patch.object(
+                AddonsCoinstallCache, "_fetch_similarity_lrcurves", return_value="",
+            )
+        )

-    ctx["similarity_donors_pool"] = LazyJSONLoader(
-        ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
-    )
+        stack = noop_taarlocale_dataload(stack)
+        stack = noop_taarcollab_dataload(stack)
+        stack = noop_taarlite_dataload(stack)

-    ctx["similarity_lr_curves"] = LazyJSONLoader(
-        ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
-    )
+        # Patch fakeredis in
+        stack.enter_context(
+            mock.patch.object(
+                AddonsCoinstallCache,
+                "init_redis_connections",
+                return_value={
+                    0: fakeredis.FakeStrictRedis(db=0),
+                    1: fakeredis.FakeStrictRedis(db=1),
+                    2: fakeredis.FakeStrictRedis(db=2),
+                },
+            )
+        )

-    return ctx
+        # Initialize redis
+        AddonsCoinstallCache.get_instance(ctx).safe_load_data()
+        yield stack


-def install_categorical_data(ctx):
-    ctx = ctx.child()
-    conn = boto3.resource("s3", region_name="us-west-2")
+@contextlib.contextmanager
+def mock_install_categorical_data(ctx):

-    try:
-        conn.create_bucket(Bucket=TAAR_SIMILARITY_BUCKET)
-    except Exception:
-        pass
-    conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY).put(
-        Body=json.dumps(CATEGORICAL_FEATURE_FIXTURE_DATA)
-    )
+    with contextlib.ExitStack() as stack:
+        AddonsCoinstallCache._instance = None
+        stack.enter_context(
+            mock.patch.object(
+                AddonsCoinstallCache,
+                "_fetch_similarity_donors",
+                return_value=CATEGORICAL_FEATURE_FIXTURE_DATA,
+            )
+        )

-    conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY).put(
-        Body=json.dumps(generate_fake_lr_curves(1000))
-    )
+        stack.enter_context(
+            mock.patch.object(
+                AddonsCoinstallCache,
+                "_fetch_similarity_lrcurves",
+                return_value=generate_fake_lr_curves(1000),
+            )
+        )
+        stack = noop_taarlocale_dataload(stack)
+        stack = noop_taarcollab_dataload(stack)
+        stack = noop_taarlite_dataload(stack)

-    ctx["similarity_donors_pool"] = LazyJSONLoader(
-        ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
-    )
+        # Patch fakeredis in
+        stack.enter_context(
+            mock.patch.object(
+                AddonsCoinstallCache,
+                "init_redis_connections",
+                return_value={
+                    0: fakeredis.FakeStrictRedis(db=0),
+                    1: fakeredis.FakeStrictRedis(db=1),
+                    2: fakeredis.FakeStrictRedis(db=2),
+                },
+            )
+        )

-    ctx["similarity_lr_curves"] = LazyJSONLoader(
-        ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
-    )
-
-    return ctx
+        # Initialize redis
+        AddonsCoinstallCache.get_instance(ctx).safe_load_data()
+        yield stack


-def install_continuous_data(ctx):
-    ctx = ctx.child()
-    cts_data = json.dumps(CONTINUOUS_FEATURE_FIXTURE_DATA)
-    lrs_data = json.dumps(generate_fake_lr_curves(1000))
+@contextlib.contextmanager
+def mock_install_continuous_data(ctx):
+    cts_data = CONTINUOUS_FEATURE_FIXTURE_DATA
+    lrs_data = generate_fake_lr_curves(1000)

-    conn = boto3.resource("s3", region_name="us-west-2")
+    with contextlib.ExitStack() as stack:
+        AddonsCoinstallCache._instance = None
+        stack.enter_context(
+            mock.patch.object(
+                AddonsCoinstallCache, "_fetch_similarity_donors", return_value=cts_data,
+            )
+        )

-    try:
-        conn.create_bucket(Bucket=TAAR_SIMILARITY_BUCKET)
-    except Exception:
-        pass
-    conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY).put(Body=cts_data)
+        stack.enter_context(
+            mock.patch.object(
+                AddonsCoinstallCache,
+                "_fetch_similarity_lrcurves",
+                return_value=lrs_data,
+            )
+        )
+        stack = noop_taarlocale_dataload(stack)
+        stack = noop_taarcollab_dataload(stack)
+        stack = noop_taarlite_dataload(stack)

-    conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY).put(Body=lrs_data)
+        # Patch fakeredis in
+        stack.enter_context(
+            mock.patch.object(
+                AddonsCoinstallCache,
+                "init_redis_connections",
+                return_value={
+                    0: fakeredis.FakeStrictRedis(db=0),
+                    1: fakeredis.FakeStrictRedis(db=1),
+                    2: fakeredis.FakeStrictRedis(db=2),
+                },
+            )
+        )

-    ctx["similarity_donors_pool"] = LazyJSONLoader(
-        ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
-    )
-
-    ctx["similarity_lr_curves"] = LazyJSONLoader(
-        ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
-    )
-
-    return ctx
+        # Initialize redis
+        AddonsCoinstallCache.get_instance(ctx).safe_load_data()
+        yield stack


-def check_matrix_built(caplog):
-    msg = "Reconstructed matrices for similarity recommender"
-    return sum([msg in str(s) for s in caplog.records]) > 0
-
-
-@mock_s3
 def test_soft_fail(test_ctx, caplog):
    # Create a new instance of a SimilarityRecommender.
-    ctx = install_no_data(test_ctx)
-    r = SimilarityRecommender(ctx)
+    with mock_install_no_data(test_ctx):
+        r = SimilarityRecommender(test_ctx)

-    # Don't recommend if the source files cannot be found.
-    assert not r.can_recommend({})
-    assert not check_matrix_built(caplog)
+        # Don't recommend if the source files cannot be found.
+        assert not r.can_recommend({})


-@mock_s3
 def test_can_recommend(test_ctx, caplog):
    caplog.set_level(logging.INFO)

    # Create a new instance of a SimilarityRecommender.
-    ctx = install_continuous_data(test_ctx)
-    r = SimilarityRecommender(ctx)
+    with mock_install_continuous_data(test_ctx):
+        r = SimilarityRecommender(test_ctx)

-    assert check_matrix_built(caplog)
+        # Test that we can't recommend if we have not enough client info.
+        assert not r.can_recommend({})

-    # Test that we can't recommend if we have not enough client info.
-    assert not r.can_recommend({})
+        # Test that we can recommend for a normal client.
+        assert r.can_recommend(generate_a_fake_taar_client())

-    # Test that we can recommend for a normal client.
-    assert r.can_recommend(generate_a_fake_taar_client())
+        # Check that we can not recommend if any required client field is missing.
+        required_fields = CATEGORICAL_FEATURES + CONTINUOUS_FEATURES

-    # Check that we can not recommend if any required client field is missing.
-    required_fields = CATEGORICAL_FEATURES + CONTINUOUS_FEATURES
+        for required_field in required_fields:
+            profile_without_x = generate_a_fake_taar_client()

-    for required_field in required_fields:
-        profile_without_x = generate_a_fake_taar_client()
+            # Make an empty value in a required field in the client info dict.
+            profile_without_x[required_field] = None
+            assert not r.can_recommend(profile_without_x)

-        # Make an empty value in a required field in the client info dict.
-        profile_without_x[required_field] = None
-        assert not r.can_recommend(profile_without_x)
-
-        # Completely remove (in place) the entire required field from the dict.
-        del profile_without_x[required_field]
-        assert not r.can_recommend(profile_without_x)
+            # Completely remove (in place) the entire required field from the dict.
+            del profile_without_x[required_field]
+            assert not r.can_recommend(profile_without_x)


-@mock_s3
 def test_recommendations(test_ctx):
    with MetricsMock() as mm:
        # Create a new instance of a SimilarityRecommender.
-        ctx = install_continuous_data(test_ctx)
-        r = SimilarityRecommender(ctx)
+        with mock_install_continuous_data(test_ctx):
+            r = SimilarityRecommender(test_ctx)

-        recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)
+            recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)

-        assert isinstance(recommendation_list, list)
-        assert len(recommendation_list) == 1
+            assert isinstance(recommendation_list, list)
+            assert len(recommendation_list) == 1

-        recommendation, weight = recommendation_list[0]
+            recommendation, weight = recommendation_list[0]

-        # Make sure that the reported addons are the expected ones from the most similar donor.
-        assert "{test-guid-1}" == recommendation
-        assert type(weight) == np.float64
+            # Make sure that the reported addons are the expected ones from the most similar donor.
+            assert "{test-guid-1}" == recommendation
+            assert type(weight) == np.float64

-        assert mm.has_record(TIMING, stat="taar.similarity_donor")
-        assert mm.has_record(TIMING, stat="taar.similarity_curves")
-        assert mm.has_record(TIMING, stat="taar.similarity_recommend")
+            assert mm.has_record(TIMING, stat="taar.similarity_recommend")


-@mock_s3
-def test_recommender_str(test_ctx):
-    # Tests that the string representation of the recommender is correct.
-    ctx = install_continuous_data(test_ctx)
-    r = SimilarityRecommender(ctx)
-    assert str(r) == "SimilarityRecommender"
-
-
-@mock_s3
 def test_get_lr(test_ctx):
    # Tests that the likelihood ratio values are not empty for extreme values and are realistic.
-    ctx = install_continuous_data(test_ctx)
-    r = SimilarityRecommender(ctx)
-    assert r.get_lr(0.0001) is not None
-    assert r.get_lr(10.0) is not None
-    assert r.get_lr(0.001) > r.get_lr(5.0)
+    with mock_install_continuous_data(test_ctx):
+        r = SimilarityRecommender(test_ctx)
+        assert r.get_lr(0.0001) is not None
+        assert r.get_lr(10.0) is not None
+        assert r.get_lr(0.001) > r.get_lr(5.0)


-@mock_s3
 def test_compute_clients_dist(test_ctx):
    # Test the distance function computation.
-    ctx = install_continuous_data(test_ctx)
-    r = SimilarityRecommender(ctx)
-    test_clients = [
-        {
-            "client_id": "test-client-002",
-            "activeAddons": [],
-            "geo_city": "sfo-us",
-            "subsession_length": 1,
-            "locale": "en-US",
-            "os": "windows",
-            "bookmark_count": 1,
-            "tab_open_count": 1,
-            "total_uri": 1,
-            "unique_tlds": 1,
-        },
-        {
-            "client_id": "test-client-003",
-            "activeAddons": [],
-            "geo_city": "brasilia-br",
-            "subsession_length": 1,
-            "locale": "br-PT",
-            "os": "windows",
-            "bookmark_count": 10,
-            "tab_open_count": 1,
-            "total_uri": 1,
-            "unique_tlds": 1,
-        },
-        {
-            "client_id": "test-client-004",
-            "activeAddons": [],
-            "geo_city": "brasilia-br",
-            "subsession_length": 100,
-            "locale": "br-PT",
-            "os": "windows",
-            "bookmark_count": 10,
-            "tab_open_count": 10,
-            "total_uri": 100,
-            "unique_tlds": 10,
-        },
-    ]
-    per_client_test = []
+    with mock_install_continuous_data(test_ctx):
+        r = SimilarityRecommender(test_ctx)
+        test_clients = [
+            {
+                "client_id": "test-client-002",
+                "activeAddons": [],
+                "geo_city": "sfo-us",
+                "subsession_length": 1,
+                "locale": "en-US",
+                "os": "windows",
+                "bookmark_count": 1,
+                "tab_open_count": 1,
+                "total_uri": 1,
+                "unique_tlds": 1,
+            },
+            {
+                "client_id": "test-client-003",
+                "activeAddons": [],
+                "geo_city": "brasilia-br",
+                "subsession_length": 1,
+                "locale": "br-PT",
+                "os": "windows",
+                "bookmark_count": 10,
+                "tab_open_count": 1,
+                "total_uri": 1,
+                "unique_tlds": 1,
+            },
+            {
+                "client_id": "test-client-004",
+                "activeAddons": [],
+                "geo_city": "brasilia-br",
+                "subsession_length": 100,
+                "locale": "br-PT",
+                "os": "windows",
+                "bookmark_count": 10,
+                "tab_open_count": 10,
+                "total_uri": 100,
+                "unique_tlds": 10,
+            },
+        ]
+        per_client_test = []

-    # Compute a different set of distances for each set of clients.
-    for tc in test_clients:
-        test_distances = r.compute_clients_dist(tc)
-        assert len(test_distances) == len(CONTINUOUS_FEATURE_FIXTURE_DATA)
-        per_client_test.append(test_distances[2][0])
+        # Compute a different set of distances for each set of clients.
+        for tc in test_clients:
+            test_distances = r.compute_clients_dist(tc)
+            assert len(test_distances) == len(CONTINUOUS_FEATURE_FIXTURE_DATA)
+            per_client_test.append(test_distances[2][0])

-    # Ensure the different clients also had different distances to a specific donor.
-    assert per_client_test[0] >= per_client_test[1] >= per_client_test[2]
+        # Ensure the different clients also had different distances to a specific donor.
+        assert per_client_test[0] >= per_client_test[1] >= per_client_test[2]


-@mock_s3
 def test_distance_functions(test_ctx):
-    # Tests the similarity functions via expected output when passing modified client data.
-    ctx = install_continuous_data(test_ctx)
-    r = SimilarityRecommender(ctx)
+    # Tests the similarity functions via expected output when passing
+    # modified client data.
+    with mock_install_continuous_data(test_ctx):
+        r = SimilarityRecommender(test_ctx)

-    # Generate a fake client.
-    test_client = generate_a_fake_taar_client()
-    recs = r.recommend(test_client, 10)
-    assert len(recs) > 0
+        # Generate a fake client.
+        test_client = generate_a_fake_taar_client()
+        recs = r.recommend(test_client, 10)
+        assert len(recs) > 0

-    # Make it a generally poor match for the donors.
-    test_client.update({"total_uri": 10, "bookmark_count": 2, "subsession_length": 10})
+        # Make it a generally poor match for the donors.
+        test_client.update(
+            {"total_uri": 10, "bookmark_count": 2, "subsession_length": 10}
+        )

-    all_client_values_zero = test_client
-    # Make all categorical variables non-matching with any donor.
-    all_client_values_zero.update(
-        {key: "zero" for key in test_client.keys() if key in CATEGORICAL_FEATURES}
-    )
-    recs = r.recommend(all_client_values_zero, 10)
-    assert len(recs) == 0
+        all_client_values_zero = test_client
+        # Make all categorical variables non-matching with any donor.
+        all_client_values_zero.update(
+            {key: "zero" for key in test_client.keys() if key in CATEGORICAL_FEATURES}
+        )
+        recs = r.recommend(all_client_values_zero, 10)
+        assert len(recs) == 0

-    # Make all continuous variables equal to zero.
-    all_client_values_zero.update(
-        {key: 0 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
-    )
-    recs = r.recommend(all_client_values_zero, 10)
-    assert len(recs) == 0
+        # Make all continuous variables equal to zero.
+        all_client_values_zero.update(
+            {key: 0 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
+        )
+        recs = r.recommend(all_client_values_zero, 10)
+        assert len(recs) == 0

-    # Make all categorical variables non-matching with any donor.
-    all_client_values_high = test_client
-    all_client_values_high.update(
-        {
-            key: "one billion"
-            for key in test_client.keys()
-            if key in CATEGORICAL_FEATURES
-        }
-    )
-    recs = r.recommend(all_client_values_high, 10)
-    assert len(recs) == 0
+        # Make all categorical variables non-matching with any donor.
+        all_client_values_high = test_client
+        all_client_values_high.update(
+            {
+                key: "one billion"
+                for key in test_client.keys()
+                if key in CATEGORICAL_FEATURES
+            }
+        )
+        recs = r.recommend(all_client_values_high, 10)
+        assert len(recs) == 0

-    # Make all continuous variables equal to a very high numerical value.
-    all_client_values_high.update(
-        {key: 1e60 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
-    )
-    recs = r.recommend(all_client_values_high, 10)
-    assert len(recs) == 0
+        # Make all continuous variables equal to a very high numerical value.
+        all_client_values_high.update(
+            {key: 1e60 for key in test_client.keys() if key in CONTINUOUS_FEATURES}
+        )
+        recs = r.recommend(all_client_values_high, 10)
+        assert len(recs) == 0

-    # Test for 0.0 values if j_c is not normalized and j_d is fine.
-    j_c = 0.0
-    j_d = 0.42
-    assert abs(j_c * j_d) == 0.0
-    assert abs((j_c + 0.01) * j_d) != 0.0
+        # Test for 0.0 values if j_c is not normalized and j_d is fine.
+        j_c = 0.0
+        j_d = 0.42
+        assert abs(j_c * j_d) == 0.0
+        assert abs((j_c + 0.01) * j_d) != 0.0


-@mock_s3
 def test_weights_continuous(test_ctx):
    # Create a new instance of a SimilarityRecommender.
-    ctx = install_continuous_data(test_ctx)
-    r = SimilarityRecommender(ctx)
+    with mock_install_continuous_data(test_ctx):
+        r = SimilarityRecommender(test_ctx)

-    # In the ensemble method recommendations should be a sorted list of tuples
-    # containing [(guid, weight), (guid, weight)... (guid, weight)].
-    recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
-    with open("/tmp/similarity_recommender.json", "w") as fout:
-        fout.write(json.dumps(recommendation_list))
+        # In the ensemble method recommendations should be a sorted list of tuples
+        # containing [(guid, weight), (guid, weight)... (guid, weight)].
+        recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
+        with open("/tmp/similarity_recommender.json", "w") as fout:
+            fout.write(json.dumps(recommendation_list))

-    # Make sure the structure of the recommendations is correct and
-    # that we recommended the the right addons.
+        # Make sure the structure of the recommendations is correct and
+        # that we recommended the the right addons.

-    assert len(recommendation_list) == 2
-    for recommendation, weight in recommendation_list:
-        assert isinstance(recommendation, six.string_types)
-        assert isinstance(weight, float)
+        assert len(recommendation_list) == 2
+        for recommendation, weight in recommendation_list:
+            assert isinstance(recommendation, six.string_types)
+            assert isinstance(weight, float)

-    # Test that sorting is appropriate.
-    rec0 = recommendation_list[0]
-    rec1 = recommendation_list[1]
+        # Test that sorting is appropriate.
+        rec0 = recommendation_list[0]
+        rec1 = recommendation_list[1]

-    rec0_weight = rec0[1]
-    rec1_weight = rec1[1]
+        rec0_weight = rec0[1]
+        rec1_weight = rec1[1]

-    # Duplicate presence of test-guid-1 should mean rec0_weight is double
-    # rec1_weight, and both should be greater than 1.0
+        # Duplicate presence of test-guid-1 should mean rec0_weight is double
+        # rec1_weight, and both should be greater than 1.0

-    assert rec0_weight > rec1_weight > 1.0
+        assert rec0_weight > rec1_weight > 1.0


-@mock_s3
 def test_weights_categorical(test_ctx):
    """
    This should get :
@ -383,48 +411,24 @@ def test_weights_categorical(test_ctx):

    """
    # Create a new instance of a SimilarityRecommender.
-    cat_ctx = install_categorical_data(test_ctx)
-    cts_ctx = install_continuous_data(test_ctx)
+    with mock_install_categorical_data(test_ctx):
+        r = SimilarityRecommender(test_ctx)

-    wrapped = cts_ctx.wrap(cat_ctx)
-    r = SimilarityRecommender(wrapped)
+        # In the ensemble method recommendations should be a sorted list of tuples
+        # containing [(guid, weight), (guid, weight)... (guid, weight)].
+        recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)

-    # In the ensemble method recommendations should be a sorted list of tuples
-    # containing [(guid, weight), (guid, weight)... (guid, weight)].
-    recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
+        assert len(recommendation_list) == 2
+        # Make sure the structure of the recommendations is correct and that we recommended the the right addons.
+        for recommendation, weight in recommendation_list:
+            assert isinstance(recommendation, six.string_types)
+            assert isinstance(weight, float)

-    assert len(recommendation_list) == 2
-    # Make sure the structure of the recommendations is correct and that we recommended the the right addons.
-    for recommendation, weight in recommendation_list:
-        assert isinstance(recommendation, six.string_types)
-        assert isinstance(weight, float)
+        # Test that sorting is appropriate.
+        rec0 = recommendation_list[0]
+        rec1 = recommendation_list[1]

-    # Test that sorting is appropriate.
-    rec0 = recommendation_list[0]
-    rec1 = recommendation_list[1]
+        rec0_weight = rec0[1]
+        rec1_weight = rec1[1]

-    rec0_weight = rec0[1]
-    rec1_weight = rec1[1]
-
-    assert rec0_weight > rec1_weight > 0
-
-
-@mock_s3
-def test_recompute_matrices(test_ctx, caplog):
-    caplog.set_level(logging.INFO)
-
-    # Create a new instance of a SimilarityRecommender.
-    ctx = install_continuous_data(test_ctx)
-    r = SimilarityRecommender(ctx)
-
-    # Reloading the donors pool should reconstruct the matrices
-    caplog.clear()
-    r._donors_pool.force_expiry()
-    r.donors_pool
-    assert check_matrix_built(caplog)
-
-    # Reloading the LR curves should reconstruct the matrices
-    caplog.clear()
-    r._lr_curves.force_expiry()
-    r.lr_curves
-    assert check_matrix_built(caplog)
+        assert rec0_weight > rec1_weight > 0