more tests updated with better mocking

2018-08-07 22:08:42 -04:00 · 2018-08-07 22:08:42 -04:00 · 9cefd94ceb
--- a/taar/recommenders/similarity_recommender.py
+++ b/taar/recommenders/similarity_recommender.py
@ -7,6 +7,7 @@ from itertools import groupby
 from scipy.spatial import distance
 from srgutil.interfaces import IMozLogging
 import numpy as np
+from .lazys3 import LazyJSONLoader

 FLOOR_DISTANCE_ADJUSTMENT = 0.001

@ -14,6 +15,7 @@ CATEGORICAL_FEATURES = ["geo_city", "locale", "os"]
 CONTINUOUS_FEATURES = ["subsession_length", "bookmark_count", "tab_open_count", "total_uri", "unique_tlds"]

 S3_BUCKET = 'telemetry-parquet'
+
 DONOR_LIST_KEY = 'taar/similarity/donors.json'
 LR_CURVES_SIMILARITY_TO_PROBABILITY = 'taar/similarity/lr_curves.json'

@ -38,19 +40,35 @@ class SimilarityRecommender(AbstractRecommender):

    def __init__(self, ctx):
        self._ctx = ctx
+
+        if 'similarity_donors_pool' in self._ctx:
+            self._donors_pool = self._ctx['similarity_donors_pool']
+        else:
+            self._donors_pool = LazyJSONLoader(S3_BUCKET, DONOR_LIST_KEY)
+
+        if 'similarity_lr_curves' in self._ctx:
+            self._lr_curves = self._ctx['similarity_lr_curves']
+        else:
+            self._lr_curves = LazyJSONLoader(S3_BUCKET, LR_CURVES_SIMILARITY_TO_PROBABILITY)
+
        self.logger = self._ctx[IMozLogging].get_logger('taar')

        self._init_from_ctx()

+    @property
+    def donors_pool(self):
+        return self._donors_pool.get()[0]
+
+    @property
+    def lr_curves(self):
+        return self._lr_curves.get()[0]
+
    def _init_from_ctx(self):
        # Download the addon donors list.
-        cache = self._ctx['cache']
-        self.donors_pool = cache.get_s3_json_content(S3_BUCKET, DONOR_LIST_KEY)
        if self.donors_pool is None:
            self.logger.error("Cannot download the donor list: {}".format(DONOR_LIST_KEY))

        # Download the probability mapping curves from similarity to likelihood of being a good donor.
-        self.lr_curves = cache.get_s3_json_content(S3_BUCKET, LR_CURVES_SIMILARITY_TO_PROBABILITY)
        if self.lr_curves is None:
            self.logger.error("Cannot download the lr curves: {}".format(LR_CURVES_SIMILARITY_TO_PROBABILITY))
        self.build_features_caches()
--- a/tests/test_collaborativerecommender.py
+++ b/tests/test_collaborativerecommender.py
@ -39,7 +39,6 @@ def install_none_mock_data(ctx):
                                                      ITEM_MATRIX_CONFIG[0],
                                                      ITEM_MATRIX_CONFIG[1])

-
    # Don't reuse connections with moto.  badness happens
    conn = boto3.resource('s3', region_name='us-west-2')
    conn.create_bucket(Bucket=ADDON_MAPPING_CONFIG[0])
@ -55,7 +54,6 @@ def install_mock_data(ctx):
    Overload the 'real' addon model and mapping URLs responses so that
    we always the fixture data at the top of this test module.
    """
-    conn = boto3.resource('s3', region_name='us-west-2')

    addon_space = [{"id": "addon1.id", "name": "addon1.name", "isWebextension": True},
                   {"id": "addon2.id", "name": "addon2.name", "isWebextension": True},
@ -63,9 +61,6 @@ def install_mock_data(ctx):
                   {"id": "addon4.id", "name": "addon4.name", "isWebextension": True},
                   {"id": "addon5.id", "name": "addon5.name", "isWebextension": True}]

-    conn.create_bucket(Bucket=ITEM_MATRIX_CONFIG[0])
-    conn.create_bucket(Bucket=ADDON_MAPPING_CONFIG[0])
-
    fake_addon_matrix = []
    for i, addon in enumerate(addon_space):
        row = {"id": positive_hash(addon['id']), "features": [0, 0.2, 0.0, 0.1, 0.15]}
@ -77,7 +72,12 @@ def install_mock_data(ctx):
        java_hash = positive_hash(addon['id'])
        fake_mapping[str(java_hash)] = addon

+    conn = boto3.resource('s3', region_name='us-west-2')
+    conn.create_bucket(Bucket=ITEM_MATRIX_CONFIG[0])
    conn.Object(ITEM_MATRIX_CONFIG[0], ITEM_MATRIX_CONFIG[1]).put(Body=json.dumps(fake_addon_matrix))
+
+    conn = boto3.resource('s3', region_name='us-west-2')
+    conn.create_bucket(Bucket=ADDON_MAPPING_CONFIG[0])
    conn.Object(ADDON_MAPPING_CONFIG[0], ADDON_MAPPING_CONFIG[1]).put(Body=json.dumps(fake_mapping))

    ctx['collaborative_addon_mapping'] = LazyJSONLoader(ctx,
--- a/tests/test_similarityrecommender.py
+++ b/tests/test_similarityrecommender.py
@ -5,12 +5,14 @@
 import json
 import six

-import pytest
 import numpy as np
 import scipy.stats
+from taar.recommenders.lazys3 import LazyJSONLoader

-from taar.cache import JSONCache, Clock
+import boto3
+from moto import mock_s3

+from taar.recommenders.similarity_recommender import S3_BUCKET
 from taar.recommenders.similarity_recommender import \
    CATEGORICAL_FEATURES, CONTINUOUS_FEATURES, DONOR_LIST_KEY, LR_CURVES_SIMILARITY_TO_PROBABILITY, \
    SimilarityRecommender
@ -55,68 +57,83 @@ def generate_a_fake_taar_client():
    }


-class MockNoDataUtils:
-    def get_s3_json_content(self, *args, **kwargs):
-        return None
+def install_no_data(ctx):
+    ctx = ctx.child()
+    conn = boto3.resource('s3', region_name='us-west-2')
+
+    conn.create_bucket(Bucket=S3_BUCKET)
+    conn.Object(S3_BUCKET, DONOR_LIST_KEY).put(Body="")
+
+    conn.Object(S3_BUCKET, LR_CURVES_SIMILARITY_TO_PROBABILITY).put(Body="")
+
+    ctx['similarity_donors_pool'] = LazyJSONLoader(ctx,
+                                                   S3_BUCKET,
+                                                   DONOR_LIST_KEY)
+
+    ctx['similarity_lr_curves'] = LazyJSONLoader(ctx,
+                                                 S3_BUCKET,
+                                                 LR_CURVES_SIMILARITY_TO_PROBABILITY)
+
+    return ctx


-class MockCategoricalData:
+def install_categorical_data(ctx):
+    ctx = ctx.child()
+    conn = boto3.resource('s3', region_name='us-west-2')

-    cat_data = json.loads(json.dumps(CATEGORICAL_FEATURE_FIXTURE_DATA))
-    lrs_data = json.loads(json.dumps(generate_fake_lr_curves(1000)))
+    conn.create_bucket(Bucket=S3_BUCKET)
+    conn.Object(S3_BUCKET, DONOR_LIST_KEY).put(Body=json.dumps(CATEGORICAL_FEATURE_FIXTURE_DATA))

-    def get_s3_json_content(self, bucket, key):
-        if key == DONOR_LIST_KEY:
-            return self.cat_data
-        if key == LR_CURVES_SIMILARITY_TO_PROBABILITY:
-            return self.lrs_data
+    conn.Object(S3_BUCKET, LR_CURVES_SIMILARITY_TO_PROBABILITY).put(Body=json.dumps(generate_fake_lr_curves(1000)))
+
+    ctx['similarity_donors_pool'] = LazyJSONLoader(ctx,
+                                                   S3_BUCKET,
+                                                   DONOR_LIST_KEY)
+
+    ctx['similarity_lr_curves'] = LazyJSONLoader(ctx,
+                                                 S3_BUCKET,
+                                                 LR_CURVES_SIMILARITY_TO_PROBABILITY)
+
+    return ctx


-class MockContinuousData:
+def install_continuous_data(ctx):
+    ctx = ctx.child()
+    cts_data = json.dumps(CONTINUOUS_FEATURE_FIXTURE_DATA)
+    lrs_data = json.dumps(generate_fake_lr_curves(1000))

-    cts_data = json.loads(json.dumps(CONTINUOUS_FEATURE_FIXTURE_DATA))
-    lrs_data = json.loads(json.dumps(generate_fake_lr_curves(1000)))
+    conn = boto3.resource('s3', region_name='us-west-2')

-    def get_s3_json_content(self, bucket, key):
-        if key == DONOR_LIST_KEY:
-            return self.cts_data
-        if key == LR_CURVES_SIMILARITY_TO_PROBABILITY:
-            return self.lrs_data
-
-
-@pytest.fixture
-def cat_test_ctx(test_ctx):
-    ctx = test_ctx
-    ctx['utils'] = MockCategoricalData()
-    ctx['clock'] = Clock()
-    ctx['cache'] = JSONCache(ctx)
-    return ctx.child()
-
-
-@pytest.fixture
-def cts_test_ctx(test_ctx):
-    ctx = test_ctx
-    ctx['utils'] = MockContinuousData()
-    ctx['clock'] = Clock()
-    ctx['cache'] = JSONCache(ctx)
-    return ctx.child()
+    conn.create_bucket(Bucket=S3_BUCKET)
+    conn.Object(S3_BUCKET, DONOR_LIST_KEY).put(Body=cts_data)
+
+    conn.Object(S3_BUCKET, LR_CURVES_SIMILARITY_TO_PROBABILITY).put(Body=lrs_data)
+
+    ctx['similarity_donors_pool'] = LazyJSONLoader(ctx,
+                                                   S3_BUCKET,
+                                                   DONOR_LIST_KEY)
+
+    ctx['similarity_lr_curves'] = LazyJSONLoader(ctx,
+                                                 S3_BUCKET,
+                                                 LR_CURVES_SIMILARITY_TO_PROBABILITY)
+
+    return ctx


+@mock_s3
 def test_soft_fail(test_ctx):
    # Create a new instance of a SimilarityRecommender.
-    ctx = test_ctx
-    ctx['utils'] = MockNoDataUtils()
-    ctx['clock'] = Clock()
-    ctx['cache'] = JSONCache(ctx)
+    ctx = install_no_data(test_ctx)
    r = SimilarityRecommender(ctx)

    # Don't recommend if the source files cannot be found.
    assert not r.can_recommend({})


-def test_can_recommend(cts_test_ctx):
+@mock_s3
+def test_can_recommend(test_ctx):
    # Create a new instance of a SimilarityRecommender.
-    ctx = cts_test_ctx
+    ctx = install_continuous_data(test_ctx)
    r = SimilarityRecommender(ctx)

    # Test that we can't recommend if we have not enough client info.
@ -140,13 +157,12 @@ def test_can_recommend(cts_test_ctx):
        assert not r.can_recommend(profile_without_x)


-def test_recommendations(cts_test_ctx):
+@mock_s3
+def test_recommendations(test_ctx):
    # Create a new instance of a SimilarityRecommender.
-    ctx = cts_test_ctx
+    ctx = install_continuous_data(test_ctx)
    r = SimilarityRecommender(ctx)

-    # TODO: clobber the SimilarityRecommender::lr_curves
-
    recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)

    assert isinstance(recommendation_list, list)
@ -159,25 +175,28 @@ def test_recommendations(cts_test_ctx):
    assert type(weight) == np.float64


-def test_recommender_str(cts_test_ctx):
+@mock_s3
+def test_recommender_str(test_ctx):
    # Tests that the string representation of the recommender is correct.
-    ctx = cts_test_ctx
+    ctx = install_continuous_data(test_ctx)
    r = SimilarityRecommender(ctx)
    assert str(r) == "SimilarityRecommender"


-def test_get_lr(cts_test_ctx):
+@mock_s3
+def test_get_lr(test_ctx):
    # Tests that the likelihood ratio values are not empty for extreme values and are realistic.
-    ctx = cts_test_ctx
+    ctx = install_continuous_data(test_ctx)
    r = SimilarityRecommender(ctx)
    assert r.get_lr(0.0001) is not None
    assert r.get_lr(10.0) is not None
    assert r.get_lr(0.001) > r.get_lr(5.0)


-def test_compute_clients_dist(cts_test_ctx):
+@mock_s3
+def test_compute_clients_dist(test_ctx):
    # Test the distance function computation.
-    ctx = cts_test_ctx
+    ctx = install_continuous_data(test_ctx)
    r = SimilarityRecommender(ctx)
    test_clients = [
        {
@ -229,9 +248,10 @@ def test_compute_clients_dist(cts_test_ctx):
    assert per_client_test[0] >= per_client_test[1] >= per_client_test[2]


-def test_distance_functions(cts_test_ctx):
+@mock_s3
+def test_distance_functions(test_ctx):
    # Tests the similarity functions via expected output when passing modified client data.
-    ctx = cts_test_ctx
+    ctx = install_continuous_data(test_ctx)
    r = SimilarityRecommender(ctx)

    # Generate a fake client.
@ -271,9 +291,10 @@ def test_distance_functions(cts_test_ctx):
    assert abs((j_c + 0.01) * j_d) != 0.0


-def test_weights_continuous(cts_test_ctx):
+@mock_s3
+def test_weights_continuous(test_ctx):
    # Create a new instance of a SimilarityRecommender.
-    ctx = cts_test_ctx
+    ctx = install_continuous_data(test_ctx)
    r = SimilarityRecommender(ctx)

    # In the ensemble method recommendations should be a sorted list of tuples
@ -303,7 +324,8 @@ def test_weights_continuous(cts_test_ctx):
    assert rec0_weight > rec1_weight > 1.0


-def test_weights_categorical(cat_test_ctx, cts_test_ctx):
+@mock_s3
+def test_weights_categorical(test_ctx):
    '''
    This should get :
        ["{test-guid-1}", "{test-guid-2}", "{test-guid-3}", "{test-guid-4}"],
@ -313,9 +335,10 @@ def test_weights_categorical(cat_test_ctx, cts_test_ctx):

    '''
    # Create a new instance of a SimilarityRecommender.
-    ctx = cat_test_ctx
-    ctx2 = cts_test_ctx
-    wrapped = ctx2.wrap(ctx)
+    cat_ctx = install_categorical_data(test_ctx)
+    cts_ctx = install_continuous_data(test_ctx)
+
+    wrapped = cts_ctx.wrap(cat_ctx)
    r = SimilarityRecommender(wrapped)

    # In the ensemble method recommendations should be a sorted list of tuples
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@ -1,13 +0,0 @@
-from taar.recommenders import utils
-
-
-def test_fetch_json():
-    """ Just test a URL that we know will fail """
-    jdata = utils.fetch_json("http://127.0.0.1:9001/some-nonexistant-url-foo.json")
-    assert jdata is None
-
-
-def test_get_s3_json_content():
-    """ Just test an S3 bucket and key that doesn't exist """
-    jdata = utils.get_s3_json_content("taar_not_my_bucket", "this/is/not/a/valid/path")
-    assert jdata is None