more tests updated with better mocking

This commit is contained in:
Victor Ng 2018-08-07 22:08:42 -04:00
Родитель 24e4b59c64
Коммит 9cefd94ceb
4 изменённых файлов: 112 добавлений и 84 удалений

Просмотреть файл

@ -7,6 +7,7 @@ from itertools import groupby
from scipy.spatial import distance
from srgutil.interfaces import IMozLogging
import numpy as np
from .lazys3 import LazyJSONLoader
FLOOR_DISTANCE_ADJUSTMENT = 0.001
@ -14,6 +15,7 @@ CATEGORICAL_FEATURES = ["geo_city", "locale", "os"]
CONTINUOUS_FEATURES = ["subsession_length", "bookmark_count", "tab_open_count", "total_uri", "unique_tlds"]
S3_BUCKET = 'telemetry-parquet'
DONOR_LIST_KEY = 'taar/similarity/donors.json'
LR_CURVES_SIMILARITY_TO_PROBABILITY = 'taar/similarity/lr_curves.json'
@ -38,19 +40,35 @@ class SimilarityRecommender(AbstractRecommender):
def __init__(self, ctx):
self._ctx = ctx
if 'similarity_donors_pool' in self._ctx:
self._donors_pool = self._ctx['similarity_donors_pool']
else:
self._donors_pool = LazyJSONLoader(S3_BUCKET, DONOR_LIST_KEY)
if 'similarity_lr_curves' in self._ctx:
self._lr_curves = self._ctx['similarity_lr_curves']
else:
self._lr_curves = LazyJSONLoader(S3_BUCKET, LR_CURVES_SIMILARITY_TO_PROBABILITY)
self.logger = self._ctx[IMozLogging].get_logger('taar')
self._init_from_ctx()
@property
def donors_pool(self):
return self._donors_pool.get()[0]
@property
def lr_curves(self):
return self._lr_curves.get()[0]
def _init_from_ctx(self):
# Download the addon donors list.
cache = self._ctx['cache']
self.donors_pool = cache.get_s3_json_content(S3_BUCKET, DONOR_LIST_KEY)
if self.donors_pool is None:
self.logger.error("Cannot download the donor list: {}".format(DONOR_LIST_KEY))
# Download the probability mapping curves from similarity to likelihood of being a good donor.
self.lr_curves = cache.get_s3_json_content(S3_BUCKET, LR_CURVES_SIMILARITY_TO_PROBABILITY)
if self.lr_curves is None:
self.logger.error("Cannot download the lr curves: {}".format(LR_CURVES_SIMILARITY_TO_PROBABILITY))
self.build_features_caches()

Просмотреть файл

@ -39,7 +39,6 @@ def install_none_mock_data(ctx):
ITEM_MATRIX_CONFIG[0],
ITEM_MATRIX_CONFIG[1])
# Don't reuse connections with moto. badness happens
conn = boto3.resource('s3', region_name='us-west-2')
conn.create_bucket(Bucket=ADDON_MAPPING_CONFIG[0])
@ -55,7 +54,6 @@ def install_mock_data(ctx):
Overload the 'real' addon model and mapping URLs responses so that
we always the fixture data at the top of this test module.
"""
conn = boto3.resource('s3', region_name='us-west-2')
addon_space = [{"id": "addon1.id", "name": "addon1.name", "isWebextension": True},
{"id": "addon2.id", "name": "addon2.name", "isWebextension": True},
@ -63,9 +61,6 @@ def install_mock_data(ctx):
{"id": "addon4.id", "name": "addon4.name", "isWebextension": True},
{"id": "addon5.id", "name": "addon5.name", "isWebextension": True}]
conn.create_bucket(Bucket=ITEM_MATRIX_CONFIG[0])
conn.create_bucket(Bucket=ADDON_MAPPING_CONFIG[0])
fake_addon_matrix = []
for i, addon in enumerate(addon_space):
row = {"id": positive_hash(addon['id']), "features": [0, 0.2, 0.0, 0.1, 0.15]}
@ -77,7 +72,12 @@ def install_mock_data(ctx):
java_hash = positive_hash(addon['id'])
fake_mapping[str(java_hash)] = addon
conn = boto3.resource('s3', region_name='us-west-2')
conn.create_bucket(Bucket=ITEM_MATRIX_CONFIG[0])
conn.Object(ITEM_MATRIX_CONFIG[0], ITEM_MATRIX_CONFIG[1]).put(Body=json.dumps(fake_addon_matrix))
conn = boto3.resource('s3', region_name='us-west-2')
conn.create_bucket(Bucket=ADDON_MAPPING_CONFIG[0])
conn.Object(ADDON_MAPPING_CONFIG[0], ADDON_MAPPING_CONFIG[1]).put(Body=json.dumps(fake_mapping))
ctx['collaborative_addon_mapping'] = LazyJSONLoader(ctx,

Просмотреть файл

@ -5,12 +5,14 @@
import json
import six
import pytest
import numpy as np
import scipy.stats
from taar.recommenders.lazys3 import LazyJSONLoader
from taar.cache import JSONCache, Clock
import boto3
from moto import mock_s3
from taar.recommenders.similarity_recommender import S3_BUCKET
from taar.recommenders.similarity_recommender import \
CATEGORICAL_FEATURES, CONTINUOUS_FEATURES, DONOR_LIST_KEY, LR_CURVES_SIMILARITY_TO_PROBABILITY, \
SimilarityRecommender
@ -55,68 +57,83 @@ def generate_a_fake_taar_client():
}
class MockNoDataUtils:
def get_s3_json_content(self, *args, **kwargs):
return None
def install_no_data(ctx):
ctx = ctx.child()
conn = boto3.resource('s3', region_name='us-west-2')
conn.create_bucket(Bucket=S3_BUCKET)
conn.Object(S3_BUCKET, DONOR_LIST_KEY).put(Body="")
conn.Object(S3_BUCKET, LR_CURVES_SIMILARITY_TO_PROBABILITY).put(Body="")
ctx['similarity_donors_pool'] = LazyJSONLoader(ctx,
S3_BUCKET,
DONOR_LIST_KEY)
ctx['similarity_lr_curves'] = LazyJSONLoader(ctx,
S3_BUCKET,
LR_CURVES_SIMILARITY_TO_PROBABILITY)
return ctx
class MockCategoricalData:
def install_categorical_data(ctx):
ctx = ctx.child()
conn = boto3.resource('s3', region_name='us-west-2')
cat_data = json.loads(json.dumps(CATEGORICAL_FEATURE_FIXTURE_DATA))
lrs_data = json.loads(json.dumps(generate_fake_lr_curves(1000)))
conn.create_bucket(Bucket=S3_BUCKET)
conn.Object(S3_BUCKET, DONOR_LIST_KEY).put(Body=json.dumps(CATEGORICAL_FEATURE_FIXTURE_DATA))
def get_s3_json_content(self, bucket, key):
if key == DONOR_LIST_KEY:
return self.cat_data
if key == LR_CURVES_SIMILARITY_TO_PROBABILITY:
return self.lrs_data
conn.Object(S3_BUCKET, LR_CURVES_SIMILARITY_TO_PROBABILITY).put(Body=json.dumps(generate_fake_lr_curves(1000)))
ctx['similarity_donors_pool'] = LazyJSONLoader(ctx,
S3_BUCKET,
DONOR_LIST_KEY)
ctx['similarity_lr_curves'] = LazyJSONLoader(ctx,
S3_BUCKET,
LR_CURVES_SIMILARITY_TO_PROBABILITY)
return ctx
class MockContinuousData:
def install_continuous_data(ctx):
ctx = ctx.child()
cts_data = json.dumps(CONTINUOUS_FEATURE_FIXTURE_DATA)
lrs_data = json.dumps(generate_fake_lr_curves(1000))
cts_data = json.loads(json.dumps(CONTINUOUS_FEATURE_FIXTURE_DATA))
lrs_data = json.loads(json.dumps(generate_fake_lr_curves(1000)))
conn = boto3.resource('s3', region_name='us-west-2')
def get_s3_json_content(self, bucket, key):
if key == DONOR_LIST_KEY:
return self.cts_data
if key == LR_CURVES_SIMILARITY_TO_PROBABILITY:
return self.lrs_data
@pytest.fixture
def cat_test_ctx(test_ctx):
ctx = test_ctx
ctx['utils'] = MockCategoricalData()
ctx['clock'] = Clock()
ctx['cache'] = JSONCache(ctx)
return ctx.child()
@pytest.fixture
def cts_test_ctx(test_ctx):
ctx = test_ctx
ctx['utils'] = MockContinuousData()
ctx['clock'] = Clock()
ctx['cache'] = JSONCache(ctx)
return ctx.child()
conn.create_bucket(Bucket=S3_BUCKET)
conn.Object(S3_BUCKET, DONOR_LIST_KEY).put(Body=cts_data)
conn.Object(S3_BUCKET, LR_CURVES_SIMILARITY_TO_PROBABILITY).put(Body=lrs_data)
ctx['similarity_donors_pool'] = LazyJSONLoader(ctx,
S3_BUCKET,
DONOR_LIST_KEY)
ctx['similarity_lr_curves'] = LazyJSONLoader(ctx,
S3_BUCKET,
LR_CURVES_SIMILARITY_TO_PROBABILITY)
return ctx
@mock_s3
def test_soft_fail(test_ctx):
# Create a new instance of a SimilarityRecommender.
ctx = test_ctx
ctx['utils'] = MockNoDataUtils()
ctx['clock'] = Clock()
ctx['cache'] = JSONCache(ctx)
ctx = install_no_data(test_ctx)
r = SimilarityRecommender(ctx)
# Don't recommend if the source files cannot be found.
assert not r.can_recommend({})
def test_can_recommend(cts_test_ctx):
@mock_s3
def test_can_recommend(test_ctx):
# Create a new instance of a SimilarityRecommender.
ctx = cts_test_ctx
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
# Test that we can't recommend if we have not enough client info.
@ -140,13 +157,12 @@ def test_can_recommend(cts_test_ctx):
assert not r.can_recommend(profile_without_x)
def test_recommendations(cts_test_ctx):
@mock_s3
def test_recommendations(test_ctx):
# Create a new instance of a SimilarityRecommender.
ctx = cts_test_ctx
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
# TODO: clobber the SimilarityRecommender::lr_curves
recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)
assert isinstance(recommendation_list, list)
@ -159,25 +175,28 @@ def test_recommendations(cts_test_ctx):
assert type(weight) == np.float64
def test_recommender_str(cts_test_ctx):
@mock_s3
def test_recommender_str(test_ctx):
# Tests that the string representation of the recommender is correct.
ctx = cts_test_ctx
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
assert str(r) == "SimilarityRecommender"
def test_get_lr(cts_test_ctx):
@mock_s3
def test_get_lr(test_ctx):
# Tests that the likelihood ratio values are not empty for extreme values and are realistic.
ctx = cts_test_ctx
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
assert r.get_lr(0.0001) is not None
assert r.get_lr(10.0) is not None
assert r.get_lr(0.001) > r.get_lr(5.0)
def test_compute_clients_dist(cts_test_ctx):
@mock_s3
def test_compute_clients_dist(test_ctx):
# Test the distance function computation.
ctx = cts_test_ctx
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
test_clients = [
{
@ -229,9 +248,10 @@ def test_compute_clients_dist(cts_test_ctx):
assert per_client_test[0] >= per_client_test[1] >= per_client_test[2]
def test_distance_functions(cts_test_ctx):
@mock_s3
def test_distance_functions(test_ctx):
# Tests the similarity functions via expected output when passing modified client data.
ctx = cts_test_ctx
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
# Generate a fake client.
@ -271,9 +291,10 @@ def test_distance_functions(cts_test_ctx):
assert abs((j_c + 0.01) * j_d) != 0.0
def test_weights_continuous(cts_test_ctx):
@mock_s3
def test_weights_continuous(test_ctx):
# Create a new instance of a SimilarityRecommender.
ctx = cts_test_ctx
ctx = install_continuous_data(test_ctx)
r = SimilarityRecommender(ctx)
# In the ensemble method recommendations should be a sorted list of tuples
@ -303,7 +324,8 @@ def test_weights_continuous(cts_test_ctx):
assert rec0_weight > rec1_weight > 1.0
def test_weights_categorical(cat_test_ctx, cts_test_ctx):
@mock_s3
def test_weights_categorical(test_ctx):
'''
This should get :
["{test-guid-1}", "{test-guid-2}", "{test-guid-3}", "{test-guid-4}"],
@ -313,9 +335,10 @@ def test_weights_categorical(cat_test_ctx, cts_test_ctx):
'''
# Create a new instance of a SimilarityRecommender.
ctx = cat_test_ctx
ctx2 = cts_test_ctx
wrapped = ctx2.wrap(ctx)
cat_ctx = install_categorical_data(test_ctx)
cts_ctx = install_continuous_data(test_ctx)
wrapped = cts_ctx.wrap(cat_ctx)
r = SimilarityRecommender(wrapped)
# In the ensemble method recommendations should be a sorted list of tuples

Просмотреть файл

@ -1,13 +0,0 @@
from taar.recommenders import utils
def test_fetch_json():
""" Just test a URL that we know will fail """
jdata = utils.fetch_json("http://127.0.0.1:9001/some-nonexistant-url-foo.json")
assert jdata is None
def test_get_s3_json_content():
""" Just test an S3 bucket and key that doesn't exist """
jdata = utils.get_s3_json_content("taar_not_my_bucket", "this/is/not/a/valid/path")
assert jdata is None