Ported ensemble and recommendation manager to use redis

This commit is contained in:
Victor Ng 2020-09-01 20:09:30 -04:00
Родитель d594703a1a
Коммит 6b3bb48429
20 изменённых файлов: 466 добавлений и 331 удалений

Просмотреть файл

@ -23,7 +23,7 @@ def main(reset, load, info):
return
ctx = default_context()
cache = AddonsCoinstallCache(ctx)
cache = AddonsCoinstallCache.get_instance(ctx)
if reset:
if cache.reset():
print("Successfully flushed db0 bookkeeping database.")

Просмотреть файл

@ -1,4 +1,4 @@
from .profile_fetcher import ProfileFetcher # noqa
from .profile_fetcher import ProfileFetcher # noqa
import pkg_resources
__version__ = pkg_resources.require("mozilla-taar3")[0].version

Просмотреть файл

@ -20,8 +20,7 @@ PLUGIN = config("TAAR_API_PLUGIN", default=None)
sentry_sdk.init(
dsn=config("SENTRY_DSN", ''),
integrations=[FlaskIntegration()],
dsn=config("SENTRY_DSN", ""), integrations=[FlaskIntegration()],
)
# There should only be a single registered app for the taar-api

Просмотреть файл

@ -132,9 +132,7 @@ class ProfileFetcher:
"locale": profile_data.get("locale", ""),
"os": profile_data.get("os", ""),
"installed_addons": addon_ids,
"disabled_addons_ids": profile_data.get(
"disabled_addons_ids", []
),
"disabled_addons_ids": profile_data.get("disabled_addons_ids", []),
"bookmark_count": profile_data.get("places_bookmarks_count", 0),
"tab_open_count": profile_data.get(
"scalar_parent_browser_engagement_tab_open_event_count", 0

Просмотреть файл

@ -40,7 +40,7 @@ class CollaborativeRecommender(AbstractRecommender):
self.logger = self._ctx[IMozLogging].get_logger("taar")
self._redis_cache = AddonsCoinstallCache(self._ctx)
self._redis_cache = AddonsCoinstallCache.get_instance(self._ctx)
self.model = None

Просмотреть файл

@ -5,16 +5,9 @@
from srgutil.interfaces import IMozLogging
import itertools
from .base_recommender import AbstractRecommender
from .lazys3 import LazyJSONLoader
from taar.settings import (
TAAR_WHITELIST_BUCKET,
TAAR_WHITELIST_KEY,
TAAR_ENSEMBLE_BUCKET,
TAAR_ENSEMBLE_KEY,
)
from taar.utils import hasher
from taar.recommenders.redis_cache import AddonsCoinstallCache
import markus
@ -27,18 +20,6 @@ def is_test_client(client_id):
return len(set(client_id.replace("-", ""))) == 1
class WeightCache:
def __init__(self, ctx):
self._ctx = ctx
self._weights = LazyJSONLoader(
self._ctx, TAAR_ENSEMBLE_BUCKET, TAAR_ENSEMBLE_KEY, "ensemble"
)
def getWeights(self):
return self._weights.get()[0]["ensemble_weights"]
class EnsembleRecommender(AbstractRecommender):
"""
The EnsembleRecommender is a collection of recommenders where the
@ -50,12 +31,17 @@ class EnsembleRecommender(AbstractRecommender):
def __init__(self, ctx):
self.RECOMMENDER_KEYS = ["collaborative", "similarity", "locale"]
self._ctx = ctx
self._redis_cache = AddonsCoinstallCache.get_instance(self._ctx)
self.logger = self._ctx[IMozLogging].get_logger("taar.ensemble")
assert "recommender_factory" in self._ctx
self._init_from_ctx()
def getWeights(self):
return self._redis_cache.ensemble_weights()
def _init_from_ctx(self):
# Copy the map of the recommenders
self._recommender_map = {}
@ -64,11 +50,6 @@ class EnsembleRecommender(AbstractRecommender):
for rkey in self.RECOMMENDER_KEYS:
self._recommender_map[rkey] = recommender_factory.create(rkey)
self._whitelist_data = LazyJSONLoader(
self._ctx, TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY, "whitelist"
)
self._weight_cache = WeightCache(self._ctx.child())
self.logger.info("EnsembleRecommender initialized")
def can_recommend(self, client_data, extra_data={}):
@ -88,7 +69,7 @@ class EnsembleRecommender(AbstractRecommender):
client_id = client_data.get("client_id", "no-client-id")
if is_test_client(client_id):
whitelist = self._whitelist_data.get()[0]
whitelist = self._redis_cache.whitelist_data()
samples = whitelist[:limit]
self.logger.info("Test ID detected [{}]".format(client_id))
@ -102,7 +83,6 @@ class EnsembleRecommender(AbstractRecommender):
results = self._recommend(client_data, limit, extra_data)
except Exception as e:
results = []
self._weight_cache._weights.force_expiry()
self.logger.exception(
"Ensemble recommender crashed for {}".format(client_id), e
)
@ -130,7 +110,7 @@ class EnsembleRecommender(AbstractRecommender):
extended_limit = limit + len(preinstalled_addon_ids)
flattened_results = []
ensemble_weights = self._weight_cache.getWeights()
ensemble_weights = self._redis_cache.ensemble_weights()
for rkey in self.RECOMMENDER_KEYS:
recommender = self._recommender_map[rkey]

Просмотреть файл

@ -76,7 +76,7 @@ class GuidBasedRecommender:
self._ctx = ctx
self.logger = self._ctx[IMozLogging].get_logger("taarlite")
self._redis_cache = AddonsCoinstallCache(self._ctx)
self._redis_cache = AddonsCoinstallCache.get_instance(self._ctx)
self.logger.info("GUIDBasedRecommender is initialized")
def cache_ready(self):

Просмотреть файл

@ -28,7 +28,7 @@ class LocaleRecommender(AbstractRecommender):
self.logger = self._ctx[IMozLogging].get_logger("taar")
self._redis_cache = AddonsCoinstallCache(self._ctx)
self._redis_cache = AddonsCoinstallCache.get_instance(self._ctx)
# DONE removed
@property

Просмотреть файл

@ -8,13 +8,9 @@ from taar.recommenders.ensemble_recommender import (
)
from taar.recommenders.randomizer import in_experiment, reorder_guids
from srgutil.interfaces import IMozLogging
from .lazys3 import LazyJSONLoader
from taar.recommenders.redis_cache import AddonsCoinstallCache
from taar.settings import (
TAAR_WHITELIST_BUCKET,
TAAR_WHITELIST_KEY,
TAAR_EXPERIMENT_PROB,
)
from taar.settings import TAAR_EXPERIMENT_PROB
import markus
@ -61,9 +57,7 @@ class RecommendationManager:
# The whitelist data is only used for test client IDs
self._whitelist_data = LazyJSONLoader(
self._ctx, TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY, "whitelist"
)
self._redis_cache = AddonsCoinstallCache.get_instance(self._ctx)
self._experiment_prob = ctx.get("TAAR_EXPERIMENT_PROB", TAAR_EXPERIMENT_PROB)
@ -98,7 +92,7 @@ class RecommendationManager:
# Fetch back all possible whitelisted addons for this
# client
extra_data["guid_randomization"] = True
whitelist = self._whitelist_data.get()[0]
whitelist = self._redis_cache.whitelist_data()
results = self._ensemble_recommender.recommend(
client_info, len(whitelist), extra_data
)

Просмотреть файл

@ -38,6 +38,12 @@ from taar.settings import (
TAAR_SIMILARITY_BUCKET,
TAAR_SIMILARITY_DONOR_KEY,
TAAR_SIMILARITY_LRCURVES_KEY,
# Ensemble data
TAAR_ENSEMBLE_BUCKET,
TAAR_ENSEMBLE_KEY,
# Whitelist data
TAAR_WHITELIST_BUCKET,
TAAR_WHITELIST_KEY,
)
from jsoncache.loader import s3_json_loader
@ -82,13 +88,22 @@ LOCALE_DATA = "taar_locale_data|"
COLLAB_MAPPING_DATA = "taar_collab_mapping|"
COLLAB_ITEM_MATRIX = "taar_collab_item_matrix|"
# TAAR: similarity data
SIMILARITY_DONORS = "taar_similarity_donors|"
SIMILARITY_LRCURVES = "taar_similarity_lrcurves|"
# TAAR: similarity preprocessed data
SIMILARITY_NUM_DONORS = "taar_similarity_num_donors|"
SIMILARITY_CONTINUOUS_FEATURES = "taar_similarity_continuous_features|"
SIMILARITY_CATEGORICAL_FEATURES = "taar_similarity_categorical_features|"
# TAAR: ensemble weights
ENSEMBLE_WEIGHTS = "taar_ensemble_weights|"
# TAAR: whitelist data
WHITELIST_DATA = "taar_whitelist_data|"
class PrefixStripper:
def __init__(self, prefix, iterator, cast_to_str=False):
@ -118,10 +133,18 @@ class AddonsCoinstallCache:
@classmethod
def get_instance(cls, ctx):
if cls._instance is None:
cls._instance = AddonsCoinstallCache(ctx)
cls._instance = AddonsCoinstallCache(ctx, i_didnt_read_the_docs=False)
return cls._instance
def __init__(self, ctx):
def __init__(self, ctx, i_didnt_read_the_docs=True):
"""
Don't call this directly - use get_instance instace
"""
if i_didnt_read_the_docs:
raise RuntimeError(
"You cannot call this method directly - use get_instance"
)
self._ctx = ctx
self.logger = self._ctx[IMozLogging].get_logger("taar")
@ -343,6 +366,18 @@ class AddonsCoinstallCache:
"""
return self._similarity_num_donors
def ensemble_weights(self):
tmp = self._db().get(ENSEMBLE_WEIGHTS)
if tmp:
return json.loads(tmp)
return None
def whitelist_data(self):
tmp = self._db().get(WHITELIST_DATA)
if tmp:
return json.loads(tmp)
return None
"""
################################
@ -445,6 +480,28 @@ class AddonsCoinstallCache:
def _fetch_similarity_lrcurves(self):
return s3_json_loader(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY,)
def _fetch_ensemble_weights(self):
return s3_json_loader(TAAR_ENSEMBLE_BUCKET, TAAR_ENSEMBLE_KEY)
def _fetch_whitelist(self):
return s3_json_loader(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY)
def _update_whitelist_data(self, db):
"""
Load the TAAR whitelist data
"""
tmp = self._fetch_whitelist()
if tmp:
db.set(WHITELIST_DATA, json.dumps(tmp))
def _update_ensemble_data(self, db):
"""
Load the TAAR ensemble data
"""
tmp = self._fetch_ensemble_weights()
if tmp:
db.set(ENSEMBLE_WEIGHTS, json.dumps(tmp["ensemble_weights"]))
def _update_similarity_data(self, db):
"""
Load the TAAR similarity data
@ -595,4 +652,11 @@ class AddonsCoinstallCache:
# Update TAAR similarity data
self._update_similarity_data(db)
# Update TAAR ensemble data
self._update_ensemble_data(db)
# Update TAAR ensemble data
self._update_whitelist_data(db)
# Run all callback functions to preprocess model data
self._update_data_callback(db)

Просмотреть файл

@ -1,6 +1,6 @@
import re
RE_PLATFORM = re.compile('(linux|windows|macintosh|android|fxios).*firefox')
RE_PLATFORM = re.compile("(linux|windows|macintosh|android|fxios).*firefox")
LINUX = 1
WINDOWS = 2
@ -8,11 +8,13 @@ MACINTOSH = 3
ANDROID = 4
FXIOS = 5
OSNAME_TO_ID = {'linux': LINUX,
'windows': WINDOWS,
'macintosh': MACINTOSH,
'android': ANDROID,
'fxios': FXIOS}
OSNAME_TO_ID = {
"linux": LINUX,
"windows": WINDOWS,
"macintosh": MACINTOSH,
"android": ANDROID,
"fxios": FXIOS,
}
def parse_ua(user_agent):

Просмотреть файл

@ -49,3 +49,18 @@ def noop_taarsimilarity_dataload(stack):
)
)
return stack
def noop_taarensemble_dataload(stack):
# no-op the taar collab
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache, "_update_ensemble_data", return_value=None
)
)
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache, "_update_whitelist_data", return_value=None
)
)
return stack

Просмотреть файл

@ -7,7 +7,12 @@
CONTINUOUS_FEATURE_FIXTURE_DATA = [
{
"active_addons": ["{test-guid-1}", "{test-guid-2}", "{test-guid-3}", "{test-guid-4}"],
"active_addons": [
"{test-guid-1}",
"{test-guid-2}",
"{test-guid-3}",
"{test-guid-4}",
],
"geo_city": "brasilia-br",
"subsession_length": 4911,
"locale": "br-PT",
@ -15,10 +20,15 @@ CONTINUOUS_FEATURE_FIXTURE_DATA = [
"bookmark_count": 7,
"tab_open_count": 4,
"total_uri": 190,
"unique_tlds": 21
"unique_tlds": 21,
},
{
"active_addons": ["{test-guid-5}", "{test-guid-6}", "{test-guid-1}", "{test-guid-8}"],
"active_addons": [
"{test-guid-5}",
"{test-guid-6}",
"{test-guid-1}",
"{test-guid-8}",
],
"geo_city": "brasilia-br",
"subsession_length": 4911,
"locale": "br-PT",
@ -26,10 +36,15 @@ CONTINUOUS_FEATURE_FIXTURE_DATA = [
"bookmark_count": 7,
"tab_open_count": 4,
"total_uri": 200,
"unique_tlds": 21
"unique_tlds": 21,
},
{
"active_addons": ["{test-guid-9}", "{test-guid-10}", "{test-guid-11}", "{test-guid-12}"],
"active_addons": [
"{test-guid-9}",
"{test-guid-10}",
"{test-guid-11}",
"{test-guid-12}",
],
"geo_city": "brasilia-br",
"subsession_length": 4911,
"locale": "br-PT",
@ -37,7 +52,7 @@ CONTINUOUS_FEATURE_FIXTURE_DATA = [
"bookmark_count": 7,
"tab_open_count": 4,
"total_uri": 222,
"unique_tlds": 21
"unique_tlds": 21,
},
{
"active_addons": ["{test-guid-13}", "{test-guid-14}"],
@ -48,8 +63,8 @@ CONTINUOUS_FEATURE_FIXTURE_DATA = [
"bookmark_count": 7,
"tab_open_count": 4,
"total_uri": 210,
"unique_tlds": 21
}
"unique_tlds": 21,
},
]
# Match the fixture taar client, but vary the geo_city to test only
@ -60,7 +75,12 @@ CONTINUOUS_FEATURE_FIXTURE_DATA = [
CATEGORICAL_FEATURE_FIXTURE_DATA = [
{
"active_addons": ["{test-guid-1}", "{test-guid-2}", "{test-guid-3}", "{test-guid-4}"],
"active_addons": [
"{test-guid-1}",
"{test-guid-2}",
"{test-guid-3}",
"{test-guid-4}",
],
"geo_city": "brasilia-br",
"subsession_length": 4911,
"locale": "br-PT",
@ -68,11 +88,16 @@ CATEGORICAL_FEATURE_FIXTURE_DATA = [
"bookmark_count": 7,
"tab_open_count": 4,
"total_uri": 222,
"unique_tlds": 21
"unique_tlds": 21,
},
{
# "{test-guid-1}" appears in duplicate here.
"active_addons": ["{test-guid-5}", "{test-guid-6}", "{test-guid-1}", "{test-guid-8}"],
"active_addons": [
"{test-guid-5}",
"{test-guid-6}",
"{test-guid-1}",
"{test-guid-8}",
],
"geo_city": "toronto-ca",
"subsession_length": 4911,
"locale": "br-PT",
@ -80,10 +105,15 @@ CATEGORICAL_FEATURE_FIXTURE_DATA = [
"bookmark_count": 7,
"tab_open_count": 4,
"total_uri": 222,
"unique_tlds": 21
"unique_tlds": 21,
},
{
"active_addons": ["{test-guid-9}", "{test-guid-10}", "{test-guid-11}", "{test-guid-12}"],
"active_addons": [
"{test-guid-9}",
"{test-guid-10}",
"{test-guid-11}",
"{test-guid-12}",
],
"geo_city": "brasilia-br",
"subsession_length": 4911,
"locale": "br-PT",
@ -91,7 +121,7 @@ CATEGORICAL_FEATURE_FIXTURE_DATA = [
"bookmark_count": 7,
"tab_open_count": 4,
"total_uri": 222,
"unique_tlds": 21
"unique_tlds": 21,
},
{
"active_addons": ["{test-guid-13}", "{test-guid-1}"],
@ -102,6 +132,6 @@ CATEGORICAL_FEATURE_FIXTURE_DATA = [
"bookmark_count": 7,
"tab_open_count": 4,
"total_uri": 222,
"unique_tlds": 21
}
"unique_tlds": 21,
},
]

Просмотреть файл

@ -23,6 +23,7 @@ from markus.testing import MetricsMock
from .noop_fixtures import (
noop_taarlocale_dataload,
noop_taarlite_dataload,
noop_taarensemble_dataload,
noop_taarsimilarity_dataload,
)
@ -39,6 +40,7 @@ def noop_other_recommenders(stack):
stack = noop_taarlocale_dataload(stack)
stack = noop_taarlite_dataload(stack)
stack = noop_taarsimilarity_dataload(stack)
stack = noop_taarensemble_dataload(stack)
return stack

Просмотреть файл

@ -2,19 +2,17 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from taar.recommenders.ensemble_recommender import (
WeightCache,
EnsembleRecommender,
from taar.recommenders.ensemble_recommender import EnsembleRecommender
import mock
import contextlib
import fakeredis
from taar.recommenders.redis_cache import AddonsCoinstallCache
from .noop_fixtures import (
noop_taarlocale_dataload,
noop_taarcollab_dataload,
noop_taarlite_dataload,
noop_taarsimilarity_dataload,
)
from taar.settings import (
TAAR_ENSEMBLE_BUCKET,
TAAR_ENSEMBLE_KEY,
TAAR_WHITELIST_BUCKET,
TAAR_WHITELIST_KEY,
)
from moto import mock_s3
import boto3
import json
from .mocks import MockRecommenderFactory
from markus import TIMING
@ -23,146 +21,179 @@ from markus.testing import MetricsMock
EXPECTED = {"collaborative": 1000, "similarity": 100, "locale": 10}
def install_mock_ensemble_data(ctx):
def noop_loaders(stack):
stack = noop_taarlocale_dataload(stack)
stack = noop_taarcollab_dataload(stack)
stack = noop_taarlite_dataload(stack)
stack = noop_taarsimilarity_dataload(stack)
return stack
@contextlib.contextmanager
def mock_install_mock_ensemble_data(ctx):
DATA = {"ensemble_weights": EXPECTED}
conn = boto3.resource("s3", region_name="us-west-2")
conn.create_bucket(Bucket=TAAR_ENSEMBLE_BUCKET)
conn.Object(TAAR_ENSEMBLE_BUCKET, TAAR_ENSEMBLE_KEY).put(Body=json.dumps(DATA))
WHITELIST_DATA = [
"2.0@disconnect.me",
"@contain-facebook",
"@testpilot-containers",
"CookieAutoDelete@kennydo.com",
"FirefoxColor@mozilla.com",
"adblockultimate@adblockultimate.net",
"addon@darkreader.org",
"adguardadblocker@adguard.com",
"adnauseam@rednoise.org",
"clearcache@michel.de.almeida",
"copyplaintext@eros.man",
"default-bookmark-folder@gustiaux.com",
"enhancerforyoutube@maximerf.addons.mozilla.org",
"extension@one-tab.com",
"extension@tabliss.io",
"firefox-addon@myki.co",
"firefox@ghostery.com",
"forecastfox@s3_fix_version",
"forget-me-not@lusito.info",
"foxyproxy@eric.h.jung",
"foxytab@eros.man",
"gmailnoads@mywebber.com",
]
conn.create_bucket(Bucket=TAAR_WHITELIST_BUCKET)
conn.Object(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY).put(
Body=json.dumps(
[
"2.0@disconnect.me",
"@contain-facebook",
"@testpilot-containers",
"CookieAutoDelete@kennydo.com",
"FirefoxColor@mozilla.com",
"adblockultimate@adblockultimate.net",
"addon@darkreader.org",
"adguardadblocker@adguard.com",
"adnauseam@rednoise.org",
"clearcache@michel.de.almeida",
"copyplaintext@eros.man",
"default-bookmark-folder@gustiaux.com",
"enhancerforyoutube@maximerf.addons.mozilla.org",
"extension@one-tab.com",
"extension@tabliss.io",
"firefox-addon@myki.co",
"firefox@ghostery.com",
"forecastfox@s3_fix_version",
"forget-me-not@lusito.info",
"foxyproxy@eric.h.jung",
"foxytab@eros.man",
"gmailnoads@mywebber.com",
]
with contextlib.ExitStack() as stack:
AddonsCoinstallCache._instance = None
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache, "_fetch_ensemble_weights", return_value=DATA,
)
)
)
return ctx
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache, "_fetch_whitelist", return_value=WHITELIST_DATA,
)
)
stack = noop_loaders(stack)
# Patch fakeredis in
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
"init_redis_connections",
return_value={
0: fakeredis.FakeStrictRedis(db=0),
1: fakeredis.FakeStrictRedis(db=1),
2: fakeredis.FakeStrictRedis(db=2),
},
)
)
# Initialize redis
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
yield stack
@mock_s3
def test_weight_cache(test_ctx):
ctx = install_mock_ensemble_data(test_ctx)
wc = WeightCache(ctx)
actual = wc.getWeights()
assert EXPECTED == actual
@mock_s3
def test_recommendations(test_ctx):
with MetricsMock() as mm:
ctx = install_mock_ensemble_data(test_ctx)
EXPECTED_RESULTS = [
("ghi", 3430.0),
("def", 3320.0),
("ijk", 3200.0),
("hij", 3100.0),
("lmn", 420.0),
]
with mock_install_mock_ensemble_data(test_ctx):
factory = MockRecommenderFactory()
ctx["recommender_factory"] = factory
test_ctx["recommender_factory"] = factory
ctx["recommender_map"] = {
test_ctx["recommender_map"] = {
"collaborative": factory.create("collaborative"),
"similarity": factory.create("similarity"),
"locale": factory.create("locale"),
}
r = EnsembleRecommender(ctx.child())
client = {"client_id": "12345"} # Anything will work here
r = EnsembleRecommender(test_ctx)
actual = r.getWeights()
assert EXPECTED == actual
def test_recommendations(test_ctx):
with MetricsMock() as mm:
with mock_install_mock_ensemble_data(test_ctx):
EXPECTED_RESULTS = [
("ghi", 3430.0),
("def", 3320.0),
("ijk", 3200.0),
("hij", 3100.0),
("lmn", 420.0),
]
factory = MockRecommenderFactory()
test_ctx["recommender_factory"] = factory
test_ctx["recommender_map"] = {
"collaborative": factory.create("collaborative"),
"similarity": factory.create("similarity"),
"locale": factory.create("locale"),
}
r = EnsembleRecommender(test_ctx)
client = {"client_id": "12345"} # Anything will work here
recommendation_list = r.recommend(client, 5)
assert isinstance(recommendation_list, list)
assert recommendation_list == EXPECTED_RESULTS
assert mm.has_record(TIMING, "taar.ensemble_recommend")
def test_preinstalled_guids(test_ctx):
with mock_install_mock_ensemble_data(test_ctx):
EXPECTED_RESULTS = [
("ghi", 3430.0),
("ijk", 3200.0),
("lmn", 420.0),
("klm", 409.99999999999994),
("abc", 23.0),
]
factory = MockRecommenderFactory()
test_ctx["recommender_factory"] = factory
test_ctx["recommender_map"] = {
"collaborative": factory.create("collaborative"),
"similarity": factory.create("similarity"),
"locale": factory.create("locale"),
}
r = EnsembleRecommender(test_ctx)
# 'hij' should be excluded from the suggestions list
# The other two addon GUIDs 'def' and 'jkl' will never be
# recommended anyway and should have no impact on results
client = {"client_id": "12345", "installed_addons": ["def", "hij", "jkl"]}
recommendation_list = r.recommend(client, 5)
print(recommendation_list)
assert isinstance(recommendation_list, list)
assert recommendation_list == EXPECTED_RESULTS
def test_mock_client_ids(test_ctx):
with mock_install_mock_ensemble_data(test_ctx):
EXPECTED_RESULTS = [
("2.0@disconnect.me", 0.17),
("@contain-facebook", 0.25),
("@testpilot-containers", 0.72),
("CookieAutoDelete@kennydo.com", 0.37),
("FirefoxColor@mozilla.com", 0.32),
]
factory = MockRecommenderFactory()
test_ctx["recommender_factory"] = factory
test_ctx["recommender_map"] = {
"collaborative": factory.create("collaborative"),
"similarity": factory.create("similarity"),
"locale": factory.create("locale"),
}
r = EnsembleRecommender(test_ctx)
# 'hij' should be excluded from the suggestions list
# The other two addon GUIDs 'def' and 'jkl' will never be
# recommended anyway and should have no impact on results
client = {"client_id": "11111"}
recommendation_list = r.recommend(client, 5)
assert isinstance(recommendation_list, list)
assert recommendation_list == EXPECTED_RESULTS
assert mm.has_record(TIMING, "taar.ensemble")
assert mm.has_record(TIMING, "taar.ensemble_recommend")
@mock_s3
def test_preinstalled_guids(test_ctx):
ctx = install_mock_ensemble_data(test_ctx)
EXPECTED_RESULTS = [
("ghi", 3430.0),
("ijk", 3200.0),
("lmn", 420.0),
("klm", 409.99999999999994),
("abc", 23.0),
]
factory = MockRecommenderFactory()
ctx["recommender_factory"] = factory
ctx["recommender_map"] = {
"collaborative": factory.create("collaborative"),
"similarity": factory.create("similarity"),
"locale": factory.create("locale"),
}
r = EnsembleRecommender(ctx.child())
# 'hij' should be excluded from the suggestions list
# The other two addon GUIDs 'def' and 'jkl' will never be
# recommended anyway and should have no impact on results
client = {"client_id": "12345", "installed_addons": ["def", "hij", "jkl"]}
recommendation_list = r.recommend(client, 5)
print(recommendation_list)
assert isinstance(recommendation_list, list)
assert recommendation_list == EXPECTED_RESULTS
@mock_s3
def test_mock_client_ids(test_ctx):
ctx = install_mock_ensemble_data(test_ctx)
EXPECTED_RESULTS = [
("2.0@disconnect.me", 0.17),
("@contain-facebook", 0.25),
("@testpilot-containers", 0.72),
("CookieAutoDelete@kennydo.com", 0.37),
("FirefoxColor@mozilla.com", 0.32),
]
factory = MockRecommenderFactory()
ctx["recommender_factory"] = factory
ctx["recommender_map"] = {
"collaborative": factory.create("collaborative"),
"similarity": factory.create("similarity"),
"locale": factory.create("locale"),
}
r = EnsembleRecommender(ctx.child())
# 'hij' should be excluded from the suggestions list
# The other two addon GUIDs 'def' and 'jkl' will never be
# recommended anyway and should have no impact on results
client = {"client_id": "11111"}
recommendation_list = r.recommend(client, 5)
assert isinstance(recommendation_list, list)
assert recommendation_list == EXPECTED_RESULTS

Просмотреть файл

@ -9,6 +9,7 @@ from .noop_fixtures import (
noop_taarlocale_dataload,
noop_taarcollab_dataload,
noop_taarsimilarity_dataload,
noop_taarensemble_dataload,
)
from taar.recommenders.guid_based_recommender import GuidBasedRecommender
@ -109,6 +110,7 @@ def mock_coinstall_ranking_context(ctx, mock_coinstall, mock_ranking):
stack = noop_taarlocale_dataload(stack)
stack = noop_taarcollab_dataload(stack)
stack = noop_taarsimilarity_dataload(stack)
stack = noop_taarensemble_dataload(stack)
# Patch fakeredis in
stack.enter_context(

Просмотреть файл

@ -12,6 +12,7 @@ from .noop_fixtures import (
noop_taarcollab_dataload,
noop_taarlite_dataload,
noop_taarsimilarity_dataload,
noop_taarensemble_dataload,
)
import json
@ -61,6 +62,7 @@ def mock_locale_data(ctx):
stack = noop_taarlite_dataload(stack)
stack = noop_taarcollab_dataload(stack)
stack = noop_taarsimilarity_dataload(stack)
stack = noop_taarensemble_dataload(stack)
# Patch fakeredis in
stack.enter_context(

Просмотреть файл

@ -52,10 +52,7 @@ MOCK_DATA = {
"tab_open_count": 46,
"total_uri": 791,
"unique_tlds": 11,
"installed_addons": [
"e10srollout@mozilla.org",
"firefox@getpocket.com",
],
"installed_addons": ["e10srollout@mozilla.org", "firefox@getpocket.com",],
"locale": "it-IT",
},
}

Просмотреть файл

@ -2,19 +2,16 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
import boto3
import json
from moto import mock_s3
from taar.recommenders import RecommendationManager
from taar.recommenders.base_recommender import AbstractRecommender
from taar.recommenders.ensemble_recommender import (
TAAR_ENSEMBLE_BUCKET,
TAAR_ENSEMBLE_KEY,
from .noop_fixtures import (
noop_taarlocale_dataload,
noop_taarcollab_dataload,
noop_taarsimilarity_dataload,
noop_taarlite_dataload,
)
from taar.settings import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
from .mocks import MockRecommenderFactory
import operator
@ -23,21 +20,75 @@ from functools import reduce
from markus import TIMING
from markus.testing import MetricsMock
import mock
import contextlib
import fakeredis
from taar.recommenders.redis_cache import AddonsCoinstallCache
def install_mock_curated_data(ctx):
@contextlib.contextmanager
def mock_install_mock_curated_data(ctx):
mock_data = []
for i in range(20):
mock_data.append(str(i) * 16)
ctx = ctx.child()
conn = boto3.resource("s3", region_name="us-west-2")
mock_ensemble_weights = {
"ensemble_weights": {"collaborative": 1000, "similarity": 100, "locale": 10,}
}
conn.create_bucket(Bucket=TAAR_WHITELIST_BUCKET)
conn.Object(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY).put(
Body=json.dumps(mock_data)
)
with contextlib.ExitStack() as stack:
AddonsCoinstallCache._instance = None
return ctx
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache, "_fetch_whitelist", return_value=mock_data
)
)
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
"_fetch_ensemble_weights",
return_value=mock_ensemble_weights,
)
)
stack = noop_taarlite_dataload(stack)
stack = noop_taarcollab_dataload(stack)
stack = noop_taarlocale_dataload(stack)
stack = noop_taarsimilarity_dataload(stack)
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache, "_fetch_whitelist", return_value=mock_data
)
)
# Patch fakeredis in
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
"init_redis_connections",
return_value={
0: fakeredis.FakeStrictRedis(db=0),
1: fakeredis.FakeStrictRedis(db=1),
2: fakeredis.FakeStrictRedis(db=2),
},
)
)
class DefaultMockProfileFetcher:
def get(self, client_id):
return {"client_id": client_id}
mock_fetcher = DefaultMockProfileFetcher()
ctx["profile_fetcher"] = mock_fetcher
ctx["recommender_factory"] = MockRecommenderFactory()
# Initialize redis
AddonsCoinstallCache.get_instance(ctx).safe_load_data()
yield stack
class StubRecommender(AbstractRecommender):
@ -55,129 +106,94 @@ class StubRecommender(AbstractRecommender):
return self._recommendations
def install_mocks(ctx, mock_fetcher=None):
ctx = ctx.child()
class DefaultMockProfileFetcher:
def get(self, client_id):
return {"client_id": client_id}
if mock_fetcher is None:
mock_fetcher = DefaultMockProfileFetcher()
ctx["profile_fetcher"] = mock_fetcher
ctx["recommender_factory"] = MockRecommenderFactory()
DATA = {
"ensemble_weights": {"collaborative": 1000, "similarity": 100, "locale": 10,}
}
conn = boto3.resource("s3", region_name="us-west-2")
conn.create_bucket(Bucket=TAAR_ENSEMBLE_BUCKET)
conn.Object(TAAR_ENSEMBLE_BUCKET, TAAR_ENSEMBLE_KEY).put(Body=json.dumps(DATA))
return ctx
@mock_s3
def test_none_profile_returns_empty_list(test_ctx):
ctx = install_mocks(test_ctx)
with mock_install_mock_curated_data(test_ctx):
class MockProfileFetcher:
def get(self, client_id):
return None
class MockProfileFetcher:
def get(self, client_id):
return None
ctx["profile_fetcher"] = MockProfileFetcher()
test_ctx["profile_fetcher"] = MockProfileFetcher()
rec_manager = RecommendationManager(ctx)
assert rec_manager.recommend("random-client-id", 10) == []
rec_manager = RecommendationManager(test_ctx)
assert rec_manager.recommend("random-client-id", 10) == []
@mock_s3
def test_simple_recommendation(test_ctx):
ctx = install_mocks(test_ctx)
with mock_install_mock_curated_data(test_ctx):
EXPECTED_RESULTS = [
("ghi", 3430.0),
("def", 3320.0),
("ijk", 3200.0),
("hij", 3100.0),
("lmn", 420.0),
("klm", 409.99999999999994),
("jkl", 400.0),
("abc", 23.0),
("fgh", 22.0),
("efg", 21.0),
]
EXPECTED_RESULTS = [
("ghi", 3430.0),
("def", 3320.0),
("ijk", 3200.0),
("hij", 3100.0),
("lmn", 420.0),
("klm", 409.99999999999994),
("jkl", 400.0),
("abc", 23.0),
("fgh", 22.0),
("efg", 21.0),
]
with MetricsMock() as mm:
manager = RecommendationManager(ctx.child())
recommendation_list = manager.recommend("some_ignored_id", 10)
with MetricsMock() as mm:
manager = RecommendationManager(test_ctx)
recommendation_list = manager.recommend("some_ignored_id", 10)
assert isinstance(recommendation_list, list)
assert recommendation_list == EXPECTED_RESULTS
assert isinstance(recommendation_list, list)
assert recommendation_list == EXPECTED_RESULTS
assert mm.has_record(TIMING, stat="taar.ensemble")
assert mm.has_record(TIMING, stat="taar.profile_recommendation")
assert mm.has_record(TIMING, stat="taar.profile_recommendation")
@mock_s3
def test_fixed_client_id_valid(test_ctx):
ctx = install_mocks(test_ctx)
ctx = install_mock_curated_data(ctx)
manager = RecommendationManager(ctx.child())
recommendation_list = manager.recommend("111111", 10)
assert len(recommendation_list) == 10
with mock_install_mock_curated_data(test_ctx):
manager = RecommendationManager(test_ctx)
recommendation_list = manager.recommend("111111", 10)
assert len(recommendation_list) == 10
@mock_s3
def test_fixed_client_id_empty_list(test_ctx):
class NoClientFetcher:
def get(self, client_id):
return None
ctx = install_mocks(test_ctx, mock_fetcher=NoClientFetcher())
with mock_install_mock_curated_data(test_ctx):
test_ctx["profile_fetcher"] = NoClientFetcher()
ctx = install_mock_curated_data(ctx)
manager = RecommendationManager(test_ctx)
recommendation_list = manager.recommend("not_a_real_client_id", 10)
manager = RecommendationManager(ctx.child())
recommendation_list = manager.recommend("not_a_real_client_id", 10)
assert len(recommendation_list) == 0
assert len(recommendation_list) == 0
@mock_s3
def test_experimental_randomization(test_ctx):
ctx = install_mocks(test_ctx)
ctx = install_mock_curated_data(ctx)
with mock_install_mock_curated_data(test_ctx):
manager = RecommendationManager(ctx.child())
raw_list = manager.recommend("111111", 10)
manager = RecommendationManager(test_ctx)
raw_list = manager.recommend("111111", 10)
# Clobber the experiment probability to be 100% to force a
# reordering.
ctx["TAAR_EXPERIMENT_PROB"] = 1.0
# Clobber the experiment probability to be 100% to force a
# reordering.
test_ctx["TAAR_EXPERIMENT_PROB"] = 1.0
manager = RecommendationManager(ctx.child())
rand_list = manager.recommend("111111", 10)
manager = RecommendationManager(test_ctx)
rand_list = manager.recommend("111111", 10)
"""
The two lists should be :
"""
The two lists should be :
* different (guid, weight) lists (possibly just order)
* same length
"""
assert (
reduce(
operator.and_,
[
(t1[0] == t2[0] and t1[1] == t2[1])
for t1, t2 in zip(rand_list, raw_list)
],
* different (guid, weight) lists (possibly just order)
* same length
"""
assert (
reduce(
operator.and_,
[
(t1[0] == t2[0] and t1[1] == t2[1])
for t1, t2 in zip(rand_list, raw_list)
],
)
is False
)
is False
)
assert len(rand_list) == len(raw_list)
assert len(rand_list) == len(raw_list)

Просмотреть файл

@ -29,10 +29,19 @@ from .noop_fixtures import (
noop_taarcollab_dataload,
noop_taarlite_dataload,
noop_taarlocale_dataload,
noop_taarensemble_dataload,
)
from taar.recommenders.redis_cache import AddonsCoinstallCache
def noop_loaders(stack):
stack = noop_taarlocale_dataload(stack)
stack = noop_taarcollab_dataload(stack)
stack = noop_taarensemble_dataload(stack)
stack = noop_taarlite_dataload(stack)
return stack
def generate_fake_lr_curves(num_elements, ceiling=10.0):
"""
Generate a mock likelihood ratio (LR) curve that can be used for
@ -86,9 +95,7 @@ def mock_install_no_data(ctx):
)
)
stack = noop_taarlocale_dataload(stack)
stack = noop_taarcollab_dataload(stack)
stack = noop_taarlite_dataload(stack)
stack = noop_loaders(stack)
# Patch fakeredis in
stack.enter_context(
@ -128,9 +135,7 @@ def mock_install_categorical_data(ctx):
return_value=generate_fake_lr_curves(1000),
)
)
stack = noop_taarlocale_dataload(stack)
stack = noop_taarcollab_dataload(stack)
stack = noop_taarlite_dataload(stack)
stack = noop_loaders(stack)
# Patch fakeredis in
stack.enter_context(
@ -170,9 +175,7 @@ def mock_install_continuous_data(ctx):
return_value=lrs_data,
)
)
stack = noop_taarlocale_dataload(stack)
stack = noop_taarcollab_dataload(stack)
stack = noop_taarlite_dataload(stack)
stack = noop_loaders(stack)
# Patch fakeredis in
stack.enter_context(