From fa78a731aa8a2b3f1c97b818d568d4f2a2de0251 Mon Sep 17 00:00:00 2001 From: Victor Ng Date: Tue, 1 Sep 2020 19:12:13 -0400 Subject: [PATCH] Dropped hybrid recommender --- taar/recommenders/hybrid_recommender.py | 178 ------------------------ tests/test_hybrid_recommender.py | 138 ------------------ tests/test_recommendation_manager.py | 18 ++- 3 files changed, 17 insertions(+), 317 deletions(-) delete mode 100644 taar/recommenders/hybrid_recommender.py delete mode 100644 tests/test_hybrid_recommender.py diff --git a/taar/recommenders/hybrid_recommender.py b/taar/recommenders/hybrid_recommender.py deleted file mode 100644 index 7911953..0000000 --- a/taar/recommenders/hybrid_recommender.py +++ /dev/null @@ -1,178 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -from .base_recommender import AbstractRecommender -from .lazys3 import LazyJSONLoader -from srgutil.interfaces import IMozLogging -import operator as op -import random - -from taar.settings import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY - -import markus - -metrics = markus.get_metrics("taar") - - -class CuratedWhitelistCache: - """ - This fetches the curated whitelist from S3. - """ - - def __init__(self, ctx): - self._ctx = ctx - self._data = LazyJSONLoader( - self._ctx, TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY, "whitelist", - ) - - def get_whitelist(self): - return self._data.get()[0] - - def get_randomized_guid_sample(self, item_count): - """ Fetch a subset of randomzied GUIDs from the whitelist """ - dataset = self.get_whitelist() - random.shuffle(dataset) - return dataset[:item_count] - - -class CuratedRecommender(AbstractRecommender): - """ - The curated recommender just delegates to the whitelist - that is provided by the AMO team. - - This recommender simply provides a randomized sample of - pre-approved addons for recommendation. It does not use any other - external data to generate recommendations, nor does it use any - information from the Firefox agent. - """ - - def __init__(self, ctx): - self._ctx = ctx - - self.logger = self._ctx[IMozLogging].get_logger("taar.curated") - self._curated_wl = CuratedWhitelistCache(self._ctx) - - def can_recommend(self, client_data, extra_data={}): - """The Curated recommender will always be able to recommend - something""" - self.logger.info("Curated can_recommend: {}".format(True)) - return True - - @metrics.timer_decorator("hybrid_recommend") - def recommend(self, client_data, limit, extra_data={}): - """ - Curated recommendations are just random selections - from the whitelist and we explicitly set the weighting to 1.0 - """ - guids = self._curated_wl.get_randomized_guid_sample(limit) - - results = [(guid, 1.0) for guid in guids] - - log_data = (client_data["client_id"], str(guids)) - self.logger.info( - "Curated recommendations client_id: [%s], guids: [%s]" % log_data - ) - return results - - -class HybridRecommender(AbstractRecommender): - """ - The EnsembleRecommender is a collection of recommenders where the - results from each recommendation is amplified or dampened by a - factor. The aggregate results are combines and used to recommend - addons for users. - """ - - def __init__(self, ctx): - self._ctx = ctx - - self.logger = self._ctx[IMozLogging].get_logger("taar") - - self._ensemble_recommender = self._ctx["ensemble_recommender"] - self._curated_recommender = CuratedRecommender(self._ctx.child()) - - def can_recommend(self, client_data, extra_data={}): - """The ensemble recommender is always going to be - available if at least one recommender is available""" - ensemble_recommend = self._ensemble_recommender.can_recommend( - client_data, extra_data - ) - curated_recommend = self._curated_recommender.can_recommend( - client_data, extra_data - ) - result = ensemble_recommend and curated_recommend - self.logger.info("Hybrid can_recommend: {}".format(result)) - return result - - def recommend(self, client_data, limit, extra_data={}): - """ - Hybrid recommendations simply select half recommendations from - the ensemble recommender, and half from the curated one. - - Duplicate recommendations are accomodated by rank ordering - by weight. - """ - - preinstalled_addon_ids = client_data.get("installed_addons", []) - - # Compute an extended limit by adding the length of - # the list of any preinstalled addons. - extended_limit = limit + len(preinstalled_addon_ids) - - ensemble_suggestions = self._ensemble_recommender.recommend( - client_data, extended_limit, extra_data - ) - curated_suggestions = self._curated_recommender.recommend( - client_data, extended_limit, extra_data - ) - - # Generate a set of results from each of the composite - # recommenders. We select one item from each recommender - # sequentially so that we do not bias one recommender over the - # other. - merged_results = set() - - while ( - len(merged_results) < limit - and len(ensemble_suggestions) > 0 - and len(curated_suggestions) > 0 - ): - - r1 = ensemble_suggestions.pop() - if r1[0] not in [temp[0] for temp in merged_results]: - merged_results.add(r1) - - # Terminate early if we have an odd number for the limit - if not ( - len(merged_results) < limit - and len(ensemble_suggestions) > 0 - and len(curated_suggestions) > 0 - ): - break - - r2 = curated_suggestions.pop() - if r2[0] not in [temp[0] for temp in merged_results]: - merged_results.add(r2) - - if len(merged_results) < limit: - msg = ( - "Defaulting to empty results. Insufficient recommendations found for client: %s" - % client_data["client_id"] - ) - self.logger.info(msg) - return [] - - sorted_results = sorted( - list(merged_results), key=op.itemgetter(1), reverse=True - ) - - log_data = ( - client_data["client_id"], - str([r[0] for r in sorted_results]), - ) - - self.logger.info( - "Hybrid recommendations client_id: [%s], guids: [%s]" % log_data - ) - return sorted_results diff --git a/tests/test_hybrid_recommender.py b/tests/test_hybrid_recommender.py deleted file mode 100644 index 5a3e81c..0000000 --- a/tests/test_hybrid_recommender.py +++ /dev/null @@ -1,138 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -""" -Test cases for the TAAR Hybrid recommender -""" - -import pytest - -from taar.recommenders.hybrid_recommender import CuratedRecommender -from taar.recommenders.hybrid_recommender import HybridRecommender -from taar.recommenders.ensemble_recommender import EnsembleRecommender - -from taar.settings import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY - -# from taar.recommenders.hybrid_recommender import ENSEMBLE_WEIGHTS -from .test_ensemblerecommender import install_mock_ensemble_data -from .mocks import MockRecommenderFactory - -import json -from moto import mock_s3 -import boto3 - -from markus import TIMING -from markus.testing import MetricsMock - - -def install_no_curated_data(ctx): - ctx = ctx.child() - conn = boto3.resource("s3", region_name="us-west-2") - - conn.create_bucket(Bucket=TAAR_WHITELIST_BUCKET) - conn.Object(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY).put(Body="") - - return ctx - - -def install_mock_curated_data(ctx): - mock_data = [] - for i in range(20): - mock_data.append(str(i) * 16) - - ctx = ctx.child() - conn = boto3.resource("s3", region_name="us-west-2") - - conn.create_bucket(Bucket=TAAR_WHITELIST_BUCKET) - conn.Object(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY).put( - Body=json.dumps(mock_data) - ) - - return ctx - - -def install_ensemble_fixtures(ctx): - ctx = install_mock_ensemble_data(ctx) - - factory = MockRecommenderFactory() - ctx["recommender_factory"] = factory - - ctx["recommender_map"] = { - "collaborative": factory.create("collaborative"), - "similarity": factory.create("similarity"), - "locale": factory.create("locale"), - } - ctx["ensemble_recommender"] = EnsembleRecommender(ctx.child()) - return ctx - - -@mock_s3 -def test_curated_can_recommend(test_ctx): - ctx = install_no_curated_data(test_ctx) - r = CuratedRecommender(ctx) - - # CuratedRecommender will always recommend something no matter - # what - assert r.can_recommend({}) - assert r.can_recommend({"installed_addons": []}) - - -@mock_s3 -def test_curated_recommendations(test_ctx): - with MetricsMock() as mm: - ctx = install_mock_curated_data(test_ctx) - r = CuratedRecommender(ctx) - - # CuratedRecommender will always recommend something no matter - # what - - for LIMIT in range(1, 5): - guid_list = r.recommend({"client_id": "000000"}, limit=LIMIT) - # The curated recommendations should always return with some kind - # of recommendations - assert len(guid_list) == LIMIT - - assert mm.has_record(TIMING, "taar.whitelist") - assert mm.has_record(TIMING, "taar.hybrid_recommend") - - -@pytest.mark.skip(reason="this test seems to break sporadically") -@mock_s3 -def test_hybrid_recommendations(test_ctx): - # verify that the recommendations mix the curated and - # ensemble results - ctx = install_mock_curated_data(test_ctx) - ctx = install_ensemble_fixtures(ctx) - - r = HybridRecommender(ctx) - - # Test that we can generate lists of results - for LIMIT in range(4, 8): - guid_list = r.recommend({"client_id": "000000"}, limit=LIMIT) - # The curated recommendations should always return with some kind - # of recommendations - assert len(guid_list) == LIMIT - - -@pytest.mark.skip(reason="this test seems to break sporadically") -@mock_s3 -def test_stable_hybrid_results(test_ctx): - # verify that the recommendations mix the curated and - # ensemble results - ctx = install_mock_curated_data(test_ctx) - ctx = install_ensemble_fixtures(ctx) - - r = HybridRecommender(ctx) - # Test that the results are actually mixed - guid_list = r.recommend({"client_id": "000000"}, limit=4) - - assert len(guid_list) == 4 - - # A mixed list will have two recommendations with weight = 1.0 - # (curated) and 2 with exactly weight < 1.0 from the ensemble list - - assert guid_list[0][1] == 1.0 - assert guid_list[1][1] == 1.0 - assert guid_list[2][1] < 1.0 - assert guid_list[3][1] < 1.0 diff --git a/tests/test_recommendation_manager.py b/tests/test_recommendation_manager.py index e55e2a0..e9daad3 100644 --- a/tests/test_recommendation_manager.py +++ b/tests/test_recommendation_manager.py @@ -13,9 +13,9 @@ from taar.recommenders.ensemble_recommender import ( TAAR_ENSEMBLE_KEY, ) +from taar.settings import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY from .mocks import MockRecommenderFactory -from .test_hybrid_recommender import install_mock_curated_data import operator from functools import reduce @@ -24,6 +24,22 @@ from markus import TIMING from markus.testing import MetricsMock +def install_mock_curated_data(ctx): + mock_data = [] + for i in range(20): + mock_data.append(str(i) * 16) + + ctx = ctx.child() + conn = boto3.resource("s3", region_name="us-west-2") + + conn.create_bucket(Bucket=TAAR_WHITELIST_BUCKET) + conn.Object(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY).put( + Body=json.dumps(mock_data) + ) + + return ctx + + class StubRecommender(AbstractRecommender): """ A shared, stub recommender that can be used for testing. """