This commit is contained in:
Victor Ng 2020-09-01 19:12:13 -04:00
Родитель daab43c980
Коммит fa78a731aa
3 изменённых файлов: 17 добавлений и 317 удалений

Просмотреть файл

@ -1,178 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from .base_recommender import AbstractRecommender
from .lazys3 import LazyJSONLoader
from srgutil.interfaces import IMozLogging
import operator as op
import random
from taar.settings import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
import markus
metrics = markus.get_metrics("taar")
class CuratedWhitelistCache:
"""
This fetches the curated whitelist from S3.
"""
def __init__(self, ctx):
self._ctx = ctx
self._data = LazyJSONLoader(
self._ctx, TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY, "whitelist",
)
def get_whitelist(self):
return self._data.get()[0]
def get_randomized_guid_sample(self, item_count):
""" Fetch a subset of randomzied GUIDs from the whitelist """
dataset = self.get_whitelist()
random.shuffle(dataset)
return dataset[:item_count]
class CuratedRecommender(AbstractRecommender):
"""
The curated recommender just delegates to the whitelist
that is provided by the AMO team.
This recommender simply provides a randomized sample of
pre-approved addons for recommendation. It does not use any other
external data to generate recommendations, nor does it use any
information from the Firefox agent.
"""
def __init__(self, ctx):
self._ctx = ctx
self.logger = self._ctx[IMozLogging].get_logger("taar.curated")
self._curated_wl = CuratedWhitelistCache(self._ctx)
def can_recommend(self, client_data, extra_data={}):
"""The Curated recommender will always be able to recommend
something"""
self.logger.info("Curated can_recommend: {}".format(True))
return True
@metrics.timer_decorator("hybrid_recommend")
def recommend(self, client_data, limit, extra_data={}):
"""
Curated recommendations are just random selections
from the whitelist and we explicitly set the weighting to 1.0
"""
guids = self._curated_wl.get_randomized_guid_sample(limit)
results = [(guid, 1.0) for guid in guids]
log_data = (client_data["client_id"], str(guids))
self.logger.info(
"Curated recommendations client_id: [%s], guids: [%s]" % log_data
)
return results
class HybridRecommender(AbstractRecommender):
"""
The EnsembleRecommender is a collection of recommenders where the
results from each recommendation is amplified or dampened by a
factor. The aggregate results are combines and used to recommend
addons for users.
"""
def __init__(self, ctx):
self._ctx = ctx
self.logger = self._ctx[IMozLogging].get_logger("taar")
self._ensemble_recommender = self._ctx["ensemble_recommender"]
self._curated_recommender = CuratedRecommender(self._ctx.child())
def can_recommend(self, client_data, extra_data={}):
"""The ensemble recommender is always going to be
available if at least one recommender is available"""
ensemble_recommend = self._ensemble_recommender.can_recommend(
client_data, extra_data
)
curated_recommend = self._curated_recommender.can_recommend(
client_data, extra_data
)
result = ensemble_recommend and curated_recommend
self.logger.info("Hybrid can_recommend: {}".format(result))
return result
def recommend(self, client_data, limit, extra_data={}):
"""
Hybrid recommendations simply select half recommendations from
the ensemble recommender, and half from the curated one.
Duplicate recommendations are accomodated by rank ordering
by weight.
"""
preinstalled_addon_ids = client_data.get("installed_addons", [])
# Compute an extended limit by adding the length of
# the list of any preinstalled addons.
extended_limit = limit + len(preinstalled_addon_ids)
ensemble_suggestions = self._ensemble_recommender.recommend(
client_data, extended_limit, extra_data
)
curated_suggestions = self._curated_recommender.recommend(
client_data, extended_limit, extra_data
)
# Generate a set of results from each of the composite
# recommenders. We select one item from each recommender
# sequentially so that we do not bias one recommender over the
# other.
merged_results = set()
while (
len(merged_results) < limit
and len(ensemble_suggestions) > 0
and len(curated_suggestions) > 0
):
r1 = ensemble_suggestions.pop()
if r1[0] not in [temp[0] for temp in merged_results]:
merged_results.add(r1)
# Terminate early if we have an odd number for the limit
if not (
len(merged_results) < limit
and len(ensemble_suggestions) > 0
and len(curated_suggestions) > 0
):
break
r2 = curated_suggestions.pop()
if r2[0] not in [temp[0] for temp in merged_results]:
merged_results.add(r2)
if len(merged_results) < limit:
msg = (
"Defaulting to empty results. Insufficient recommendations found for client: %s"
% client_data["client_id"]
)
self.logger.info(msg)
return []
sorted_results = sorted(
list(merged_results), key=op.itemgetter(1), reverse=True
)
log_data = (
client_data["client_id"],
str([r[0] for r in sorted_results]),
)
self.logger.info(
"Hybrid recommendations client_id: [%s], guids: [%s]" % log_data
)
return sorted_results

Просмотреть файл

@ -1,138 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Test cases for the TAAR Hybrid recommender
"""
import pytest
from taar.recommenders.hybrid_recommender import CuratedRecommender
from taar.recommenders.hybrid_recommender import HybridRecommender
from taar.recommenders.ensemble_recommender import EnsembleRecommender
from taar.settings import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
# from taar.recommenders.hybrid_recommender import ENSEMBLE_WEIGHTS
from .test_ensemblerecommender import install_mock_ensemble_data
from .mocks import MockRecommenderFactory
import json
from moto import mock_s3
import boto3
from markus import TIMING
from markus.testing import MetricsMock
def install_no_curated_data(ctx):
ctx = ctx.child()
conn = boto3.resource("s3", region_name="us-west-2")
conn.create_bucket(Bucket=TAAR_WHITELIST_BUCKET)
conn.Object(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY).put(Body="")
return ctx
def install_mock_curated_data(ctx):
mock_data = []
for i in range(20):
mock_data.append(str(i) * 16)
ctx = ctx.child()
conn = boto3.resource("s3", region_name="us-west-2")
conn.create_bucket(Bucket=TAAR_WHITELIST_BUCKET)
conn.Object(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY).put(
Body=json.dumps(mock_data)
)
return ctx
def install_ensemble_fixtures(ctx):
ctx = install_mock_ensemble_data(ctx)
factory = MockRecommenderFactory()
ctx["recommender_factory"] = factory
ctx["recommender_map"] = {
"collaborative": factory.create("collaborative"),
"similarity": factory.create("similarity"),
"locale": factory.create("locale"),
}
ctx["ensemble_recommender"] = EnsembleRecommender(ctx.child())
return ctx
@mock_s3
def test_curated_can_recommend(test_ctx):
ctx = install_no_curated_data(test_ctx)
r = CuratedRecommender(ctx)
# CuratedRecommender will always recommend something no matter
# what
assert r.can_recommend({})
assert r.can_recommend({"installed_addons": []})
@mock_s3
def test_curated_recommendations(test_ctx):
with MetricsMock() as mm:
ctx = install_mock_curated_data(test_ctx)
r = CuratedRecommender(ctx)
# CuratedRecommender will always recommend something no matter
# what
for LIMIT in range(1, 5):
guid_list = r.recommend({"client_id": "000000"}, limit=LIMIT)
# The curated recommendations should always return with some kind
# of recommendations
assert len(guid_list) == LIMIT
assert mm.has_record(TIMING, "taar.whitelist")
assert mm.has_record(TIMING, "taar.hybrid_recommend")
@pytest.mark.skip(reason="this test seems to break sporadically")
@mock_s3
def test_hybrid_recommendations(test_ctx):
# verify that the recommendations mix the curated and
# ensemble results
ctx = install_mock_curated_data(test_ctx)
ctx = install_ensemble_fixtures(ctx)
r = HybridRecommender(ctx)
# Test that we can generate lists of results
for LIMIT in range(4, 8):
guid_list = r.recommend({"client_id": "000000"}, limit=LIMIT)
# The curated recommendations should always return with some kind
# of recommendations
assert len(guid_list) == LIMIT
@pytest.mark.skip(reason="this test seems to break sporadically")
@mock_s3
def test_stable_hybrid_results(test_ctx):
# verify that the recommendations mix the curated and
# ensemble results
ctx = install_mock_curated_data(test_ctx)
ctx = install_ensemble_fixtures(ctx)
r = HybridRecommender(ctx)
# Test that the results are actually mixed
guid_list = r.recommend({"client_id": "000000"}, limit=4)
assert len(guid_list) == 4
# A mixed list will have two recommendations with weight = 1.0
# (curated) and 2 with exactly weight < 1.0 from the ensemble list
assert guid_list[0][1] == 1.0
assert guid_list[1][1] == 1.0
assert guid_list[2][1] < 1.0
assert guid_list[3][1] < 1.0

Просмотреть файл

@ -13,9 +13,9 @@ from taar.recommenders.ensemble_recommender import (
TAAR_ENSEMBLE_KEY,
)
from taar.settings import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
from .mocks import MockRecommenderFactory
from .test_hybrid_recommender import install_mock_curated_data
import operator
from functools import reduce
@ -24,6 +24,22 @@ from markus import TIMING
from markus.testing import MetricsMock
def install_mock_curated_data(ctx):
mock_data = []
for i in range(20):
mock_data.append(str(i) * 16)
ctx = ctx.child()
conn = boto3.resource("s3", region_name="us-west-2")
conn.create_bucket(Bucket=TAAR_WHITELIST_BUCKET)
conn.Object(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY).put(
Body=json.dumps(mock_data)
)
return ctx
class StubRecommender(AbstractRecommender):
""" A shared, stub recommender that can be used for testing.
"""