зеркало из https://github.com/mozilla/taar.git
Dropped hybrid recommender
This commit is contained in:
Родитель
daab43c980
Коммит
fa78a731aa
|
@ -1,178 +0,0 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from .base_recommender import AbstractRecommender
|
||||
from .lazys3 import LazyJSONLoader
|
||||
from srgutil.interfaces import IMozLogging
|
||||
import operator as op
|
||||
import random
|
||||
|
||||
from taar.settings import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
|
||||
|
||||
import markus
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
|
||||
class CuratedWhitelistCache:
|
||||
"""
|
||||
This fetches the curated whitelist from S3.
|
||||
"""
|
||||
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
self._data = LazyJSONLoader(
|
||||
self._ctx, TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY, "whitelist",
|
||||
)
|
||||
|
||||
def get_whitelist(self):
|
||||
return self._data.get()[0]
|
||||
|
||||
def get_randomized_guid_sample(self, item_count):
|
||||
""" Fetch a subset of randomzied GUIDs from the whitelist """
|
||||
dataset = self.get_whitelist()
|
||||
random.shuffle(dataset)
|
||||
return dataset[:item_count]
|
||||
|
||||
|
||||
class CuratedRecommender(AbstractRecommender):
|
||||
"""
|
||||
The curated recommender just delegates to the whitelist
|
||||
that is provided by the AMO team.
|
||||
|
||||
This recommender simply provides a randomized sample of
|
||||
pre-approved addons for recommendation. It does not use any other
|
||||
external data to generate recommendations, nor does it use any
|
||||
information from the Firefox agent.
|
||||
"""
|
||||
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar.curated")
|
||||
self._curated_wl = CuratedWhitelistCache(self._ctx)
|
||||
|
||||
def can_recommend(self, client_data, extra_data={}):
|
||||
"""The Curated recommender will always be able to recommend
|
||||
something"""
|
||||
self.logger.info("Curated can_recommend: {}".format(True))
|
||||
return True
|
||||
|
||||
@metrics.timer_decorator("hybrid_recommend")
|
||||
def recommend(self, client_data, limit, extra_data={}):
|
||||
"""
|
||||
Curated recommendations are just random selections
|
||||
from the whitelist and we explicitly set the weighting to 1.0
|
||||
"""
|
||||
guids = self._curated_wl.get_randomized_guid_sample(limit)
|
||||
|
||||
results = [(guid, 1.0) for guid in guids]
|
||||
|
||||
log_data = (client_data["client_id"], str(guids))
|
||||
self.logger.info(
|
||||
"Curated recommendations client_id: [%s], guids: [%s]" % log_data
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
class HybridRecommender(AbstractRecommender):
|
||||
"""
|
||||
The EnsembleRecommender is a collection of recommenders where the
|
||||
results from each recommendation is amplified or dampened by a
|
||||
factor. The aggregate results are combines and used to recommend
|
||||
addons for users.
|
||||
"""
|
||||
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
|
||||
self._ensemble_recommender = self._ctx["ensemble_recommender"]
|
||||
self._curated_recommender = CuratedRecommender(self._ctx.child())
|
||||
|
||||
def can_recommend(self, client_data, extra_data={}):
|
||||
"""The ensemble recommender is always going to be
|
||||
available if at least one recommender is available"""
|
||||
ensemble_recommend = self._ensemble_recommender.can_recommend(
|
||||
client_data, extra_data
|
||||
)
|
||||
curated_recommend = self._curated_recommender.can_recommend(
|
||||
client_data, extra_data
|
||||
)
|
||||
result = ensemble_recommend and curated_recommend
|
||||
self.logger.info("Hybrid can_recommend: {}".format(result))
|
||||
return result
|
||||
|
||||
def recommend(self, client_data, limit, extra_data={}):
|
||||
"""
|
||||
Hybrid recommendations simply select half recommendations from
|
||||
the ensemble recommender, and half from the curated one.
|
||||
|
||||
Duplicate recommendations are accomodated by rank ordering
|
||||
by weight.
|
||||
"""
|
||||
|
||||
preinstalled_addon_ids = client_data.get("installed_addons", [])
|
||||
|
||||
# Compute an extended limit by adding the length of
|
||||
# the list of any preinstalled addons.
|
||||
extended_limit = limit + len(preinstalled_addon_ids)
|
||||
|
||||
ensemble_suggestions = self._ensemble_recommender.recommend(
|
||||
client_data, extended_limit, extra_data
|
||||
)
|
||||
curated_suggestions = self._curated_recommender.recommend(
|
||||
client_data, extended_limit, extra_data
|
||||
)
|
||||
|
||||
# Generate a set of results from each of the composite
|
||||
# recommenders. We select one item from each recommender
|
||||
# sequentially so that we do not bias one recommender over the
|
||||
# other.
|
||||
merged_results = set()
|
||||
|
||||
while (
|
||||
len(merged_results) < limit
|
||||
and len(ensemble_suggestions) > 0
|
||||
and len(curated_suggestions) > 0
|
||||
):
|
||||
|
||||
r1 = ensemble_suggestions.pop()
|
||||
if r1[0] not in [temp[0] for temp in merged_results]:
|
||||
merged_results.add(r1)
|
||||
|
||||
# Terminate early if we have an odd number for the limit
|
||||
if not (
|
||||
len(merged_results) < limit
|
||||
and len(ensemble_suggestions) > 0
|
||||
and len(curated_suggestions) > 0
|
||||
):
|
||||
break
|
||||
|
||||
r2 = curated_suggestions.pop()
|
||||
if r2[0] not in [temp[0] for temp in merged_results]:
|
||||
merged_results.add(r2)
|
||||
|
||||
if len(merged_results) < limit:
|
||||
msg = (
|
||||
"Defaulting to empty results. Insufficient recommendations found for client: %s"
|
||||
% client_data["client_id"]
|
||||
)
|
||||
self.logger.info(msg)
|
||||
return []
|
||||
|
||||
sorted_results = sorted(
|
||||
list(merged_results), key=op.itemgetter(1), reverse=True
|
||||
)
|
||||
|
||||
log_data = (
|
||||
client_data["client_id"],
|
||||
str([r[0] for r in sorted_results]),
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
"Hybrid recommendations client_id: [%s], guids: [%s]" % log_data
|
||||
)
|
||||
return sorted_results
|
|
@ -1,138 +0,0 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
"""
|
||||
Test cases for the TAAR Hybrid recommender
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from taar.recommenders.hybrid_recommender import CuratedRecommender
|
||||
from taar.recommenders.hybrid_recommender import HybridRecommender
|
||||
from taar.recommenders.ensemble_recommender import EnsembleRecommender
|
||||
|
||||
from taar.settings import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
|
||||
|
||||
# from taar.recommenders.hybrid_recommender import ENSEMBLE_WEIGHTS
|
||||
from .test_ensemblerecommender import install_mock_ensemble_data
|
||||
from .mocks import MockRecommenderFactory
|
||||
|
||||
import json
|
||||
from moto import mock_s3
|
||||
import boto3
|
||||
|
||||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
|
||||
def install_no_curated_data(ctx):
|
||||
ctx = ctx.child()
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
|
||||
conn.create_bucket(Bucket=TAAR_WHITELIST_BUCKET)
|
||||
conn.Object(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY).put(Body="")
|
||||
|
||||
return ctx
|
||||
|
||||
|
||||
def install_mock_curated_data(ctx):
|
||||
mock_data = []
|
||||
for i in range(20):
|
||||
mock_data.append(str(i) * 16)
|
||||
|
||||
ctx = ctx.child()
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
|
||||
conn.create_bucket(Bucket=TAAR_WHITELIST_BUCKET)
|
||||
conn.Object(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY).put(
|
||||
Body=json.dumps(mock_data)
|
||||
)
|
||||
|
||||
return ctx
|
||||
|
||||
|
||||
def install_ensemble_fixtures(ctx):
|
||||
ctx = install_mock_ensemble_data(ctx)
|
||||
|
||||
factory = MockRecommenderFactory()
|
||||
ctx["recommender_factory"] = factory
|
||||
|
||||
ctx["recommender_map"] = {
|
||||
"collaborative": factory.create("collaborative"),
|
||||
"similarity": factory.create("similarity"),
|
||||
"locale": factory.create("locale"),
|
||||
}
|
||||
ctx["ensemble_recommender"] = EnsembleRecommender(ctx.child())
|
||||
return ctx
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_curated_can_recommend(test_ctx):
|
||||
ctx = install_no_curated_data(test_ctx)
|
||||
r = CuratedRecommender(ctx)
|
||||
|
||||
# CuratedRecommender will always recommend something no matter
|
||||
# what
|
||||
assert r.can_recommend({})
|
||||
assert r.can_recommend({"installed_addons": []})
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_curated_recommendations(test_ctx):
|
||||
with MetricsMock() as mm:
|
||||
ctx = install_mock_curated_data(test_ctx)
|
||||
r = CuratedRecommender(ctx)
|
||||
|
||||
# CuratedRecommender will always recommend something no matter
|
||||
# what
|
||||
|
||||
for LIMIT in range(1, 5):
|
||||
guid_list = r.recommend({"client_id": "000000"}, limit=LIMIT)
|
||||
# The curated recommendations should always return with some kind
|
||||
# of recommendations
|
||||
assert len(guid_list) == LIMIT
|
||||
|
||||
assert mm.has_record(TIMING, "taar.whitelist")
|
||||
assert mm.has_record(TIMING, "taar.hybrid_recommend")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="this test seems to break sporadically")
|
||||
@mock_s3
|
||||
def test_hybrid_recommendations(test_ctx):
|
||||
# verify that the recommendations mix the curated and
|
||||
# ensemble results
|
||||
ctx = install_mock_curated_data(test_ctx)
|
||||
ctx = install_ensemble_fixtures(ctx)
|
||||
|
||||
r = HybridRecommender(ctx)
|
||||
|
||||
# Test that we can generate lists of results
|
||||
for LIMIT in range(4, 8):
|
||||
guid_list = r.recommend({"client_id": "000000"}, limit=LIMIT)
|
||||
# The curated recommendations should always return with some kind
|
||||
# of recommendations
|
||||
assert len(guid_list) == LIMIT
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="this test seems to break sporadically")
|
||||
@mock_s3
|
||||
def test_stable_hybrid_results(test_ctx):
|
||||
# verify that the recommendations mix the curated and
|
||||
# ensemble results
|
||||
ctx = install_mock_curated_data(test_ctx)
|
||||
ctx = install_ensemble_fixtures(ctx)
|
||||
|
||||
r = HybridRecommender(ctx)
|
||||
# Test that the results are actually mixed
|
||||
guid_list = r.recommend({"client_id": "000000"}, limit=4)
|
||||
|
||||
assert len(guid_list) == 4
|
||||
|
||||
# A mixed list will have two recommendations with weight = 1.0
|
||||
# (curated) and 2 with exactly weight < 1.0 from the ensemble list
|
||||
|
||||
assert guid_list[0][1] == 1.0
|
||||
assert guid_list[1][1] == 1.0
|
||||
assert guid_list[2][1] < 1.0
|
||||
assert guid_list[3][1] < 1.0
|
|
@ -13,9 +13,9 @@ from taar.recommenders.ensemble_recommender import (
|
|||
TAAR_ENSEMBLE_KEY,
|
||||
)
|
||||
|
||||
from taar.settings import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
|
||||
|
||||
from .mocks import MockRecommenderFactory
|
||||
from .test_hybrid_recommender import install_mock_curated_data
|
||||
|
||||
import operator
|
||||
from functools import reduce
|
||||
|
@ -24,6 +24,22 @@ from markus import TIMING
|
|||
from markus.testing import MetricsMock
|
||||
|
||||
|
||||
def install_mock_curated_data(ctx):
|
||||
mock_data = []
|
||||
for i in range(20):
|
||||
mock_data.append(str(i) * 16)
|
||||
|
||||
ctx = ctx.child()
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
|
||||
conn.create_bucket(Bucket=TAAR_WHITELIST_BUCKET)
|
||||
conn.Object(TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY).put(
|
||||
Body=json.dumps(mock_data)
|
||||
)
|
||||
|
||||
return ctx
|
||||
|
||||
|
||||
class StubRecommender(AbstractRecommender):
|
||||
""" A shared, stub recommender that can be used for testing.
|
||||
"""
|
||||
|
|
Загрузка…
Ссылка в новой задаче