зеркало из https://github.com/mozilla/taar.git
hybrid is sorted out (finally)
This commit is contained in:
Родитель
88b1ef6649
Коммит
6b95af983b
|
@ -14,15 +14,14 @@ chain.
|
|||
"""
|
||||
|
||||
# Clobber the Context name to prevent messy name collisions
|
||||
from srgutil.context import Context as _Context
|
||||
from srgutil.context import default_context as _default_context
|
||||
|
||||
|
||||
def default_context():
|
||||
ctx = _Context()
|
||||
ctx = _default_context()
|
||||
from taar.recommenders import CollaborativeRecommender
|
||||
from taar.recommenders import SimilarityRecommender
|
||||
from taar.recommenders import LocaleRecommender
|
||||
from taar.cache import Clock
|
||||
|
||||
# Note that the EnsembleRecommender is *not* in this map as it
|
||||
# needs to ensure that the recommender_map key is installed in the
|
||||
|
@ -31,5 +30,4 @@ def default_context():
|
|||
'similarity': lambda: SimilarityRecommender(ctx.child()),
|
||||
'locale': lambda: LocaleRecommender(ctx.child())}
|
||||
|
||||
ctx['clock'] = Clock()
|
||||
return ctx
|
||||
|
|
|
@ -6,6 +6,7 @@ from .base_recommender import AbstractRecommender
|
|||
from .lazys3 import LazyJSONLoader
|
||||
from srgutil.interfaces import IMozLogging
|
||||
import random
|
||||
import operator as op
|
||||
|
||||
S3_BUCKET = 'telemetry-parquet'
|
||||
|
||||
|
@ -114,8 +115,6 @@ class HybridRecommender(AbstractRecommender):
|
|||
# the list of any preinstalled addons.
|
||||
extended_limit = limit + len(preinstalled_addon_ids)
|
||||
|
||||
ensemble_weights = self._weight_cache.getWeights()
|
||||
|
||||
ensemble_suggestions = self._ensemble_recommender.recommend(client_data,
|
||||
extended_limit,
|
||||
extra_data)
|
||||
|
@ -128,23 +127,34 @@ class HybridRecommender(AbstractRecommender):
|
|||
# sequentially so that we do not bias one recommender over the
|
||||
# other.
|
||||
merged_results = set()
|
||||
|
||||
while len(merged_results) < limit and len(ensemble_suggestions) > 0 and len(curated_suggestions) > 0:
|
||||
|
||||
r1 = ensemble_suggestions.pop()
|
||||
if r1[0] not in [temp[0] for temp in merged_results]:
|
||||
merged_results.add(r1)
|
||||
|
||||
# Terminate early if we have an odd number for the limit
|
||||
if not (len(merged_results) < limit and
|
||||
len(ensemble_suggestions) > 0 and
|
||||
len(curated_suggestions) > 0):
|
||||
break
|
||||
|
||||
r2 = curated_suggestions.pop()
|
||||
if r2[0] not in [temp[0] for temp in merged_results]:
|
||||
merged_results.add(r1)
|
||||
merged_results.add(r2)
|
||||
|
||||
if len(merged_results) < limit:
|
||||
msg = "Insufficient recommendations found for client: %s" % client_data['client_id']
|
||||
self.logger.info(msg)
|
||||
return []
|
||||
|
||||
sorted_results = sorted(list(merged_results),
|
||||
key=op.itemgetter(1),
|
||||
reverse=True)
|
||||
|
||||
log_data = (client_data['client_id'],
|
||||
str(ensemble_weights),
|
||||
str([r[0] for r in merged_results]))
|
||||
self.logger.info("client_id: [%s], ensemble_weight: [%s], guids: [%s]" % log_data)
|
||||
return list(merged_results)
|
||||
str([r[0] for r in sorted_results]))
|
||||
|
||||
self.logger.info("client_id: [%s], guids: [%s]" % log_data)
|
||||
return sorted_results
|
||||
|
|
|
@ -14,7 +14,7 @@ EXPECTED = {'collaborative': 1000,
|
|||
'locale': 10}
|
||||
|
||||
|
||||
def install_mock_data(ctx):
|
||||
def install_mock_ensemble_data(ctx):
|
||||
DATA = {'ensemble_weights': EXPECTED}
|
||||
|
||||
S3_BUCKET = 'telemetry-parquet'
|
||||
|
@ -33,7 +33,7 @@ def install_mock_data(ctx):
|
|||
|
||||
@mock_s3
|
||||
def test_weight_cache(test_ctx):
|
||||
ctx = install_mock_data(test_ctx)
|
||||
ctx = install_mock_ensemble_data(test_ctx)
|
||||
wc = WeightCache(ctx)
|
||||
actual = wc.getWeights()
|
||||
assert EXPECTED == actual
|
||||
|
@ -41,7 +41,7 @@ def test_weight_cache(test_ctx):
|
|||
|
||||
@mock_s3
|
||||
def test_recommendations(test_ctx):
|
||||
ctx = install_mock_data(test_ctx)
|
||||
ctx = install_mock_ensemble_data(test_ctx)
|
||||
|
||||
EXPECTED_RESULTS = [('ghi', 3430.0),
|
||||
('def', 3320.0),
|
||||
|
@ -65,7 +65,7 @@ def test_recommendations(test_ctx):
|
|||
|
||||
@mock_s3
|
||||
def test_preinstalled_guids(test_ctx):
|
||||
ctx = install_mock_data(test_ctx)
|
||||
ctx = install_mock_ensemble_data(test_ctx)
|
||||
|
||||
EXPECTED_RESULTS = [('ghi', 3430.0),
|
||||
('ijk', 3200.0),
|
||||
|
|
|
@ -7,18 +7,20 @@ Test cases for the TAAR Hybrid recommender
|
|||
"""
|
||||
|
||||
from taar.recommenders.hybrid_recommender import CuratedRecommender
|
||||
from taar.recommenders.hybrid_recommender import HybridRecommender
|
||||
from taar.recommenders.ensemble_recommender import EnsembleRecommender
|
||||
|
||||
from taar.recommenders.hybrid_recommender import S3_BUCKET
|
||||
from taar.recommenders.hybrid_recommender import CURATED_WHITELIST
|
||||
# from taar.recommenders.hybrid_recommender import ENSEMBLE_WEIGHTS
|
||||
from taar.recommenders.lazys3 import LazyJSONLoader
|
||||
from .test_ensemblerecommender import install_mock_ensemble_data
|
||||
from .mocks import MockRecommenderFactory
|
||||
|
||||
import json
|
||||
from moto import mock_s3
|
||||
import boto3
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def install_no_curated_data(ctx):
|
||||
ctx = ctx.child()
|
||||
|
@ -52,6 +54,19 @@ def install_mock_curated_data(ctx):
|
|||
return ctx
|
||||
|
||||
|
||||
def install_ensemble_fixtures(ctx):
|
||||
ctx = install_mock_ensemble_data(ctx)
|
||||
|
||||
factory = MockRecommenderFactory()
|
||||
ctx['recommender_factory'] = factory
|
||||
|
||||
ctx['recommender_map'] = {'collaborative': factory.create('collaborative'),
|
||||
'similarity': factory.create('similarity'),
|
||||
'locale': factory.create('locale')}
|
||||
ctx['ensemble_recommender'] = EnsembleRecommender(ctx.child())
|
||||
return ctx
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_curated_can_recommend(test_ctx):
|
||||
ctx = install_no_curated_data(test_ctx)
|
||||
|
@ -78,6 +93,29 @@ def test_curated_recommendations(test_ctx):
|
|||
assert len(guid_list) == LIMIT
|
||||
|
||||
|
||||
@pytest.mark.skip("TODO")
|
||||
@mock_s3
|
||||
def test_hybrid_recommendations(test_ctx):
|
||||
pass
|
||||
# verify that the recommendations mix the curated and
|
||||
# ensemble results
|
||||
ctx = install_mock_curated_data(test_ctx)
|
||||
ctx = install_ensemble_fixtures(ctx)
|
||||
|
||||
r = HybridRecommender(ctx)
|
||||
|
||||
# Test that we can generate lists of results
|
||||
for LIMIT in range(4, 8):
|
||||
guid_list = r.recommend({'client_id': '000000'}, limit = LIMIT)
|
||||
# The curated recommendations should always return with some kind
|
||||
# of recommendations
|
||||
assert len(guid_list) == LIMIT
|
||||
|
||||
# Test that the results are actually mixed
|
||||
guid_list = r.recommend({'client_id': '000000'}, limit = 4)
|
||||
|
||||
# A mixed list will have two recommendations with weight > 1.0
|
||||
# (ensemble) and 2 with exactly weight 1.0 from the curated list
|
||||
|
||||
assert guid_list[0][1] > 1.0
|
||||
assert guid_list[1][1] > 1.0
|
||||
assert guid_list[2][1] == 1.0
|
||||
assert guid_list[3][1] == 1.0
|
||||
|
|
|
@ -9,10 +9,10 @@ from taar.profile_fetcher import ProfileFetcher
|
|||
from taar.recommenders import RecommendationManager
|
||||
from taar.recommenders.lazys3 import LazyJSONLoader
|
||||
from taar.schema import INTERVENTION_A
|
||||
from taar.schema import INTERVENTION_B
|
||||
from taar.recommenders.base_recommender import AbstractRecommender
|
||||
from .mocks import MockProfileController, MockRecommenderFactory
|
||||
|
||||
import pytest
|
||||
from .test_hybrid_recommender import install_mock_curated_data
|
||||
|
||||
|
||||
class StubRecommender(AbstractRecommender):
|
||||
|
@ -62,13 +62,31 @@ def test_none_profile_returns_empty_list(test_ctx):
|
|||
assert rec_manager.recommend("random-client-id", 10) == []
|
||||
|
||||
|
||||
@pytest.mark.skip("InterventionB isn't implemented yet")
|
||||
@mock_s3
|
||||
def test_intervention_b():
|
||||
def test_intervention_b(test_ctx):
|
||||
"""The recommendation manager is currently very naive and just
|
||||
selects the first recommender which returns 'True' to
|
||||
can_recommend()."""
|
||||
|
||||
ctx = install_mocks(test_ctx)
|
||||
ctx = install_mock_curated_data(ctx)
|
||||
|
||||
factory = MockRecommenderFactory()
|
||||
|
||||
class MockProfileFetcher:
|
||||
def get(self, client_id):
|
||||
return {'client_id': client_id}
|
||||
|
||||
ctx['recommender_factory'] = factory
|
||||
ctx['profile_fetcher'] = MockProfileFetcher()
|
||||
manager = RecommendationManager(ctx.child())
|
||||
recommendation_list = manager.recommend('some_ignored_id',
|
||||
4,
|
||||
extra_data={'branch': INTERVENTION_B})
|
||||
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert len(recommendation_list) == 4
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recommendations_via_manager(test_ctx):
|
||||
|
|
Загрузка…
Ссылка в новой задаче