hybrid is sorted out (finally)

This commit is contained in:
Victor Ng 2018-08-08 11:29:04 -04:00
Родитель 88b1ef6649
Коммит 6b95af983b
5 изменённых файлов: 87 добавлений и 23 удалений

Просмотреть файл

@ -14,15 +14,14 @@ chain.
"""
# Clobber the Context name to prevent messy name collisions
from srgutil.context import Context as _Context
from srgutil.context import default_context as _default_context
def default_context():
ctx = _Context()
ctx = _default_context()
from taar.recommenders import CollaborativeRecommender
from taar.recommenders import SimilarityRecommender
from taar.recommenders import LocaleRecommender
from taar.cache import Clock
# Note that the EnsembleRecommender is *not* in this map as it
# needs to ensure that the recommender_map key is installed in the
@ -31,5 +30,4 @@ def default_context():
'similarity': lambda: SimilarityRecommender(ctx.child()),
'locale': lambda: LocaleRecommender(ctx.child())}
ctx['clock'] = Clock()
return ctx

Просмотреть файл

@ -6,6 +6,7 @@ from .base_recommender import AbstractRecommender
from .lazys3 import LazyJSONLoader
from srgutil.interfaces import IMozLogging
import random
import operator as op
S3_BUCKET = 'telemetry-parquet'
@ -114,8 +115,6 @@ class HybridRecommender(AbstractRecommender):
# the list of any preinstalled addons.
extended_limit = limit + len(preinstalled_addon_ids)
ensemble_weights = self._weight_cache.getWeights()
ensemble_suggestions = self._ensemble_recommender.recommend(client_data,
extended_limit,
extra_data)
@ -128,23 +127,34 @@ class HybridRecommender(AbstractRecommender):
# sequentially so that we do not bias one recommender over the
# other.
merged_results = set()
while len(merged_results) < limit and len(ensemble_suggestions) > 0 and len(curated_suggestions) > 0:
r1 = ensemble_suggestions.pop()
if r1[0] not in [temp[0] for temp in merged_results]:
merged_results.add(r1)
# Terminate early if we have an odd number for the limit
if not (len(merged_results) < limit and
len(ensemble_suggestions) > 0 and
len(curated_suggestions) > 0):
break
r2 = curated_suggestions.pop()
if r2[0] not in [temp[0] for temp in merged_results]:
merged_results.add(r1)
merged_results.add(r2)
if len(merged_results) < limit:
msg = "Insufficient recommendations found for client: %s" % client_data['client_id']
self.logger.info(msg)
return []
sorted_results = sorted(list(merged_results),
key=op.itemgetter(1),
reverse=True)
log_data = (client_data['client_id'],
str(ensemble_weights),
str([r[0] for r in merged_results]))
self.logger.info("client_id: [%s], ensemble_weight: [%s], guids: [%s]" % log_data)
return list(merged_results)
str([r[0] for r in sorted_results]))
self.logger.info("client_id: [%s], guids: [%s]" % log_data)
return sorted_results

Просмотреть файл

@ -14,7 +14,7 @@ EXPECTED = {'collaborative': 1000,
'locale': 10}
def install_mock_data(ctx):
def install_mock_ensemble_data(ctx):
DATA = {'ensemble_weights': EXPECTED}
S3_BUCKET = 'telemetry-parquet'
@ -33,7 +33,7 @@ def install_mock_data(ctx):
@mock_s3
def test_weight_cache(test_ctx):
ctx = install_mock_data(test_ctx)
ctx = install_mock_ensemble_data(test_ctx)
wc = WeightCache(ctx)
actual = wc.getWeights()
assert EXPECTED == actual
@ -41,7 +41,7 @@ def test_weight_cache(test_ctx):
@mock_s3
def test_recommendations(test_ctx):
ctx = install_mock_data(test_ctx)
ctx = install_mock_ensemble_data(test_ctx)
EXPECTED_RESULTS = [('ghi', 3430.0),
('def', 3320.0),
@ -65,7 +65,7 @@ def test_recommendations(test_ctx):
@mock_s3
def test_preinstalled_guids(test_ctx):
ctx = install_mock_data(test_ctx)
ctx = install_mock_ensemble_data(test_ctx)
EXPECTED_RESULTS = [('ghi', 3430.0),
('ijk', 3200.0),

Просмотреть файл

@ -7,18 +7,20 @@ Test cases for the TAAR Hybrid recommender
"""
from taar.recommenders.hybrid_recommender import CuratedRecommender
from taar.recommenders.hybrid_recommender import HybridRecommender
from taar.recommenders.ensemble_recommender import EnsembleRecommender
from taar.recommenders.hybrid_recommender import S3_BUCKET
from taar.recommenders.hybrid_recommender import CURATED_WHITELIST
# from taar.recommenders.hybrid_recommender import ENSEMBLE_WEIGHTS
from taar.recommenders.lazys3 import LazyJSONLoader
from .test_ensemblerecommender import install_mock_ensemble_data
from .mocks import MockRecommenderFactory
import json
from moto import mock_s3
import boto3
import pytest
def install_no_curated_data(ctx):
ctx = ctx.child()
@ -52,6 +54,19 @@ def install_mock_curated_data(ctx):
return ctx
def install_ensemble_fixtures(ctx):
ctx = install_mock_ensemble_data(ctx)
factory = MockRecommenderFactory()
ctx['recommender_factory'] = factory
ctx['recommender_map'] = {'collaborative': factory.create('collaborative'),
'similarity': factory.create('similarity'),
'locale': factory.create('locale')}
ctx['ensemble_recommender'] = EnsembleRecommender(ctx.child())
return ctx
@mock_s3
def test_curated_can_recommend(test_ctx):
ctx = install_no_curated_data(test_ctx)
@ -78,6 +93,29 @@ def test_curated_recommendations(test_ctx):
assert len(guid_list) == LIMIT
@pytest.mark.skip("TODO")
@mock_s3
def test_hybrid_recommendations(test_ctx):
pass
# verify that the recommendations mix the curated and
# ensemble results
ctx = install_mock_curated_data(test_ctx)
ctx = install_ensemble_fixtures(ctx)
r = HybridRecommender(ctx)
# Test that we can generate lists of results
for LIMIT in range(4, 8):
guid_list = r.recommend({'client_id': '000000'}, limit = LIMIT)
# The curated recommendations should always return with some kind
# of recommendations
assert len(guid_list) == LIMIT
# Test that the results are actually mixed
guid_list = r.recommend({'client_id': '000000'}, limit = 4)
# A mixed list will have two recommendations with weight > 1.0
# (ensemble) and 2 with exactly weight 1.0 from the curated list
assert guid_list[0][1] > 1.0
assert guid_list[1][1] > 1.0
assert guid_list[2][1] == 1.0
assert guid_list[3][1] == 1.0

Просмотреть файл

@ -9,10 +9,10 @@ from taar.profile_fetcher import ProfileFetcher
from taar.recommenders import RecommendationManager
from taar.recommenders.lazys3 import LazyJSONLoader
from taar.schema import INTERVENTION_A
from taar.schema import INTERVENTION_B
from taar.recommenders.base_recommender import AbstractRecommender
from .mocks import MockProfileController, MockRecommenderFactory
import pytest
from .test_hybrid_recommender import install_mock_curated_data
class StubRecommender(AbstractRecommender):
@ -62,13 +62,31 @@ def test_none_profile_returns_empty_list(test_ctx):
assert rec_manager.recommend("random-client-id", 10) == []
@pytest.mark.skip("InterventionB isn't implemented yet")
@mock_s3
def test_intervention_b():
def test_intervention_b(test_ctx):
"""The recommendation manager is currently very naive and just
selects the first recommender which returns 'True' to
can_recommend()."""
ctx = install_mocks(test_ctx)
ctx = install_mock_curated_data(ctx)
factory = MockRecommenderFactory()
class MockProfileFetcher:
def get(self, client_id):
return {'client_id': client_id}
ctx['recommender_factory'] = factory
ctx['profile_fetcher'] = MockProfileFetcher()
manager = RecommendationManager(ctx.child())
recommendation_list = manager.recommend('some_ignored_id',
4,
extra_data={'branch': INTERVENTION_B})
assert isinstance(recommendation_list, list)
assert len(recommendation_list) == 4
@mock_s3
def test_recommendations_via_manager(test_ctx):