updated recommenders to use the GUID only amo curated whitelist

This commit is contained in:
Victor Ng 2018-08-16 23:19:06 -04:00
Родитель 1d33c2ca34
Коммит 1aab10ad06
3 изменённых файлов: 5 добавлений и 15 удалений

Просмотреть файл

@ -11,19 +11,12 @@ import operator as op
S3_BUCKET = 'telemetry-parquet'
ENSEMBLE_WEIGHTS = 'taar/ensemble/ensemble_weight.json'
CURATED_WHITELIST = 'telemetry-ml/addon_recommender/top_200_whitelist.json'
CURATED_WHITELIST = 'telemetry-ml/addon_recommender/only_guids_top_200.json'
class CuratedWhitelistCache:
"""
This fetches the curated whitelist from S3.
A sample of the whitelist below :
[{'GUID': guid_string,
'Extension': extension_name,
'Copy (final)': english_description},
]
"""
def __init__(self, ctx):
self._ctx = ctx
@ -39,8 +32,7 @@ class CuratedWhitelistCache:
""" Fetch a subset of randomzied GUIDs from the whitelist """
dataset = self.get_whitelist()
random.shuffle(dataset)
samples = dataset[:item_count]
return [s['GUID'] for s in samples]
return dataset[:item_count]
class CuratedRecommender(AbstractRecommender):

Просмотреть файл

@ -138,7 +138,7 @@ class RecommendationManager:
# The whitelist data is only used for test client IDs
WHITELIST_S3_BUCKET = 'telemetry-parquet'
WHITELIST_S3_KEY = 'telemetry-ml/addon_recommender/top_200_whitelist.json'
WHITELIST_S3_KEY = 'telemetry-ml/addon_recommender/only_guids_top_200.json'
self._whitelist_data = LazyJSONLoader(self._ctx, WHITELIST_S3_BUCKET, WHITELIST_S3_KEY)
@schema_validate(RecommendationManagerQuerySchema)
@ -165,7 +165,7 @@ class RecommendationManager:
random.shuffle(data)
samples = data[:limit]
self.logger.info("Test ID detected [{}]".format(client_id))
return [(s['GUID'], 1.1) for s in samples]
return [(s, 1.1) for s in samples]
if client_id in EMPTY_TEST_CLIENT_IDS:
self.logger.info("Empty Test ID detected [{}]".format(client_id))

Просмотреть файл

@ -38,9 +38,7 @@ def install_no_curated_data(ctx):
def install_mock_curated_data(ctx):
mock_data = []
for i in range(20):
mock_data.append({'GUID': str(i) * 16,
'Extension': 'WebExt %d' % i,
'Copy (final)': 'Copy for %d' % i})
mock_data.append(str(i) * 16)
ctx = ctx.child()
conn = boto3.resource('s3', region_name='us-west-2')