зеркало из https://github.com/mozilla/taar.git
updated recommenders to use the GUID only amo curated whitelist
This commit is contained in:
Родитель
1d33c2ca34
Коммит
1aab10ad06
|
@ -11,19 +11,12 @@ import operator as op
|
|||
S3_BUCKET = 'telemetry-parquet'
|
||||
|
||||
ENSEMBLE_WEIGHTS = 'taar/ensemble/ensemble_weight.json'
|
||||
CURATED_WHITELIST = 'telemetry-ml/addon_recommender/top_200_whitelist.json'
|
||||
CURATED_WHITELIST = 'telemetry-ml/addon_recommender/only_guids_top_200.json'
|
||||
|
||||
|
||||
class CuratedWhitelistCache:
|
||||
"""
|
||||
This fetches the curated whitelist from S3.
|
||||
|
||||
A sample of the whitelist below :
|
||||
|
||||
[{'GUID': guid_string,
|
||||
'Extension': extension_name,
|
||||
'Copy (final)': english_description},
|
||||
]
|
||||
"""
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
|
@ -39,8 +32,7 @@ class CuratedWhitelistCache:
|
|||
""" Fetch a subset of randomzied GUIDs from the whitelist """
|
||||
dataset = self.get_whitelist()
|
||||
random.shuffle(dataset)
|
||||
samples = dataset[:item_count]
|
||||
return [s['GUID'] for s in samples]
|
||||
return dataset[:item_count]
|
||||
|
||||
|
||||
class CuratedRecommender(AbstractRecommender):
|
||||
|
|
|
@ -138,7 +138,7 @@ class RecommendationManager:
|
|||
|
||||
# The whitelist data is only used for test client IDs
|
||||
WHITELIST_S3_BUCKET = 'telemetry-parquet'
|
||||
WHITELIST_S3_KEY = 'telemetry-ml/addon_recommender/top_200_whitelist.json'
|
||||
WHITELIST_S3_KEY = 'telemetry-ml/addon_recommender/only_guids_top_200.json'
|
||||
self._whitelist_data = LazyJSONLoader(self._ctx, WHITELIST_S3_BUCKET, WHITELIST_S3_KEY)
|
||||
|
||||
@schema_validate(RecommendationManagerQuerySchema)
|
||||
|
@ -165,7 +165,7 @@ class RecommendationManager:
|
|||
random.shuffle(data)
|
||||
samples = data[:limit]
|
||||
self.logger.info("Test ID detected [{}]".format(client_id))
|
||||
return [(s['GUID'], 1.1) for s in samples]
|
||||
return [(s, 1.1) for s in samples]
|
||||
|
||||
if client_id in EMPTY_TEST_CLIENT_IDS:
|
||||
self.logger.info("Empty Test ID detected [{}]".format(client_id))
|
||||
|
|
|
@ -38,9 +38,7 @@ def install_no_curated_data(ctx):
|
|||
def install_mock_curated_data(ctx):
|
||||
mock_data = []
|
||||
for i in range(20):
|
||||
mock_data.append({'GUID': str(i) * 16,
|
||||
'Extension': 'WebExt %d' % i,
|
||||
'Copy (final)': 'Copy for %d' % i})
|
||||
mock_data.append(str(i) * 16)
|
||||
|
||||
ctx = ctx.child()
|
||||
conn = boto3.resource('s3', region_name='us-west-2')
|
||||
|
|
Загрузка…
Ссылка в новой задаче