Migrated taar locale recommender to use redis

This commit is contained in:
Victor Ng 2020-09-01 10:20:07 -04:00
Родитель 9773053739
Коммит 77eef1db83
4 изменённых файлов: 181 добавлений и 87 удалений

Просмотреть файл

@ -2,14 +2,13 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from srgutil.interfaces import IMozLogging
from .base_recommender import AbstractRecommender
from .lazys3 import LazyJSONLoader
from taar.settings import TAAR_LOCALE_BUCKET, TAAR_LOCALE_KEY
import markus
from srgutil.interfaces import IMozLogging
from .base_recommender import AbstractRecommender
from taar.recommenders.redis_cache import AddonsCoinstallCache
metrics = markus.get_metrics("taar")
@ -29,27 +28,12 @@ class LocaleRecommender(AbstractRecommender):
self.logger = self._ctx[IMozLogging].get_logger("taar")
self._top_addons_per_locale = LazyJSONLoader(
self._ctx, TAAR_LOCALE_BUCKET, TAAR_LOCALE_KEY, "locale"
)
self._init_from_ctx()
self._redis_cache = AddonsCoinstallCache(self._ctx)
# DONE removed
@property
def top_addons_per_locale(self):
def presort_locale(data):
result = {}
for locale, guid_list in data.items():
result[locale] = sorted(guid_list, key=lambda x: x[1], reverse=True)
return result
return self._top_addons_per_locale.get(transform=presort_locale)[0]
def _init_from_ctx(self):
if self.top_addons_per_locale is None:
self.logger.error(
"Cannot download the top per locale file {}".format(TAAR_LOCALE_KEY)
)
return self._redis_cache.top_addons_per_locale()
def can_recommend(self, client_data, extra_data={}):
# We can't recommend if we don't have our data files.

Просмотреть файл

@ -8,39 +8,63 @@ import threading
import redis
import numpy as np
from srgutil.interfaces import IMozLogging
from taar.settings import (
REDIS_HOST,
REDIS_PORT,
)
# TAARLite configuration
from taar.settings import (
TAARLITE_GUID_COINSTALL_BUCKET,
TAARLITE_GUID_COINSTALL_KEY,
TAARLITE_GUID_RANKING_KEY,
TAARLITE_TTL,
TAARLITE_TRUNCATE,
TAARLITE_MUTEX_TTL,
)
# TAARLite configuration
from taar.settings import TAAR_LOCALE_BUCKET, TAAR_LOCALE_KEY
from jsoncache.loader import s3_json_loader
# This marks which of the redis databases is currently
# active for read
ACTIVE_DB = "active_db"
# This is a mutex to block multiple writers from redis
UPDATE_CHECK = "update_mutex|"
# taarlite guid guid coinstallation matrix
COINSTALL_PREFIX = "coinstall|"
# taarlite guid guid coinstallation matrix filtered by
# minimum installation threshholds
FILTERED_COINSTALL_PREFIX = "filtered_coinstall|"
# taarlite ranking data
RANKING_PREFIX = "ranking|"
# taarlite minimum installation threshold
MIN_INSTALLS_PREFIX = "min_installs|"
# This is a map is guid->sum of coinstall counts
# taarlite map of guid->(sum of coinstall counts)
NORMDATA_COUNT_MAP_PREFIX = "normdata_count_map_prefix|"
# Capture the number of times a GUID shows up per row
# taarlite number of times a GUID shows up per row
# of coinstallation data.
NORMDATA_ROWCOUNT_PREFIX = "normdata_rowcount_prefix|"
# taarlite row nownormalization data
NORMDATA_GUID_ROW_NORM_PREFIX = "normdata_guid_row_norm_prefix|"
# TAAR: Locale data
LOCALE_DATA = "taar_locale_data|"
class PrefixStripper:
def __init__(self, prefix, iterator, cast_to_str=False):
self._prefix = prefix
@ -64,12 +88,10 @@ class AddonsCoinstallCache:
GUID->GUID co-installation data
"""
def __init__(self, ctx, ttl=TAARLITE_TTL):
def __init__(self, ctx):
self._ctx = ctx
self.logger = self._ctx[IMozLogging].get_logger("taar")
self._ttl = ttl
rcon = self.init_redis_connections()
self._r0 = rcon[0]
self._r1 = rcon[1]
@ -136,9 +158,6 @@ class AddonsCoinstallCache:
self._r0.delete(UPDATE_CHECK)
self.logger.info("UPDATE_CHECK field is cleared")
def fetch_ranking_data(self):
return s3_json_loader(TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_RANKING_KEY)
def guid_maps_count_map(self, guid, default=None):
tmp = self._db().get(NORMDATA_COUNT_MAP_PREFIX + guid)
if tmp:
@ -167,11 +186,6 @@ class AddonsCoinstallCache:
return 0
return float(result.decode("utf8"))
def fetch_coinstall_data(self):
return s3_json_loader(
TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_COINSTALL_KEY
)
def get_filtered_coinstall(self, guid, default=None):
tmp = self._db().get(FILTERED_COINSTALL_PREFIX + guid)
if tmp:
@ -224,7 +238,19 @@ class AddonsCoinstallCache:
# Any value in ACTIVE_DB indicates that data is live
return self._r0.get(ACTIVE_DB) is not None
# Private methods below
def top_addons_per_locale(self):
tmp = self._db().get(LOCALE_DATA)
if tmp:
return json.loads(tmp.decode("utf8"))
return None
"""
################################
Private methods below
"""
def _db(self):
"""
@ -244,9 +270,34 @@ class AddonsCoinstallCache:
""" pid/thread identity """
return f"{os.getpid()}_{threading.get_ident()}"
def _update_coinstall_data(self, db):
def _fetch_coinstall_data(self):
return s3_json_loader(
TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_COINSTALL_KEY
)
data = self.fetch_coinstall_data()
def _fetch_ranking_data(self):
return s3_json_loader(TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_RANKING_KEY)
def _fetch_locale_data(self):
return s3_json_loader(TAAR_LOCALE_BUCKET, TAAR_LOCALE_KEY)
def _update_locale_data(self, db):
"""
Load the TAAR locale data
"""
data = self._fetch_locale_data()
result = {}
for locale, guid_list in data.items():
result[locale] = sorted(guid_list, key=lambda x: x[1], reverse=True)
db.set(LOCALE_DATA, json.dumps(result))
def _update_coinstall_data(self, db):
"""
Load the TAAR Lite GUID GUID coinstallation data
"""
data = self._fetch_coinstall_data()
items = data.items()
len_items = len(items)
@ -302,7 +353,7 @@ class AddonsCoinstallCache:
def _update_rank_data(self, db):
data = self.fetch_ranking_data()
data = self._fetch_ranking_data()
items = data.items()
len_items = len(items)
@ -332,8 +383,6 @@ class AddonsCoinstallCache:
self.logger.info("Completed precomputing normalized data")
# TODO: should this autoexpire to help indicate that no fresh
# data has loaded? Maybe N * update TTL time?
self._r0.set(ACTIVE_DB, next_active_db)
self.logger.info(f"Active DB is set to {next_active_db}")
@ -347,3 +396,4 @@ class AddonsCoinstallCache:
db.flushdb()
self._update_rank_data(db)
self._update_coinstall_data(db)
self._update_locale_data(db)

Просмотреть файл

@ -81,23 +81,35 @@ RESULTS = {
}
def noop_taarlocale_dataload(stack):
# no-op the taarlite rankdata
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache, "_update_locale_data", return_value=None
)
)
return stack
@contextlib.contextmanager
def mock_coinstall_ranking_context(ctx, mock_coinstall, mock_ranking):
with contextlib.ExitStack() as stack:
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache, "fetch_ranking_data", return_value=mock_ranking,
AddonsCoinstallCache, "_fetch_ranking_data", return_value=mock_ranking,
)
)
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
"fetch_coinstall_data",
"_fetch_coinstall_data",
return_value=mock_coinstall,
)
)
stack = noop_taarlocale_dataload(stack)
# Patch fakeredis in
stack.enter_context(
mock.patch.object(

Просмотреть файл

@ -2,8 +2,12 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from moto import mock_s3
import boto3
import mock
import contextlib
import fakeredis
from taar.recommenders.redis_cache import AddonsCoinstallCache
import json
@ -14,6 +18,7 @@ from taar.settings import TAAR_LOCALE_KEY, TAAR_LOCALE_BUCKET
from markus import TIMING
from markus.testing import MetricsMock
FAKE_LOCALE_DATA = {
"te-ST": [
["{1e6b8bce-7dc8-481c-9f19-123e41332b72}", 0.1],
@ -37,32 +42,74 @@ def install_mock_data(ctx):
return ctx
@mock_s3
def noop_taarlite_dataload(stack):
# no-op the taarlite rankdata
stack.enter_context(
mock.patch.object(AddonsCoinstallCache, "_update_rank_data", return_value=None)
)
# no-op the taarlite guidguid data
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache, "_update_coinstall_data", return_value=None,
)
)
return stack
@contextlib.contextmanager
def mock_locale_data(ctx):
with contextlib.ExitStack() as stack:
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
"_fetch_locale_data",
return_value=FAKE_LOCALE_DATA,
)
)
stack = noop_taarlite_dataload(stack)
# Patch fakeredis in
stack.enter_context(
mock.patch.object(
AddonsCoinstallCache,
"init_redis_connections",
return_value={
0: fakeredis.FakeStrictRedis(db=0),
1: fakeredis.FakeStrictRedis(db=1),
2: fakeredis.FakeStrictRedis(db=2),
},
)
)
# Initialize redis
AddonsCoinstallCache(ctx).safe_load_data()
yield stack
def test_can_recommend(test_ctx):
ctx = install_mock_data(test_ctx)
r = LocaleRecommender(ctx)
with mock_locale_data(test_ctx):
r = LocaleRecommender(test_ctx)
# Test that we can't recommend if we have not enough client info.
assert not r.can_recommend({})
assert not r.can_recommend({"locale": []})
# Test that we can't recommend if we have not enough client info.
assert not r.can_recommend({})
assert not r.can_recommend({"locale": []})
# Check that we can recommend if the user has at least an addon.
assert r.can_recommend({"locale": "en"})
# Check that we can recommend if the user has at least an addon.
assert r.can_recommend({"locale": "en"})
@mock_s3
def test_can_recommend_no_model(test_ctx):
ctx = install_mock_data(test_ctx)
r = LocaleRecommender(ctx)
with mock_locale_data(test_ctx):
r = LocaleRecommender(test_ctx)
# We should never be able to recommend if something went
# wrong with the model.
assert not r.can_recommend({})
assert not r.can_recommend({"locale": []})
assert not r.can_recommend({"locale": "it"})
# We should never be able to recommend if something went
# wrong with the model.
assert not r.can_recommend({})
assert not r.can_recommend({"locale": []})
assert not r.can_recommend({"locale": "it"})
@mock_s3
def test_recommendations(test_ctx):
"""Test that the locale recommender returns the correct
locale dependent addons.
@ -71,27 +118,26 @@ def test_recommendations(test_ctx):
of (GUID, weight).
"""
with MetricsMock() as mm:
ctx = install_mock_data(test_ctx)
r = LocaleRecommender(ctx)
recommendations = r.recommend({"locale": "en"}, 10)
with mock_locale_data(test_ctx):
r = LocaleRecommender(test_ctx)
# Make sure the structure of the recommendations is correct and that we
# recommended the the right addon.
assert isinstance(recommendations, list)
assert len(recommendations) == len(FAKE_LOCALE_DATA["en"])
recommendations = r.recommend({"locale": "en"}, 10)
# Make sure that the reported addons are the one from the fake data.
for (addon_id, weight), (expected_id, expected_weight) in zip(
recommendations, FAKE_LOCALE_DATA["en"]
):
assert addon_id == expected_id
assert weight == expected_weight
# Make sure the structure of the recommendations is correct and that we
# recommended the the right addon.
assert isinstance(recommendations, list)
assert len(recommendations) == len(FAKE_LOCALE_DATA["en"])
assert mm.has_record(TIMING, "taar.locale")
assert mm.has_record(TIMING, "taar.locale_recommend")
# Make sure that the reported addons are the one from the fake data.
for (addon_id, weight), (expected_id, expected_weight) in zip(
recommendations, FAKE_LOCALE_DATA["en"]
):
assert addon_id == expected_id
assert weight == expected_weight
assert mm.has_record(TIMING, "taar.locale_recommend")
@mock_s3
def test_recommender_extra_data(test_ctx):
# Test that the recommender uses locale data from the "extra"
# section if available.
@ -109,11 +155,13 @@ def test_recommender_extra_data(test_ctx):
assert addon_id == expected_id
assert weight == expected_weight
ctx = install_mock_data(test_ctx)
r = LocaleRecommender(ctx)
recommendations = r.recommend({}, 10, extra_data={"locale": "en"})
validate_recommendations(recommendations, "en")
with mock_locale_data(test_ctx):
r = LocaleRecommender(test_ctx)
recommendations = r.recommend({}, 10, extra_data={"locale": "en"})
validate_recommendations(recommendations, "en")
# Make sure that we favour client data over the extra data.
recommendations = r.recommend({"locale": "en"}, 10, extra_data={"locale": "te-ST"})
validate_recommendations(recommendations, "en")
# Make sure that we favour client data over the extra data.
recommendations = r.recommend(
{"locale": "en"}, 10, extra_data={"locale": "te-ST"}
)
validate_recommendations(recommendations, "en")