зеркало из https://github.com/mozilla/taar.git
Migrated taar locale recommender to use redis
This commit is contained in:
Родитель
9773053739
Коммит
77eef1db83
|
@ -2,14 +2,13 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from srgutil.interfaces import IMozLogging
|
||||
from .base_recommender import AbstractRecommender
|
||||
from .lazys3 import LazyJSONLoader
|
||||
|
||||
from taar.settings import TAAR_LOCALE_BUCKET, TAAR_LOCALE_KEY
|
||||
|
||||
import markus
|
||||
|
||||
from srgutil.interfaces import IMozLogging
|
||||
|
||||
from .base_recommender import AbstractRecommender
|
||||
from taar.recommenders.redis_cache import AddonsCoinstallCache
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
|
||||
|
@ -29,27 +28,12 @@ class LocaleRecommender(AbstractRecommender):
|
|||
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
|
||||
self._top_addons_per_locale = LazyJSONLoader(
|
||||
self._ctx, TAAR_LOCALE_BUCKET, TAAR_LOCALE_KEY, "locale"
|
||||
)
|
||||
|
||||
self._init_from_ctx()
|
||||
self._redis_cache = AddonsCoinstallCache(self._ctx)
|
||||
|
||||
# DONE removed
|
||||
@property
|
||||
def top_addons_per_locale(self):
|
||||
def presort_locale(data):
|
||||
result = {}
|
||||
for locale, guid_list in data.items():
|
||||
result[locale] = sorted(guid_list, key=lambda x: x[1], reverse=True)
|
||||
return result
|
||||
|
||||
return self._top_addons_per_locale.get(transform=presort_locale)[0]
|
||||
|
||||
def _init_from_ctx(self):
|
||||
if self.top_addons_per_locale is None:
|
||||
self.logger.error(
|
||||
"Cannot download the top per locale file {}".format(TAAR_LOCALE_KEY)
|
||||
)
|
||||
return self._redis_cache.top_addons_per_locale()
|
||||
|
||||
def can_recommend(self, client_data, extra_data={}):
|
||||
# We can't recommend if we don't have our data files.
|
||||
|
|
|
@ -8,39 +8,63 @@ import threading
|
|||
import redis
|
||||
import numpy as np
|
||||
from srgutil.interfaces import IMozLogging
|
||||
|
||||
from taar.settings import (
|
||||
REDIS_HOST,
|
||||
REDIS_PORT,
|
||||
)
|
||||
|
||||
# TAARLite configuration
|
||||
from taar.settings import (
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_COINSTALL_KEY,
|
||||
TAARLITE_GUID_RANKING_KEY,
|
||||
TAARLITE_TTL,
|
||||
TAARLITE_TRUNCATE,
|
||||
TAARLITE_MUTEX_TTL,
|
||||
)
|
||||
|
||||
# TAARLite configuration
|
||||
from taar.settings import TAAR_LOCALE_BUCKET, TAAR_LOCALE_KEY
|
||||
|
||||
from jsoncache.loader import s3_json_loader
|
||||
|
||||
|
||||
# This marks which of the redis databases is currently
|
||||
# active for read
|
||||
ACTIVE_DB = "active_db"
|
||||
|
||||
# This is a mutex to block multiple writers from redis
|
||||
UPDATE_CHECK = "update_mutex|"
|
||||
|
||||
|
||||
# taarlite guid guid coinstallation matrix
|
||||
COINSTALL_PREFIX = "coinstall|"
|
||||
|
||||
# taarlite guid guid coinstallation matrix filtered by
|
||||
# minimum installation threshholds
|
||||
FILTERED_COINSTALL_PREFIX = "filtered_coinstall|"
|
||||
|
||||
# taarlite ranking data
|
||||
RANKING_PREFIX = "ranking|"
|
||||
|
||||
# taarlite minimum installation threshold
|
||||
MIN_INSTALLS_PREFIX = "min_installs|"
|
||||
|
||||
# This is a map is guid->sum of coinstall counts
|
||||
# taarlite map of guid->(sum of coinstall counts)
|
||||
NORMDATA_COUNT_MAP_PREFIX = "normdata_count_map_prefix|"
|
||||
|
||||
# Capture the number of times a GUID shows up per row
|
||||
# taarlite number of times a GUID shows up per row
|
||||
# of coinstallation data.
|
||||
NORMDATA_ROWCOUNT_PREFIX = "normdata_rowcount_prefix|"
|
||||
|
||||
# taarlite row nownormalization data
|
||||
NORMDATA_GUID_ROW_NORM_PREFIX = "normdata_guid_row_norm_prefix|"
|
||||
|
||||
|
||||
# TAAR: Locale data
|
||||
LOCALE_DATA = "taar_locale_data|"
|
||||
|
||||
|
||||
class PrefixStripper:
|
||||
def __init__(self, prefix, iterator, cast_to_str=False):
|
||||
self._prefix = prefix
|
||||
|
@ -64,12 +88,10 @@ class AddonsCoinstallCache:
|
|||
GUID->GUID co-installation data
|
||||
"""
|
||||
|
||||
def __init__(self, ctx, ttl=TAARLITE_TTL):
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
|
||||
self._ttl = ttl
|
||||
|
||||
rcon = self.init_redis_connections()
|
||||
self._r0 = rcon[0]
|
||||
self._r1 = rcon[1]
|
||||
|
@ -136,9 +158,6 @@ class AddonsCoinstallCache:
|
|||
self._r0.delete(UPDATE_CHECK)
|
||||
self.logger.info("UPDATE_CHECK field is cleared")
|
||||
|
||||
def fetch_ranking_data(self):
|
||||
return s3_json_loader(TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_RANKING_KEY)
|
||||
|
||||
def guid_maps_count_map(self, guid, default=None):
|
||||
tmp = self._db().get(NORMDATA_COUNT_MAP_PREFIX + guid)
|
||||
if tmp:
|
||||
|
@ -167,11 +186,6 @@ class AddonsCoinstallCache:
|
|||
return 0
|
||||
return float(result.decode("utf8"))
|
||||
|
||||
def fetch_coinstall_data(self):
|
||||
return s3_json_loader(
|
||||
TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_COINSTALL_KEY
|
||||
)
|
||||
|
||||
def get_filtered_coinstall(self, guid, default=None):
|
||||
tmp = self._db().get(FILTERED_COINSTALL_PREFIX + guid)
|
||||
if tmp:
|
||||
|
@ -224,7 +238,19 @@ class AddonsCoinstallCache:
|
|||
# Any value in ACTIVE_DB indicates that data is live
|
||||
return self._r0.get(ACTIVE_DB) is not None
|
||||
|
||||
# Private methods below
|
||||
def top_addons_per_locale(self):
|
||||
tmp = self._db().get(LOCALE_DATA)
|
||||
if tmp:
|
||||
return json.loads(tmp.decode("utf8"))
|
||||
return None
|
||||
|
||||
"""
|
||||
|
||||
################################
|
||||
|
||||
Private methods below
|
||||
|
||||
"""
|
||||
|
||||
def _db(self):
|
||||
"""
|
||||
|
@ -244,9 +270,34 @@ class AddonsCoinstallCache:
|
|||
""" pid/thread identity """
|
||||
return f"{os.getpid()}_{threading.get_ident()}"
|
||||
|
||||
def _update_coinstall_data(self, db):
|
||||
def _fetch_coinstall_data(self):
|
||||
return s3_json_loader(
|
||||
TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_COINSTALL_KEY
|
||||
)
|
||||
|
||||
data = self.fetch_coinstall_data()
|
||||
def _fetch_ranking_data(self):
|
||||
return s3_json_loader(TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_RANKING_KEY)
|
||||
|
||||
def _fetch_locale_data(self):
|
||||
return s3_json_loader(TAAR_LOCALE_BUCKET, TAAR_LOCALE_KEY)
|
||||
|
||||
def _update_locale_data(self, db):
|
||||
"""
|
||||
Load the TAAR locale data
|
||||
"""
|
||||
data = self._fetch_locale_data()
|
||||
result = {}
|
||||
for locale, guid_list in data.items():
|
||||
result[locale] = sorted(guid_list, key=lambda x: x[1], reverse=True)
|
||||
|
||||
db.set(LOCALE_DATA, json.dumps(result))
|
||||
|
||||
def _update_coinstall_data(self, db):
|
||||
"""
|
||||
Load the TAAR Lite GUID GUID coinstallation data
|
||||
"""
|
||||
|
||||
data = self._fetch_coinstall_data()
|
||||
|
||||
items = data.items()
|
||||
len_items = len(items)
|
||||
|
@ -302,7 +353,7 @@ class AddonsCoinstallCache:
|
|||
|
||||
def _update_rank_data(self, db):
|
||||
|
||||
data = self.fetch_ranking_data()
|
||||
data = self._fetch_ranking_data()
|
||||
|
||||
items = data.items()
|
||||
len_items = len(items)
|
||||
|
@ -332,8 +383,6 @@ class AddonsCoinstallCache:
|
|||
|
||||
self.logger.info("Completed precomputing normalized data")
|
||||
|
||||
# TODO: should this autoexpire to help indicate that no fresh
|
||||
# data has loaded? Maybe N * update TTL time?
|
||||
self._r0.set(ACTIVE_DB, next_active_db)
|
||||
self.logger.info(f"Active DB is set to {next_active_db}")
|
||||
|
||||
|
@ -347,3 +396,4 @@ class AddonsCoinstallCache:
|
|||
db.flushdb()
|
||||
self._update_rank_data(db)
|
||||
self._update_coinstall_data(db)
|
||||
self._update_locale_data(db)
|
||||
|
|
|
@ -81,23 +81,35 @@ RESULTS = {
|
|||
}
|
||||
|
||||
|
||||
def noop_taarlocale_dataload(stack):
|
||||
# no-op the taarlite rankdata
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache, "_update_locale_data", return_value=None
|
||||
)
|
||||
)
|
||||
return stack
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def mock_coinstall_ranking_context(ctx, mock_coinstall, mock_ranking):
|
||||
|
||||
with contextlib.ExitStack() as stack:
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache, "fetch_ranking_data", return_value=mock_ranking,
|
||||
AddonsCoinstallCache, "_fetch_ranking_data", return_value=mock_ranking,
|
||||
)
|
||||
)
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
"fetch_coinstall_data",
|
||||
"_fetch_coinstall_data",
|
||||
return_value=mock_coinstall,
|
||||
)
|
||||
)
|
||||
|
||||
stack = noop_taarlocale_dataload(stack)
|
||||
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
|
|
|
@ -2,8 +2,12 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from moto import mock_s3
|
||||
import boto3
|
||||
import mock
|
||||
|
||||
import contextlib
|
||||
import fakeredis
|
||||
from taar.recommenders.redis_cache import AddonsCoinstallCache
|
||||
|
||||
import json
|
||||
|
||||
|
@ -14,6 +18,7 @@ from taar.settings import TAAR_LOCALE_KEY, TAAR_LOCALE_BUCKET
|
|||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
|
||||
FAKE_LOCALE_DATA = {
|
||||
"te-ST": [
|
||||
["{1e6b8bce-7dc8-481c-9f19-123e41332b72}", 0.1],
|
||||
|
@ -37,32 +42,74 @@ def install_mock_data(ctx):
|
|||
return ctx
|
||||
|
||||
|
||||
@mock_s3
|
||||
def noop_taarlite_dataload(stack):
|
||||
# no-op the taarlite rankdata
|
||||
stack.enter_context(
|
||||
mock.patch.object(AddonsCoinstallCache, "_update_rank_data", return_value=None)
|
||||
)
|
||||
# no-op the taarlite guidguid data
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache, "_update_coinstall_data", return_value=None,
|
||||
)
|
||||
)
|
||||
return stack
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def mock_locale_data(ctx):
|
||||
with contextlib.ExitStack() as stack:
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
"_fetch_locale_data",
|
||||
return_value=FAKE_LOCALE_DATA,
|
||||
)
|
||||
)
|
||||
|
||||
stack = noop_taarlite_dataload(stack)
|
||||
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
"init_redis_connections",
|
||||
return_value={
|
||||
0: fakeredis.FakeStrictRedis(db=0),
|
||||
1: fakeredis.FakeStrictRedis(db=1),
|
||||
2: fakeredis.FakeStrictRedis(db=2),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# Initialize redis
|
||||
AddonsCoinstallCache(ctx).safe_load_data()
|
||||
yield stack
|
||||
|
||||
|
||||
def test_can_recommend(test_ctx):
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = LocaleRecommender(ctx)
|
||||
with mock_locale_data(test_ctx):
|
||||
r = LocaleRecommender(test_ctx)
|
||||
|
||||
# Test that we can't recommend if we have not enough client info.
|
||||
assert not r.can_recommend({})
|
||||
assert not r.can_recommend({"locale": []})
|
||||
# Test that we can't recommend if we have not enough client info.
|
||||
assert not r.can_recommend({})
|
||||
assert not r.can_recommend({"locale": []})
|
||||
|
||||
# Check that we can recommend if the user has at least an addon.
|
||||
assert r.can_recommend({"locale": "en"})
|
||||
# Check that we can recommend if the user has at least an addon.
|
||||
assert r.can_recommend({"locale": "en"})
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_can_recommend_no_model(test_ctx):
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = LocaleRecommender(ctx)
|
||||
with mock_locale_data(test_ctx):
|
||||
r = LocaleRecommender(test_ctx)
|
||||
|
||||
# We should never be able to recommend if something went
|
||||
# wrong with the model.
|
||||
assert not r.can_recommend({})
|
||||
assert not r.can_recommend({"locale": []})
|
||||
assert not r.can_recommend({"locale": "it"})
|
||||
# We should never be able to recommend if something went
|
||||
# wrong with the model.
|
||||
assert not r.can_recommend({})
|
||||
assert not r.can_recommend({"locale": []})
|
||||
assert not r.can_recommend({"locale": "it"})
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recommendations(test_ctx):
|
||||
"""Test that the locale recommender returns the correct
|
||||
locale dependent addons.
|
||||
|
@ -71,27 +118,26 @@ def test_recommendations(test_ctx):
|
|||
of (GUID, weight).
|
||||
"""
|
||||
with MetricsMock() as mm:
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = LocaleRecommender(ctx)
|
||||
recommendations = r.recommend({"locale": "en"}, 10)
|
||||
with mock_locale_data(test_ctx):
|
||||
r = LocaleRecommender(test_ctx)
|
||||
|
||||
# Make sure the structure of the recommendations is correct and that we
|
||||
# recommended the the right addon.
|
||||
assert isinstance(recommendations, list)
|
||||
assert len(recommendations) == len(FAKE_LOCALE_DATA["en"])
|
||||
recommendations = r.recommend({"locale": "en"}, 10)
|
||||
|
||||
# Make sure that the reported addons are the one from the fake data.
|
||||
for (addon_id, weight), (expected_id, expected_weight) in zip(
|
||||
recommendations, FAKE_LOCALE_DATA["en"]
|
||||
):
|
||||
assert addon_id == expected_id
|
||||
assert weight == expected_weight
|
||||
# Make sure the structure of the recommendations is correct and that we
|
||||
# recommended the the right addon.
|
||||
assert isinstance(recommendations, list)
|
||||
assert len(recommendations) == len(FAKE_LOCALE_DATA["en"])
|
||||
|
||||
assert mm.has_record(TIMING, "taar.locale")
|
||||
assert mm.has_record(TIMING, "taar.locale_recommend")
|
||||
# Make sure that the reported addons are the one from the fake data.
|
||||
for (addon_id, weight), (expected_id, expected_weight) in zip(
|
||||
recommendations, FAKE_LOCALE_DATA["en"]
|
||||
):
|
||||
assert addon_id == expected_id
|
||||
assert weight == expected_weight
|
||||
|
||||
assert mm.has_record(TIMING, "taar.locale_recommend")
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recommender_extra_data(test_ctx):
|
||||
# Test that the recommender uses locale data from the "extra"
|
||||
# section if available.
|
||||
|
@ -109,11 +155,13 @@ def test_recommender_extra_data(test_ctx):
|
|||
assert addon_id == expected_id
|
||||
assert weight == expected_weight
|
||||
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = LocaleRecommender(ctx)
|
||||
recommendations = r.recommend({}, 10, extra_data={"locale": "en"})
|
||||
validate_recommendations(recommendations, "en")
|
||||
with mock_locale_data(test_ctx):
|
||||
r = LocaleRecommender(test_ctx)
|
||||
recommendations = r.recommend({}, 10, extra_data={"locale": "en"})
|
||||
validate_recommendations(recommendations, "en")
|
||||
|
||||
# Make sure that we favour client data over the extra data.
|
||||
recommendations = r.recommend({"locale": "en"}, 10, extra_data={"locale": "te-ST"})
|
||||
validate_recommendations(recommendations, "en")
|
||||
# Make sure that we favour client data over the extra data.
|
||||
recommendations = r.recommend(
|
||||
{"locale": "en"}, 10, extra_data={"locale": "te-ST"}
|
||||
)
|
||||
validate_recommendations(recommendations, "en")
|
||||
|
|
Загрузка…
Ссылка в новой задаче