зеркало из https://github.com/mozilla/taar.git
Features/175 taarlite limits (#176)
* Consolidate env configuration into taar.settings and add a TAARLITE_MAX_RESULTS configuration * Rework TAARLite recommender to use threaded caching * Added cache warmup prior to process starting * add target for a local start into makefile * pytest updates to accomodate new redis requirements * Added TAARlite GUID Ranking caching * Reworked taarlite to precompute all values and run with redis * Truncate the length of the GUID coinstallation list to keep the normalization times bounded for taarlite Most of the performance problems with TAARlite had to do with normalizing very long lists of GUIDs. The normalization method now truncates the list to a multiple of the maximum number of TAARLITE suggestions (controlled by `TAARLITE_TRUNCATE` which defaults to 5x `TAARLITE_MAX_RESULTS`)
This commit is contained in:
Родитель
70989662e4
Коммит
99f278eaa3
13
Makefile
13
Makefile
|
@ -27,6 +27,9 @@ up:
|
|||
-v ~/.config:/app/.config \
|
||||
-v ~/.aws:/app/.aws \
|
||||
-v ~/.gcp_creds:/app/.gcp_creds \
|
||||
-e WORKERS=1 \
|
||||
-e THREADS=2 \
|
||||
-e LOG_LEVEL=20 \
|
||||
-e GOOGLE_APPLICATION_CREDENTIALS=/app/.gcp_creds/vng-taar-dev-clientinfo-svc.json \
|
||||
-e TAAR_API_PLUGIN=taar.plugin \
|
||||
-e TAAR_ITEM_MATRIX_BUCKET=telemetry-public-analysis-2 \
|
||||
|
@ -43,11 +46,12 @@ up:
|
|||
-e TAAR_SIMILARITY_DONOR_KEY=taar/similarity/donors.json \
|
||||
-e TAAR_SIMILARITY_LRCURVES_KEY=taar/similarity/lr_curves.json \
|
||||
-e TAAR_MAX_RESULTS=10 \
|
||||
-e TAARLITE_MAX_RESULTS=4 \
|
||||
-e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} \
|
||||
-e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} \
|
||||
-e BIGTABLE_PROJECT_ID \
|
||||
-e BIGTABLE_INSTANCE_ID \
|
||||
-e BIGTABLE_TABLE_ID \
|
||||
-e BIGTABLE_PROJECT_ID=${BIGTABLE_PROJECT_ID} \
|
||||
-e BIGTABLE_INSTANCE_ID=${BIGTABLE_INSTANCE_ID} \
|
||||
-e BIGTABLE_TABLE_ID=${BIGTABLE_TABLE_ID} \
|
||||
-e GCLOUD_PROJECT=${GCLOUD_PROJECT} \
|
||||
-p 8000:8000 \
|
||||
-it taar:latest
|
||||
|
@ -55,5 +59,8 @@ up:
|
|||
test-container:
|
||||
docker run -e CODECOV_TOKEN=${CODECOV_TOKEN} -it taar:latest test
|
||||
|
||||
run_local:
|
||||
TAAR_API_PLUGIN=taar.plugin python taar/flask_app.py
|
||||
|
||||
shell:
|
||||
docker run -it taar:latest bash
|
||||
|
|
14
README.md
14
README.md
|
@ -102,6 +102,20 @@ create a new release has been split out into separate
|
|||
|
||||
## Dependencies
|
||||
|
||||
### Google Cloud Storage resources
|
||||
|
||||
### TODO: put this into a table to be easier to read
|
||||
The final TAAR models are stored in:
|
||||
|
||||
```gs://moz-fx-data-taar-pr-prod-e0f7-prod-models```
|
||||
|
||||
The TAAR production model bucket is defined in Airflow under the
|
||||
variable `taar_etl_model_storage_bucket`
|
||||
|
||||
Temporary models that the Airflow ETL jobs require are stored in a
|
||||
temporary bucket defined in the Airflow variable `taar_etl_storage_bucket`
|
||||
|
||||
|
||||
### AWS resources
|
||||
|
||||
Recommendation engines load models from Amazon S3.
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
version: "3.8"
|
||||
services:
|
||||
redis:
|
||||
image: "redis:alpine"
|
||||
ports:
|
||||
- "6379:6379"
|
||||
web:
|
||||
image: "taar:latest"
|
||||
depends_on:
|
||||
- redis
|
||||
environment:
|
||||
- MUSICMATCH_API=${MUSICMATCH_API}
|
||||
- WORKERS=1
|
||||
- THREADS=2
|
||||
- LOG_LEVEL=20
|
||||
- GOOGLE_APPLICATION_CREDENTIALS=/app/.gcp_creds/vng-taar-dev-clientinfo-svc.json
|
||||
- REDIS_HOST=redis
|
||||
- TAAR_API_PLUGIN=taar.plugin
|
||||
- TAAR_ITEM_MATRIX_BUCKET=telemetry-public-analysis-2
|
||||
- TAAR_ITEM_MATRIX_KEY=telemetry-ml/addon_recommender/item_matrix.json
|
||||
- TAAR_ADDON_MAPPING_BUCKET=telemetry-public-analysis-2
|
||||
- TAAR_ADDON_MAPPING_KEY=telemetry-ml/addon_recommender/addon_mapping.json
|
||||
- TAAR_ENSEMBLE_BUCKET=telemetry-parquet
|
||||
- TAAR_ENSEMBLE_KEY=taar/ensemble/ensemble_weight.json
|
||||
- TAAR_WHITELIST_BUCKET=telemetry-parquet
|
||||
- TAAR_WHITELIST_KEY=telemetry-ml/addon_recommender/only_guids_top_200.json
|
||||
- TAAR_LOCALE_BUCKET=telemetry-parquet
|
||||
- TAAR_LOCALE_KEY=taar/locale/top10_dict.json
|
||||
- TAAR_SIMILARITY_BUCKET=telemetry-parquet
|
||||
- TAAR_SIMILARITY_DONOR_KEY=taar/similarity/donors.json
|
||||
- TAAR_SIMILARITY_LRCURVES_KEY=taar/similarity/lr_curves.json
|
||||
- TAAR_MAX_RESULTS=10
|
||||
- TAARLITE_MAX_RESULTS=4
|
||||
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
||||
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
||||
- BIGTABLE_PROJECT_ID=${BIGTABLE_PROJECT_ID}
|
||||
- BIGTABLE_INSTANCE_ID=${BIGTABLE_INSTANCE_ID}
|
||||
- BIGTABLE_TABLE_ID=${BIGTABLE_TABLE_ID}
|
||||
ports:
|
||||
- "8000:8000"
|
||||
|
|
@ -3,7 +3,6 @@ channels:
|
|||
- conda-forge
|
||||
- defaults
|
||||
dependencies:
|
||||
- ipython=7.14.0
|
||||
- pip=20.1.1
|
||||
- python=3.7.6
|
||||
- python_abi=3.7
|
||||
|
@ -50,6 +49,7 @@ dependencies:
|
|||
- entrypoints==0.3
|
||||
- enum34==1.1.6
|
||||
- fabric==2.1.3
|
||||
- fakeredis==1.4.3
|
||||
- flake8==3.7.7
|
||||
- flask==1.0.2
|
||||
- flask-api==1.0
|
||||
|
@ -85,6 +85,7 @@ dependencies:
|
|||
- more-itertools==4.2.0
|
||||
- moto==1.3.14
|
||||
- mozilla-srgutil==0.1.7
|
||||
- mozilla-jsoncache==0.1.7
|
||||
- networkx==2.4
|
||||
- newrelic==5.14.1.144
|
||||
- packaging==17.1
|
||||
|
@ -121,6 +122,7 @@ dependencies:
|
|||
- python-jose==3.1.0
|
||||
- pytz==2018.5
|
||||
- pyyaml==5.3.1
|
||||
- redis==3.5.3
|
||||
- requests==2.23.0
|
||||
- requests-toolbelt==0.8.0
|
||||
- responses==0.9.0
|
||||
|
@ -130,7 +132,6 @@ dependencies:
|
|||
- sentry-sdk==0.7.3
|
||||
- setuptools-scm==2.1.0
|
||||
- simplegeneric==0.8.1
|
||||
- six==1.11.0
|
||||
- spark==0.2.1
|
||||
- spark-parser==1.8.7
|
||||
- sshpubkeys==3.1.0
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from decouple import config
|
||||
from flask import request
|
||||
import json
|
||||
|
||||
|
@ -13,11 +12,48 @@ from taar.context import default_context
|
|||
from taar.profile_fetcher import ProfileFetcher
|
||||
from taar import recommenders
|
||||
|
||||
# These are configurations that are specific to the TAAR library
|
||||
TAAR_MAX_RESULTS = config("TAAR_MAX_RESULTS", default=10, cast=int)
|
||||
from taar.settings import (
|
||||
TAAR_MAX_RESULTS,
|
||||
TAARLITE_MAX_RESULTS,
|
||||
STATSD_HOST,
|
||||
STATSD_PORT,
|
||||
)
|
||||
|
||||
STATSD_HOST = config("STATSD_HOST", default="localhost", cast=str)
|
||||
STATSD_PORT = config("STATSD_PORT", default=8125, cast=int)
|
||||
|
||||
def acquire_taarlite_singleton(PROXY_MANAGER):
|
||||
if PROXY_MANAGER.getTaarLite() is None:
|
||||
ctx = default_context()
|
||||
root_ctx = ctx.child()
|
||||
instance = recommenders.GuidBasedRecommender(root_ctx)
|
||||
PROXY_MANAGER.setTaarLite(instance)
|
||||
return PROXY_MANAGER.getTaarLite()
|
||||
|
||||
|
||||
def acquire_taar_singleton(PROXY_MANAGER):
|
||||
if PROXY_MANAGER.getTaarRM() is None:
|
||||
ctx = default_context()
|
||||
profile_fetcher = ProfileFetcher(ctx)
|
||||
|
||||
ctx["profile_fetcher"] = profile_fetcher
|
||||
|
||||
# Lock the context down after we've got basic bits installed
|
||||
root_ctx = ctx.child()
|
||||
r_factory = recommenders.RecommenderFactory(root_ctx)
|
||||
root_ctx["recommender_factory"] = r_factory
|
||||
instance = recommenders.RecommendationManager(root_ctx.child())
|
||||
PROXY_MANAGER.setTaarRM(instance)
|
||||
return PROXY_MANAGER.getTaarRM()
|
||||
|
||||
|
||||
def warm_caches():
|
||||
import sys
|
||||
|
||||
if "pytest" in sys.modules:
|
||||
# Don't clobber the taarlite singleton under test
|
||||
return
|
||||
|
||||
global PROXY_MANAGER
|
||||
acquire_taarlite_singleton(PROXY_MANAGER)
|
||||
|
||||
|
||||
class ResourceProxy(object):
|
||||
|
@ -113,10 +149,10 @@ def configure_plugin(app): # noqa: C901
|
|||
cdict["normalize"] = normalization_type
|
||||
|
||||
recommendations = taarlite_recommender.recommend(
|
||||
client_data=cdict, limit=TAAR_MAX_RESULTS
|
||||
client_data=cdict, limit=TAARLITE_MAX_RESULTS
|
||||
)
|
||||
|
||||
if len(recommendations) != TAAR_MAX_RESULTS:
|
||||
if len(recommendations) != TAARLITE_MAX_RESULTS:
|
||||
recommendations = []
|
||||
|
||||
# Strip out weights from TAAR results to maintain compatibility
|
||||
|
@ -216,29 +252,6 @@ def configure_plugin(app): # noqa: C901
|
|||
)
|
||||
return response
|
||||
|
||||
def acquire_taarlite_singleton(PROXY_MANAGER):
|
||||
if PROXY_MANAGER.getTaarLite() is None:
|
||||
ctx = default_context()
|
||||
root_ctx = ctx.child()
|
||||
instance = recommenders.GuidBasedRecommender(root_ctx)
|
||||
PROXY_MANAGER.setTaarLite(instance)
|
||||
return PROXY_MANAGER.getTaarLite()
|
||||
|
||||
def acquire_taar_singleton(PROXY_MANAGER):
|
||||
if PROXY_MANAGER.getTaarRM() is None:
|
||||
ctx = default_context()
|
||||
profile_fetcher = ProfileFetcher(ctx)
|
||||
|
||||
ctx["profile_fetcher"] = profile_fetcher
|
||||
|
||||
# Lock the context down after we've got basic bits installed
|
||||
root_ctx = ctx.child()
|
||||
r_factory = recommenders.RecommenderFactory(root_ctx)
|
||||
root_ctx["recommender_factory"] = r_factory
|
||||
instance = recommenders.RecommendationManager(root_ctx.child())
|
||||
PROXY_MANAGER.setTaarRM(instance)
|
||||
return PROXY_MANAGER.getTaarRM()
|
||||
|
||||
class MyPlugin:
|
||||
def set(self, config_options):
|
||||
"""
|
||||
|
@ -252,4 +265,5 @@ def configure_plugin(app): # noqa: C901
|
|||
if "PROXY_RESOURCE" in config_options:
|
||||
PROXY_MANAGER._resource = config_options["PROXY_RESOURCE"]
|
||||
|
||||
warm_caches()
|
||||
return MyPlugin()
|
||||
|
|
|
@ -11,10 +11,12 @@ import threading
|
|||
|
||||
from .base_recommender import AbstractRecommender
|
||||
|
||||
from .s3config import TAAR_ITEM_MATRIX_BUCKET
|
||||
from .s3config import TAAR_ITEM_MATRIX_KEY
|
||||
from .s3config import TAAR_ADDON_MAPPING_BUCKET
|
||||
from .s3config import TAAR_ADDON_MAPPING_KEY
|
||||
from taar.settings import (
|
||||
TAAR_ITEM_MATRIX_BUCKET,
|
||||
TAAR_ITEM_MATRIX_KEY,
|
||||
TAAR_ADDON_MAPPING_BUCKET,
|
||||
TAAR_ADDON_MAPPING_KEY,
|
||||
)
|
||||
|
||||
import markus
|
||||
|
||||
|
|
|
@ -7,10 +7,12 @@ import itertools
|
|||
from .base_recommender import AbstractRecommender
|
||||
from .lazys3 import LazyJSONLoader
|
||||
|
||||
from .s3config import TAAR_WHITELIST_BUCKET
|
||||
from .s3config import TAAR_WHITELIST_KEY
|
||||
from .s3config import TAAR_ENSEMBLE_BUCKET
|
||||
from .s3config import TAAR_ENSEMBLE_KEY
|
||||
from taar.settings import (
|
||||
TAAR_WHITELIST_BUCKET,
|
||||
TAAR_WHITELIST_KEY,
|
||||
TAAR_ENSEMBLE_BUCKET,
|
||||
TAAR_ENSEMBLE_KEY,
|
||||
)
|
||||
|
||||
from taar.utils import hasher
|
||||
|
||||
|
|
|
@ -2,26 +2,19 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import numpy as np
|
||||
from contextlib import contextmanager
|
||||
|
||||
import time
|
||||
|
||||
from .lazys3 import LazyJSONLoader
|
||||
from srgutil.interfaces import IMozLogging
|
||||
|
||||
import markus
|
||||
|
||||
|
||||
from .s3config import (
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_COINSTALL_KEY,
|
||||
TAARLITE_GUID_RANKING_KEY,
|
||||
)
|
||||
from taar.recommenders.redis_cache import AddonsCoinstallCache
|
||||
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
ADDON_DL_ERR = (
|
||||
f"Cannot download addon coinstallation file {TAARLITE_GUID_COINSTALL_KEY}"
|
||||
)
|
||||
|
||||
NORM_MODE_ROWNORMSUM = "rownorm_sum"
|
||||
NORM_MODE_ROWCOUNT = "row_count"
|
||||
|
@ -29,6 +22,26 @@ NORM_MODE_ROWSUM = "row_sum"
|
|||
NORM_MODE_GUIDCEPTION = "guidception"
|
||||
|
||||
|
||||
@contextmanager
|
||||
def log_timer_debug(msg, logger):
|
||||
start_time = time.time()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
end_time = time.time()
|
||||
logger.debug(msg + f" Completed in {end_time-start_time} seconds")
|
||||
|
||||
|
||||
@contextmanager
|
||||
def log_timer_info(msg, logger):
|
||||
start_time = time.time()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
end_time = time.time()
|
||||
logger.info(msg + f" Completed in {end_time-start_time} seconds")
|
||||
|
||||
|
||||
class GuidBasedRecommender:
|
||||
""" A recommender class that returns top N addons based on a
|
||||
passed addon identifier. This will load a json file containing
|
||||
|
@ -61,117 +74,14 @@ class GuidBasedRecommender:
|
|||
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
|
||||
if "coinstall_loader" in self._ctx:
|
||||
self._addons_coinstall_loader = self._ctx["coinstall_loader"]
|
||||
else:
|
||||
self._addons_coinstall_loader = LazyJSONLoader(
|
||||
self._ctx,
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_COINSTALL_KEY,
|
||||
"guid_coinstall",
|
||||
)
|
||||
|
||||
if "ranking_loader" in self._ctx:
|
||||
self._guid_ranking_loader = self._ctx["ranking_loader"]
|
||||
else:
|
||||
self._guid_ranking_loader = LazyJSONLoader(
|
||||
self._ctx,
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_RANKING_KEY,
|
||||
"guid_ranking",
|
||||
)
|
||||
|
||||
self._init_from_ctx()
|
||||
|
||||
# Force access to the JSON models for each request at
|
||||
# recommender construction. This was lifted out of the
|
||||
# constructor for the LazyJSONLoader so that the
|
||||
# precomputation of the normalization tables can be done in
|
||||
# the recommender.
|
||||
_ = self._addons_coinstallations # noqa
|
||||
_ = self._guid_rankings # noqa
|
||||
|
||||
self.logger.info("GUIDBasedRecommender is initialized")
|
||||
|
||||
def _init_from_ctx(self):
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taarlite")
|
||||
|
||||
if self._addons_coinstallations is None:
|
||||
self.logger.error(ADDON_DL_ERR)
|
||||
|
||||
# Compute the floor install incidence that recommended addons
|
||||
# must satisfy. Take 5% of the mean of all installed addons.
|
||||
self._min_installs = np.mean(list(self._guid_rankings.values())) * 0.05
|
||||
|
||||
# Warn if the minimum number of installs drops below 100.
|
||||
if self._min_installs < 100:
|
||||
self.logger.warning(
|
||||
"minimum installs threshold low: [%s]" % self._min_installs
|
||||
)
|
||||
|
||||
@property
|
||||
def _addons_coinstallations(self):
|
||||
result, refreshed = self._addons_coinstall_loader.get()
|
||||
if refreshed:
|
||||
self.logger.info("Refreshing guid_maps for normalization")
|
||||
self._precompute_normalization()
|
||||
return result
|
||||
|
||||
@property
|
||||
def _guid_rankings(self):
|
||||
result, refreshed = self._guid_ranking_loader.get()
|
||||
if refreshed:
|
||||
self.logger.info("Refreshing guid_maps for normalization")
|
||||
self._precompute_normalization()
|
||||
return result
|
||||
|
||||
def _precompute_normalization(self):
|
||||
if self._addons_coinstallations is None:
|
||||
self.logger.error("Cannot find addon coinstallations to normalize.")
|
||||
return
|
||||
|
||||
# Capture the total number of times that a guid was
|
||||
# coinstalled with another GUID
|
||||
#
|
||||
# This is a map is guid->sum of coinstall counts
|
||||
guid_count_map = {}
|
||||
|
||||
# Capture the number of times a GUID shows up per row
|
||||
# of coinstallation data.
|
||||
#
|
||||
# This is a map of guid->rows that this guid appears on
|
||||
row_count = {}
|
||||
|
||||
guid_row_norm = {}
|
||||
|
||||
for guidkey, coinstalls in self._addons_coinstallations.items():
|
||||
rowsum = sum(coinstalls.values())
|
||||
for coinstall_guid, coinstall_count in coinstalls.items():
|
||||
|
||||
# Capture the total number of time a GUID was
|
||||
# coinstalled with other guids
|
||||
guid_count_map.setdefault(coinstall_guid, 0)
|
||||
guid_count_map[coinstall_guid] += coinstall_count
|
||||
|
||||
# Capture the unique number of times a GUID is
|
||||
# coinstalled with other guids
|
||||
row_count.setdefault(coinstall_guid, 0)
|
||||
row_count[coinstall_guid] += 1
|
||||
|
||||
if coinstall_guid not in guid_row_norm:
|
||||
guid_row_norm[coinstall_guid] = []
|
||||
guid_row_norm[coinstall_guid].append(1.0 * coinstall_count / rowsum)
|
||||
|
||||
self._guid_maps = {
|
||||
"count_map": guid_count_map,
|
||||
"row_count": row_count,
|
||||
"guid_row_norm": guid_row_norm,
|
||||
}
|
||||
self._redis_cache = AddonsCoinstallCache(self._ctx)
|
||||
self.logger.info("GUIDBasedRecommender is initialized")
|
||||
|
||||
def can_recommend(self, client_data):
|
||||
# We can't recommend if we don't have our data files.
|
||||
if self._addons_coinstallations is None:
|
||||
if not self._redis_cache.is_active():
|
||||
return False
|
||||
|
||||
# If we have data coming from other sources, we can use that for
|
||||
|
@ -181,10 +91,10 @@ class GuidBasedRecommender:
|
|||
return False
|
||||
|
||||
# Use a dictionary keyed on the query guid
|
||||
if addon_guid not in self._addons_coinstallations.keys():
|
||||
if not self._redis_cache.has_coinstalls_for(addon_guid):
|
||||
return False
|
||||
|
||||
if not self._addons_coinstallations.get(addon_guid):
|
||||
if not self._redis_cache.get_coinstalls(addon_guid):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
@ -195,71 +105,73 @@ class GuidBasedRecommender:
|
|||
TAAR lite will yield 4 recommendations for the AMO page
|
||||
"""
|
||||
|
||||
# Force access to the JSON models for each request at the
|
||||
# start of the request to update normalization tables if
|
||||
# required.
|
||||
_ = self._addons_coinstallations # noqa
|
||||
_ = self._guid_rankings # noqa
|
||||
with log_timer_debug(f"Results computed", self.logger):
|
||||
|
||||
addon_guid = client_data.get("guid")
|
||||
with log_timer_debug("get client data", self.logger):
|
||||
addon_guid = client_data.get("guid")
|
||||
|
||||
normalize = client_data.get("normalize", NORM_MODE_ROWNORMSUM)
|
||||
# Get the raw co-installation result dictionary
|
||||
with log_timer_debug("Get filtered coinstallations", self.logger):
|
||||
result_dict = self._redis_cache.get_filtered_coinstall(addon_guid, {})
|
||||
|
||||
norm_dict = {
|
||||
"none": lambda guid, x: x,
|
||||
NORM_MODE_ROWCOUNT: self.norm_row_count,
|
||||
NORM_MODE_ROWSUM: self.norm_row_sum,
|
||||
NORM_MODE_ROWNORMSUM: self.norm_rownorm_sum,
|
||||
NORM_MODE_GUIDCEPTION: self.norm_guidception,
|
||||
}
|
||||
with log_timer_debug("acquire normalization method", self.logger):
|
||||
normalize = client_data.get("normalize", NORM_MODE_ROWNORMSUM)
|
||||
|
||||
if normalize is not None and normalize not in norm_dict.keys():
|
||||
# Yield no results if the normalization method is not
|
||||
# specified
|
||||
self.logger.warning(
|
||||
"Invalid normalization parameter detected: [%s]" % normalize
|
||||
norm_dict = {
|
||||
"none": lambda guid, x: x,
|
||||
NORM_MODE_ROWCOUNT: self.norm_row_count,
|
||||
NORM_MODE_ROWSUM: self.norm_row_sum,
|
||||
NORM_MODE_ROWNORMSUM: self.norm_rownorm_sum,
|
||||
NORM_MODE_GUIDCEPTION: self.norm_guidception,
|
||||
}
|
||||
|
||||
if normalize is not None and normalize not in norm_dict.keys():
|
||||
# Yield no results if the normalization method is not
|
||||
# specified
|
||||
self.logger.warning(
|
||||
"Invalid normalization parameter detected: [%s]" % normalize
|
||||
)
|
||||
return []
|
||||
|
||||
# Bind the normalization method
|
||||
norm_method = norm_dict[normalize]
|
||||
|
||||
with log_timer_debug(
|
||||
f"Compute normalization using method:{normalize}", self.logger
|
||||
):
|
||||
# Apply normalization
|
||||
tmp_result_dict = norm_method(addon_guid, result_dict)
|
||||
|
||||
# Augment the result_dict with the installation counts
|
||||
# and then we can sort using lexical sorting of strings.
|
||||
# The idea here is to get something in the form of
|
||||
# 0000.0000.0000
|
||||
# The computed weight takes the first and second segments of
|
||||
# integers. The third segment is the installation count of
|
||||
# the addon but is zero padded.
|
||||
|
||||
TWICE_LIMIT = limit * 2
|
||||
with log_timer_debug(
|
||||
f"Augment {TWICE_LIMIT} with installation counts and resorted",
|
||||
self.logger,
|
||||
):
|
||||
result_list = []
|
||||
rank_sorted = sorted(
|
||||
tmp_result_dict.items(), key=lambda x: x[1], reverse=True
|
||||
)
|
||||
for k, v in rank_sorted[:TWICE_LIMIT]:
|
||||
lex_value = "{0:020.10f}.{1:010d}".format(
|
||||
v, self._redis_cache.get_rankings(k, 0)
|
||||
)
|
||||
result_list.append((k, lex_value))
|
||||
|
||||
# Sort the result list in descending order by weight
|
||||
result_list.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
log_data = (str(addon_guid), [str(r) for r in result_list[:limit]])
|
||||
self.logger.info(
|
||||
"Addon: [%s] triggered these recommendation guids: [%s]" % log_data
|
||||
)
|
||||
return []
|
||||
|
||||
# Bind the normalization method
|
||||
norm_method = norm_dict[normalize]
|
||||
|
||||
# Get the raw co-installation result dictionary
|
||||
result_dict = self._addons_coinstallations.get(addon_guid, {})
|
||||
|
||||
# Collect addon GUIDs where the install incidence is below a
|
||||
# floor incidence.
|
||||
removal_keys = []
|
||||
for k, v in result_dict.items():
|
||||
if self._guid_rankings.get(k, 0) < self._min_installs:
|
||||
removal_keys.append(k)
|
||||
|
||||
# Remove the collected addons that are not installed enough
|
||||
for k in removal_keys:
|
||||
del result_dict[k]
|
||||
|
||||
# Apply normalization
|
||||
tmp_result_dict = norm_method(addon_guid, result_dict)
|
||||
|
||||
# Augment the result_dict with the installation counts
|
||||
# and then we can sort using lexical sorting of strings.
|
||||
# The idea here is to get something in the form of
|
||||
# 0000.0000.0000
|
||||
# The computed weight takes the first and second segments of
|
||||
# integers. The third segment is the installation count of
|
||||
# the addon but is zero padded.
|
||||
result_dict = {}
|
||||
for k, v in tmp_result_dict.items():
|
||||
lex_value = "{0:020.10f}.{1:010d}".format(v, self._guid_rankings.get(k, 0))
|
||||
result_dict[k] = lex_value
|
||||
|
||||
# Sort the result dictionary in descending order by weight
|
||||
result_list = sorted(result_dict.items(), key=lambda x: x[1], reverse=True)
|
||||
|
||||
log_data = (str(addon_guid), [str(r) for r in result_list[:limit]])
|
||||
self.logger.info(
|
||||
"Addon: [%s] triggered these recommendation guids: [%s]" % log_data
|
||||
)
|
||||
|
||||
return result_list[:limit]
|
||||
|
||||
|
@ -270,12 +182,11 @@ class GuidBasedRecommender:
|
|||
This dampens weight of any suggested GUID inversely
|
||||
proportional to it's overall popularity.
|
||||
"""
|
||||
uniq_guid_map = self._guid_maps["row_count"]
|
||||
|
||||
output_result_dict = {}
|
||||
for result_guid, result_count in input_coinstall_dict.items():
|
||||
output_result_dict[result_guid] = (
|
||||
1.0 * result_count / uniq_guid_map[result_guid]
|
||||
1.0 * result_count / self._redis_cache.guid_maps_rowcount(result_guid)
|
||||
)
|
||||
return output_result_dict
|
||||
|
||||
|
@ -284,11 +195,13 @@ class GuidBasedRecommender:
|
|||
coinstallation GUIDs based on the sum of the weights for the
|
||||
coinstallation GUIDs given a key GUID.
|
||||
"""
|
||||
guid_count_map = self._guid_maps["count_map"]
|
||||
|
||||
def generate_row_sum_list():
|
||||
for guid, guid_weight in input_coinstall_dict.items():
|
||||
norm_guid_weight = guid_weight * 1.0 / guid_count_map[guid]
|
||||
norm_guid_weight = (
|
||||
guid_weight * 1.0 / self._redis_cache.guid_maps_count_map(guid)
|
||||
)
|
||||
|
||||
yield guid, norm_guid_weight
|
||||
|
||||
return dict(generate_row_sum_list())
|
||||
|
@ -301,24 +214,31 @@ class GuidBasedRecommender:
|
|||
The testcase for this scenario lays out the math more
|
||||
explicitly.
|
||||
"""
|
||||
tmp_dict = self._normalize_row_weights(input_coinstall_dict)
|
||||
guid_row_norm = self._guid_maps["guid_row_norm"]
|
||||
with log_timer_debug("normalize row weights for coinstall dict", self.logger):
|
||||
tmp_dict = self._normalize_row_weights(input_coinstall_dict)
|
||||
|
||||
output_dict = {}
|
||||
for output_guid, output_guid_weight in tmp_dict.items():
|
||||
guid_row_norm_list = guid_row_norm.get(output_guid, [])
|
||||
if len(guid_row_norm_list) == 0:
|
||||
self.logger.warning(
|
||||
"Can't find GUID_ROW_NORM data for [{}]".format(output_guid)
|
||||
with log_timer_debug(
|
||||
f"normalizing output_dict of size: {len(tmp_dict)}", self.logger
|
||||
):
|
||||
output_dict = {}
|
||||
for output_guid, output_guid_weight in tmp_dict.items():
|
||||
guid_row_norm_list = self._redis_cache.guid_maps_row_norm(
|
||||
output_guid, []
|
||||
)
|
||||
continue
|
||||
norm_sum = sum(guid_row_norm_list)
|
||||
if norm_sum == 0:
|
||||
self.logger.warning(
|
||||
"Sum of GUID_ROW_NORM data for [{}] is zero.".format(output_guid)
|
||||
)
|
||||
continue
|
||||
output_dict[output_guid] = output_guid_weight / norm_sum
|
||||
if len(guid_row_norm_list) == 0:
|
||||
self.logger.warning(
|
||||
"Can't find GUID_ROW_NORM data for [{}]".format(output_guid)
|
||||
)
|
||||
continue
|
||||
norm_sum = sum(guid_row_norm_list)
|
||||
if norm_sum == 0:
|
||||
self.logger.warning(
|
||||
"Sum of GUID_ROW_NORM data for [{}] is zero.".format(
|
||||
output_guid
|
||||
)
|
||||
)
|
||||
continue
|
||||
output_dict[output_guid] = output_guid_weight / norm_sum
|
||||
|
||||
return output_dict
|
||||
|
||||
|
@ -367,7 +287,7 @@ class GuidBasedRecommender:
|
|||
# Add in the next level
|
||||
level -= 1
|
||||
for guid in consolidated_coinstall_dict.keys():
|
||||
next_level_coinstalls = self._addons_coinstallations.get(guid, {})
|
||||
next_level_coinstalls = self._redis_cache.get_coinstalls(guid, {})
|
||||
if next_level_coinstalls != {}:
|
||||
# Normalize the next bunch of suggestions
|
||||
next_level_coinstalls = self._normalize_row_weights(
|
||||
|
|
|
@ -7,8 +7,8 @@ from .lazys3 import LazyJSONLoader
|
|||
from srgutil.interfaces import IMozLogging
|
||||
import operator as op
|
||||
import random
|
||||
from .s3config import TAAR_WHITELIST_BUCKET
|
||||
from .s3config import TAAR_WHITELIST_KEY
|
||||
|
||||
from taar.settings import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
|
||||
|
||||
import markus
|
||||
|
||||
|
|
|
@ -6,8 +6,7 @@ from srgutil.interfaces import IMozLogging
|
|||
from .base_recommender import AbstractRecommender
|
||||
from .lazys3 import LazyJSONLoader
|
||||
|
||||
from .s3config import TAAR_LOCALE_BUCKET
|
||||
from .s3config import TAAR_LOCALE_KEY
|
||||
from taar.settings import TAAR_LOCALE_BUCKET, TAAR_LOCALE_KEY
|
||||
|
||||
import markus
|
||||
|
||||
|
|
|
@ -6,9 +6,12 @@ from taar.recommenders.ensemble_recommender import EnsembleRecommender
|
|||
from taar.recommenders.randomizer import in_experiment, reorder_guids
|
||||
from srgutil.interfaces import IMozLogging
|
||||
from .lazys3 import LazyJSONLoader
|
||||
from .s3config import TAAR_WHITELIST_BUCKET
|
||||
from .s3config import TAAR_WHITELIST_KEY
|
||||
from .s3config import TAAR_EXPERIMENT_PROB
|
||||
|
||||
from taar.settings import (
|
||||
TAAR_WHITELIST_BUCKET,
|
||||
TAAR_WHITELIST_KEY,
|
||||
TAAR_EXPERIMENT_PROB,
|
||||
)
|
||||
|
||||
import markus
|
||||
|
||||
|
|
|
@ -0,0 +1,388 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import redis
|
||||
import numpy as np
|
||||
from srgutil.interfaces import IMozLogging
|
||||
from taar.settings import (
|
||||
REDIS_HOST,
|
||||
REDIS_PORT,
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_COINSTALL_KEY,
|
||||
TAARLITE_GUID_RANKING_KEY,
|
||||
TAARLITE_TTL,
|
||||
TAARLITE_TRUNCATE,
|
||||
TAARLITE_UPDATE_POLL,
|
||||
TAARLITE_MUTEX_TTL,
|
||||
)
|
||||
import time
|
||||
|
||||
from jsoncache.loader import s3_json_loader
|
||||
|
||||
|
||||
ACTIVE_DB = "active_db"
|
||||
UPDATE_CHECK = "update_id_check"
|
||||
|
||||
|
||||
COINSTALL_PREFIX = "coinstall|"
|
||||
FILTERED_COINSTALL_PREFIX = "filtered_coinstall|"
|
||||
RANKING_PREFIX = "ranking|"
|
||||
MIN_INSTALLS_PREFIX = "min_installs|"
|
||||
|
||||
# This is a map is guid->sum of coinstall counts
|
||||
NORMDATA_COUNT_MAP_PREFIX = "normdata_count_map_prefix|"
|
||||
|
||||
# Capture the number of times a GUID shows up per row
|
||||
# of coinstallation data.
|
||||
NORMDATA_ROWCOUNT_PREFIX = "normdata_rowcount_prefix|"
|
||||
|
||||
NORMDATA_GUID_ROW_NORM_PREFIX = "normdata_guid_row_norm_prefix|"
|
||||
|
||||
NEXT_UPDATE_MUTEX = "next_update_mutex|"
|
||||
NEXT_UPDATE_TIME = "next_update_time|"
|
||||
|
||||
|
||||
class PrefixStripper:
|
||||
def __init__(self, prefix, iterator, cast_to_str=False):
|
||||
self._prefix = prefix
|
||||
self._iter = iterator
|
||||
self._cast_to_str = cast_to_str
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
result = self._iter.__next__()
|
||||
result = result[len(self._prefix) :]
|
||||
if self._cast_to_str:
|
||||
result = str(result)
|
||||
return result
|
||||
|
||||
|
||||
class AddonsCoinstallCache:
|
||||
"""
|
||||
This class manages a redis instance to hold onto the taar-lite
|
||||
GUID->GUID co-installation data
|
||||
"""
|
||||
|
||||
def __init__(self, ctx, ttl=TAARLITE_TTL):
|
||||
self._ctx = ctx
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
|
||||
self._ttl = ttl
|
||||
|
||||
rcon = self.init_redis_connections()
|
||||
self._r0 = rcon[0]
|
||||
self._r1 = rcon[1]
|
||||
self._r2 = rcon[2]
|
||||
|
||||
if self._db() is None:
|
||||
self.safe_load_data()
|
||||
|
||||
self.wait_for_data()
|
||||
|
||||
self.start_update_thread()
|
||||
|
||||
def start_update_thread(self):
|
||||
self._update_thread = threading.Thread(
|
||||
target=self._update_data_target, daemon=True
|
||||
)
|
||||
self._update_thread.start()
|
||||
|
||||
def _update_data_target(self):
|
||||
while True:
|
||||
self.logger.info(f"TAARLite update is alive. ident={self._ident}")
|
||||
try:
|
||||
self.logger.debug(f"Trying to acquire lock {self._ident}")
|
||||
self._r0.set(
|
||||
NEXT_UPDATE_MUTEX, self._ident, nx=True, ex=TAARLITE_MUTEX_TTL,
|
||||
)
|
||||
tmp = self._r0.get(NEXT_UPDATE_MUTEX)
|
||||
|
||||
if tmp is None:
|
||||
# Someone else acquired the lock (and released it)
|
||||
return
|
||||
|
||||
if tmp.decode("utf8") != self._ident:
|
||||
# Someone else acquired the lock
|
||||
return
|
||||
self.logger.debug(f"Acquired lock {self._ident}")
|
||||
try:
|
||||
next_update = self._r0.get(NEXT_UPDATE_TIME)
|
||||
if next_update is None:
|
||||
next_update = time.time() + TAARLITE_TTL
|
||||
self._r0.set(NEXT_UPDATE_TIME, next_update)
|
||||
|
||||
next_update = float(self._r0.get(NEXT_UPDATE_TIME))
|
||||
self.logger.info(
|
||||
f"Next TAARlite refresh is in {next_update - time.time()} seconds"
|
||||
)
|
||||
if next_update <= time.time():
|
||||
# We're past the expiry time
|
||||
self.safe_load_data()
|
||||
finally:
|
||||
self._r0.delete(NEXT_UPDATE_MUTEX)
|
||||
self.logger.debug(f"Released lock {self._ident}")
|
||||
except Exception:
|
||||
self.logger.exception("Error while updating GUID GUID cache")
|
||||
time.sleep(TAARLITE_UPDATE_POLL)
|
||||
|
||||
@property
|
||||
def _ident(self):
|
||||
""" pid/thread identity """
|
||||
return f"{os.getpid()}_{threading.get_ident()}"
|
||||
|
||||
def init_redis_connections(self):
|
||||
return {
|
||||
0: redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=0),
|
||||
1: redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=1),
|
||||
2: redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=2),
|
||||
}
|
||||
|
||||
def safe_load_data(self):
|
||||
"""
|
||||
This is a multiprocess, multithread safe method to safely load
|
||||
data into the cache.
|
||||
|
||||
If a concurrent calls to this method are invoked, only the first
|
||||
call will have any effect.
|
||||
"""
|
||||
# Pin the first thread ID to try to update data
|
||||
# Note that nx flag so that this is only set if the
|
||||
# UPDATE_CHECK is not previously set
|
||||
#
|
||||
# The thread barrier will autoexpire in 10 minutes in the
|
||||
# event of process termination inside the critical section.
|
||||
self._r0.set(UPDATE_CHECK, self._ident, nx=True, ex=TAARLITE_MUTEX_TTL)
|
||||
self.logger.info(f"UPDATE_CHECK field is set: {self._ident}")
|
||||
|
||||
# This is a concurrency barrier to make sure only the pinned
|
||||
# thread can update redis
|
||||
update_ident = self._r0.get(UPDATE_CHECK).decode("utf8")
|
||||
if update_ident != self._ident:
|
||||
return
|
||||
|
||||
# We're past the thread barrier - load the data and clear the
|
||||
# barrier when done
|
||||
try:
|
||||
self._load_data()
|
||||
finally:
|
||||
self._r0.delete(UPDATE_CHECK)
|
||||
self.logger.info("UPDATE_CHECK field is cleared")
|
||||
|
||||
def _load_data(self):
|
||||
active_db = self._r0.get(ACTIVE_DB)
|
||||
if active_db is not None:
|
||||
active_db = int(active_db)
|
||||
if active_db == 1:
|
||||
next_active_db = 2
|
||||
else:
|
||||
next_active_db = 1
|
||||
else:
|
||||
next_active_db = 1
|
||||
|
||||
self._copy_data(next_active_db)
|
||||
|
||||
self.logger.info("Completed precomputing normalized data")
|
||||
|
||||
# TODO: should this autoexpire to help indicate that no fresh
|
||||
# data has loaded? Maybe N * update TTL time?
|
||||
self._r0.set(ACTIVE_DB, next_active_db)
|
||||
self.logger.info(f"Active DB is set to {next_active_db}")
|
||||
|
||||
def _copy_data(self, next_active_db):
|
||||
if next_active_db == 1:
|
||||
db = self._r1
|
||||
else:
|
||||
db = self._r2
|
||||
|
||||
# Clear this database before we do anything with it
|
||||
db.flushdb()
|
||||
self._update_rank_data(db)
|
||||
self._update_coinstall_data(db)
|
||||
|
||||
def fetch_ranking_data(self):
|
||||
return s3_json_loader(TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_RANKING_KEY)
|
||||
|
||||
def _update_rank_data(self, db):
|
||||
|
||||
data = self.fetch_ranking_data()
|
||||
|
||||
items = data.items()
|
||||
len_items = len(items)
|
||||
|
||||
for i, (guid, count) in enumerate(items):
|
||||
db.set(RANKING_PREFIX + guid, json.dumps(count))
|
||||
|
||||
if i % 1000 == 0:
|
||||
self.logger.info(f"Loaded {i+1} of {len_items} GUID ranking into redis")
|
||||
|
||||
min_installs = np.mean(list(data.values())) * 0.05
|
||||
db.set(MIN_INSTALLS_PREFIX, min_installs)
|
||||
self.logger.info(f"Updated MIN_INSTALLS: {min_installs}")
|
||||
|
||||
def guid_maps_count_map(self, guid, default=None):
|
||||
tmp = self._db().get(NORMDATA_COUNT_MAP_PREFIX + guid)
|
||||
if tmp:
|
||||
return json.loads(tmp.decode("utf8"))
|
||||
return default
|
||||
|
||||
def guid_maps_rowcount(self, guid, default=None):
|
||||
tmp = self._db().get(NORMDATA_ROWCOUNT_PREFIX + guid)
|
||||
if tmp:
|
||||
return json.loads(tmp.decode("utf8"))
|
||||
return default
|
||||
|
||||
def guid_maps_row_norm(self, guid, default=None):
|
||||
tmp = self._db().get(NORMDATA_GUID_ROW_NORM_PREFIX + guid)
|
||||
if tmp:
|
||||
return json.loads(tmp.decode("utf8"))
|
||||
return default
|
||||
|
||||
def min_installs(self, db):
|
||||
"""
|
||||
Return the floor minimum installed addons that we will
|
||||
consider, or 0 if nothing is currently stored in redis
|
||||
"""
|
||||
result = db.get(MIN_INSTALLS_PREFIX)
|
||||
if result is None:
|
||||
return 0
|
||||
return float(result.decode("utf8"))
|
||||
|
||||
def fetch_coinstall_data(self):
|
||||
return s3_json_loader(
|
||||
TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_COINSTALL_KEY
|
||||
)
|
||||
|
||||
def _update_coinstall_data(self, db):
|
||||
|
||||
data = self.fetch_coinstall_data()
|
||||
|
||||
items = data.items()
|
||||
len_items = len(items)
|
||||
|
||||
guid_count_map = {}
|
||||
row_count = {}
|
||||
guid_row_norm = {}
|
||||
|
||||
for i, (guid, coinstalls) in enumerate(items):
|
||||
|
||||
tmp = dict(
|
||||
[(k, v) for (k, v) in coinstalls.items() if v >= self.min_installs(db)]
|
||||
)
|
||||
|
||||
db.set(FILTERED_COINSTALL_PREFIX + guid, json.dumps(tmp))
|
||||
rowsum = sum(coinstalls.values())
|
||||
|
||||
for coinstall_guid, coinstall_count in coinstalls.items():
|
||||
# Capture the total number of time a GUID was
|
||||
# coinstalled with other guids
|
||||
guid_count_map.setdefault(coinstall_guid, 0)
|
||||
guid_count_map[coinstall_guid] += coinstall_count
|
||||
|
||||
# Capture the unique number of times a GUID is
|
||||
# coinstalled with other guids
|
||||
row_count.setdefault(coinstall_guid, 0)
|
||||
row_count[coinstall_guid] += 1
|
||||
|
||||
if coinstall_guid not in guid_row_norm:
|
||||
guid_row_norm[coinstall_guid] = []
|
||||
guid_row_norm[coinstall_guid].append(1.0 * coinstall_count / rowsum)
|
||||
|
||||
db.set(COINSTALL_PREFIX + guid, json.dumps(coinstalls))
|
||||
if i % 1000 == 0:
|
||||
self.logger.info(
|
||||
f"Loaded {i+1} of {len_items} GUID-GUID coinstall records into redis"
|
||||
)
|
||||
|
||||
self.logger.info("guidmaps computed - saving to redis")
|
||||
for guid, guid_count in guid_count_map.items():
|
||||
db.set(NORMDATA_COUNT_MAP_PREFIX + guid, json.dumps(guid_count))
|
||||
|
||||
for coinstall_guid, coinstall_count in row_count.items():
|
||||
db.set(
|
||||
NORMDATA_ROWCOUNT_PREFIX + coinstall_guid, json.dumps(coinstall_count),
|
||||
)
|
||||
|
||||
for coinstall_guid, norm_val in guid_row_norm.items():
|
||||
db.set(
|
||||
NORMDATA_GUID_ROW_NORM_PREFIX + coinstall_guid, json.dumps(norm_val),
|
||||
)
|
||||
self.logger.info("finished saving guidmaps to redis")
|
||||
|
||||
def get_filtered_coinstall(self, guid, default=None):
|
||||
tmp = self._db().get(FILTERED_COINSTALL_PREFIX + guid)
|
||||
if tmp:
|
||||
raw_dict = json.loads(tmp.decode("utf8"))
|
||||
# This truncates the size of the coinstall list for
|
||||
# performance reasons
|
||||
return dict(
|
||||
sorted(raw_dict.items(), key=lambda x: x[1], reverse=True)[
|
||||
:TAARLITE_TRUNCATE
|
||||
]
|
||||
)
|
||||
return default
|
||||
|
||||
def get_rankings(self, guid, default=None):
|
||||
"""
|
||||
Return the rankings
|
||||
"""
|
||||
tmp = self._db().get(RANKING_PREFIX + guid)
|
||||
if tmp:
|
||||
return json.loads(tmp.decode("utf8"))
|
||||
return default
|
||||
|
||||
def has_coinstalls_for(self, guid):
|
||||
return self._db().get(COINSTALL_PREFIX + guid) is not None
|
||||
|
||||
def get_coinstalls(self, guid, default=None):
|
||||
"""
|
||||
Return a map of GUID:install count that represents the
|
||||
coinstallation map for a particular addon GUID
|
||||
"""
|
||||
tmp = self._db().get(COINSTALL_PREFIX + guid)
|
||||
if tmp:
|
||||
return json.loads(tmp.decode("utf8"))
|
||||
return default
|
||||
|
||||
def key_iter_ranking(self):
|
||||
return PrefixStripper(
|
||||
RANKING_PREFIX, self._db().scan_iter(match=RANKING_PREFIX + "*")
|
||||
)
|
||||
|
||||
def key_iter_coinstall(self):
|
||||
return PrefixStripper(
|
||||
COINSTALL_PREFIX, self._db().scan_iter(match=COINSTALL_PREFIX + "*")
|
||||
)
|
||||
|
||||
def is_active(self):
|
||||
"""
|
||||
return True if data is loaded
|
||||
"""
|
||||
# Any value in ACTIVE_DB indicates that data is live
|
||||
return self._r0.get(ACTIVE_DB) is not None
|
||||
|
||||
def wait_for_data(self):
|
||||
while True:
|
||||
if self.is_active():
|
||||
break
|
||||
self.logger.info("waiting for data. spinlock active")
|
||||
time.sleep(1)
|
||||
self.logger.info("finished waiting for data")
|
||||
|
||||
def _db(self):
|
||||
"""
|
||||
This dereferences the ACTIVE_DB pointer to get the current
|
||||
active redis instance
|
||||
"""
|
||||
active_db = self._r0.get(ACTIVE_DB)
|
||||
if active_db is not None:
|
||||
db = int(active_db.decode("utf8"))
|
||||
if db == 1:
|
||||
return self._r1
|
||||
elif db == 2:
|
||||
return self._r2
|
|
@ -1,50 +0,0 @@
|
|||
from decouple import config
|
||||
|
||||
TAAR_ENSEMBLE_BUCKET = config(
|
||||
"TAAR_ENSEMBLE_BUCKET", default="test_ensemble_bucket"
|
||||
)
|
||||
TAAR_ENSEMBLE_KEY = config("TAAR_ENSEMBLE_KEY", default="test_ensemble_key")
|
||||
|
||||
TAAR_WHITELIST_BUCKET = config(
|
||||
"TAAR_WHITELIST_BUCKET", default="test_whitelist_bucket"
|
||||
)
|
||||
TAAR_WHITELIST_KEY = config("TAAR_WHITELIST_KEY", default="test_whitelist_key")
|
||||
|
||||
TAAR_ITEM_MATRIX_BUCKET = config(
|
||||
"TAAR_ITEM_MATRIX_BUCKET", default="test_matrix_bucket"
|
||||
)
|
||||
TAAR_ITEM_MATRIX_KEY = config("TAAR_ITEM_MATRIX_KEY", default="test_matrix_key")
|
||||
TAAR_ADDON_MAPPING_BUCKET = config(
|
||||
"TAAR_ADDON_MAPPING_BUCKET", default="test_mapping_bucket"
|
||||
)
|
||||
TAAR_ADDON_MAPPING_KEY = config(
|
||||
"TAAR_ADDON_MAPPING_KEY", default="test_mapping_key"
|
||||
)
|
||||
|
||||
TAAR_LOCALE_BUCKET = config("TAAR_LOCALE_BUCKET", default="test_locale_bucket")
|
||||
TAAR_LOCALE_KEY = config("TAAR_LOCALE_KEY", default="test_locale_key")
|
||||
|
||||
|
||||
TAAR_SIMILARITY_BUCKET = config(
|
||||
"TAAR_SIMILARITY_BUCKET", default="test_similarity_bucket"
|
||||
)
|
||||
TAAR_SIMILARITY_DONOR_KEY = config(
|
||||
"TAAR_SIMILARITY_DONOR_KEY", default="test_similarity_donor_key"
|
||||
)
|
||||
TAAR_SIMILARITY_LRCURVES_KEY = config(
|
||||
"TAAR_SIMILARITY_LRCURVES_KEY", default="test_similarity_lrcurves_key"
|
||||
)
|
||||
|
||||
TAAR_EXPERIMENT_PROB = config("TAAR_EXPERIMENT_PROB", default=0.0)
|
||||
|
||||
|
||||
# TAAR-lite configuration below
|
||||
|
||||
TAARLITE_GUID_COINSTALL_BUCKET = config(
|
||||
"TAARLITE_GUID_COINSTALL_BUCKET", "telemetry-parquet"
|
||||
)
|
||||
TAARLITE_GUID_COINSTALL_KEY = config(
|
||||
"TAARlLITE_GUID_COINSTALL_KEY", "taar/lite/guid_coinstallation.json"
|
||||
)
|
||||
|
||||
TAARLITE_GUID_RANKING_KEY = "taar/lite/guid_install_ranking.json"
|
|
@ -9,9 +9,11 @@ from srgutil.interfaces import IMozLogging
|
|||
import numpy as np
|
||||
from .lazys3 import LazyJSONLoader
|
||||
|
||||
from .s3config import TAAR_SIMILARITY_BUCKET
|
||||
from .s3config import TAAR_SIMILARITY_DONOR_KEY
|
||||
from .s3config import TAAR_SIMILARITY_LRCURVES_KEY
|
||||
from taar.settings import (
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_DONOR_KEY,
|
||||
TAAR_SIMILARITY_LRCURVES_KEY,
|
||||
)
|
||||
|
||||
import markus
|
||||
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
|
||||
from decouple import config
|
||||
|
||||
REDIS_HOST = config("REDIS_HOST", "localhost", cast=str)
|
||||
REDIS_PORT = config("REDIS_PORT", 6379, cast=int)
|
||||
|
||||
# These are configurations that are specific to the TAAR library
|
||||
TAAR_MAX_RESULTS = config("TAAR_MAX_RESULTS", default=10, cast=int)
|
||||
|
||||
TAARLITE_MAX_RESULTS = config("TAARLITE_MAX_RESULTS", default=4, cast=int)
|
||||
|
||||
STATSD_HOST = config("STATSD_HOST", default="localhost", cast=str)
|
||||
STATSD_PORT = config("STATSD_PORT", default=8125, cast=int)
|
||||
|
||||
TAAR_ENSEMBLE_BUCKET = config("TAAR_ENSEMBLE_BUCKET", default="test_ensemble_bucket")
|
||||
TAAR_ENSEMBLE_KEY = config("TAAR_ENSEMBLE_KEY", default="test_ensemble_key")
|
||||
|
||||
TAAR_WHITELIST_BUCKET = config("TAAR_WHITELIST_BUCKET", default="test_whitelist_bucket")
|
||||
TAAR_WHITELIST_KEY = config("TAAR_WHITELIST_KEY", default="test_whitelist_key")
|
||||
|
||||
TAAR_ITEM_MATRIX_BUCKET = config(
|
||||
"TAAR_ITEM_MATRIX_BUCKET", default="test_matrix_bucket"
|
||||
)
|
||||
TAAR_ITEM_MATRIX_KEY = config("TAAR_ITEM_MATRIX_KEY", default="test_matrix_key")
|
||||
TAAR_ADDON_MAPPING_BUCKET = config(
|
||||
"TAAR_ADDON_MAPPING_BUCKET", default="test_mapping_bucket"
|
||||
)
|
||||
TAAR_ADDON_MAPPING_KEY = config("TAAR_ADDON_MAPPING_KEY", default="test_mapping_key")
|
||||
|
||||
TAAR_LOCALE_BUCKET = config("TAAR_LOCALE_BUCKET", default="test_locale_bucket")
|
||||
TAAR_LOCALE_KEY = config("TAAR_LOCALE_KEY", default="test_locale_key")
|
||||
|
||||
|
||||
TAAR_SIMILARITY_BUCKET = config(
|
||||
"TAAR_SIMILARITY_BUCKET", default="test_similarity_bucket"
|
||||
)
|
||||
TAAR_SIMILARITY_DONOR_KEY = config(
|
||||
"TAAR_SIMILARITY_DONOR_KEY", default="test_similarity_donor_key"
|
||||
)
|
||||
TAAR_SIMILARITY_LRCURVES_KEY = config(
|
||||
"TAAR_SIMILARITY_LRCURVES_KEY", default="test_similarity_lrcurves_key"
|
||||
)
|
||||
|
||||
TAAR_EXPERIMENT_PROB = config("TAAR_EXPERIMENT_PROB", default=0.0)
|
||||
|
||||
|
||||
# TAAR-lite configuration below
|
||||
|
||||
TAARLITE_GUID_COINSTALL_BUCKET = config(
|
||||
"TAARLITE_GUID_COINSTALL_BUCKET", "telemetry-parquet"
|
||||
)
|
||||
TAARLITE_GUID_COINSTALL_KEY = config(
|
||||
"TAARlLITE_GUID_COINSTALL_KEY", "taar/lite/guid_coinstallation.json"
|
||||
)
|
||||
|
||||
TAARLITE_GUID_RANKING_KEY = config(
|
||||
"TAARLITE_GUID_RANKING_KEY", "taar/lite/guid_install_ranking.json"
|
||||
)
|
||||
|
||||
# 4 hour liviliness for TAARLITE data
|
||||
TAARLITE_TTL = config("TAARLITE_TTL", 60 * 60 * 4, cast=int)
|
||||
|
||||
|
||||
# TAARlite needs redis backed mutex's to protect critical sections
|
||||
TAARLITE_MUTEX_TTL = config("TAARLITE_MUTEX_TTL", 60 * 60, cast=int)
|
||||
|
||||
|
||||
# Poll for TTL expiry every 60 seconds
|
||||
TAARLITE_UPDATE_POLL = config("TAARLITE_UPDATE_POLL", 60, cast=int)
|
||||
|
||||
# Set a default TAARLite mutex TTL of 10 minutes (60 * 10)
|
||||
TAARLITE_MUTEX_TTL = config("TAARLITE_MUTEX_TTL", 60 * 10, cast=int)
|
||||
|
||||
TAARLITE_TRUNCATE = config("TAARLITE_TRUNCATE", TAARLITE_MAX_RESULTS * 5, cast=int)
|
|
@ -6,7 +6,7 @@ from taar.recommenders.ensemble_recommender import (
|
|||
WeightCache,
|
||||
EnsembleRecommender,
|
||||
)
|
||||
from taar.recommenders.s3config import (
|
||||
from taar.settings import (
|
||||
TAAR_ENSEMBLE_BUCKET,
|
||||
TAAR_ENSEMBLE_KEY,
|
||||
)
|
||||
|
|
|
@ -1,18 +1,15 @@
|
|||
import json
|
||||
|
||||
from moto import mock_s3
|
||||
import boto3
|
||||
import fakeredis
|
||||
import pytest
|
||||
import mock
|
||||
import contextlib
|
||||
|
||||
from taar.recommenders.guid_based_recommender import GuidBasedRecommender
|
||||
from taar.recommenders.redis_cache import AddonsCoinstallCache
|
||||
|
||||
from taar.recommenders.s3config import (
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_COINSTALL_KEY,
|
||||
TAARLITE_GUID_RANKING_KEY,
|
||||
)
|
||||
from taar.recommenders.redis_cache import NORMDATA_GUID_ROW_NORM_PREFIX
|
||||
|
||||
from taar.recommenders.lazys3 import LazyJSONLoader
|
||||
|
||||
from taar.recommenders.ua_parser import parse_ua, OSNAME_TO_ID
|
||||
|
||||
|
@ -84,205 +81,209 @@ RESULTS = {
|
|||
}
|
||||
|
||||
|
||||
def install_mock_data(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx):
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
@contextlib.contextmanager
|
||||
def mock_coinstall_ranking_context(mock_coinstall, mock_ranking):
|
||||
with contextlib.ExitStack() as stack:
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache, "fetch_ranking_data", return_value=mock_ranking,
|
||||
)
|
||||
)
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
"fetch_coinstall_data",
|
||||
return_value=mock_coinstall,
|
||||
)
|
||||
)
|
||||
|
||||
conn.create_bucket(Bucket=TAARLITE_GUID_COINSTALL_BUCKET)
|
||||
|
||||
conn.Object(TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_COINSTALL_KEY).put(
|
||||
Body=json.dumps(TAARLITE_MOCK_DATA)
|
||||
)
|
||||
conn.Object(TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_RANKING_KEY).put(
|
||||
Body=json.dumps(TAARLITE_MOCK_GUID_RANKING)
|
||||
)
|
||||
|
||||
coinstall_loader = LazyJSONLoader(
|
||||
test_ctx,
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_COINSTALL_KEY,
|
||||
"guid_coinstall",
|
||||
)
|
||||
|
||||
ranking_loader = LazyJSONLoader(
|
||||
test_ctx,
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_RANKING_KEY,
|
||||
"guid_ranking",
|
||||
)
|
||||
|
||||
test_ctx["coinstall_loader"] = coinstall_loader
|
||||
test_ctx["ranking_loader"] = ranking_loader
|
||||
# Patch fakeredis in
|
||||
stack.enter_context(
|
||||
mock.patch.object(
|
||||
AddonsCoinstallCache,
|
||||
"init_redis_connections",
|
||||
return_value={
|
||||
0: fakeredis.FakeStrictRedis(),
|
||||
1: fakeredis.FakeStrictRedis(),
|
||||
2: fakeredis.FakeStrictRedis(),
|
||||
},
|
||||
)
|
||||
)
|
||||
yield stack
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recommender_nonormal(test_ctx, TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING):
|
||||
with MetricsMock() as mm:
|
||||
EXPECTED_RESULTS = RESULTS["default"]
|
||||
install_mock_data(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
with mock_coinstall_ranking_context(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING):
|
||||
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
with MetricsMock() as mm:
|
||||
EXPECTED_RESULTS = RESULTS["default"]
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
|
||||
guid = "guid-1"
|
||||
guid = "guid-1"
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "none"})
|
||||
assert actual == EXPECTED_RESULTS
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "none"})
|
||||
assert actual == EXPECTED_RESULTS
|
||||
|
||||
mm.has_record(TIMING, "taar.guid_coinstall")
|
||||
mm.has_record(TIMING, "taar.guid_ranking")
|
||||
mm.has_record(TIMING, "taar.guid_recommendation")
|
||||
mm.has_record(TIMING, "taar.guid_coinstall")
|
||||
mm.has_record(TIMING, "taar.guid_ranking")
|
||||
mm.has_record(TIMING, "taar.guid_recommendation")
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_row_count_recommender(
|
||||
test_ctx, TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING
|
||||
):
|
||||
EXPECTED_RESULTS = RESULTS["row_count"]
|
||||
install_mock_data(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
with mock_coinstall_ranking_context(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING):
|
||||
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
EXPECTED_RESULTS = RESULTS["row_count"]
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "row_count"})
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
|
||||
# Note that guid-9 is not included because it's weight is
|
||||
# decreased 50% to 5
|
||||
assert EXPECTED_RESULTS == actual
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "row_count"})
|
||||
|
||||
# Note that guid-9 is not included because it's weight is
|
||||
# decreased 50% to 5
|
||||
assert EXPECTED_RESULTS == actual
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_rownorm_sumrownorm(test_ctx, TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING):
|
||||
EXPECTED_RESULTS = RESULTS["rownorm_sum"]
|
||||
install_mock_data(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
with mock_coinstall_ranking_context(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING):
|
||||
EXPECTED_RESULTS = RESULTS["rownorm_sum"]
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
default_actual = recommender.recommend({"guid": guid})
|
||||
|
||||
default_actual = recommender.recommend({"guid": guid})
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "rownorm_sum"})
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "rownorm_sum"})
|
||||
# Default normalization is rownorm_sum
|
||||
assert actual == default_actual
|
||||
assert actual == EXPECTED_RESULTS
|
||||
"""
|
||||
Some notes on verifying guid-1:
|
||||
|
||||
# Default normalization is rownorm_sum
|
||||
assert actual == default_actual
|
||||
assert actual == EXPECTED_RESULTS
|
||||
"""
|
||||
Some notes on verifying guid-1:
|
||||
Numerator is the row weighted value of guid-1 : 50/150
|
||||
Denominator is the sum of the row weighted value of guid-1 in all
|
||||
other rows
|
||||
|
||||
Numerator is the row weighted value of guid-1 : 50/150
|
||||
Denominator is the sum of the row weighted value of guid-1 in all
|
||||
other rows
|
||||
(guid-2) 50/150
|
||||
(guid-3) 100/210
|
||||
(guid-6) 5/305
|
||||
|
||||
(guid-2) 50/150
|
||||
(guid-3) 100/210
|
||||
(guid-6) 5/305
|
||||
This gives us: [0.3333333333333333,
|
||||
0.47619047619047616,
|
||||
0.01639344262295082]
|
||||
|
||||
This gives us: [0.3333333333333333,
|
||||
0.47619047619047616,
|
||||
0.01639344262295082]
|
||||
so the final result should be (5/150) / (50/150 + 100/210 + 5/305)
|
||||
|
||||
so the final result should be (5/150) / (50/150 + 100/210 + 5/305)
|
||||
|
||||
That gives a final expected weight for guid-1 to be: 0.403591682
|
||||
"""
|
||||
expected = 0.403591682
|
||||
actual = float(actual[1][1][:-11])
|
||||
assert expected == pytest.approx(actual, rel=1e-3)
|
||||
That gives a final expected weight for guid-1 to be: 0.403591682
|
||||
"""
|
||||
expected = 0.403591682
|
||||
actual = float(actual[1][1][:-11])
|
||||
assert expected == pytest.approx(actual, rel=1e-3)
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_rowsum_recommender(test_ctx, TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING):
|
||||
EXPECTED_RESULTS = RESULTS["row_sum"]
|
||||
install_mock_data(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
with mock_coinstall_ranking_context(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING):
|
||||
EXPECTED_RESULTS = RESULTS["row_sum"]
|
||||
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "row_sum"})
|
||||
assert 4 == len(actual)
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "row_sum"})
|
||||
assert 4 == len(actual)
|
||||
|
||||
expected_val = 50 / 155
|
||||
actual_val = float(actual[0][1][:-11])
|
||||
assert expected_val == pytest.approx(actual_val, rel=1e-3)
|
||||
expected_val = 50 / 155
|
||||
actual_val = float(actual[0][1][:-11])
|
||||
assert expected_val == pytest.approx(actual_val, rel=1e-3)
|
||||
|
||||
assert actual == EXPECTED_RESULTS
|
||||
assert actual == EXPECTED_RESULTS
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_guidception(test_ctx, TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING):
|
||||
EXPECTED_RESULTS = RESULTS["guidception"]
|
||||
install_mock_data(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
with mock_coinstall_ranking_context(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING):
|
||||
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
EXPECTED_RESULTS = RESULTS["guidception"]
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "guidception"})
|
||||
assert actual == EXPECTED_RESULTS
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "guidception"})
|
||||
assert actual == EXPECTED_RESULTS
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_rownorm_sum_tiebreak(
|
||||
test_ctx, TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING
|
||||
):
|
||||
EXPECTED_RESULTS = RESULTS["rownorm_sum_tiebreak"]
|
||||
install_mock_data(TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
with mock_coinstall_ranking_context(
|
||||
TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING
|
||||
):
|
||||
EXPECTED_RESULTS = RESULTS["rownorm_sum_tiebreak"]
|
||||
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "rownorm_sum"})
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "rownorm_sum"})
|
||||
|
||||
# Note that the results have weights that are equal, but the tie
|
||||
# break is solved by the install rate.
|
||||
assert actual == EXPECTED_RESULTS
|
||||
# Note that the results have weights that are equal, but the tie
|
||||
# break is solved by the install rate.
|
||||
assert actual == EXPECTED_RESULTS
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_missing_rownorm_data_issue_31(
|
||||
test_ctx, TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING
|
||||
):
|
||||
install_mock_data(TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
with mock_coinstall_ranking_context(
|
||||
TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING
|
||||
):
|
||||
|
||||
EXPECTED_RESULTS = RESULTS["rownorm_sum_tiebreak"]
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
|
||||
# Explicitly destroy the guid-4 key in the row_norm data
|
||||
del recommender._guid_maps["guid_row_norm"]["guid-4"]
|
||||
for i, row in enumerate(EXPECTED_RESULTS):
|
||||
if row[0] == "guid-4":
|
||||
del EXPECTED_RESULTS[i]
|
||||
break
|
||||
EXPECTED_RESULTS = RESULTS["rownorm_sum_tiebreak"]
|
||||
|
||||
guid = "guid-2"
|
||||
# Explicitly destroy the guid-4 key in the row_norm data
|
||||
recommender._redis_cache._db().delete(NORMDATA_GUID_ROW_NORM_PREFIX + "guid-4")
|
||||
for i, row in enumerate(EXPECTED_RESULTS):
|
||||
if row[0] == "guid-4":
|
||||
del EXPECTED_RESULTS[i]
|
||||
break
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "rownorm_sum"})
|
||||
guid = "guid-2"
|
||||
|
||||
assert actual == EXPECTED_RESULTS
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "rownorm_sum"})
|
||||
|
||||
assert actual == EXPECTED_RESULTS
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_divide_by_zero_rownorm_data_issue_31(
|
||||
test_ctx, TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING
|
||||
):
|
||||
install_mock_data(TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
with mock_coinstall_ranking_context(
|
||||
TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING
|
||||
):
|
||||
|
||||
EXPECTED_RESULTS = RESULTS["rownorm_sum_tiebreak"]
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
|
||||
# Explicitly set the guid-4 key in the row_norm data to have a sum
|
||||
# of zero weights
|
||||
recommender._guid_maps["guid_row_norm"]["guid-4"] = [0, 0, 0]
|
||||
EXPECTED_RESULTS = RESULTS["rownorm_sum_tiebreak"]
|
||||
|
||||
# Destroy the guid-4 key in the expected results as a sum of 0
|
||||
# will generate a divide by zero error
|
||||
for i, row in enumerate(EXPECTED_RESULTS):
|
||||
if row[0] == "guid-4":
|
||||
del EXPECTED_RESULTS[i]
|
||||
break
|
||||
# Explicitly set the guid-4 key in the row_norm data to have a sum
|
||||
# of zero weights
|
||||
recommender._redis_cache._db().set(
|
||||
NORMDATA_GUID_ROW_NORM_PREFIX + "guid-4", json.dumps([0, 0, 0])
|
||||
)
|
||||
|
||||
guid = "guid-2"
|
||||
# Destroy the guid-4 key in the expected results as a sum of 0
|
||||
# will generate a divide by zero error
|
||||
for i, row in enumerate(EXPECTED_RESULTS):
|
||||
if row[0] == "guid-4":
|
||||
del EXPECTED_RESULTS[i]
|
||||
break
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "rownorm_sum"})
|
||||
guid = "guid-2"
|
||||
|
||||
assert actual == EXPECTED_RESULTS
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "rownorm_sum"})
|
||||
|
||||
assert actual == EXPECTED_RESULTS
|
||||
|
||||
|
||||
def test_user_agent_strings():
|
||||
|
|
|
@ -6,11 +6,13 @@
|
|||
Test cases for the TAAR Hybrid recommender
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from taar.recommenders.hybrid_recommender import CuratedRecommender
|
||||
from taar.recommenders.hybrid_recommender import HybridRecommender
|
||||
from taar.recommenders.ensemble_recommender import EnsembleRecommender
|
||||
|
||||
from taar.recommenders.s3config import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
|
||||
from taar.settings import TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
|
||||
|
||||
# from taar.recommenders.hybrid_recommender import ENSEMBLE_WEIGHTS
|
||||
from .test_ensemblerecommender import install_mock_ensemble_data
|
||||
|
@ -112,6 +114,7 @@ def test_hybrid_recommendations(test_ctx):
|
|||
assert len(guid_list) == LIMIT
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="this test seems to break sporadically")
|
||||
@mock_s3
|
||||
def test_stable_hybrid_results(test_ctx):
|
||||
# verify that the recommendations mix the curated and
|
||||
|
|
|
@ -6,7 +6,9 @@ from flask import url_for
|
|||
|
||||
import pytest
|
||||
|
||||
from taar.settings import TAARLITE_MAX_RESULTS
|
||||
from taar.context import default_context
|
||||
from .test_guid_based_recommender import mock_coinstall_ranking_context
|
||||
|
||||
try:
|
||||
from unittest.mock import MagicMock
|
||||
|
@ -48,11 +50,7 @@ def test_only_promoted_addons_post(client, app):
|
|||
res = client.post(
|
||||
"/v1/api/recommendations/not_a_real_hash/",
|
||||
json=dict(
|
||||
{
|
||||
"options": {
|
||||
"promoted": [["guid1", 10], ["guid2", 5], ["guid55", 8]]
|
||||
}
|
||||
}
|
||||
{"options": {"promoted": [["guid1", 10], ["guid2", 5], ["guid55", 8]]}}
|
||||
),
|
||||
follow_redirects=True,
|
||||
)
|
||||
|
@ -101,9 +99,7 @@ class ProfileFetcherEnabledRecommendationManager(FakeRecommendationManager):
|
|||
def __init__(self, *args, **kwargs):
|
||||
self._ctx = default_context()
|
||||
self._ctx["profile_fetcher"] = kwargs["profile_fetcher"]
|
||||
super(ProfileFetcherEnabledRecommendationManager, self).__init__(
|
||||
args, kwargs
|
||||
)
|
||||
super(ProfileFetcherEnabledRecommendationManager, self).__init__(args, kwargs)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -115,9 +111,7 @@ def locale_recommendation_manager(monkeypatch):
|
|||
|
||||
import taar.flask_app
|
||||
|
||||
taar.flask_app.APP_WRAPPER.set(
|
||||
{"PROXY_RESOURCE": LocaleRecommendationManager()}
|
||||
)
|
||||
taar.flask_app.APP_WRAPPER.set({"PROXY_RESOURCE": LocaleRecommendationManager()})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -129,9 +123,7 @@ def empty_recommendation_manager(monkeypatch):
|
|||
|
||||
import taar.flask_app
|
||||
|
||||
taar.flask_app.APP_WRAPPER.set(
|
||||
{"PROXY_RESOURCE": EmptyRecommendationManager()}
|
||||
)
|
||||
taar.flask_app.APP_WRAPPER.set({"PROXY_RESOURCE": EmptyRecommendationManager()})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -143,9 +135,7 @@ def platform_recommendation_manager(monkeypatch):
|
|||
|
||||
import taar.flask_app
|
||||
|
||||
taar.flask_app.APP_WRAPPER.set(
|
||||
{"PROXY_RESOURCE": PlatformRecommendationManager()}
|
||||
)
|
||||
taar.flask_app.APP_WRAPPER.set({"PROXY_RESOURCE": PlatformRecommendationManager()})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -157,9 +147,7 @@ def static_recommendation_manager(monkeypatch):
|
|||
|
||||
import taar.flask_app
|
||||
|
||||
taar.flask_app.APP_WRAPPER.set(
|
||||
{"PROXY_RESOURCE": StaticRecommendationManager()}
|
||||
)
|
||||
taar.flask_app.APP_WRAPPER.set({"PROXY_RESOURCE": StaticRecommendationManager()})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -235,9 +223,7 @@ def test_simple_request(client, static_recommendation_manager):
|
|||
assert response.data == expected
|
||||
|
||||
|
||||
def test_mixed_and_promoted_and_taar_adodns(
|
||||
client, static_recommendation_manager
|
||||
):
|
||||
def test_mixed_and_promoted_and_taar_adodns(client, static_recommendation_manager):
|
||||
"""
|
||||
Test that we can provide addon suggestions that also get clobbered
|
||||
by the promoted addon set.
|
||||
|
@ -246,11 +232,7 @@ def test_mixed_and_promoted_and_taar_adodns(
|
|||
res = client.post(
|
||||
url,
|
||||
json=dict(
|
||||
{
|
||||
"options": {
|
||||
"promoted": [["guid1", 10], ["guid2", 5], ["guid55", 8]]
|
||||
}
|
||||
}
|
||||
{"options": {"promoted": [["guid1", 10], ["guid2", 5], ["guid55", 8]]}}
|
||||
),
|
||||
follow_redirects=True,
|
||||
)
|
||||
|
@ -281,11 +263,7 @@ def test_overlapping_mixed_and_promoted_and_taar_adodns(
|
|||
json=dict(
|
||||
{
|
||||
"options": {
|
||||
"promoted": [
|
||||
["test-addon-1", 10],
|
||||
["guid2", 5],
|
||||
["guid55", 8],
|
||||
]
|
||||
"promoted": [["test-addon-1", 10], ["guid2", 5], ["guid55", 8],]
|
||||
}
|
||||
}
|
||||
),
|
||||
|
@ -293,13 +271,7 @@ def test_overlapping_mixed_and_promoted_and_taar_adodns(
|
|||
)
|
||||
# The result should order the GUIDs in descending order of weight
|
||||
expected = {
|
||||
"results": [
|
||||
"test-addon-1",
|
||||
"guid55",
|
||||
"guid2",
|
||||
"test-addon-2",
|
||||
"test-addon-N",
|
||||
]
|
||||
"results": ["test-addon-1", "guid55", "guid2", "test-addon-2", "test-addon-N",]
|
||||
}
|
||||
assert res.json == expected
|
||||
|
||||
|
@ -351,3 +323,17 @@ def test_client_has_no_addon(client, profile_enabled_rm):
|
|||
res = client.get(url, follow_redirects=True)
|
||||
|
||||
assert res.json["results"] is False
|
||||
|
||||
|
||||
def test_taarlite(client, TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx):
|
||||
"""
|
||||
Check that the result size of taarlite is TAARLITE_MAX_RESULTS
|
||||
"""
|
||||
|
||||
with mock_coinstall_ranking_context(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING):
|
||||
|
||||
url = url_for("taarlite_recommendations", guid="guid-1",)
|
||||
res = client.get(url, follow_redirects=True)
|
||||
|
||||
assert len(res.json["results"]) == TAARLITE_MAX_RESULTS
|
||||
assert res.json["results"] == ["guid-5", "guid-6", "guid-3", "guid-2"]
|
||||
|
|
|
@ -10,7 +10,7 @@ import boto3
|
|||
from moto import mock_s3
|
||||
|
||||
|
||||
from taar.recommenders.s3config import (
|
||||
from taar.settings import (
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_DONOR_KEY,
|
||||
)
|
||||
|
@ -90,5 +90,5 @@ def test_force_expiry(test_ctx):
|
|||
|
||||
|
||||
def check_jdata_status(jdata, status):
|
||||
assert jdata == {'test': 'donor_key'}
|
||||
assert jdata == {"test": "donor_key"}
|
||||
assert status
|
||||
|
|
|
@ -9,7 +9,7 @@ import json
|
|||
|
||||
|
||||
from taar.recommenders import LocaleRecommender
|
||||
from taar.recommenders.s3config import TAAR_LOCALE_KEY, TAAR_LOCALE_BUCKET
|
||||
from taar.settings import TAAR_LOCALE_KEY, TAAR_LOCALE_BUCKET
|
||||
|
||||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
|
|
@ -25,7 +25,7 @@ from .similarity_data import CATEGORICAL_FEATURE_FIXTURE_DATA
|
|||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
from taar.recommenders.s3config import (
|
||||
from taar.settings import (
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_DONOR_KEY,
|
||||
TAAR_SIMILARITY_LRCURVES_KEY,
|
||||
|
|
Загрузка…
Ссылка в новой задаче