зеркало из https://github.com/mozilla/taar.git
Merge taar-lite into main TAAR repository (#173)
* Merge taar-lite into taar * Enable API endpoint for TAARlite * Bump version number and add an amended TAARLITE-README.md * Added curl instruction for invoking TAARlite * flake8/black fixes * Add markus[datadog] for statsd metrics * Add markus statsd metrics all over TAAR
This commit is contained in:
Родитель
3891419bcb
Коммит
f88d31b599
2
.flake8
2
.flake8
|
@ -1,5 +1,5 @@
|
|||
[flake8]
|
||||
ignore = W503,W504,E203
|
||||
ignore = W503,W504,E203,E231
|
||||
exclude = .git,__pycache__
|
||||
max-complexity = 10
|
||||
max-line-length = 120
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
repos:
|
||||
- repo: https://github.com/ambv/black
|
||||
rev: stable
|
||||
hooks:
|
||||
- id: black
|
||||
language_version: python3.7
|
||||
- repo: https://gitlab.com/pycqa/flake8
|
||||
rev: 3.7.9
|
||||
hooks:
|
||||
- id: flake8
|
|
@ -256,6 +256,13 @@ curl.
|
|||
curl https://user@pass:stage.taar.nonprod.dataops.mozgcp.net/v1/api/recommendations/<hashed_telemetry_id>
|
||||
```
|
||||
|
||||
Requests for a TAAR-lite recommendation can be made using curl as
|
||||
well:
|
||||
|
||||
```
|
||||
curl https://user@pass:stage.taar.nonprod.dataops.mozgcp.net/taarlite/api/v1/addon_recommendations/<addon_guid>/
|
||||
```
|
||||
|
||||
## A note on cdist optimization.
|
||||
cdist can speed up distance computation by a factor of 10 for the computations we're doing.
|
||||
We can use it without problems on the canberra distance calculation.
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
# Taar-lite
|
||||
|
||||
The TAAR-lite service has been merged into the main TAAR repository
|
||||
now.
|
||||
|
||||
TAAR-lite exposes a GUID-GUID recommender that recommends addons based
|
||||
on the co-installation rate of each accept-list addons with other
|
||||
accept listed addons.
|
||||
|
||||
|
||||
#### ETL workflow AMO guid-guid TAAR-lite
|
||||
* [taar_amodump.py](https://github.com/mozilla/taar_gcp_etl/blob/master/taar_etl/taar_amodump.py)
|
||||
* Scheduled to run daily
|
||||
* Collects all listed addons by callign the [AMO public API](https://addons.mozilla.org/api/v3/addons/search/) endpoint
|
||||
* Applies filter returning only Firefox Web Browser Extensions
|
||||
* Writes __extended_addons_database.json__
|
||||
* [taar_amowhitelist.py](https://github.com/mozilla/taar_gcp_etl/blob/master/taar_etl/taar_amowhitelist.py)
|
||||
* Scheduled to run daily, dependent on successful completion of [taar_amodump.py](https://github.com/mozilla/taar_gcp_etl/blob/master/taar_etl/taar_amodump.py)
|
||||
* Filters the addons contained in __extended_addons_database.json__
|
||||
* removes legacy addons
|
||||
* removes Web Extensions with a rating < 3.0
|
||||
* removes Web Extensions uploaded less than 60 days ago
|
||||
* removes [Firefox Pioneer](https://addons.mozilla.org/en-GB/firefox/addon/firefox-pioneer/?src=search)
|
||||
* Writes __whitelist_addons_database.json__
|
||||
* [taar_lite_guidguid.py](https://github.com/mozilla/taar_gcp_etl/blob/master/taar_etl/taar_lite_guidguid.py)
|
||||
* Computes the coinstallation rate of each whitelisted addon with other whitelisted addons for a sample of Firefox clients
|
||||
* Removes rare combinations of coinstallations
|
||||
* writes __guid_coinstallation.json__
|
||||
|
||||
## Build and run tests
|
||||
|
||||
The main TAAR build and test instructions are applicable as this is
|
||||
now a unified codebase.
|
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 92 KiB |
|
@ -80,6 +80,7 @@ dependencies:
|
|||
- jsonschema==3.2.0
|
||||
- junit-xml==1.9
|
||||
- mccabe==0.6.1
|
||||
- markus[datadog]==2.2.0
|
||||
- mock==2.0.0
|
||||
- more-itertools==4.2.0
|
||||
- moto==1.3.14
|
||||
|
|
2
setup.py
2
setup.py
|
@ -3,7 +3,7 @@ from setuptools import find_packages, setup
|
|||
setup(
|
||||
name="mozilla-taar3",
|
||||
use_scm_version=False,
|
||||
version="0.5.1",
|
||||
version="0.6.0",
|
||||
setup_requires=["setuptools_scm", "pytest-runner"],
|
||||
tests_require=["pytest"],
|
||||
include_package_data=True,
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
"""
|
||||
A Context is a customizable namespace.
|
||||
|
||||
|
@ -26,8 +30,10 @@ def default_context():
|
|||
# Note that the EnsembleRecommender is *not* in this map as it
|
||||
# needs to ensure that the recommender_map key is installed in the
|
||||
# context
|
||||
ctx['recommender_factory_map'] = {'collaborative': lambda: CollaborativeRecommender(ctx.child()),
|
||||
'similarity': lambda: SimilarityRecommender(ctx.child()),
|
||||
'locale': lambda: LocaleRecommender(ctx.child())}
|
||||
ctx["recommender_factory_map"] = {
|
||||
"collaborative": lambda: CollaborativeRecommender(ctx.child()),
|
||||
"similarity": lambda: SimilarityRecommender(ctx.child()),
|
||||
"locale": lambda: LocaleRecommender(ctx.child()),
|
||||
}
|
||||
|
||||
return ctx
|
||||
|
|
|
@ -6,6 +6,8 @@ from decouple import config
|
|||
from flask import request
|
||||
import json
|
||||
|
||||
import markus
|
||||
|
||||
# TAAR specific libraries
|
||||
from taar.context import default_context
|
||||
from taar.profile_fetcher import ProfileFetcher
|
||||
|
@ -14,17 +16,27 @@ from taar import recommenders
|
|||
# These are configurations that are specific to the TAAR library
|
||||
TAAR_MAX_RESULTS = config("TAAR_MAX_RESULTS", default=10, cast=int)
|
||||
|
||||
STATSD_HOST = config("STATSD_HOST", default="localhost", cast=str)
|
||||
STATSD_PORT = config("STATSD_PORT", default=8125, cast=int)
|
||||
|
||||
|
||||
class ResourceProxy(object):
|
||||
def __init__(self):
|
||||
self._resource = None
|
||||
self._taarlite_resource = None
|
||||
|
||||
def setResource(self, rsrc):
|
||||
def setTaarRM(self, rsrc):
|
||||
self._resource = rsrc
|
||||
|
||||
def getResource(self):
|
||||
def getTaarRM(self):
|
||||
return self._resource
|
||||
|
||||
def setTaarLite(self, rsrc):
|
||||
self._taarlite_resource = rsrc
|
||||
|
||||
def getTaarLite(self):
|
||||
return self._taarlite_resource
|
||||
|
||||
|
||||
PROXY_MANAGER = ResourceProxy()
|
||||
|
||||
|
@ -73,14 +85,56 @@ def configure_plugin(app): # noqa: C901
|
|||
flask given a particular library.
|
||||
"""
|
||||
|
||||
markus.configure(
|
||||
backends=[
|
||||
{
|
||||
# Log metrics to local instance of statsd
|
||||
# server. Use DatadogMetrics client
|
||||
"class": "markus.backends.datadog.DatadogMetrics",
|
||||
"options": {
|
||||
"statsd_host": STATSD_HOST,
|
||||
"statsd_port": STATSD_PORT,
|
||||
"statsd_namespace": "",
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
@app.route("/taarlite/api/v1/addon_recommendations/<string:guid>/")
|
||||
def taarlite_recommendations(guid):
|
||||
"""Return a list of recommendations provided a telemetry client_id."""
|
||||
# Use the module global PROXY_MANAGER
|
||||
global PROXY_MANAGER
|
||||
taarlite_recommender = acquire_taarlite_singleton(PROXY_MANAGER)
|
||||
|
||||
cdict = {"guid": guid}
|
||||
normalization_type = request.args.get("normalize", None)
|
||||
if normalization_type is not None:
|
||||
cdict["normalize"] = normalization_type
|
||||
|
||||
recommendations = taarlite_recommender.recommend(
|
||||
client_data=cdict, limit=TAAR_MAX_RESULTS
|
||||
)
|
||||
|
||||
if len(recommendations) != TAAR_MAX_RESULTS:
|
||||
recommendations = []
|
||||
|
||||
# Strip out weights from TAAR results to maintain compatibility
|
||||
# with TAAR 1.0
|
||||
jdata = {"results": [x[0] for x in recommendations]}
|
||||
|
||||
response = app.response_class(
|
||||
response=json.dumps(jdata), status=200, mimetype="application/json"
|
||||
)
|
||||
return response
|
||||
|
||||
@app.route(
|
||||
"/v1/api/client_has_addon/<hashed_client_id>/<addon_id>/",
|
||||
methods=["GET"],
|
||||
"/v1/api/client_has_addon/<hashed_client_id>/<addon_id>/", methods=["GET"],
|
||||
)
|
||||
def client_has_addon(hashed_client_id, addon_id):
|
||||
# Use the module global PROXY_MANAGER
|
||||
global PROXY_MANAGER
|
||||
recommendation_manager = check_proxy_manager(PROXY_MANAGER)
|
||||
recommendation_manager = acquire_taar_singleton(PROXY_MANAGER)
|
||||
pf = recommendation_manager._ctx["profile_fetcher"]
|
||||
|
||||
client_meta = pf.get(hashed_client_id)
|
||||
|
@ -89,23 +143,17 @@ def configure_plugin(app): # noqa: C901
|
|||
# clientId
|
||||
result = {"results": False, "error": "No client found"}
|
||||
response = app.response_class(
|
||||
response=json.dumps(result),
|
||||
status=200,
|
||||
mimetype="application/json",
|
||||
response=json.dumps(result), status=200, mimetype="application/json",
|
||||
)
|
||||
return response
|
||||
|
||||
result = {
|
||||
"results": addon_id in client_meta.get("installed_addons", [])
|
||||
}
|
||||
result = {"results": addon_id in client_meta.get("installed_addons", [])}
|
||||
response = app.response_class(
|
||||
response=json.dumps(result), status=200, mimetype="application/json"
|
||||
)
|
||||
return response
|
||||
|
||||
@app.route(
|
||||
"/v1/api/recommendations/<hashed_client_id>/", methods=["GET", "POST"]
|
||||
)
|
||||
@app.route("/v1/api/recommendations/<hashed_client_id>/", methods=["GET", "POST"])
|
||||
def recommendations(hashed_client_id):
|
||||
"""Return a list of recommendations provided a telemetry client_id."""
|
||||
# Use the module global PROXY_MANAGER
|
||||
|
@ -137,9 +185,7 @@ def configure_plugin(app): # noqa: C901
|
|||
jdata["results"] = []
|
||||
jdata["error"] = "Invalid JSON in POST: {}".format(e)
|
||||
return app.response_class(
|
||||
response=json.dumps(
|
||||
jdata, status=400, mimetype="application/json"
|
||||
)
|
||||
response=json.dumps(jdata, status=400, mimetype="application/json")
|
||||
)
|
||||
|
||||
# Coerce the uuid.UUID type into a string
|
||||
|
@ -153,7 +199,7 @@ def configure_plugin(app): # noqa: C901
|
|||
if platform is not None:
|
||||
extra_data["platform"] = platform
|
||||
|
||||
recommendation_manager = check_proxy_manager(PROXY_MANAGER)
|
||||
recommendation_manager = acquire_taar_singleton(PROXY_MANAGER)
|
||||
recommendations = recommendation_manager.recommend(
|
||||
client_id=client_id, limit=TAAR_MAX_RESULTS, extra_data=extra_data
|
||||
)
|
||||
|
@ -170,8 +216,16 @@ def configure_plugin(app): # noqa: C901
|
|||
)
|
||||
return response
|
||||
|
||||
def check_proxy_manager(PROXY_MANAGER):
|
||||
if PROXY_MANAGER.getResource() is None:
|
||||
def acquire_taarlite_singleton(PROXY_MANAGER):
|
||||
if PROXY_MANAGER.getTaarLite() is None:
|
||||
ctx = default_context()
|
||||
root_ctx = ctx.child()
|
||||
instance = recommenders.GuidBasedRecommender(root_ctx)
|
||||
PROXY_MANAGER.setTaarLite(instance)
|
||||
return PROXY_MANAGER.getTaarLite()
|
||||
|
||||
def acquire_taar_singleton(PROXY_MANAGER):
|
||||
if PROXY_MANAGER.getTaarRM() is None:
|
||||
ctx = default_context()
|
||||
profile_fetcher = ProfileFetcher(ctx)
|
||||
|
||||
|
@ -182,8 +236,8 @@ def configure_plugin(app): # noqa: C901
|
|||
r_factory = recommenders.RecommenderFactory(root_ctx)
|
||||
root_ctx["recommender_factory"] = r_factory
|
||||
instance = recommenders.RecommendationManager(root_ctx.child())
|
||||
PROXY_MANAGER.setResource(instance)
|
||||
return PROXY_MANAGER.getResource()
|
||||
PROXY_MANAGER.setTaarRM(instance)
|
||||
return PROXY_MANAGER.getTaarRM()
|
||||
|
||||
class MyPlugin:
|
||||
def set(self, config_options):
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
from .collaborative_recommender import CollaborativeRecommender
|
||||
from .guid_based_recommender import GuidBasedRecommender
|
||||
from .locale_recommender import LocaleRecommender
|
||||
from .similarity_recommender import SimilarityRecommender
|
||||
from .recommendation_manager import RecommendationManager, RecommenderFactory
|
||||
from .fixtures import hasher # noqa
|
||||
from .similarity_recommender import SimilarityRecommender
|
||||
|
||||
|
||||
__all__ = [
|
||||
"CollaborativeRecommender",
|
||||
"GuidBasedRecommender",
|
||||
"LocaleRecommender",
|
||||
"SimilarityRecommender",
|
||||
"RecommendationManager",
|
||||
"RecommenderFactory",
|
||||
"hasher",
|
||||
]
|
||||
|
|
|
@ -16,6 +16,10 @@ from .s3config import TAAR_ITEM_MATRIX_KEY
|
|||
from .s3config import TAAR_ADDON_MAPPING_BUCKET
|
||||
from .s3config import TAAR_ADDON_MAPPING_KEY
|
||||
|
||||
import markus
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
|
||||
def synchronized(wrapped):
|
||||
""" Synchronization decorator. """
|
||||
|
@ -55,11 +59,14 @@ class CollaborativeRecommender(AbstractRecommender):
|
|||
self._lock = threading.RLock()
|
||||
|
||||
self._addon_mapping = LazyJSONLoader(
|
||||
self._ctx, TAAR_ADDON_MAPPING_BUCKET, TAAR_ADDON_MAPPING_KEY
|
||||
self._ctx,
|
||||
TAAR_ADDON_MAPPING_BUCKET,
|
||||
TAAR_ADDON_MAPPING_KEY,
|
||||
"addon_mapping",
|
||||
)
|
||||
|
||||
self._raw_item_matrix = LazyJSONLoader(
|
||||
self._ctx, TAAR_ITEM_MATRIX_BUCKET, TAAR_ITEM_MATRIX_KEY
|
||||
self._ctx, TAAR_ITEM_MATRIX_BUCKET, TAAR_ITEM_MATRIX_KEY, "item_matrix",
|
||||
)
|
||||
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
|
@ -166,6 +173,7 @@ class CollaborativeRecommender(AbstractRecommender):
|
|||
recommendations = [(s[0], s[1]) for s in sorted_dists[:limit]]
|
||||
return recommendations
|
||||
|
||||
@metrics.timer_decorator("collaborative_recommend")
|
||||
def recommend(self, client_data, limit, extra_data={}):
|
||||
# Addons identifiers are stored as positive hash values within the model.
|
||||
with self._lock:
|
||||
|
@ -177,6 +185,7 @@ class CollaborativeRecommender(AbstractRecommender):
|
|||
self._addon_mapping.force_expiry()
|
||||
self._raw_item_matrix.force_expiry()
|
||||
|
||||
metrics.incr("error_collaborative", value=1)
|
||||
self.logger.exception(
|
||||
"Collaborative recommender crashed for {}".format(
|
||||
client_data.get("client_id", "no-client-id")
|
||||
|
@ -184,7 +193,10 @@ class CollaborativeRecommender(AbstractRecommender):
|
|||
e,
|
||||
)
|
||||
|
||||
log_data = (client_data["client_id"], str([r[0] for r in recommendations]))
|
||||
log_data = (
|
||||
client_data["client_id"],
|
||||
str([r[0] for r in recommendations]),
|
||||
)
|
||||
self.logger.info(
|
||||
"collaborative_recommender_triggered, "
|
||||
"client_id: [%s], "
|
||||
|
|
|
@ -12,10 +12,16 @@ from .s3config import TAAR_WHITELIST_KEY
|
|||
from .s3config import TAAR_ENSEMBLE_BUCKET
|
||||
from .s3config import TAAR_ENSEMBLE_KEY
|
||||
|
||||
from .fixtures import hasher
|
||||
from taar.utils import hasher
|
||||
|
||||
import markus
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
|
||||
def is_test_client(client_id):
|
||||
""" any client_id where the GUID is composed of a single digit
|
||||
(repeating) is a test id """
|
||||
return len(set(client_id.replace("-", ""))) == 1
|
||||
|
||||
|
||||
|
@ -24,7 +30,7 @@ class WeightCache:
|
|||
self._ctx = ctx
|
||||
|
||||
self._weights = LazyJSONLoader(
|
||||
self._ctx, TAAR_ENSEMBLE_BUCKET, TAAR_ENSEMBLE_KEY
|
||||
self._ctx, TAAR_ENSEMBLE_BUCKET, TAAR_ENSEMBLE_KEY, "ensemble"
|
||||
)
|
||||
|
||||
def getWeights(self):
|
||||
|
@ -57,7 +63,7 @@ class EnsembleRecommender(AbstractRecommender):
|
|||
self._recommender_map[rkey] = recommender_factory.create(rkey)
|
||||
|
||||
self._whitelist_data = LazyJSONLoader(
|
||||
self._ctx, TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
|
||||
self._ctx, TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY, "whitelist"
|
||||
)
|
||||
|
||||
self._weight_cache = WeightCache(self._ctx.child())
|
||||
|
@ -75,6 +81,7 @@ class EnsembleRecommender(AbstractRecommender):
|
|||
self.logger.info("Ensemble can_recommend: {}".format(result))
|
||||
return result
|
||||
|
||||
@metrics.timer_decorator("ensemble_recommend")
|
||||
def recommend(self, client_data, limit, extra_data={}):
|
||||
client_id = client_data.get("client_id", "no-client-id")
|
||||
|
||||
|
@ -89,6 +96,7 @@ class EnsembleRecommender(AbstractRecommender):
|
|||
results = list(zip(samples, p))
|
||||
else:
|
||||
try:
|
||||
metrics.incr("error_ensemble", value=1)
|
||||
results = self._recommend(client_data, limit, extra_data)
|
||||
except Exception as e:
|
||||
results = []
|
||||
|
@ -140,9 +148,7 @@ class EnsembleRecommender(AbstractRecommender):
|
|||
|
||||
# group by the guid, sum up the weights for recurring GUID
|
||||
# suggestions across all recommenders
|
||||
guid_grouper = itertools.groupby(
|
||||
flattened_results, lambda item: item[0]
|
||||
)
|
||||
guid_grouper = itertools.groupby(flattened_results, lambda item: item[0])
|
||||
|
||||
ensemble_suggestions = []
|
||||
for (guid, guid_group) in guid_grouper:
|
||||
|
|
|
@ -0,0 +1,386 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .lazys3 import LazyJSONLoader
|
||||
from srgutil.interfaces import IMozLogging
|
||||
|
||||
import markus
|
||||
|
||||
|
||||
from .s3config import (
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_COINSTALL_KEY,
|
||||
TAARLITE_GUID_RANKING_KEY,
|
||||
)
|
||||
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
ADDON_DL_ERR = (
|
||||
f"Cannot download addon coinstallation file {TAARLITE_GUID_COINSTALL_KEY}"
|
||||
)
|
||||
|
||||
NORM_MODE_ROWNORMSUM = "rownorm_sum"
|
||||
NORM_MODE_ROWCOUNT = "row_count"
|
||||
NORM_MODE_ROWSUM = "row_sum"
|
||||
NORM_MODE_GUIDCEPTION = "guidception"
|
||||
|
||||
|
||||
class GuidBasedRecommender:
|
||||
""" A recommender class that returns top N addons based on a
|
||||
passed addon identifier. This will load a json file containing
|
||||
updated top n addons coinstalled with the addon passed as an input
|
||||
parameter based on periodically updated addon-addon
|
||||
coinstallation frequency table generated from Longitdudinal
|
||||
Telemetry data. This recommender will drive recommendations
|
||||
surfaced on addons.mozilla.org
|
||||
|
||||
|
||||
We store the JSON data for the GUID coinstallation in memory. This
|
||||
consumes ~ 15.8MB of heap.
|
||||
|
||||
In [10]: from pympler import asizeof
|
||||
|
||||
In [11]: jdata = json.load(open('guid_coinstallation.json'))
|
||||
|
||||
In [12]: asizeof.asizeof(jdata)
|
||||
Out[12]: 15784672
|
||||
|
||||
Each of the data normalization dictionaries is also stored in
|
||||
memory.
|
||||
"""
|
||||
|
||||
_addons_coinstallations = None
|
||||
_guid_maps = {}
|
||||
|
||||
# Define recursion levels for guid-ception
|
||||
RECURSION_LEVELS = 3
|
||||
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
|
||||
if "coinstall_loader" in self._ctx:
|
||||
self._addons_coinstall_loader = self._ctx["coinstall_loader"]
|
||||
else:
|
||||
self._addons_coinstall_loader = LazyJSONLoader(
|
||||
self._ctx,
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_COINSTALL_KEY,
|
||||
"guid_coinstall",
|
||||
)
|
||||
|
||||
if "ranking_loader" in self._ctx:
|
||||
self._guid_ranking_loader = self._ctx["ranking_loader"]
|
||||
else:
|
||||
self._guid_ranking_loader = LazyJSONLoader(
|
||||
self._ctx,
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_RANKING_KEY,
|
||||
"guid_ranking",
|
||||
)
|
||||
|
||||
self._init_from_ctx()
|
||||
|
||||
# Force access to the JSON models for each request at
|
||||
# recommender construction. This was lifted out of the
|
||||
# constructor for the LazyJSONLoader so that the
|
||||
# precomputation of the normalization tables can be done in
|
||||
# the recommender.
|
||||
_ = self._addons_coinstallations # noqa
|
||||
_ = self._guid_rankings # noqa
|
||||
|
||||
self.logger.info("GUIDBasedRecommender is initialized")
|
||||
|
||||
def _init_from_ctx(self):
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taarlite")
|
||||
|
||||
if self._addons_coinstallations is None:
|
||||
self.logger.error(ADDON_DL_ERR)
|
||||
|
||||
# Compute the floor install incidence that recommended addons
|
||||
# must satisfy. Take 5% of the mean of all installed addons.
|
||||
self._min_installs = np.mean(list(self._guid_rankings.values())) * 0.05
|
||||
|
||||
# Warn if the minimum number of installs drops below 100.
|
||||
if self._min_installs < 100:
|
||||
self.logger.warning(
|
||||
"minimum installs threshold low: [%s]" % self._min_installs
|
||||
)
|
||||
|
||||
@property
|
||||
def _addons_coinstallations(self):
|
||||
result, refreshed = self._addons_coinstall_loader.get()
|
||||
if refreshed:
|
||||
self.logger.info("Refreshing guid_maps for normalization")
|
||||
self._precompute_normalization()
|
||||
return result
|
||||
|
||||
@property
|
||||
def _guid_rankings(self):
|
||||
result, refreshed = self._guid_ranking_loader.get()
|
||||
if refreshed:
|
||||
self.logger.info("Refreshing guid_maps for normalization")
|
||||
self._precompute_normalization()
|
||||
return result
|
||||
|
||||
def _precompute_normalization(self):
|
||||
if self._addons_coinstallations is None:
|
||||
self.logger.error("Cannot find addon coinstallations to normalize.")
|
||||
return
|
||||
|
||||
# Capture the total number of times that a guid was
|
||||
# coinstalled with another GUID
|
||||
#
|
||||
# This is a map is guid->sum of coinstall counts
|
||||
guid_count_map = {}
|
||||
|
||||
# Capture the number of times a GUID shows up per row
|
||||
# of coinstallation data.
|
||||
#
|
||||
# This is a map of guid->rows that this guid appears on
|
||||
row_count = {}
|
||||
|
||||
guid_row_norm = {}
|
||||
|
||||
for guidkey, coinstalls in self._addons_coinstallations.items():
|
||||
rowsum = sum(coinstalls.values())
|
||||
for coinstall_guid, coinstall_count in coinstalls.items():
|
||||
|
||||
# Capture the total number of time a GUID was
|
||||
# coinstalled with other guids
|
||||
guid_count_map.setdefault(coinstall_guid, 0)
|
||||
guid_count_map[coinstall_guid] += coinstall_count
|
||||
|
||||
# Capture the unique number of times a GUID is
|
||||
# coinstalled with other guids
|
||||
row_count.setdefault(coinstall_guid, 0)
|
||||
row_count[coinstall_guid] += 1
|
||||
|
||||
if coinstall_guid not in guid_row_norm:
|
||||
guid_row_norm[coinstall_guid] = []
|
||||
guid_row_norm[coinstall_guid].append(1.0 * coinstall_count / rowsum)
|
||||
|
||||
self._guid_maps = {
|
||||
"count_map": guid_count_map,
|
||||
"row_count": row_count,
|
||||
"guid_row_norm": guid_row_norm,
|
||||
}
|
||||
|
||||
def can_recommend(self, client_data):
|
||||
# We can't recommend if we don't have our data files.
|
||||
if self._addons_coinstallations is None:
|
||||
return False
|
||||
|
||||
# If we have data coming from other sources, we can use that for
|
||||
# recommending.
|
||||
addon_guid = client_data.get("guid", None)
|
||||
if not isinstance(addon_guid, str):
|
||||
return False
|
||||
|
||||
# Use a dictionary keyed on the query guid
|
||||
if addon_guid not in self._addons_coinstallations.keys():
|
||||
return False
|
||||
|
||||
if not self._addons_coinstallations.get(addon_guid):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
@metrics.timer_decorator("guid_recommendation")
|
||||
def recommend(self, client_data, limit=4):
|
||||
"""
|
||||
TAAR lite will yield 4 recommendations for the AMO page
|
||||
"""
|
||||
|
||||
# Force access to the JSON models for each request at the
|
||||
# start of the request to update normalization tables if
|
||||
# required.
|
||||
_ = self._addons_coinstallations # noqa
|
||||
_ = self._guid_rankings # noqa
|
||||
|
||||
addon_guid = client_data.get("guid")
|
||||
|
||||
normalize = client_data.get("normalize", NORM_MODE_ROWNORMSUM)
|
||||
|
||||
norm_dict = {
|
||||
"none": lambda guid, x: x,
|
||||
NORM_MODE_ROWCOUNT: self.norm_row_count,
|
||||
NORM_MODE_ROWSUM: self.norm_row_sum,
|
||||
NORM_MODE_ROWNORMSUM: self.norm_rownorm_sum,
|
||||
NORM_MODE_GUIDCEPTION: self.norm_guidception,
|
||||
}
|
||||
|
||||
if normalize is not None and normalize not in norm_dict.keys():
|
||||
# Yield no results if the normalization method is not
|
||||
# specified
|
||||
self.logger.warning(
|
||||
"Invalid normalization parameter detected: [%s]" % normalize
|
||||
)
|
||||
return []
|
||||
|
||||
# Bind the normalization method
|
||||
norm_method = norm_dict[normalize]
|
||||
|
||||
# Get the raw co-installation result dictionary
|
||||
result_dict = self._addons_coinstallations.get(addon_guid, {})
|
||||
|
||||
# Collect addon GUIDs where the install incidence is below a
|
||||
# floor incidence.
|
||||
removal_keys = []
|
||||
for k, v in result_dict.items():
|
||||
if self._guid_rankings.get(k, 0) < self._min_installs:
|
||||
removal_keys.append(k)
|
||||
|
||||
# Remove the collected addons that are not installed enough
|
||||
for k in removal_keys:
|
||||
del result_dict[k]
|
||||
|
||||
# Apply normalization
|
||||
tmp_result_dict = norm_method(addon_guid, result_dict)
|
||||
|
||||
# Augment the result_dict with the installation counts
|
||||
# and then we can sort using lexical sorting of strings.
|
||||
# The idea here is to get something in the form of
|
||||
# 0000.0000.0000
|
||||
# The computed weight takes the first and second segments of
|
||||
# integers. The third segment is the installation count of
|
||||
# the addon but is zero padded.
|
||||
result_dict = {}
|
||||
for k, v in tmp_result_dict.items():
|
||||
lex_value = "{0:020.10f}.{1:010d}".format(v, self._guid_rankings.get(k, 0))
|
||||
result_dict[k] = lex_value
|
||||
|
||||
# Sort the result dictionary in descending order by weight
|
||||
result_list = sorted(result_dict.items(), key=lambda x: x[1], reverse=True)
|
||||
|
||||
log_data = (str(addon_guid), [str(r) for r in result_list[:limit]])
|
||||
self.logger.info(
|
||||
"Addon: [%s] triggered these recommendation guids: [%s]" % log_data
|
||||
)
|
||||
|
||||
return result_list[:limit]
|
||||
|
||||
def norm_row_count(self, key_guid, input_coinstall_dict):
|
||||
"""This normalization method counts the unique times that a
|
||||
GUID is coinstalled with any other GUID.
|
||||
|
||||
This dampens weight of any suggested GUID inversely
|
||||
proportional to it's overall popularity.
|
||||
"""
|
||||
uniq_guid_map = self._guid_maps["row_count"]
|
||||
|
||||
output_result_dict = {}
|
||||
for result_guid, result_count in input_coinstall_dict.items():
|
||||
output_result_dict[result_guid] = (
|
||||
1.0 * result_count / uniq_guid_map[result_guid]
|
||||
)
|
||||
return output_result_dict
|
||||
|
||||
def norm_row_sum(self, key_guid, input_coinstall_dict):
|
||||
"""This normalization normalizes the weights for the suggested
|
||||
coinstallation GUIDs based on the sum of the weights for the
|
||||
coinstallation GUIDs given a key GUID.
|
||||
"""
|
||||
guid_count_map = self._guid_maps["count_map"]
|
||||
|
||||
def generate_row_sum_list():
|
||||
for guid, guid_weight in input_coinstall_dict.items():
|
||||
norm_guid_weight = guid_weight * 1.0 / guid_count_map[guid]
|
||||
yield guid, norm_guid_weight
|
||||
|
||||
return dict(generate_row_sum_list())
|
||||
|
||||
def norm_rownorm_sum(self, key_guid, input_coinstall_dict):
|
||||
"""This normalization is the same as norm_row_sum, but we also
|
||||
divide the result by the sum of
|
||||
(addon coinstall instances)/(addon coinstall total instances)
|
||||
|
||||
The testcase for this scenario lays out the math more
|
||||
explicitly.
|
||||
"""
|
||||
tmp_dict = self._normalize_row_weights(input_coinstall_dict)
|
||||
guid_row_norm = self._guid_maps["guid_row_norm"]
|
||||
|
||||
output_dict = {}
|
||||
for output_guid, output_guid_weight in tmp_dict.items():
|
||||
guid_row_norm_list = guid_row_norm.get(output_guid, [])
|
||||
if len(guid_row_norm_list) == 0:
|
||||
self.logger.warning(
|
||||
"Can't find GUID_ROW_NORM data for [{}]".format(output_guid)
|
||||
)
|
||||
continue
|
||||
norm_sum = sum(guid_row_norm_list)
|
||||
if norm_sum == 0:
|
||||
self.logger.warning(
|
||||
"Sum of GUID_ROW_NORM data for [{}] is zero.".format(output_guid)
|
||||
)
|
||||
continue
|
||||
output_dict[output_guid] = output_guid_weight / norm_sum
|
||||
|
||||
return output_dict
|
||||
|
||||
def norm_guidception(self, key_guid, input_coinstall_dict):
|
||||
tmp_dict = self._normalize_row_weights(input_coinstall_dict)
|
||||
|
||||
return self._compute_recursive_results(tmp_dict, self.RECURSION_LEVELS)
|
||||
|
||||
def _normalize_row_weights(self, coinstall_dict):
|
||||
# Compute an intermediary dictionary that is a row normalized
|
||||
# co-install. That is - each coinstalled guid weight is
|
||||
# divided by the sum of the weights for all coinstalled guids
|
||||
# on this row.
|
||||
tmp_dict = {}
|
||||
coinstall_total_weight = sum(coinstall_dict.values())
|
||||
for coinstall_guid, coinstall_weight in coinstall_dict.items():
|
||||
tmp_dict[coinstall_guid] = coinstall_weight / coinstall_total_weight
|
||||
return tmp_dict
|
||||
|
||||
def _recursion_penalty(self, level):
|
||||
""" Return a factor to apply to the weight for a guid
|
||||
recommendation.
|
||||
"""
|
||||
dampener = 1.0 - (1.0 * (self.RECURSION_LEVELS - level) / self.RECURSION_LEVELS)
|
||||
dampener *= dampener
|
||||
return dampener
|
||||
|
||||
def _compute_recursive_results(self, row_normalized_coinstall, level):
|
||||
if level <= 0:
|
||||
return row_normalized_coinstall
|
||||
|
||||
# consolidated_coinstall_dict will capture values
|
||||
consolidated_coinstall_dict = {}
|
||||
|
||||
# Add this level's guid weight to the consolidated result
|
||||
dampener = self._recursion_penalty(level)
|
||||
for (
|
||||
recommendation_guid,
|
||||
recommendation_guid_weight,
|
||||
) in row_normalized_coinstall.items():
|
||||
for guid, guid_weight in row_normalized_coinstall.items():
|
||||
weight = consolidated_coinstall_dict.get(guid, 0)
|
||||
weight += dampener * guid_weight
|
||||
consolidated_coinstall_dict[guid] = weight
|
||||
|
||||
# Add in the next level
|
||||
level -= 1
|
||||
for guid in consolidated_coinstall_dict.keys():
|
||||
next_level_coinstalls = self._addons_coinstallations.get(guid, {})
|
||||
if next_level_coinstalls != {}:
|
||||
# Normalize the next bunch of suggestions
|
||||
next_level_coinstalls = self._normalize_row_weights(
|
||||
next_level_coinstalls
|
||||
)
|
||||
|
||||
next_level_results = self._compute_recursive_results(
|
||||
next_level_coinstalls, level
|
||||
)
|
||||
for (next_level_guid, next_level_weight,) in next_level_results.items():
|
||||
weight = consolidated_coinstall_dict.get(guid, 0)
|
||||
weight += next_level_weight
|
||||
consolidated_coinstall_dict[guid] = weight
|
||||
|
||||
# normalize the final results
|
||||
return self._normalize_row_weights(consolidated_coinstall_dict)
|
|
@ -10,6 +10,10 @@ import random
|
|||
from .s3config import TAAR_WHITELIST_BUCKET
|
||||
from .s3config import TAAR_WHITELIST_KEY
|
||||
|
||||
import markus
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
|
||||
class CuratedWhitelistCache:
|
||||
"""
|
||||
|
@ -19,7 +23,7 @@ class CuratedWhitelistCache:
|
|||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
self._data = LazyJSONLoader(
|
||||
self._ctx, TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
|
||||
self._ctx, TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY, "whitelist",
|
||||
)
|
||||
|
||||
def get_whitelist(self):
|
||||
|
@ -55,6 +59,7 @@ class CuratedRecommender(AbstractRecommender):
|
|||
self.logger.info("Curated can_recommend: {}".format(True))
|
||||
return True
|
||||
|
||||
@metrics.timer_decorator("hybrid_recommend")
|
||||
def recommend(self, client_data, limit, extra_data={}):
|
||||
"""
|
||||
Curated recommendations are just random selections
|
||||
|
@ -162,7 +167,10 @@ class HybridRecommender(AbstractRecommender):
|
|||
list(merged_results), key=op.itemgetter(1), reverse=True
|
||||
)
|
||||
|
||||
log_data = (client_data["client_id"], str([r[0] for r in sorted_results]))
|
||||
log_data = (
|
||||
client_data["client_id"],
|
||||
str([r[0] for r in sorted_results]),
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
"Hybrid recommendations client_id: [%s], guids: [%s]" % log_data
|
||||
|
|
|
@ -6,15 +6,21 @@ import json
|
|||
import threading
|
||||
import time
|
||||
|
||||
import markus
|
||||
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
|
||||
class LazyJSONLoader:
|
||||
def __init__(self, ctx, s3_bucket, s3_key, ttl=14400):
|
||||
def __init__(self, ctx, s3_bucket, s3_key, metric_name="", ttl=14400):
|
||||
self._ctx = ctx
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
self._clock = self._ctx[IClock]
|
||||
|
||||
self._s3_bucket = s3_bucket
|
||||
self._s3_key = s3_key
|
||||
self._metric_name = metric_name
|
||||
self._ttl = int(ttl)
|
||||
self._expiry_time = 0
|
||||
|
||||
|
@ -54,6 +60,7 @@ class LazyJSONLoader:
|
|||
return self._refresh_cache(transform), True
|
||||
|
||||
def _refresh_cache(self, transform=None):
|
||||
|
||||
with self._lock:
|
||||
# If some requests get stale data while the S3 bucket is
|
||||
# being reloaded - it's not the end of the world.
|
||||
|
@ -70,6 +77,7 @@ class LazyJSONLoader:
|
|||
|
||||
raw_data = None
|
||||
raw_bytes = None
|
||||
|
||||
try:
|
||||
# We need to force a data reload from S3
|
||||
config = Config(connect_timeout=10, retries={"max_attempts": 3})
|
||||
|
@ -95,6 +103,15 @@ class LazyJSONLoader:
|
|||
if transform is not None:
|
||||
tmp = transform(tmp)
|
||||
self._cached_copy = tmp
|
||||
metrics.timing(
|
||||
self._metric_name,
|
||||
value=load_time * 1000,
|
||||
tags=[
|
||||
f"store:s3",
|
||||
f"bucket:{self._s3_bucket}",
|
||||
f"key:{self._s3_key}",
|
||||
],
|
||||
)
|
||||
except ValueError:
|
||||
# In the event of an error, we want to try to reload
|
||||
# the data so force the expiry to 0, but leave the
|
||||
|
|
|
@ -9,6 +9,10 @@ from .lazys3 import LazyJSONLoader
|
|||
from .s3config import TAAR_LOCALE_BUCKET
|
||||
from .s3config import TAAR_LOCALE_KEY
|
||||
|
||||
import markus
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
|
||||
class LocaleRecommender(AbstractRecommender):
|
||||
""" A recommender class that returns top N addons based on the client geo-locale.
|
||||
|
@ -27,7 +31,7 @@ class LocaleRecommender(AbstractRecommender):
|
|||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
|
||||
self._top_addons_per_locale = LazyJSONLoader(
|
||||
self._ctx, TAAR_LOCALE_BUCKET, TAAR_LOCALE_KEY
|
||||
self._ctx, TAAR_LOCALE_BUCKET, TAAR_LOCALE_KEY, "locale"
|
||||
)
|
||||
|
||||
self._init_from_ctx()
|
||||
|
@ -69,12 +73,14 @@ class LocaleRecommender(AbstractRecommender):
|
|||
|
||||
return True
|
||||
|
||||
@metrics.timer_decorator("locale_recommend")
|
||||
def recommend(self, client_data, limit, extra_data={}):
|
||||
try:
|
||||
result_list = self._recommend(client_data, limit, extra_data)
|
||||
except Exception as e:
|
||||
result_list = []
|
||||
self._top_addons_per_locale.force_expiry()
|
||||
metrics.incr("error_locale", value=1)
|
||||
self.logger.exception(
|
||||
"Locale recommender crashed for {}".format(
|
||||
client_data.get("client_id", "no-client-id")
|
||||
|
|
|
@ -5,18 +5,15 @@
|
|||
from taar.recommenders.ensemble_recommender import EnsembleRecommender
|
||||
from taar.recommenders.randomizer import in_experiment, reorder_guids
|
||||
from srgutil.interfaces import IMozLogging
|
||||
|
||||
from taar.context import default_context
|
||||
|
||||
from .lazys3 import LazyJSONLoader
|
||||
|
||||
from .s3config import TAAR_WHITELIST_BUCKET
|
||||
from .s3config import TAAR_WHITELIST_KEY
|
||||
from .s3config import TAAR_EXPERIMENT_PROB
|
||||
|
||||
# We need to build a default logger for the schema validation as there
|
||||
# is no class to bind to yet.
|
||||
ctx = default_context()
|
||||
import markus
|
||||
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
|
||||
class RecommenderFactory:
|
||||
|
@ -59,13 +56,12 @@ class RecommendationManager:
|
|||
# The whitelist data is only used for test client IDs
|
||||
|
||||
self._whitelist_data = LazyJSONLoader(
|
||||
self._ctx, TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY
|
||||
self._ctx, TAAR_WHITELIST_BUCKET, TAAR_WHITELIST_KEY, "whitelist"
|
||||
)
|
||||
|
||||
self._experiment_prob = ctx.get(
|
||||
"TAAR_EXPERIMENT_PROB", TAAR_EXPERIMENT_PROB
|
||||
)
|
||||
self._experiment_prob = ctx.get("TAAR_EXPERIMENT_PROB", TAAR_EXPERIMENT_PROB)
|
||||
|
||||
@metrics.timer_decorator("profile_recommendation")
|
||||
def recommend(self, client_id, limit, extra_data={}):
|
||||
"""Return recommendations for the given client.
|
||||
|
||||
|
|
|
@ -1,9 +1,13 @@
|
|||
from decouple import config
|
||||
|
||||
TAAR_ENSEMBLE_BUCKET = config("TAAR_ENSEMBLE_BUCKET", default="test_ensemble_bucket")
|
||||
TAAR_ENSEMBLE_BUCKET = config(
|
||||
"TAAR_ENSEMBLE_BUCKET", default="test_ensemble_bucket"
|
||||
)
|
||||
TAAR_ENSEMBLE_KEY = config("TAAR_ENSEMBLE_KEY", default="test_ensemble_key")
|
||||
|
||||
TAAR_WHITELIST_BUCKET = config("TAAR_WHITELIST_BUCKET", default="test_whitelist_bucket")
|
||||
TAAR_WHITELIST_BUCKET = config(
|
||||
"TAAR_WHITELIST_BUCKET", default="test_whitelist_bucket"
|
||||
)
|
||||
TAAR_WHITELIST_KEY = config("TAAR_WHITELIST_KEY", default="test_whitelist_key")
|
||||
|
||||
TAAR_ITEM_MATRIX_BUCKET = config(
|
||||
|
@ -13,7 +17,9 @@ TAAR_ITEM_MATRIX_KEY = config("TAAR_ITEM_MATRIX_KEY", default="test_matrix_key")
|
|||
TAAR_ADDON_MAPPING_BUCKET = config(
|
||||
"TAAR_ADDON_MAPPING_BUCKET", default="test_mapping_bucket"
|
||||
)
|
||||
TAAR_ADDON_MAPPING_KEY = config("TAAR_ADDON_MAPPING_KEY", default="test_mapping_key")
|
||||
TAAR_ADDON_MAPPING_KEY = config(
|
||||
"TAAR_ADDON_MAPPING_KEY", default="test_mapping_key"
|
||||
)
|
||||
|
||||
TAAR_LOCALE_BUCKET = config("TAAR_LOCALE_BUCKET", default="test_locale_bucket")
|
||||
TAAR_LOCALE_KEY = config("TAAR_LOCALE_KEY", default="test_locale_key")
|
||||
|
@ -30,3 +36,15 @@ TAAR_SIMILARITY_LRCURVES_KEY = config(
|
|||
)
|
||||
|
||||
TAAR_EXPERIMENT_PROB = config("TAAR_EXPERIMENT_PROB", default=0.0)
|
||||
|
||||
|
||||
# TAAR-lite configuration below
|
||||
|
||||
TAARLITE_GUID_COINSTALL_BUCKET = config(
|
||||
"TAARLITE_GUID_COINSTALL_BUCKET", "telemetry-parquet"
|
||||
)
|
||||
TAARLITE_GUID_COINSTALL_KEY = config(
|
||||
"TAARlLITE_GUID_COINSTALL_KEY", "taar/lite/guid_coinstallation.json"
|
||||
)
|
||||
|
||||
TAARLITE_GUID_RANKING_KEY = "taar/lite/guid_install_ranking.json"
|
||||
|
|
|
@ -13,6 +13,9 @@ from .s3config import TAAR_SIMILARITY_BUCKET
|
|||
from .s3config import TAAR_SIMILARITY_DONOR_KEY
|
||||
from .s3config import TAAR_SIMILARITY_LRCURVES_KEY
|
||||
|
||||
import markus
|
||||
|
||||
metrics = markus.get_metrics("taar")
|
||||
|
||||
FLOOR_DISTANCE_ADJUSTMENT = 0.001
|
||||
|
||||
|
@ -51,14 +54,20 @@ class SimilarityRecommender(AbstractRecommender):
|
|||
self._donors_pool = self._ctx["similarity_donors_pool"]
|
||||
else:
|
||||
self._donors_pool = LazyJSONLoader(
|
||||
self._ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY
|
||||
self._ctx,
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_DONOR_KEY,
|
||||
"similarity_donor",
|
||||
)
|
||||
|
||||
if "similarity_lr_curves" in self._ctx:
|
||||
self._lr_curves = self._ctx["similarity_lr_curves"]
|
||||
else:
|
||||
self._lr_curves = LazyJSONLoader(
|
||||
self._ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY
|
||||
self._ctx,
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_LRCURVES_KEY,
|
||||
"similarity_curves",
|
||||
)
|
||||
|
||||
self.logger = self._ctx[IMozLogging].get_logger("taar")
|
||||
|
@ -196,7 +205,7 @@ class SimilarityRecommender(AbstractRecommender):
|
|||
|
||||
# Compute the distances between the user and the cached continuous features.
|
||||
cont_features = distance.cdist(
|
||||
self.continuous_features, np.array([client_continuous_feats]), "canberra"
|
||||
self.continuous_features, np.array([client_continuous_feats]), "canberra",
|
||||
)
|
||||
|
||||
# Compute the distances between the user and the cached categorical features.
|
||||
|
@ -284,6 +293,7 @@ class SimilarityRecommender(AbstractRecommender):
|
|||
)
|
||||
return recommendations_out
|
||||
|
||||
@metrics.timer_decorator("similarity_recommend")
|
||||
def recommend(self, client_data, limit, extra_data={}):
|
||||
try:
|
||||
recommendations_out = self._recommend(client_data, limit, extra_data)
|
||||
|
@ -292,6 +302,7 @@ class SimilarityRecommender(AbstractRecommender):
|
|||
self._donors_pool.force_expiry()
|
||||
self._lr_curves.force_expiry()
|
||||
|
||||
metrics.incr("error_similarity", value=1)
|
||||
self.logger.exception(
|
||||
"Similarity recommender crashed for {}".format(
|
||||
client_data.get("client_id", "no-client-id")
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
import re
|
||||
|
||||
RE_PLATFORM = re.compile('(linux|windows|macintosh|android|fxios).*firefox')
|
||||
|
||||
LINUX = 1
|
||||
WINDOWS = 2
|
||||
MACINTOSH = 3
|
||||
ANDROID = 4
|
||||
FXIOS = 5
|
||||
|
||||
OSNAME_TO_ID = {'linux': LINUX,
|
||||
'windows': WINDOWS,
|
||||
'macintosh': MACINTOSH,
|
||||
'android': ANDROID,
|
||||
'fxios': FXIOS}
|
||||
|
||||
|
||||
def parse_ua(user_agent):
|
||||
"""
|
||||
Return one of the constants for platform selection, otherwise
|
||||
return None if the platform cannot be determined. Any non-firefox
|
||||
agent us automatically short circuited to be None.
|
||||
"""
|
||||
ua = user_agent.lower()
|
||||
matches = RE_PLATFORM.findall(ua)
|
||||
if len(matches) != 1:
|
||||
return None
|
||||
return OSNAME_TO_ID[matches[0]]
|
|
@ -2,11 +2,6 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
"""
|
||||
These are fixtures that are used for testing TAAR in a production
|
||||
enviroment with known stable client_ids
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
|
||||
|
|
@ -8,17 +8,83 @@ from srgutil.interfaces import IClock
|
|||
|
||||
FAKE_LOCALE_DATA = {
|
||||
"te-ST": [
|
||||
"{1e6b8bce-7dc8-481c-9f19-123e41332b72}", "some-other@nice-addon.com",
|
||||
"{66d1eed2-a390-47cd-8215-016e9fa9cc55}", "{5f1594c3-0d4c-49dd-9182-4fbbb25131a7}"
|
||||
"{1e6b8bce-7dc8-481c-9f19-123e41332b72}",
|
||||
"some-other@nice-addon.com",
|
||||
"{66d1eed2-a390-47cd-8215-016e9fa9cc55}",
|
||||
"{5f1594c3-0d4c-49dd-9182-4fbbb25131a7}",
|
||||
],
|
||||
"en": [
|
||||
"some-uuid@test-addon.com", "other-addon@some-id.it"
|
||||
]
|
||||
"en": ["some-uuid@test-addon.com", "other-addon@some-id.it"],
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_ctx():
|
||||
ctx = default_context()
|
||||
ctx['clock'] = ctx[IClock]
|
||||
ctx["clock"] = ctx[IClock]
|
||||
return ctx
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def TAARLITE_MOCK_DATA():
|
||||
return {
|
||||
"guid-1": {
|
||||
"guid-2": 1000,
|
||||
"guid-3": 100,
|
||||
"guid-4": 10,
|
||||
"guid-5": 1,
|
||||
"guid-6": 1,
|
||||
},
|
||||
"guid-2": {
|
||||
"guid-1": 50,
|
||||
"guid-3": 40,
|
||||
"guid-4": 20,
|
||||
"guid-8": 30,
|
||||
"guid-9": 10,
|
||||
},
|
||||
"guid-3": {"guid-1": 100, "guid-2": 40, "guid-4": 70},
|
||||
"guid-4": {"guid-2": 20},
|
||||
"guid-6": {"guid-1": 5, "guid-7": 100, "guid-8": 100, "guid-9": 100},
|
||||
"guid-8": {"guid-2": 30},
|
||||
"guid-9": {"guid-2": 10},
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def TAARLITE_TIE_MOCK_DATA():
|
||||
return {
|
||||
"guid-1": {"guid-2": 100, "guid-3": 100, "guid-4": 100, "guid-5": 100},
|
||||
"guid-2": {"guid-1": 100, "guid-3": 100, "guid-4": 100, "guid-5": 100},
|
||||
"guid-3": {"guid-1": 100, "guid-2": 100, "guid-4": 100, "guid-5": 100},
|
||||
"guid-4": {"guid-1": 20, "guid-2": 20, "guid-3": 20, "guid-5": 20},
|
||||
"guid-5": {"guid-1": 20, "guid-2": 20, "guid-3": 20, "guid-4": 20},
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def TAARLITE_MOCK_GUID_RANKING():
|
||||
return {
|
||||
"guid-1": 10,
|
||||
"guid-2": 9,
|
||||
"guid-3": 8,
|
||||
"guid-4": 7,
|
||||
"guid-5": 6,
|
||||
"guid-6": 5,
|
||||
"guid-7": 4,
|
||||
"guid-8": 3,
|
||||
"guid-9": 2,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def TAARLITE_CUTOFF_GUID_RANKING():
|
||||
return {
|
||||
"guid-1": 10000,
|
||||
"guid-2": 9000,
|
||||
"guid-3": 8000,
|
||||
"guid-4": 7,
|
||||
"guid-5": 6000,
|
||||
"guid-6": 5000,
|
||||
"guid-7": 4000,
|
||||
"guid-8": 3000,
|
||||
"guid-9": 2000,
|
||||
}
|
||||
|
|
|
@ -21,6 +21,9 @@ from taar.recommenders.collaborative_recommender import CollaborativeRecommender
|
|||
from taar.recommenders.collaborative_recommender import positive_hash
|
||||
import json
|
||||
|
||||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
|
||||
"""
|
||||
We need to generate a synthetic list of addons and relative weights
|
||||
|
@ -134,30 +137,36 @@ def test_empty_recommendations(test_ctx):
|
|||
|
||||
@mock_s3
|
||||
def test_best_recommendation(test_ctx):
|
||||
# Make sure the structure of the recommendations is correct and that we
|
||||
# recommended the the right addon.
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = CollaborativeRecommender(ctx)
|
||||
with MetricsMock() as mm:
|
||||
|
||||
# An non-empty set of addons should give a list of recommendations
|
||||
fixture_client_data = {
|
||||
"installed_addons": ["addon4.id"],
|
||||
"client_id": "test_client",
|
||||
}
|
||||
assert r.can_recommend(fixture_client_data)
|
||||
recommendations = r.recommend(fixture_client_data, 1)
|
||||
# Make sure the structure of the recommendations is correct and that we
|
||||
# recommended the the right addon.
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = CollaborativeRecommender(ctx)
|
||||
|
||||
assert isinstance(recommendations, list)
|
||||
assert len(recommendations) == 1
|
||||
# An non-empty set of addons should give a list of recommendations
|
||||
fixture_client_data = {
|
||||
"installed_addons": ["addon4.id"],
|
||||
"client_id": "test_client",
|
||||
}
|
||||
assert r.can_recommend(fixture_client_data)
|
||||
recommendations = r.recommend(fixture_client_data, 1)
|
||||
|
||||
# Verify that addon2 - the most heavy weighted addon was
|
||||
# recommended
|
||||
result = recommendations[0]
|
||||
assert type(result) is tuple
|
||||
assert len(result) == 2
|
||||
assert result[0] == "addon2.id"
|
||||
assert type(result[1]) is numpy.float64
|
||||
assert numpy.isclose(result[1], numpy.float64("0.3225"))
|
||||
assert isinstance(recommendations, list)
|
||||
assert len(recommendations) == 1
|
||||
|
||||
# Verify that addon2 - the most heavy weighted addon was
|
||||
# recommended
|
||||
result = recommendations[0]
|
||||
assert type(result) is tuple
|
||||
assert len(result) == 2
|
||||
assert result[0] == "addon2.id"
|
||||
assert type(result[1]) is numpy.float64
|
||||
assert numpy.isclose(result[1], numpy.float64("0.3225"))
|
||||
|
||||
assert mm.has_record(TIMING, stat="taar.item_matrix")
|
||||
assert mm.has_record(TIMING, stat="taar.addon_mapping")
|
||||
assert mm.has_record(TIMING, stat="taar.collaborative_recommend")
|
||||
|
||||
|
||||
@mock_s3
|
||||
|
|
|
@ -2,7 +2,10 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from taar.recommenders.ensemble_recommender import WeightCache, EnsembleRecommender
|
||||
from taar.recommenders.ensemble_recommender import (
|
||||
WeightCache,
|
||||
EnsembleRecommender,
|
||||
)
|
||||
from taar.recommenders.s3config import (
|
||||
TAAR_ENSEMBLE_BUCKET,
|
||||
TAAR_ENSEMBLE_KEY,
|
||||
|
@ -12,6 +15,9 @@ import boto3
|
|||
import json
|
||||
from .mocks import MockRecommenderFactory
|
||||
|
||||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
EXPECTED = {"collaborative": 1000, "similarity": 100, "locale": 10}
|
||||
|
||||
|
||||
|
@ -35,30 +41,34 @@ def test_weight_cache(test_ctx):
|
|||
|
||||
@mock_s3
|
||||
def test_recommendations(test_ctx):
|
||||
ctx = install_mock_ensemble_data(test_ctx)
|
||||
with MetricsMock() as mm:
|
||||
ctx = install_mock_ensemble_data(test_ctx)
|
||||
|
||||
EXPECTED_RESULTS = [
|
||||
("ghi", 3430.0),
|
||||
("def", 3320.0),
|
||||
("ijk", 3200.0),
|
||||
("hij", 3100.0),
|
||||
("lmn", 420.0),
|
||||
]
|
||||
EXPECTED_RESULTS = [
|
||||
("ghi", 3430.0),
|
||||
("def", 3320.0),
|
||||
("ijk", 3200.0),
|
||||
("hij", 3100.0),
|
||||
("lmn", 420.0),
|
||||
]
|
||||
|
||||
factory = MockRecommenderFactory()
|
||||
ctx["recommender_factory"] = factory
|
||||
factory = MockRecommenderFactory()
|
||||
ctx["recommender_factory"] = factory
|
||||
|
||||
ctx["recommender_map"] = {
|
||||
"collaborative": factory.create("collaborative"),
|
||||
"similarity": factory.create("similarity"),
|
||||
"locale": factory.create("locale"),
|
||||
}
|
||||
r = EnsembleRecommender(ctx.child())
|
||||
client = {"client_id": "12345"} # Anything will work here
|
||||
ctx["recommender_map"] = {
|
||||
"collaborative": factory.create("collaborative"),
|
||||
"similarity": factory.create("similarity"),
|
||||
"locale": factory.create("locale"),
|
||||
}
|
||||
r = EnsembleRecommender(ctx.child())
|
||||
client = {"client_id": "12345"} # Anything will work here
|
||||
|
||||
recommendation_list = r.recommend(client, 5)
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert recommendation_list == EXPECTED_RESULTS
|
||||
recommendation_list = r.recommend(client, 5)
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert recommendation_list == EXPECTED_RESULTS
|
||||
|
||||
assert mm.has_record(TIMING, "taar.ensemble")
|
||||
assert mm.has_record(TIMING, "taar.ensemble_recommend")
|
||||
|
||||
|
||||
@mock_s3
|
||||
|
|
|
@ -0,0 +1,318 @@
|
|||
import json
|
||||
|
||||
from moto import mock_s3
|
||||
import boto3
|
||||
import pytest
|
||||
|
||||
from taar.recommenders.guid_based_recommender import GuidBasedRecommender
|
||||
|
||||
from taar.recommenders.s3config import (
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_COINSTALL_KEY,
|
||||
TAARLITE_GUID_RANKING_KEY,
|
||||
)
|
||||
|
||||
from taar.recommenders.lazys3 import LazyJSONLoader
|
||||
|
||||
from taar.recommenders.ua_parser import parse_ua, OSNAME_TO_ID
|
||||
|
||||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
# The different kinds of results we can expect from TAARlite are
|
||||
# listed below. Note that the ordering of GUIDs returned, and even
|
||||
# the set of GUIDs returned may be altered by the different weight
|
||||
# normalization modes.
|
||||
#
|
||||
# Based on some preliminary clustering analysis, the 'rownorm_sum'
|
||||
# method appears to provide qualitatively better results than the
|
||||
# other normalization modes including no normalization.
|
||||
|
||||
|
||||
# Reading the RESULTS is not entirely obvious. The recommendation
|
||||
# list consists of 2-tuples containing a guid, followed by a lexically
|
||||
# sorted weight+install count.
|
||||
# The weights are formatted as a fixed with zero padded float, with
|
||||
# an addition suffix of a decimal and a zero padded instllation count
|
||||
# for the addon.
|
||||
#
|
||||
# The clearest example of this is the 'rownorm_sum_tiebreak' results
|
||||
# where each of the computed weights are the same (0.25), but the
|
||||
# installation count varies.
|
||||
RESULTS = {
|
||||
"default": [
|
||||
("guid-2", "000001000.0000000000.0000000009"),
|
||||
("guid-3", "000000100.0000000000.0000000008"),
|
||||
("guid-4", "000000010.0000000000.0000000007"),
|
||||
("guid-5", "000000001.0000000000.0000000006"),
|
||||
],
|
||||
"row_count": [
|
||||
("guid-3", "000000020.0000000000.0000000008"), # 50% of 40
|
||||
("guid-1", "000000016.6666666667.0000000010"), # 1/3 of 50
|
||||
("guid-8", "000000015.0000000000.0000000003"), # 50% of 30
|
||||
("guid-4", "000000006.6666666667.0000000007"),
|
||||
], # 1/3 of 20
|
||||
"rownorm_sum": [
|
||||
("guid-3", "000000000.7478143914.0000000008"),
|
||||
("guid-1", "000000000.4035916824.0000000010"),
|
||||
("guid-8", "000000000.3788819876.0000000003"),
|
||||
("guid-4", "000000000.2803125788.0000000007"),
|
||||
],
|
||||
"rownorm_sum_tiebreak": [
|
||||
("guid-1", "000000000.2500000000.0000000010"),
|
||||
("guid-3", "000000000.2500000000.0000000008"),
|
||||
("guid-4", "000000000.2500000000.0000000007"),
|
||||
("guid-5", "000000000.2500000000.0000000006"),
|
||||
],
|
||||
"rownorm_sum_tiebreak_cutoff": [
|
||||
("guid-1", "000000000.3333333333.0000010000"), # noqa
|
||||
("guid-3", "000000000.3333333333.0000008000"), # noqa
|
||||
("guid-5", "000000000.3333333333.0000006000"),
|
||||
], # noqa
|
||||
"row_sum": [
|
||||
("guid-1", "000000000.3225806452.0000000010"),
|
||||
("guid-3", "000000000.2857142857.0000000008"),
|
||||
("guid-8", "000000000.2307692308.0000000003"),
|
||||
("guid-4", "000000000.2000000000.0000000007"),
|
||||
],
|
||||
"guidception": [
|
||||
("guid-1", "000000000.2666666667.0000000010"),
|
||||
("guid-3", "000000000.2333333333.0000000008"),
|
||||
("guid-8", "000000000.2000000000.0000000003"),
|
||||
("guid-4", "000000000.1666666667.0000000007"),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def install_mock_data(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx):
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
|
||||
conn.create_bucket(Bucket=TAARLITE_GUID_COINSTALL_BUCKET)
|
||||
|
||||
conn.Object(TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_COINSTALL_KEY).put(
|
||||
Body=json.dumps(TAARLITE_MOCK_DATA)
|
||||
)
|
||||
conn.Object(TAARLITE_GUID_COINSTALL_BUCKET, TAARLITE_GUID_RANKING_KEY).put(
|
||||
Body=json.dumps(TAARLITE_MOCK_GUID_RANKING)
|
||||
)
|
||||
|
||||
coinstall_loader = LazyJSONLoader(
|
||||
test_ctx,
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_COINSTALL_KEY,
|
||||
"guid_coinstall",
|
||||
)
|
||||
|
||||
ranking_loader = LazyJSONLoader(
|
||||
test_ctx,
|
||||
TAARLITE_GUID_COINSTALL_BUCKET,
|
||||
TAARLITE_GUID_RANKING_KEY,
|
||||
"guid_ranking",
|
||||
)
|
||||
|
||||
test_ctx["coinstall_loader"] = coinstall_loader
|
||||
test_ctx["ranking_loader"] = ranking_loader
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_recommender_nonormal(test_ctx, TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING):
|
||||
with MetricsMock() as mm:
|
||||
EXPECTED_RESULTS = RESULTS["default"]
|
||||
install_mock_data(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
|
||||
guid = "guid-1"
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "none"})
|
||||
assert actual == EXPECTED_RESULTS
|
||||
|
||||
mm.has_record(TIMING, "taar.guid_coinstall")
|
||||
mm.has_record(TIMING, "taar.guid_ranking")
|
||||
mm.has_record(TIMING, "taar.guid_recommendation")
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_row_count_recommender(
|
||||
test_ctx, TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING
|
||||
):
|
||||
EXPECTED_RESULTS = RESULTS["row_count"]
|
||||
install_mock_data(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "row_count"})
|
||||
|
||||
# Note that guid-9 is not included because it's weight is
|
||||
# decreased 50% to 5
|
||||
assert EXPECTED_RESULTS == actual
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_rownorm_sumrownorm(test_ctx, TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING):
|
||||
EXPECTED_RESULTS = RESULTS["rownorm_sum"]
|
||||
install_mock_data(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
|
||||
default_actual = recommender.recommend({"guid": guid})
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "rownorm_sum"})
|
||||
|
||||
# Default normalization is rownorm_sum
|
||||
assert actual == default_actual
|
||||
assert actual == EXPECTED_RESULTS
|
||||
"""
|
||||
Some notes on verifying guid-1:
|
||||
|
||||
Numerator is the row weighted value of guid-1 : 50/150
|
||||
Denominator is the sum of the row weighted value of guid-1 in all
|
||||
other rows
|
||||
|
||||
(guid-2) 50/150
|
||||
(guid-3) 100/210
|
||||
(guid-6) 5/305
|
||||
|
||||
This gives us: [0.3333333333333333,
|
||||
0.47619047619047616,
|
||||
0.01639344262295082]
|
||||
|
||||
so the final result should be (5/150) / (50/150 + 100/210 + 5/305)
|
||||
|
||||
That gives a final expected weight for guid-1 to be: 0.403591682
|
||||
"""
|
||||
expected = 0.403591682
|
||||
actual = float(actual[1][1][:-11])
|
||||
assert expected == pytest.approx(actual, rel=1e-3)
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_rowsum_recommender(test_ctx, TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING):
|
||||
EXPECTED_RESULTS = RESULTS["row_sum"]
|
||||
install_mock_data(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "row_sum"})
|
||||
assert 4 == len(actual)
|
||||
|
||||
expected_val = 50 / 155
|
||||
actual_val = float(actual[0][1][:-11])
|
||||
assert expected_val == pytest.approx(actual_val, rel=1e-3)
|
||||
|
||||
assert actual == EXPECTED_RESULTS
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_guidception(test_ctx, TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING):
|
||||
EXPECTED_RESULTS = RESULTS["guidception"]
|
||||
install_mock_data(TAARLITE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "guidception"})
|
||||
assert actual == EXPECTED_RESULTS
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_rownorm_sum_tiebreak(
|
||||
test_ctx, TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING
|
||||
):
|
||||
EXPECTED_RESULTS = RESULTS["rownorm_sum_tiebreak"]
|
||||
install_mock_data(TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
guid = "guid-2"
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "rownorm_sum"})
|
||||
|
||||
# Note that the results have weights that are equal, but the tie
|
||||
# break is solved by the install rate.
|
||||
assert actual == EXPECTED_RESULTS
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_missing_rownorm_data_issue_31(
|
||||
test_ctx, TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING
|
||||
):
|
||||
install_mock_data(TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
|
||||
EXPECTED_RESULTS = RESULTS["rownorm_sum_tiebreak"]
|
||||
|
||||
# Explicitly destroy the guid-4 key in the row_norm data
|
||||
del recommender._guid_maps["guid_row_norm"]["guid-4"]
|
||||
for i, row in enumerate(EXPECTED_RESULTS):
|
||||
if row[0] == "guid-4":
|
||||
del EXPECTED_RESULTS[i]
|
||||
break
|
||||
|
||||
guid = "guid-2"
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "rownorm_sum"})
|
||||
|
||||
assert actual == EXPECTED_RESULTS
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_divide_by_zero_rownorm_data_issue_31(
|
||||
test_ctx, TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING
|
||||
):
|
||||
install_mock_data(TAARLITE_TIE_MOCK_DATA, TAARLITE_MOCK_GUID_RANKING, test_ctx)
|
||||
recommender = GuidBasedRecommender(test_ctx)
|
||||
|
||||
EXPECTED_RESULTS = RESULTS["rownorm_sum_tiebreak"]
|
||||
|
||||
# Explicitly set the guid-4 key in the row_norm data to have a sum
|
||||
# of zero weights
|
||||
recommender._guid_maps["guid_row_norm"]["guid-4"] = [0, 0, 0]
|
||||
|
||||
# Destroy the guid-4 key in the expected results as a sum of 0
|
||||
# will generate a divide by zero error
|
||||
for i, row in enumerate(EXPECTED_RESULTS):
|
||||
if row[0] == "guid-4":
|
||||
del EXPECTED_RESULTS[i]
|
||||
break
|
||||
|
||||
guid = "guid-2"
|
||||
|
||||
actual = recommender.recommend({"guid": guid, "normalize": "rownorm_sum"})
|
||||
|
||||
assert actual == EXPECTED_RESULTS
|
||||
|
||||
|
||||
def test_user_agent_strings():
|
||||
"""
|
||||
The UA String parser should only care about selecting the right
|
||||
platform for Firefox UA strings. Any non-firefox browser should
|
||||
get all available addons.
|
||||
"""
|
||||
ua_strings = {
|
||||
"windows": "Mozilla/5.0 (Windows NT x.y; rv:10.0) Gecko/20100101 Firefox/10.0",
|
||||
"macintosh": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0",
|
||||
"linux": "Mozilla/5.0 (X11; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0",
|
||||
"android": "Mozilla/5.0 (Android; Mobile; rv:40.0) Gecko/40.0 Firefox/40.0",
|
||||
}
|
||||
|
||||
not_fx_ua_strings = [
|
||||
# Chrome
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36", # noqa
|
||||
# Microsoft Edge
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246", # noqa
|
||||
# Safari
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A", # noqa
|
||||
]
|
||||
|
||||
# These are valid Firefox UA strings
|
||||
for platform, ua in ua_strings.items():
|
||||
platform_id = parse_ua(ua)
|
||||
assert OSNAME_TO_ID[platform] == platform_id
|
||||
|
||||
# These are non-Firefox UA strings - we should expect nothing
|
||||
for ua in not_fx_ua_strings:
|
||||
actual_name = parse_ua(ua)
|
||||
assert actual_name is None
|
|
@ -20,6 +20,9 @@ import json
|
|||
from moto import mock_s3
|
||||
import boto3
|
||||
|
||||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
|
||||
def install_no_curated_data(ctx):
|
||||
ctx = ctx.child()
|
||||
|
@ -75,17 +78,21 @@ def test_curated_can_recommend(test_ctx):
|
|||
|
||||
@mock_s3
|
||||
def test_curated_recommendations(test_ctx):
|
||||
ctx = install_mock_curated_data(test_ctx)
|
||||
r = CuratedRecommender(ctx)
|
||||
with MetricsMock() as mm:
|
||||
ctx = install_mock_curated_data(test_ctx)
|
||||
r = CuratedRecommender(ctx)
|
||||
|
||||
# CuratedRecommender will always recommend something no matter
|
||||
# what
|
||||
# CuratedRecommender will always recommend something no matter
|
||||
# what
|
||||
|
||||
for LIMIT in range(1, 5):
|
||||
guid_list = r.recommend({"client_id": "000000"}, limit=LIMIT)
|
||||
# The curated recommendations should always return with some kind
|
||||
# of recommendations
|
||||
assert len(guid_list) == LIMIT
|
||||
for LIMIT in range(1, 5):
|
||||
guid_list = r.recommend({"client_id": "000000"}, limit=LIMIT)
|
||||
# The curated recommendations should always return with some kind
|
||||
# of recommendations
|
||||
assert len(guid_list) == LIMIT
|
||||
|
||||
assert mm.has_record(TIMING, "taar.whitelist")
|
||||
assert mm.has_record(TIMING, "taar.hybrid_recommend")
|
||||
|
||||
|
||||
@mock_s3
|
||||
|
|
|
@ -27,7 +27,7 @@ def app():
|
|||
|
||||
# Clobber the default recommendation manager with a MagicMock
|
||||
mock_recommender = MagicMock()
|
||||
PROXY_MANAGER.setResource(mock_recommender)
|
||||
PROXY_MANAGER.setTaarRM(mock_recommender)
|
||||
|
||||
configure_plugin(flask_app)
|
||||
|
||||
|
|
|
@ -11,6 +11,8 @@ import json
|
|||
from taar.recommenders import LocaleRecommender
|
||||
from taar.recommenders.s3config import TAAR_LOCALE_KEY, TAAR_LOCALE_BUCKET
|
||||
|
||||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
FAKE_LOCALE_DATA = {
|
||||
"te-ST": [
|
||||
|
@ -68,21 +70,25 @@ def test_recommendations(test_ctx):
|
|||
The JSON output for this recommender should be a list of 2-tuples
|
||||
of (GUID, weight).
|
||||
"""
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = LocaleRecommender(ctx)
|
||||
recommendations = r.recommend({"locale": "en"}, 10)
|
||||
with MetricsMock() as mm:
|
||||
ctx = install_mock_data(test_ctx)
|
||||
r = LocaleRecommender(ctx)
|
||||
recommendations = r.recommend({"locale": "en"}, 10)
|
||||
|
||||
# Make sure the structure of the recommendations is correct and that we
|
||||
# recommended the the right addon.
|
||||
assert isinstance(recommendations, list)
|
||||
assert len(recommendations) == len(FAKE_LOCALE_DATA["en"])
|
||||
# Make sure the structure of the recommendations is correct and that we
|
||||
# recommended the the right addon.
|
||||
assert isinstance(recommendations, list)
|
||||
assert len(recommendations) == len(FAKE_LOCALE_DATA["en"])
|
||||
|
||||
# Make sure that the reported addons are the one from the fake data.
|
||||
for (addon_id, weight), (expected_id, expected_weight) in zip(
|
||||
recommendations, FAKE_LOCALE_DATA["en"]
|
||||
):
|
||||
assert addon_id == expected_id
|
||||
assert weight == expected_weight
|
||||
# Make sure that the reported addons are the one from the fake data.
|
||||
for (addon_id, weight), (expected_id, expected_weight) in zip(
|
||||
recommendations, FAKE_LOCALE_DATA["en"]
|
||||
):
|
||||
assert addon_id == expected_id
|
||||
assert weight == expected_weight
|
||||
|
||||
assert mm.has_record(TIMING, "taar.locale")
|
||||
assert mm.has_record(TIMING, "taar.locale_recommend")
|
||||
|
||||
|
||||
@mock_s3
|
||||
|
|
|
@ -20,6 +20,9 @@ from .test_hybrid_recommender import install_mock_curated_data
|
|||
import operator
|
||||
from functools import reduce
|
||||
|
||||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
|
||||
class StubRecommender(AbstractRecommender):
|
||||
""" A shared, stub recommender that can be used for testing.
|
||||
|
@ -50,18 +53,12 @@ def install_mocks(ctx, mock_fetcher=None):
|
|||
ctx["recommender_factory"] = MockRecommenderFactory()
|
||||
|
||||
DATA = {
|
||||
"ensemble_weights": {
|
||||
"collaborative": 1000,
|
||||
"similarity": 100,
|
||||
"locale": 10,
|
||||
}
|
||||
"ensemble_weights": {"collaborative": 1000, "similarity": 100, "locale": 10,}
|
||||
}
|
||||
|
||||
conn = boto3.resource("s3", region_name="us-west-2")
|
||||
conn.create_bucket(Bucket=TAAR_ENSEMBLE_BUCKET)
|
||||
conn.Object(TAAR_ENSEMBLE_BUCKET, TAAR_ENSEMBLE_KEY).put(
|
||||
Body=json.dumps(DATA)
|
||||
)
|
||||
conn.Object(TAAR_ENSEMBLE_BUCKET, TAAR_ENSEMBLE_KEY).put(Body=json.dumps(DATA))
|
||||
|
||||
return ctx
|
||||
|
||||
|
@ -97,11 +94,15 @@ def test_simple_recommendation(test_ctx):
|
|||
("efg", 21.0),
|
||||
]
|
||||
|
||||
manager = RecommendationManager(ctx.child())
|
||||
recommendation_list = manager.recommend("some_ignored_id", 10)
|
||||
with MetricsMock() as mm:
|
||||
manager = RecommendationManager(ctx.child())
|
||||
recommendation_list = manager.recommend("some_ignored_id", 10)
|
||||
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert recommendation_list == EXPECTED_RESULTS
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert recommendation_list == EXPECTED_RESULTS
|
||||
|
||||
assert mm.has_record(TIMING, stat="taar.ensemble")
|
||||
assert mm.has_record(TIMING, stat="taar.profile_recommendation")
|
||||
|
||||
|
||||
@mock_s3
|
||||
|
@ -110,7 +111,7 @@ def test_fixed_client_id_valid(test_ctx):
|
|||
ctx = install_mock_curated_data(ctx)
|
||||
|
||||
manager = RecommendationManager(ctx.child())
|
||||
recommendation_list = manager.recommend('111111', 10)
|
||||
recommendation_list = manager.recommend("111111", 10)
|
||||
|
||||
assert len(recommendation_list) == 10
|
||||
|
||||
|
@ -137,14 +138,14 @@ def test_experimental_randomization(test_ctx):
|
|||
ctx = install_mock_curated_data(ctx)
|
||||
|
||||
manager = RecommendationManager(ctx.child())
|
||||
raw_list = manager.recommend('111111', 10)
|
||||
raw_list = manager.recommend("111111", 10)
|
||||
|
||||
# Clobber the experiment probability to be 100% to force a
|
||||
# reordering.
|
||||
ctx["TAAR_EXPERIMENT_PROB"] = 1.0
|
||||
|
||||
manager = RecommendationManager(ctx.child())
|
||||
rand_list = manager.recommend('111111', 10)
|
||||
rand_list = manager.recommend("111111", 10)
|
||||
|
||||
"""
|
||||
The two lists should be :
|
||||
|
@ -162,4 +163,5 @@ def test_experimental_randomization(test_ctx):
|
|||
)
|
||||
is False
|
||||
)
|
||||
|
||||
assert len(rand_list) == len(raw_list)
|
||||
|
|
|
@ -22,6 +22,9 @@ from taar.recommenders.similarity_recommender import (
|
|||
from .similarity_data import CONTINUOUS_FEATURE_FIXTURE_DATA
|
||||
from .similarity_data import CATEGORICAL_FEATURE_FIXTURE_DATA
|
||||
|
||||
from markus import TIMING
|
||||
from markus.testing import MetricsMock
|
||||
|
||||
from taar.recommenders.s3config import (
|
||||
TAAR_SIMILARITY_BUCKET,
|
||||
TAAR_SIMILARITY_DONOR_KEY,
|
||||
|
@ -75,11 +78,11 @@ def install_no_data(ctx):
|
|||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY).put(Body="")
|
||||
|
||||
ctx["similarity_donors_pool"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
|
||||
)
|
||||
|
||||
ctx["similarity_lr_curves"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
|
||||
)
|
||||
|
||||
return ctx
|
||||
|
@ -102,11 +105,11 @@ def install_categorical_data(ctx):
|
|||
)
|
||||
|
||||
ctx["similarity_donors_pool"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
|
||||
)
|
||||
|
||||
ctx["similarity_lr_curves"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
|
||||
)
|
||||
|
||||
return ctx
|
||||
|
@ -128,11 +131,11 @@ def install_continuous_data(ctx):
|
|||
conn.Object(TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY).put(Body=lrs_data)
|
||||
|
||||
ctx["similarity_donors_pool"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_DONOR_KEY, "similarity_donor",
|
||||
)
|
||||
|
||||
ctx["similarity_lr_curves"] = LazyJSONLoader(
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY
|
||||
ctx, TAAR_SIMILARITY_BUCKET, TAAR_SIMILARITY_LRCURVES_KEY, "similarity_curves",
|
||||
)
|
||||
|
||||
return ctx
|
||||
|
@ -187,20 +190,25 @@ def test_can_recommend(test_ctx, caplog):
|
|||
|
||||
@mock_s3
|
||||
def test_recommendations(test_ctx):
|
||||
# Create a new instance of a SimilarityRecommender.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
with MetricsMock() as mm:
|
||||
# Create a new instance of a SimilarityRecommender.
|
||||
ctx = install_continuous_data(test_ctx)
|
||||
r = SimilarityRecommender(ctx)
|
||||
|
||||
recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)
|
||||
recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)
|
||||
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert len(recommendation_list) == 1
|
||||
assert isinstance(recommendation_list, list)
|
||||
assert len(recommendation_list) == 1
|
||||
|
||||
recommendation, weight = recommendation_list[0]
|
||||
recommendation, weight = recommendation_list[0]
|
||||
|
||||
# Make sure that the reported addons are the expected ones from the most similar donor.
|
||||
assert "{test-guid-1}" == recommendation
|
||||
assert type(weight) == np.float64
|
||||
# Make sure that the reported addons are the expected ones from the most similar donor.
|
||||
assert "{test-guid-1}" == recommendation
|
||||
assert type(weight) == np.float64
|
||||
|
||||
assert mm.has_record(TIMING, stat="taar.similarity_donor")
|
||||
assert mm.has_record(TIMING, stat="taar.similarity_curves")
|
||||
assert mm.has_record(TIMING, stat="taar.similarity_recommend")
|
||||
|
||||
|
||||
@mock_s3
|
||||
|
|
Загрузка…
Ссылка в новой задаче