Merge pull request #9985 from diox/rescore-to-add-slop-to-match-phrase
Move match_phrase queries against description & summary to a rescore, add slop
This commit is contained in:
Коммит
f2f98c7096
|
@ -38,7 +38,7 @@ Flow of a search query through AMO
|
|||
|
||||
Let's assume we search on addons-frontend (not legacy) the search query hits the API and gets handled by ``AddonSearchView``, which directly queries ElasticSearch and doesn't involve the database at all.
|
||||
|
||||
There are a few filters that are described in the :ref:`/api/v4/addons/search/ docs <addon-search>` but most of them are not very relevant for raw search queries. Examples are filters by guid, platform, category, add-on type or appversion (application version compatibility).
|
||||
There are a few filters that are described in the :ref:`/api/v4/addons/search/ docs <addon-search>` but most of them are not very relevant for raw search queries. Examples are filters by guid, platform, category, add-on type or appversion (application version compatibility). Those filters are applied using a ``filter`` clause and shouldn't affect scoring.
|
||||
|
||||
Much more relevant for raw add-on searches (and this is primarily used when you use the search on the frontend) is ``SearchQueryFilter``.
|
||||
|
||||
|
@ -70,12 +70,19 @@ containing more text like description, summary and tags.
|
|||
|
||||
**Applied rules** (merged via ``should``):
|
||||
|
||||
1. Look for phrase matches inside the summary (``boost=3.0``)
|
||||
2. Look for phrase matches inside the description (``boost=2.0``)
|
||||
1. Look for matches inside the summary (``boost=3.0``, ``operator=and``)
|
||||
2. Look for matches inside the description (``boost=2.0``, ``operator=and``)
|
||||
|
||||
If the language of the request matches a known language-specific analyzer, those are made using a ``multi_match`` query using ``summary`` or ``description`` and the corresponding ``{field}_l10n_{analyzer}``, similar to how exact name matches are performed above, in order to support potential translations.
|
||||
|
||||
|
||||
Rescoring rules
|
||||
---------------
|
||||
|
||||
On top of the two sets of rules above, a ``rescore`` query is applied with a ``window_size`` of ``10``. In production, we have 5 shards, so that
|
||||
should re-adjust the score of the top 50 results returned only. The rules used for rescoring are the same used in the secondary rules above, with just one difference: it's using ``match_phrase`` instead of ``match``, with a slop of ``10``.
|
||||
|
||||
|
||||
General query flow:
|
||||
-------------------
|
||||
|
||||
|
@ -83,4 +90,4 @@ General query flow:
|
|||
2. Fetch locale specific analyzer (`List of analyzers <https://github.com/mozilla/addons-server/blob/master/src/olympia/constants/search.py#L15-L61>`_)
|
||||
3. Merge primary and secondary *should* rules
|
||||
4. Create a ``function_score`` query that uses a ``field_value_factor`` function on ``boost`` field that we set when indexing
|
||||
5. Add a specific query-time boost for webextension add-ons
|
||||
5. Add the ``rescore`` query to the mix
|
||||
|
|
|
@ -365,6 +365,7 @@ class TestAddonIndexer(TestCase):
|
|||
'fr': '', # Empty description should be ignored in extract.
|
||||
'it': '<script>alert(42)</script>',
|
||||
}
|
||||
self.addon.summary_id = None
|
||||
self.addon.name = translations_name
|
||||
self.addon.description = translations_description
|
||||
self.addon.save()
|
||||
|
@ -380,12 +381,19 @@ class TestAddonIndexer(TestCase):
|
|||
])
|
||||
assert extracted['name_l10n_english'] == [translations_name['en-US']]
|
||||
assert extracted['name_l10n_spanish'] == [translations_name['es']]
|
||||
assert extracted['name_l10n_italian'] == []
|
||||
assert (extracted['description_l10n_english'] ==
|
||||
[translations_description['en-US']])
|
||||
assert (extracted['description_l10n_spanish'] ==
|
||||
[translations_description['es']])
|
||||
assert extracted['description_l10n_french'] == []
|
||||
assert (extracted['description_l10n_italian'] ==
|
||||
['<script>alert(42)</script>'])
|
||||
assert extracted['summary_l10n_english'] == []
|
||||
# The non-l10n fields are fallbacks in the addon's default locale, they
|
||||
# need to always contain a string.
|
||||
assert extracted['name'] == u'Name in ënglish'
|
||||
assert extracted['summary'] == ''
|
||||
|
||||
def test_extract_translations_engb_default(self):
|
||||
"""Make sure we do correctly extract things for en-GB default locale"""
|
||||
|
|
|
@ -124,7 +124,7 @@ class BaseSearchIndexer(object):
|
|||
def extract_field_api_translations(cls, obj, field, db_field=None):
|
||||
"""
|
||||
Returns a dict containing translations that we need to store for
|
||||
the API.
|
||||
the API. Empty translations are skipped entirely.
|
||||
"""
|
||||
if db_field is None:
|
||||
db_field = '%s_id' % field
|
||||
|
@ -141,23 +141,26 @@ class BaseSearchIndexer(object):
|
|||
@classmethod
|
||||
def extract_field_search_translation(cls, obj, field, default_locale):
|
||||
"""
|
||||
Returns the translation for this field in the object's default locale.
|
||||
Returns the translation for this field in the object's default locale,
|
||||
in the form a dict with one entry (the field being the key and the
|
||||
translation being the value, or an empty string if none was found).
|
||||
|
||||
That field will be analyzed and indexed by ES *without*
|
||||
language-specific analyzers.
|
||||
"""
|
||||
translations = dict(obj.translations[getattr(obj, '%s_id' % field)])
|
||||
default_locale = default_locale.lower() if default_locale else None
|
||||
value = translations.get(default_locale, getattr(obj, field))
|
||||
|
||||
return {
|
||||
field: unicode(
|
||||
translations.get(default_locale, getattr(obj, field)))
|
||||
field: unicode(value) if value else ''
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def extract_field_analyzed_translations(cls, obj, field, db_field=None):
|
||||
"""
|
||||
Returns a dict containing translations for each language-specific
|
||||
analyzer for the given field.
|
||||
analyzer for the given field. Empty translations are skipped entirely.
|
||||
"""
|
||||
if db_field is None:
|
||||
db_field = '%s_id' % field
|
||||
|
@ -170,6 +173,6 @@ class BaseSearchIndexer(object):
|
|||
extend_with_me['%s_l10n_%s' % (field, analyzer)] = list(
|
||||
set(unicode(string) for locale, string
|
||||
in obj.translations[getattr(obj, db_field)]
|
||||
if locale.lower() in languages))
|
||||
if locale.lower() in languages and string))
|
||||
|
||||
return extend_with_me
|
||||
|
|
|
@ -637,8 +637,9 @@ def addon_factory(
|
|||
'created': when,
|
||||
'last_updated': when,
|
||||
}
|
||||
if type_ != amo.ADDON_PERSONA:
|
||||
# Personas don't have a summary.
|
||||
if type_ != amo.ADDON_PERSONA and 'summary' not in kw:
|
||||
# Assign a dummy summary if none was specified in keyword args, unless
|
||||
# we're creating a Persona since they don't have summaries.
|
||||
kwargs['summary'] = u'Summary for %s' % name
|
||||
if type_ not in [amo.ADDON_PERSONA, amo.ADDON_SEARCH]:
|
||||
# Personas and search engines don't need guids
|
||||
|
|
|
@ -427,7 +427,8 @@ class SearchQueryFilter(BaseFilterBackend):
|
|||
|
||||
return should
|
||||
|
||||
def secondary_should_rules(self, search_query, analyzer):
|
||||
def secondary_should_rules(
|
||||
self, search_query, analyzer, rescore_mode=False):
|
||||
"""Return "secondary" should rules for the query.
|
||||
|
||||
These are the ones using the weakest boosts, they are applied to fields
|
||||
|
@ -435,51 +436,97 @@ class SearchQueryFilter(BaseFilterBackend):
|
|||
|
||||
Applied rules:
|
||||
|
||||
* Look for phrase matches inside the summary (boost=3.0)
|
||||
* Look for phrase matches inside the description (boost=2.0).
|
||||
* Look for matches inside the summary (boost=3.0)
|
||||
* Look for matches inside the description (boost=2.0).
|
||||
|
||||
If we're using a supported language, both rules are done through a
|
||||
multi_match that considers both the default locale translation
|
||||
(using snowball analyzer) and the translation in the current language
|
||||
(using language-specific analyzer). If we're not using a supported
|
||||
language then only the first part is applied.
|
||||
|
||||
If rescore_mode is True, the match applied are match_phrase queries
|
||||
with a slop of 5 instead of a regular match. As those are more
|
||||
expensive they are only done in the 'rescore' part of the query.
|
||||
"""
|
||||
if rescore_mode is False:
|
||||
query_class = query.Match
|
||||
query_kwargs = {
|
||||
'operator': 'and',
|
||||
}
|
||||
query_class_name = 'Match'
|
||||
multi_match_kwargs = {
|
||||
'operator': 'and',
|
||||
}
|
||||
else:
|
||||
query_class = query.MatchPhrase
|
||||
query_kwargs = {
|
||||
'slop': 10,
|
||||
}
|
||||
query_class_name = 'MatchPhrase'
|
||||
multi_match_kwargs = {
|
||||
'slop': 10,
|
||||
'type': 'phrase',
|
||||
}
|
||||
|
||||
if analyzer:
|
||||
summary_query_name = (
|
||||
'MultiMatch(MatchPhrase(summary),'
|
||||
'MatchPhrase(summary_l10n_%s))' % analyzer)
|
||||
'MultiMatch(%s(summary),%s(summary_l10n_%s))' % (
|
||||
query_class_name, query_class_name, analyzer))
|
||||
description_query_name = (
|
||||
'MultiMatch(MatchPhrase(description),'
|
||||
'MatchPhrase(description_l10n_%s))' % analyzer)
|
||||
'MultiMatch(%s(description),%s(description_l10n_%s))' % (
|
||||
query_class_name, query_class_name, analyzer))
|
||||
should = [
|
||||
# When *not* doing a rescore, we do regular non-phrase matches
|
||||
# with 'operator': 'and' (see query_class/multi_match_kwargs
|
||||
# above). This may seem wrong, the ES docs warn against this,
|
||||
# but this is exactly what we want here: we want all terms
|
||||
# to be present in either of the fields individually, not some
|
||||
# in one and some in another.
|
||||
query.MultiMatch(
|
||||
_name=summary_query_name,
|
||||
query=search_query,
|
||||
type='phrase',
|
||||
fields=['summary', 'summary_l10n_%s' % analyzer],
|
||||
boost=3.0,
|
||||
**multi_match_kwargs
|
||||
),
|
||||
query.MultiMatch(
|
||||
_name=description_query_name,
|
||||
query=search_query,
|
||||
type='phrase',
|
||||
fields=['description', 'description_l10n_%s' % analyzer],
|
||||
boost=2.0,
|
||||
**multi_match_kwargs
|
||||
),
|
||||
]
|
||||
else:
|
||||
should = [
|
||||
query.MatchPhrase(summary={
|
||||
'_name': 'MatchPhrase(summary)',
|
||||
'query': search_query, 'boost': 3.0}),
|
||||
query.MatchPhrase(description={
|
||||
'_name': 'MatchPhrase(description)',
|
||||
'query': search_query, 'boost': 2.0}),
|
||||
query_class(
|
||||
summary=dict(
|
||||
_name='%s(summary)' % query_class_name,
|
||||
query=search_query,
|
||||
boost=3.0,
|
||||
**query_kwargs)),
|
||||
query_class(
|
||||
summary=dict(
|
||||
_name='%s(description)' % query_class_name,
|
||||
query=search_query,
|
||||
boost=2.0,
|
||||
**query_kwargs)),
|
||||
]
|
||||
|
||||
return should
|
||||
|
||||
def apply_search_query(self, search_query, qs):
|
||||
def rescore_rules(self, search_query, analyzer):
|
||||
"""
|
||||
Rules for the rescore part of the query. Currently just more expensive
|
||||
version of secondary_search_rules(), doing match_phrase with a slop
|
||||
against summary & description, including translated variants if
|
||||
possible.
|
||||
"""
|
||||
return self.secondary_should_rules(
|
||||
search_query, analyzer, rescore_mode=True)
|
||||
|
||||
def apply_search_query(self, search_query, qs, sort=None):
|
||||
lang = translation.get_language()
|
||||
analyzer = get_locale_analyzer(lang)
|
||||
|
||||
|
@ -494,14 +541,25 @@ class SearchQueryFilter(BaseFilterBackend):
|
|||
query.SF('field_value_factor', field='boost'),
|
||||
]
|
||||
|
||||
# Assemble everything together and return the search "queryset".
|
||||
return qs.query(
|
||||
# Assemble everything together
|
||||
qs = qs.query(
|
||||
'function_score',
|
||||
query=query.Bool(should=primary_should + secondary_should),
|
||||
functions=functions)
|
||||
|
||||
if sort is None or sort == 'relevance':
|
||||
# If we are searching by relevancy, rescore the top 10
|
||||
# (window_size below) results per shard with more expensive rules
|
||||
# using match_phrase + slop.
|
||||
rescore_query = self.rescore_rules(search_query, analyzer)
|
||||
qs = qs.extra(rescore={'window_size': 10, 'query': {
|
||||
'rescore_query': query.Bool(should=rescore_query).to_dict()}})
|
||||
|
||||
return qs
|
||||
|
||||
def filter_queryset(self, request, qs, view):
|
||||
search_query = request.GET.get('q', '').lower()
|
||||
sort_param = request.GET.get('sort')
|
||||
|
||||
if not search_query:
|
||||
return qs
|
||||
|
@ -510,7 +568,7 @@ class SearchQueryFilter(BaseFilterBackend):
|
|||
raise serializers.ValidationError(
|
||||
ugettext('Maximum query length exceeded.'))
|
||||
|
||||
return self.apply_search_query(search_query, qs)
|
||||
return self.apply_search_query(search_query, qs, sort_param)
|
||||
|
||||
|
||||
class SearchParameterFilter(BaseFilterBackend):
|
||||
|
|
|
@ -38,8 +38,7 @@ class TestQueryFilter(FilterTestsBase):
|
|||
|
||||
filter_classes = [SearchQueryFilter]
|
||||
|
||||
def _test_q(self):
|
||||
qs = self._filter(data={'q': 'tea pot'})
|
||||
def _test_q(self, qs):
|
||||
# Spot check a few queries.
|
||||
should = qs['query']['function_score']['query']['bool']['should']
|
||||
|
||||
|
@ -78,25 +77,81 @@ class TestQueryFilter(FilterTestsBase):
|
|||
expected = {
|
||||
'multi_match': {
|
||||
'_name': (
|
||||
'MultiMatch(MatchPhrase(summary),'
|
||||
'MatchPhrase(summary_l10n_english))'),
|
||||
'MultiMatch(Match(summary),Match(summary_l10n_english))'),
|
||||
'query': 'tea pot',
|
||||
'type': 'phrase',
|
||||
'operator': 'and',
|
||||
'fields': ['summary', 'summary_l10n_english'],
|
||||
'boost': 3.0,
|
||||
}
|
||||
}
|
||||
assert expected in should
|
||||
|
||||
expected = {
|
||||
'multi_match': {
|
||||
'_name': (
|
||||
'MultiMatch(Match(description),'
|
||||
'Match(description_l10n_english))'),
|
||||
'query': 'tea pot',
|
||||
'operator': 'and',
|
||||
'fields': ['description', 'description_l10n_english'],
|
||||
'boost': 2.0,
|
||||
}
|
||||
}
|
||||
assert expected in should
|
||||
|
||||
functions = qs['query']['function_score']['functions']
|
||||
assert functions[0] == {'field_value_factor': {'field': 'boost'}}
|
||||
return qs
|
||||
|
||||
def test_no_rescore_if_not_sorting_by_relevance(self):
|
||||
qs = self._test_q(
|
||||
self._filter(data={'q': 'tea pot', 'sort': 'rating'}))
|
||||
assert 'rescore' not in qs
|
||||
|
||||
def test_q(self):
|
||||
qs = self._test_q()
|
||||
qs = self._test_q(self._filter(data={'q': 'tea pot'}))
|
||||
functions = qs['query']['function_score']['functions']
|
||||
assert len(functions) == 1
|
||||
|
||||
expected_rescore = {
|
||||
'bool': {
|
||||
'should': [
|
||||
{
|
||||
'multi_match': {
|
||||
'_name': (
|
||||
'MultiMatch(MatchPhrase(summary),'
|
||||
'MatchPhrase(summary_l10n_english))'),
|
||||
'query': 'tea pot',
|
||||
'slop': 10,
|
||||
'type': 'phrase',
|
||||
'fields': ['summary', 'summary_l10n_english'],
|
||||
'boost': 3.0,
|
||||
},
|
||||
},
|
||||
{
|
||||
'multi_match': {
|
||||
'_name': (
|
||||
'MultiMatch(MatchPhrase(description),'
|
||||
'MatchPhrase(description_l10n_english))'),
|
||||
'query': 'tea pot',
|
||||
'slop': 10,
|
||||
'type': 'phrase',
|
||||
'fields': ['description',
|
||||
'description_l10n_english'],
|
||||
'boost': 2.0,
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
assert qs['rescore'] == {
|
||||
'window_size': 10,
|
||||
'query': {
|
||||
'rescore_query': expected_rescore
|
||||
}
|
||||
}
|
||||
|
||||
def test_q_too_long(self):
|
||||
with self.assertRaises(serializers.ValidationError):
|
||||
self._filter(data={'q': 'a' * 101})
|
||||
|
|
|
@ -70,7 +70,7 @@ class TestRankingScenarios(ESTestCase):
|
|||
# sense though!)
|
||||
# if found_score != expected_score:
|
||||
# filename = 'src/olympia/search/tests/test_search_ranking.py'
|
||||
# with open('sed_me.sh', 'a+') as f:
|
||||
# with open('/code/tmp/sed_me.sh', 'a+') as f:
|
||||
# f.write('sed -i s/%s/%s/ %s\n' % (
|
||||
# expected_score, found_score, filename))
|
||||
# continue
|
||||
|
@ -430,32 +430,32 @@ class TestRankingScenarios(ESTestCase):
|
|||
weekly_downloads=1123)
|
||||
amo.tests.addon_factory(
|
||||
name='GrApple Yummy', type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=1, weekly_downloads=1)
|
||||
average_daily_users=1, weekly_downloads=1, summary=None)
|
||||
amo.tests.addon_factory(
|
||||
name='Delicious Bookmarks', type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=1, weekly_downloads=1)
|
||||
average_daily_users=1, weekly_downloads=1, summary=None)
|
||||
|
||||
# Some more or less Dummy data to test a few very specific scenarios
|
||||
# e.g for exact name matching
|
||||
amo.tests.addon_factory(
|
||||
name='Merge Windows', type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=1, weekly_downloads=1),
|
||||
average_daily_users=1, weekly_downloads=1, summary=None)
|
||||
amo.tests.addon_factory(
|
||||
name='Merge All Windows', type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=1, weekly_downloads=1),
|
||||
average_daily_users=1, weekly_downloads=1, summary=None)
|
||||
amo.tests.addon_factory(
|
||||
name='All Downloader Professional', type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=1, weekly_downloads=1),
|
||||
average_daily_users=1, weekly_downloads=1, summary=None)
|
||||
|
||||
amo.tests.addon_factory(
|
||||
name='test addon test11', type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=1, weekly_downloads=1),
|
||||
average_daily_users=1, weekly_downloads=1, summary=None)
|
||||
amo.tests.addon_factory(
|
||||
name='test addon test21', type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=1, weekly_downloads=1),
|
||||
average_daily_users=1, weekly_downloads=1, summary=None)
|
||||
amo.tests.addon_factory(
|
||||
name='test addon test31', type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=1, weekly_downloads=1),
|
||||
average_daily_users=1, weekly_downloads=1, summary=None)
|
||||
|
||||
names = {
|
||||
'fr': 'Foobar unique francais',
|
||||
|
@ -464,134 +464,143 @@ class TestRankingScenarios(ESTestCase):
|
|||
amo.tests.addon_factory(
|
||||
name=names, type=amo.ADDON_EXTENSION,
|
||||
default_locale='fr', slug='test-addon-test-special',
|
||||
average_daily_users=1, weekly_downloads=1)
|
||||
average_daily_users=1, weekly_downloads=1,
|
||||
summary=None)
|
||||
|
||||
amo.tests.addon_factory(
|
||||
name='1-Click YouTube Video Download',
|
||||
type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=566337, weekly_downloads=150000,
|
||||
summary=None,
|
||||
description=(
|
||||
'button, click that button, 1-Click Youtube Video '
|
||||
'Downloader is a click click great tool')),
|
||||
amo.tests.addon_factory(
|
||||
name='Amazon 1-Click Lock', type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=50, weekly_downloads=1),
|
||||
average_daily_users=50, weekly_downloads=1, summary=None)
|
||||
|
||||
cls.refresh()
|
||||
|
||||
def test_scenario_tab_center_redux(self):
|
||||
self._check_scenario('tab center redux', (
|
||||
['Tab Center Redux', 69.21394],
|
||||
['Tab Mix Plus', 0.06495905],
|
||||
['Redux DevTools', 0.04430029],
|
||||
['Tab Center Redux', 69.5371],
|
||||
['Tab Mix Plus', 0.06526235],
|
||||
['Redux DevTools', 0.044507127],
|
||||
))
|
||||
|
||||
def test_scenario_open_image_new_tab(self):
|
||||
self._check_scenario('Open Image in New Tab', (
|
||||
['Open Image in New Tab', 24.277237],
|
||||
['Open image in a new tab', 5.6800475],
|
||||
['Open Image in New Tab', 34.543846],
|
||||
['Open image in a new tab', 10.555013],
|
||||
))
|
||||
|
||||
def test_scenario_coinhive(self):
|
||||
# TODO, should match "CoinBlock". Check word delimiting analysis maybe?
|
||||
self._check_scenario('CoinHive', (
|
||||
['Coinhive Blocker', 3.891288],
|
||||
['NoMiners', 0.017959397], # via description
|
||||
['Coinhive Blocker', 4.6710896],
|
||||
['NoMiners', 0.32348818], # via description
|
||||
# ['CoinBlock', 0], # via prefix search
|
||||
))
|
||||
|
||||
def test_scenario_privacy(self):
|
||||
self._check_scenario('Privacy', (
|
||||
['Privacy Badger', 8.7432165],
|
||||
['Privacy Settings', 4.559415],
|
||||
['Google Privacy', 4.350088], # More users, summary
|
||||
['Privacy Pass', 3.2087922],
|
||||
['Ghostery', 0.09441561], # Crazy amount of users, summary
|
||||
# summary + a lot of users, but not as many as ghostery
|
||||
['Blur', 0.0776396],
|
||||
['Privacy Badger', 9.063842],
|
||||
['Privacy Settings', 4.917904],
|
||||
['Google Privacy', 4.638222], # More users, summary
|
||||
['Privacy Pass', 3.2094479],
|
||||
['Blur', 0.62396055],
|
||||
['Ghostery', 0.47795504],
|
||||
))
|
||||
|
||||
def test_scenario_firebu(self):
|
||||
self._check_scenario('firebu', (
|
||||
['Firebug', 4.117813],
|
||||
['Firefinder for Firebug', 1.0876373],
|
||||
['Firebug Autocompleter', 1.0655142],
|
||||
['Fire Drag', 0.6470381],
|
||||
['Firebug', 4.118729],
|
||||
['Firefinder for Firebug', 1.0878792],
|
||||
['Firebug Autocompleter', 1.0657512],
|
||||
['Fire Drag', 0.64718205],
|
||||
))
|
||||
|
||||
def test_scenario_fireb(self):
|
||||
self._check_scenario('fireb', (
|
||||
['Firebug', 4.117813],
|
||||
['Firefinder for Firebug', 1.0876373],
|
||||
['Firebug Autocompleter', 1.0655142],
|
||||
['Fire Drag', 0.6470381],
|
||||
['Firebug', 4.118729],
|
||||
['Firefinder for Firebug', 1.0878792],
|
||||
['Firebug Autocompleter', 1.0657512],
|
||||
['Fire Drag', 0.64718205],
|
||||
))
|
||||
|
||||
def test_scenario_menu_wizzard(self):
|
||||
self._check_scenario('Menu Wizzard', (
|
||||
['Menu Wizard', 0.10683497], # (fuzzy, typo)
|
||||
['Menu Wizard', 0.10698298], # (fuzzy, typo)
|
||||
# partial match + users
|
||||
['Add-ons Manager Context Menu', 0.0791911],
|
||||
['Add-ons Manager Context Menu', 0.07930083],
|
||||
))
|
||||
|
||||
def test_scenario_frame_demolition(self):
|
||||
self._check_scenario('Frame Demolition', (
|
||||
['Frame Demolition', 20.48827],
|
||||
['Frame Demolition', 20.534973],
|
||||
))
|
||||
|
||||
def test_scenario_demolition(self):
|
||||
# Find "Frame Demolition" via a typo
|
||||
self._check_scenario('Demolation', (
|
||||
['Frame Demolition', 0.057878494],
|
||||
['Frame Demolition', 0.057891317],
|
||||
))
|
||||
|
||||
def test_scenario_restyle(self):
|
||||
self._check_scenario('reStyle', (
|
||||
['reStyle', 26.352535],
|
||||
['reStyle', 26.360489],
|
||||
))
|
||||
|
||||
def test_scenario_megaupload_downloadhelper(self):
|
||||
# Doesn't find "RapidShare DownloadHelper" anymore
|
||||
# since we now query by "MegaUpload AND DownloadHelper"
|
||||
self._check_scenario('MegaUpload DownloadHelper', (
|
||||
['MegaUpload DownloadHelper', 42.920856],
|
||||
['MegaUpload DownloadHelper', 42.995304],
|
||||
))
|
||||
|
||||
def test_scenario_downloadhelper(self):
|
||||
# No direct match, "Download Flash and Video" has
|
||||
# huge amount of users that puts it first here
|
||||
self._check_scenario('DownloadHelper', (
|
||||
['RapidShare DownloadHelper', 3.1008768],
|
||||
['MegaUpload DownloadHelper', 1.7232388],
|
||||
['Download Flash and Video', 1.5117542],
|
||||
['1-Click YouTube Video Download', 1.141354],
|
||||
['RapidShare DownloadHelper', 3.1015685],
|
||||
['MegaUpload DownloadHelper', 1.7236232],
|
||||
['Download Flash and Video', 1.5120913],
|
||||
['1-Click YouTube Video Download', 1.1416086],
|
||||
))
|
||||
|
||||
def test_scenario_megaupload(self):
|
||||
self._check_scenario('MegaUpload', (
|
||||
['MegaUpload DownloadHelper', 3.269901],
|
||||
['Popup Blocker', 1.4292603],
|
||||
['MegaUpload DownloadHelper', 3.8486633],
|
||||
['Popup Blocker', 1.4295679],
|
||||
))
|
||||
|
||||
def test_scenario_no_flash(self):
|
||||
self._check_scenario('No Flash', (
|
||||
['No Flash', 46.64575],
|
||||
['Download Flash and Video', 4.4118795],
|
||||
['YouTube Flash Player', 3.5120416],
|
||||
['YouTube Flash Video Player', 3.202704],
|
||||
['No Flash', 47.049717],
|
||||
['Download Flash and Video', 4.7860165],
|
||||
['YouTube Flash Player', 3.7756333],
|
||||
['YouTube Flash Video Player', 3.627189],
|
||||
))
|
||||
|
||||
# Case should not matter.
|
||||
self._check_scenario('no flash', (
|
||||
['No Flash', 46.64575],
|
||||
['Download Flash and Video', 4.4118795],
|
||||
['YouTube Flash Player', 3.5120416],
|
||||
['YouTube Flash Video Player', 3.202704],
|
||||
['No Flash', 47.049717],
|
||||
['Download Flash and Video', 4.7860165],
|
||||
['YouTube Flash Player', 3.7756333],
|
||||
['YouTube Flash Video Player', 3.627189],
|
||||
))
|
||||
|
||||
def test_scenario_youtube_html5_player(self):
|
||||
# Both are found thanks to their descriptions (matches each individual
|
||||
# term, then get rescored with a match_phrase w/ slop.
|
||||
self._check_scenario('Youtube html5 Player', (
|
||||
['YouTube Flash Player', 0.41867542],
|
||||
['No Flash', 0.068471745],
|
||||
))
|
||||
|
||||
def test_scenario_disable_hello_pocket_reader_plus(self):
|
||||
self._check_scenario('Disable Hello, Pocket & Reader+', (
|
||||
['Disable Hello, Pocket & Reader+', 59.37624], # yeay!
|
||||
['Disable Hello, Pocket & Reader+', 59.869083], # yeay!
|
||||
))
|
||||
|
||||
def test_scenario_grapple(self):
|
||||
|
@ -600,7 +609,7 @@ class TestRankingScenarios(ESTestCase):
|
|||
see `legacy_api.SearchTest` for various examples.
|
||||
"""
|
||||
self._check_scenario('grapple', (
|
||||
['GrApple Yummy', 0.97180986],
|
||||
['GrApple Yummy', 0.7218929],
|
||||
))
|
||||
|
||||
def test_scenario_delicious(self):
|
||||
|
@ -609,36 +618,36 @@ class TestRankingScenarios(ESTestCase):
|
|||
see `legacy_api.SearchTest` for various examples.
|
||||
"""
|
||||
self._check_scenario('delicious', (
|
||||
['Delicious Bookmarks', 1.1302524],
|
||||
['Delicious Bookmarks', 0.85394204],
|
||||
))
|
||||
|
||||
def test_score_boost_name_match(self):
|
||||
# Tests that we match directly "Merge Windows" and also find
|
||||
# "Merge All Windows" because of slop=1
|
||||
self._check_scenario('merge windows', (
|
||||
['Merge Windows', 12.554659],
|
||||
['Merge All Windows', 1.7936656],
|
||||
['Merge Windows', 10.207693],
|
||||
['Merge All Windows', 1.7960566],
|
||||
), no_match=(
|
||||
'All Downloader Professional',
|
||||
))
|
||||
|
||||
self._check_scenario('merge all windows', (
|
||||
['Merge All Windows', 14.103567],
|
||||
['Merge Windows', 0.042702418],
|
||||
['All Downloader Professional', 0.0070751677],
|
||||
['Merge All Windows', 11.195793],
|
||||
['Merge Windows', 0.04285209],
|
||||
['All Downloader Professional', 0.0070999665],
|
||||
))
|
||||
|
||||
def test_score_boost_exact_match(self):
|
||||
"""Test that we rank exact matches at the top."""
|
||||
self._check_scenario('test addon test21', (
|
||||
['test addon test21', 14.293872],
|
||||
['test addon test21', 11.341756],
|
||||
))
|
||||
|
||||
def test_score_boost_exact_match_description_hijack(self):
|
||||
"""Test that we rank exact matches at the top."""
|
||||
self._check_scenario('Amazon 1-Click Lock', (
|
||||
['Amazon 1-Click Lock', 34.255657],
|
||||
['1-Click YouTube Video Download', 0.22216046],
|
||||
['Amazon 1-Click Lock', 26.223818],
|
||||
['1-Click YouTube Video Download', 0.22368877],
|
||||
))
|
||||
|
||||
def test_score_boost_exact_match_in_right_language(self):
|
||||
|
@ -646,13 +655,13 @@ class TestRankingScenarios(ESTestCase):
|
|||
# First in english. Straightforward: it should be an exact match, the
|
||||
# translation exists.
|
||||
self._check_scenario(u'foobar unique english', (
|
||||
[u'Foobar unique english', 4.675893],
|
||||
[u'Foobar unique english', 2.9022827],
|
||||
), lang='en-US')
|
||||
|
||||
# Then check in french. Also straightforward: it should be an exact
|
||||
# match, the translation exists, it's even the default locale.
|
||||
self._check_scenario(u'foobar unique francais', (
|
||||
[u'Foobar unique francais', 14.146512],
|
||||
[u'Foobar unique francais', 10.837958],
|
||||
), lang='fr')
|
||||
|
||||
# Check with a language that we don't have a translation for (mn), and
|
||||
|
@ -663,7 +672,7 @@ class TestRankingScenarios(ESTestCase):
|
|||
assert 'mn' not in SEARCH_LANGUAGE_TO_ANALYZER
|
||||
assert 'mn' in settings.LANGUAGES
|
||||
self._check_scenario(u'foobar unique francais', (
|
||||
[u'Foobar unique francais', 12.54679],
|
||||
[u'Foobar unique francais', 9.005518],
|
||||
), lang='mn', expected_lang='fr')
|
||||
|
||||
# Check with a language that we don't have a translation for (ca), and
|
||||
|
@ -674,12 +683,12 @@ class TestRankingScenarios(ESTestCase):
|
|||
assert 'ca' in SEARCH_LANGUAGE_TO_ANALYZER
|
||||
assert 'ca' in settings.LANGUAGES
|
||||
self._check_scenario(u'foobar unique francais', (
|
||||
[u'Foobar unique francais', 11.289922],
|
||||
[u'Foobar unique francais', 8.107916],
|
||||
), lang='ca', expected_lang='fr')
|
||||
|
||||
# Check with a language that we do have a translation for (en-US), but
|
||||
# we're requesting the string that matches the default locale (fr).
|
||||
# Note that the name returned follows the language requested.
|
||||
self._check_scenario(u'foobar unique francais', (
|
||||
[u'Foobar unique english', 9.786429],
|
||||
[u'Foobar unique english', 7.0274434],
|
||||
), lang='en-US')
|
||||
|
|
Загрузка…
Ссылка в новой задаче