Merge pull request #9985 from diox/rescore-to-add-slop-to-match-phrase

Move match_phrase queries against description & summary to a rescore, add slop
This commit is contained in:
Mathieu Pillard 2018-11-15 16:03:56 +01:00 коммит произвёл GitHub
Родитель 2fd365ec53 3a679954b5
Коммит f2f98c7096
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 247 добавлений и 106 удалений

Просмотреть файл

@ -38,7 +38,7 @@ Flow of a search query through AMO
Let's assume we search on addons-frontend (not legacy) the search query hits the API and gets handled by ``AddonSearchView``, which directly queries ElasticSearch and doesn't involve the database at all.
There are a few filters that are described in the :ref:`/api/v4/addons/search/ docs <addon-search>` but most of them are not very relevant for raw search queries. Examples are filters by guid, platform, category, add-on type or appversion (application version compatibility).
There are a few filters that are described in the :ref:`/api/v4/addons/search/ docs <addon-search>` but most of them are not very relevant for raw search queries. Examples are filters by guid, platform, category, add-on type or appversion (application version compatibility). Those filters are applied using a ``filter`` clause and shouldn't affect scoring.
Much more relevant for raw add-on searches (and this is primarily used when you use the search on the frontend) is ``SearchQueryFilter``.
@ -70,12 +70,19 @@ containing more text like description, summary and tags.
**Applied rules** (merged via ``should``):
1. Look for phrase matches inside the summary (``boost=3.0``)
2. Look for phrase matches inside the description (``boost=2.0``)
1. Look for matches inside the summary (``boost=3.0``, ``operator=and``)
2. Look for matches inside the description (``boost=2.0``, ``operator=and``)
If the language of the request matches a known language-specific analyzer, those are made using a ``multi_match`` query using ``summary`` or ``description`` and the corresponding ``{field}_l10n_{analyzer}``, similar to how exact name matches are performed above, in order to support potential translations.
Rescoring rules
---------------
On top of the two sets of rules above, a ``rescore`` query is applied with a ``window_size`` of ``10``. In production, we have 5 shards, so that
should re-adjust the score of the top 50 results returned only. The rules used for rescoring are the same used in the secondary rules above, with just one difference: it's using ``match_phrase`` instead of ``match``, with a slop of ``10``.
General query flow:
-------------------
@ -83,4 +90,4 @@ General query flow:
2. Fetch locale specific analyzer (`List of analyzers <https://github.com/mozilla/addons-server/blob/master/src/olympia/constants/search.py#L15-L61>`_)
3. Merge primary and secondary *should* rules
4. Create a ``function_score`` query that uses a ``field_value_factor`` function on ``boost`` field that we set when indexing
5. Add a specific query-time boost for webextension add-ons
5. Add the ``rescore`` query to the mix

Просмотреть файл

@ -365,6 +365,7 @@ class TestAddonIndexer(TestCase):
'fr': '', # Empty description should be ignored in extract.
'it': '<script>alert(42)</script>',
}
self.addon.summary_id = None
self.addon.name = translations_name
self.addon.description = translations_description
self.addon.save()
@ -380,12 +381,19 @@ class TestAddonIndexer(TestCase):
])
assert extracted['name_l10n_english'] == [translations_name['en-US']]
assert extracted['name_l10n_spanish'] == [translations_name['es']]
assert extracted['name_l10n_italian'] == []
assert (extracted['description_l10n_english'] ==
[translations_description['en-US']])
assert (extracted['description_l10n_spanish'] ==
[translations_description['es']])
assert extracted['description_l10n_french'] == []
assert (extracted['description_l10n_italian'] ==
['&lt;script&gt;alert(42)&lt;/script&gt;'])
assert extracted['summary_l10n_english'] == []
# The non-l10n fields are fallbacks in the addon's default locale, they
# need to always contain a string.
assert extracted['name'] == u'Name in ënglish'
assert extracted['summary'] == ''
def test_extract_translations_engb_default(self):
"""Make sure we do correctly extract things for en-GB default locale"""

Просмотреть файл

@ -124,7 +124,7 @@ class BaseSearchIndexer(object):
def extract_field_api_translations(cls, obj, field, db_field=None):
"""
Returns a dict containing translations that we need to store for
the API.
the API. Empty translations are skipped entirely.
"""
if db_field is None:
db_field = '%s_id' % field
@ -141,23 +141,26 @@ class BaseSearchIndexer(object):
@classmethod
def extract_field_search_translation(cls, obj, field, default_locale):
"""
Returns the translation for this field in the object's default locale.
Returns the translation for this field in the object's default locale,
in the form a dict with one entry (the field being the key and the
translation being the value, or an empty string if none was found).
That field will be analyzed and indexed by ES *without*
language-specific analyzers.
"""
translations = dict(obj.translations[getattr(obj, '%s_id' % field)])
default_locale = default_locale.lower() if default_locale else None
value = translations.get(default_locale, getattr(obj, field))
return {
field: unicode(
translations.get(default_locale, getattr(obj, field)))
field: unicode(value) if value else ''
}
@classmethod
def extract_field_analyzed_translations(cls, obj, field, db_field=None):
"""
Returns a dict containing translations for each language-specific
analyzer for the given field.
analyzer for the given field. Empty translations are skipped entirely.
"""
if db_field is None:
db_field = '%s_id' % field
@ -170,6 +173,6 @@ class BaseSearchIndexer(object):
extend_with_me['%s_l10n_%s' % (field, analyzer)] = list(
set(unicode(string) for locale, string
in obj.translations[getattr(obj, db_field)]
if locale.lower() in languages))
if locale.lower() in languages and string))
return extend_with_me

Просмотреть файл

@ -637,8 +637,9 @@ def addon_factory(
'created': when,
'last_updated': when,
}
if type_ != amo.ADDON_PERSONA:
# Personas don't have a summary.
if type_ != amo.ADDON_PERSONA and 'summary' not in kw:
# Assign a dummy summary if none was specified in keyword args, unless
# we're creating a Persona since they don't have summaries.
kwargs['summary'] = u'Summary for %s' % name
if type_ not in [amo.ADDON_PERSONA, amo.ADDON_SEARCH]:
# Personas and search engines don't need guids

Просмотреть файл

@ -427,7 +427,8 @@ class SearchQueryFilter(BaseFilterBackend):
return should
def secondary_should_rules(self, search_query, analyzer):
def secondary_should_rules(
self, search_query, analyzer, rescore_mode=False):
"""Return "secondary" should rules for the query.
These are the ones using the weakest boosts, they are applied to fields
@ -435,51 +436,97 @@ class SearchQueryFilter(BaseFilterBackend):
Applied rules:
* Look for phrase matches inside the summary (boost=3.0)
* Look for phrase matches inside the description (boost=2.0).
* Look for matches inside the summary (boost=3.0)
* Look for matches inside the description (boost=2.0).
If we're using a supported language, both rules are done through a
multi_match that considers both the default locale translation
(using snowball analyzer) and the translation in the current language
(using language-specific analyzer). If we're not using a supported
language then only the first part is applied.
If rescore_mode is True, the match applied are match_phrase queries
with a slop of 5 instead of a regular match. As those are more
expensive they are only done in the 'rescore' part of the query.
"""
if rescore_mode is False:
query_class = query.Match
query_kwargs = {
'operator': 'and',
}
query_class_name = 'Match'
multi_match_kwargs = {
'operator': 'and',
}
else:
query_class = query.MatchPhrase
query_kwargs = {
'slop': 10,
}
query_class_name = 'MatchPhrase'
multi_match_kwargs = {
'slop': 10,
'type': 'phrase',
}
if analyzer:
summary_query_name = (
'MultiMatch(MatchPhrase(summary),'
'MatchPhrase(summary_l10n_%s))' % analyzer)
'MultiMatch(%s(summary),%s(summary_l10n_%s))' % (
query_class_name, query_class_name, analyzer))
description_query_name = (
'MultiMatch(MatchPhrase(description),'
'MatchPhrase(description_l10n_%s))' % analyzer)
'MultiMatch(%s(description),%s(description_l10n_%s))' % (
query_class_name, query_class_name, analyzer))
should = [
# When *not* doing a rescore, we do regular non-phrase matches
# with 'operator': 'and' (see query_class/multi_match_kwargs
# above). This may seem wrong, the ES docs warn against this,
# but this is exactly what we want here: we want all terms
# to be present in either of the fields individually, not some
# in one and some in another.
query.MultiMatch(
_name=summary_query_name,
query=search_query,
type='phrase',
fields=['summary', 'summary_l10n_%s' % analyzer],
boost=3.0,
**multi_match_kwargs
),
query.MultiMatch(
_name=description_query_name,
query=search_query,
type='phrase',
fields=['description', 'description_l10n_%s' % analyzer],
boost=2.0,
**multi_match_kwargs
),
]
else:
should = [
query.MatchPhrase(summary={
'_name': 'MatchPhrase(summary)',
'query': search_query, 'boost': 3.0}),
query.MatchPhrase(description={
'_name': 'MatchPhrase(description)',
'query': search_query, 'boost': 2.0}),
query_class(
summary=dict(
_name='%s(summary)' % query_class_name,
query=search_query,
boost=3.0,
**query_kwargs)),
query_class(
summary=dict(
_name='%s(description)' % query_class_name,
query=search_query,
boost=2.0,
**query_kwargs)),
]
return should
def apply_search_query(self, search_query, qs):
def rescore_rules(self, search_query, analyzer):
"""
Rules for the rescore part of the query. Currently just more expensive
version of secondary_search_rules(), doing match_phrase with a slop
against summary & description, including translated variants if
possible.
"""
return self.secondary_should_rules(
search_query, analyzer, rescore_mode=True)
def apply_search_query(self, search_query, qs, sort=None):
lang = translation.get_language()
analyzer = get_locale_analyzer(lang)
@ -494,14 +541,25 @@ class SearchQueryFilter(BaseFilterBackend):
query.SF('field_value_factor', field='boost'),
]
# Assemble everything together and return the search "queryset".
return qs.query(
# Assemble everything together
qs = qs.query(
'function_score',
query=query.Bool(should=primary_should + secondary_should),
functions=functions)
if sort is None or sort == 'relevance':
# If we are searching by relevancy, rescore the top 10
# (window_size below) results per shard with more expensive rules
# using match_phrase + slop.
rescore_query = self.rescore_rules(search_query, analyzer)
qs = qs.extra(rescore={'window_size': 10, 'query': {
'rescore_query': query.Bool(should=rescore_query).to_dict()}})
return qs
def filter_queryset(self, request, qs, view):
search_query = request.GET.get('q', '').lower()
sort_param = request.GET.get('sort')
if not search_query:
return qs
@ -510,7 +568,7 @@ class SearchQueryFilter(BaseFilterBackend):
raise serializers.ValidationError(
ugettext('Maximum query length exceeded.'))
return self.apply_search_query(search_query, qs)
return self.apply_search_query(search_query, qs, sort_param)
class SearchParameterFilter(BaseFilterBackend):

Просмотреть файл

@ -38,8 +38,7 @@ class TestQueryFilter(FilterTestsBase):
filter_classes = [SearchQueryFilter]
def _test_q(self):
qs = self._filter(data={'q': 'tea pot'})
def _test_q(self, qs):
# Spot check a few queries.
should = qs['query']['function_score']['query']['bool']['should']
@ -78,25 +77,81 @@ class TestQueryFilter(FilterTestsBase):
expected = {
'multi_match': {
'_name': (
'MultiMatch(MatchPhrase(summary),'
'MatchPhrase(summary_l10n_english))'),
'MultiMatch(Match(summary),Match(summary_l10n_english))'),
'query': 'tea pot',
'type': 'phrase',
'operator': 'and',
'fields': ['summary', 'summary_l10n_english'],
'boost': 3.0,
}
}
assert expected in should
expected = {
'multi_match': {
'_name': (
'MultiMatch(Match(description),'
'Match(description_l10n_english))'),
'query': 'tea pot',
'operator': 'and',
'fields': ['description', 'description_l10n_english'],
'boost': 2.0,
}
}
assert expected in should
functions = qs['query']['function_score']['functions']
assert functions[0] == {'field_value_factor': {'field': 'boost'}}
return qs
def test_no_rescore_if_not_sorting_by_relevance(self):
qs = self._test_q(
self._filter(data={'q': 'tea pot', 'sort': 'rating'}))
assert 'rescore' not in qs
def test_q(self):
qs = self._test_q()
qs = self._test_q(self._filter(data={'q': 'tea pot'}))
functions = qs['query']['function_score']['functions']
assert len(functions) == 1
expected_rescore = {
'bool': {
'should': [
{
'multi_match': {
'_name': (
'MultiMatch(MatchPhrase(summary),'
'MatchPhrase(summary_l10n_english))'),
'query': 'tea pot',
'slop': 10,
'type': 'phrase',
'fields': ['summary', 'summary_l10n_english'],
'boost': 3.0,
},
},
{
'multi_match': {
'_name': (
'MultiMatch(MatchPhrase(description),'
'MatchPhrase(description_l10n_english))'),
'query': 'tea pot',
'slop': 10,
'type': 'phrase',
'fields': ['description',
'description_l10n_english'],
'boost': 2.0,
},
}
]
}
}
assert qs['rescore'] == {
'window_size': 10,
'query': {
'rescore_query': expected_rescore
}
}
def test_q_too_long(self):
with self.assertRaises(serializers.ValidationError):
self._filter(data={'q': 'a' * 101})

Просмотреть файл

@ -70,7 +70,7 @@ class TestRankingScenarios(ESTestCase):
# sense though!)
# if found_score != expected_score:
# filename = 'src/olympia/search/tests/test_search_ranking.py'
# with open('sed_me.sh', 'a+') as f:
# with open('/code/tmp/sed_me.sh', 'a+') as f:
# f.write('sed -i s/%s/%s/ %s\n' % (
# expected_score, found_score, filename))
# continue
@ -430,32 +430,32 @@ class TestRankingScenarios(ESTestCase):
weekly_downloads=1123)
amo.tests.addon_factory(
name='GrApple Yummy', type=amo.ADDON_EXTENSION,
average_daily_users=1, weekly_downloads=1)
average_daily_users=1, weekly_downloads=1, summary=None)
amo.tests.addon_factory(
name='Delicious Bookmarks', type=amo.ADDON_EXTENSION,
average_daily_users=1, weekly_downloads=1)
average_daily_users=1, weekly_downloads=1, summary=None)
# Some more or less Dummy data to test a few very specific scenarios
# e.g for exact name matching
amo.tests.addon_factory(
name='Merge Windows', type=amo.ADDON_EXTENSION,
average_daily_users=1, weekly_downloads=1),
average_daily_users=1, weekly_downloads=1, summary=None)
amo.tests.addon_factory(
name='Merge All Windows', type=amo.ADDON_EXTENSION,
average_daily_users=1, weekly_downloads=1),
average_daily_users=1, weekly_downloads=1, summary=None)
amo.tests.addon_factory(
name='All Downloader Professional', type=amo.ADDON_EXTENSION,
average_daily_users=1, weekly_downloads=1),
average_daily_users=1, weekly_downloads=1, summary=None)
amo.tests.addon_factory(
name='test addon test11', type=amo.ADDON_EXTENSION,
average_daily_users=1, weekly_downloads=1),
average_daily_users=1, weekly_downloads=1, summary=None)
amo.tests.addon_factory(
name='test addon test21', type=amo.ADDON_EXTENSION,
average_daily_users=1, weekly_downloads=1),
average_daily_users=1, weekly_downloads=1, summary=None)
amo.tests.addon_factory(
name='test addon test31', type=amo.ADDON_EXTENSION,
average_daily_users=1, weekly_downloads=1),
average_daily_users=1, weekly_downloads=1, summary=None)
names = {
'fr': 'Foobar unique francais',
@ -464,134 +464,143 @@ class TestRankingScenarios(ESTestCase):
amo.tests.addon_factory(
name=names, type=amo.ADDON_EXTENSION,
default_locale='fr', slug='test-addon-test-special',
average_daily_users=1, weekly_downloads=1)
average_daily_users=1, weekly_downloads=1,
summary=None)
amo.tests.addon_factory(
name='1-Click YouTube Video Download',
type=amo.ADDON_EXTENSION,
average_daily_users=566337, weekly_downloads=150000,
summary=None,
description=(
'button, click that button, 1-Click Youtube Video '
'Downloader is a click click great tool')),
amo.tests.addon_factory(
name='Amazon 1-Click Lock', type=amo.ADDON_EXTENSION,
average_daily_users=50, weekly_downloads=1),
average_daily_users=50, weekly_downloads=1, summary=None)
cls.refresh()
def test_scenario_tab_center_redux(self):
self._check_scenario('tab center redux', (
['Tab Center Redux', 69.21394],
['Tab Mix Plus', 0.06495905],
['Redux DevTools', 0.04430029],
['Tab Center Redux', 69.5371],
['Tab Mix Plus', 0.06526235],
['Redux DevTools', 0.044507127],
))
def test_scenario_open_image_new_tab(self):
self._check_scenario('Open Image in New Tab', (
['Open Image in New Tab', 24.277237],
['Open image in a new tab', 5.6800475],
['Open Image in New Tab', 34.543846],
['Open image in a new tab', 10.555013],
))
def test_scenario_coinhive(self):
# TODO, should match "CoinBlock". Check word delimiting analysis maybe?
self._check_scenario('CoinHive', (
['Coinhive Blocker', 3.891288],
['NoMiners', 0.017959397], # via description
['Coinhive Blocker', 4.6710896],
['NoMiners', 0.32348818], # via description
# ['CoinBlock', 0], # via prefix search
))
def test_scenario_privacy(self):
self._check_scenario('Privacy', (
['Privacy Badger', 8.7432165],
['Privacy Settings', 4.559415],
['Google Privacy', 4.350088], # More users, summary
['Privacy Pass', 3.2087922],
['Ghostery', 0.09441561], # Crazy amount of users, summary
# summary + a lot of users, but not as many as ghostery
['Blur', 0.0776396],
['Privacy Badger', 9.063842],
['Privacy Settings', 4.917904],
['Google Privacy', 4.638222], # More users, summary
['Privacy Pass', 3.2094479],
['Blur', 0.62396055],
['Ghostery', 0.47795504],
))
def test_scenario_firebu(self):
self._check_scenario('firebu', (
['Firebug', 4.117813],
['Firefinder for Firebug', 1.0876373],
['Firebug Autocompleter', 1.0655142],
['Fire Drag', 0.6470381],
['Firebug', 4.118729],
['Firefinder for Firebug', 1.0878792],
['Firebug Autocompleter', 1.0657512],
['Fire Drag', 0.64718205],
))
def test_scenario_fireb(self):
self._check_scenario('fireb', (
['Firebug', 4.117813],
['Firefinder for Firebug', 1.0876373],
['Firebug Autocompleter', 1.0655142],
['Fire Drag', 0.6470381],
['Firebug', 4.118729],
['Firefinder for Firebug', 1.0878792],
['Firebug Autocompleter', 1.0657512],
['Fire Drag', 0.64718205],
))
def test_scenario_menu_wizzard(self):
self._check_scenario('Menu Wizzard', (
['Menu Wizard', 0.10683497], # (fuzzy, typo)
['Menu Wizard', 0.10698298], # (fuzzy, typo)
# partial match + users
['Add-ons Manager Context Menu', 0.0791911],
['Add-ons Manager Context Menu', 0.07930083],
))
def test_scenario_frame_demolition(self):
self._check_scenario('Frame Demolition', (
['Frame Demolition', 20.48827],
['Frame Demolition', 20.534973],
))
def test_scenario_demolition(self):
# Find "Frame Demolition" via a typo
self._check_scenario('Demolation', (
['Frame Demolition', 0.057878494],
['Frame Demolition', 0.057891317],
))
def test_scenario_restyle(self):
self._check_scenario('reStyle', (
['reStyle', 26.352535],
['reStyle', 26.360489],
))
def test_scenario_megaupload_downloadhelper(self):
# Doesn't find "RapidShare DownloadHelper" anymore
# since we now query by "MegaUpload AND DownloadHelper"
self._check_scenario('MegaUpload DownloadHelper', (
['MegaUpload DownloadHelper', 42.920856],
['MegaUpload DownloadHelper', 42.995304],
))
def test_scenario_downloadhelper(self):
# No direct match, "Download Flash and Video" has
# huge amount of users that puts it first here
self._check_scenario('DownloadHelper', (
['RapidShare DownloadHelper', 3.1008768],
['MegaUpload DownloadHelper', 1.7232388],
['Download Flash and Video', 1.5117542],
['1-Click YouTube Video Download', 1.141354],
['RapidShare DownloadHelper', 3.1015685],
['MegaUpload DownloadHelper', 1.7236232],
['Download Flash and Video', 1.5120913],
['1-Click YouTube Video Download', 1.1416086],
))
def test_scenario_megaupload(self):
self._check_scenario('MegaUpload', (
['MegaUpload DownloadHelper', 3.269901],
['Popup Blocker', 1.4292603],
['MegaUpload DownloadHelper', 3.8486633],
['Popup Blocker', 1.4295679],
))
def test_scenario_no_flash(self):
self._check_scenario('No Flash', (
['No Flash', 46.64575],
['Download Flash and Video', 4.4118795],
['YouTube Flash Player', 3.5120416],
['YouTube Flash Video Player', 3.202704],
['No Flash', 47.049717],
['Download Flash and Video', 4.7860165],
['YouTube Flash Player', 3.7756333],
['YouTube Flash Video Player', 3.627189],
))
# Case should not matter.
self._check_scenario('no flash', (
['No Flash', 46.64575],
['Download Flash and Video', 4.4118795],
['YouTube Flash Player', 3.5120416],
['YouTube Flash Video Player', 3.202704],
['No Flash', 47.049717],
['Download Flash and Video', 4.7860165],
['YouTube Flash Player', 3.7756333],
['YouTube Flash Video Player', 3.627189],
))
def test_scenario_youtube_html5_player(self):
# Both are found thanks to their descriptions (matches each individual
# term, then get rescored with a match_phrase w/ slop.
self._check_scenario('Youtube html5 Player', (
['YouTube Flash Player', 0.41867542],
['No Flash', 0.068471745],
))
def test_scenario_disable_hello_pocket_reader_plus(self):
self._check_scenario('Disable Hello, Pocket & Reader+', (
['Disable Hello, Pocket & Reader+', 59.37624], # yeay!
['Disable Hello, Pocket & Reader+', 59.869083], # yeay!
))
def test_scenario_grapple(self):
@ -600,7 +609,7 @@ class TestRankingScenarios(ESTestCase):
see `legacy_api.SearchTest` for various examples.
"""
self._check_scenario('grapple', (
['GrApple Yummy', 0.97180986],
['GrApple Yummy', 0.7218929],
))
def test_scenario_delicious(self):
@ -609,36 +618,36 @@ class TestRankingScenarios(ESTestCase):
see `legacy_api.SearchTest` for various examples.
"""
self._check_scenario('delicious', (
['Delicious Bookmarks', 1.1302524],
['Delicious Bookmarks', 0.85394204],
))
def test_score_boost_name_match(self):
# Tests that we match directly "Merge Windows" and also find
# "Merge All Windows" because of slop=1
self._check_scenario('merge windows', (
['Merge Windows', 12.554659],
['Merge All Windows', 1.7936656],
['Merge Windows', 10.207693],
['Merge All Windows', 1.7960566],
), no_match=(
'All Downloader Professional',
))
self._check_scenario('merge all windows', (
['Merge All Windows', 14.103567],
['Merge Windows', 0.042702418],
['All Downloader Professional', 0.0070751677],
['Merge All Windows', 11.195793],
['Merge Windows', 0.04285209],
['All Downloader Professional', 0.0070999665],
))
def test_score_boost_exact_match(self):
"""Test that we rank exact matches at the top."""
self._check_scenario('test addon test21', (
['test addon test21', 14.293872],
['test addon test21', 11.341756],
))
def test_score_boost_exact_match_description_hijack(self):
"""Test that we rank exact matches at the top."""
self._check_scenario('Amazon 1-Click Lock', (
['Amazon 1-Click Lock', 34.255657],
['1-Click YouTube Video Download', 0.22216046],
['Amazon 1-Click Lock', 26.223818],
['1-Click YouTube Video Download', 0.22368877],
))
def test_score_boost_exact_match_in_right_language(self):
@ -646,13 +655,13 @@ class TestRankingScenarios(ESTestCase):
# First in english. Straightforward: it should be an exact match, the
# translation exists.
self._check_scenario(u'foobar unique english', (
[u'Foobar unique english', 4.675893],
[u'Foobar unique english', 2.9022827],
), lang='en-US')
# Then check in french. Also straightforward: it should be an exact
# match, the translation exists, it's even the default locale.
self._check_scenario(u'foobar unique francais', (
[u'Foobar unique francais', 14.146512],
[u'Foobar unique francais', 10.837958],
), lang='fr')
# Check with a language that we don't have a translation for (mn), and
@ -663,7 +672,7 @@ class TestRankingScenarios(ESTestCase):
assert 'mn' not in SEARCH_LANGUAGE_TO_ANALYZER
assert 'mn' in settings.LANGUAGES
self._check_scenario(u'foobar unique francais', (
[u'Foobar unique francais', 12.54679],
[u'Foobar unique francais', 9.005518],
), lang='mn', expected_lang='fr')
# Check with a language that we don't have a translation for (ca), and
@ -674,12 +683,12 @@ class TestRankingScenarios(ESTestCase):
assert 'ca' in SEARCH_LANGUAGE_TO_ANALYZER
assert 'ca' in settings.LANGUAGES
self._check_scenario(u'foobar unique francais', (
[u'Foobar unique francais', 11.289922],
[u'Foobar unique francais', 8.107916],
), lang='ca', expected_lang='fr')
# Check with a language that we do have a translation for (en-US), but
# we're requesting the string that matches the default locale (fr).
# Note that the name returned follows the language requested.
self._check_scenario(u'foobar unique francais', (
[u'Foobar unique english', 9.786429],
[u'Foobar unique english', 7.0274434],
), lang='en-US')