Merge pull request #9985 from diox/rescore-to-add-slop-to-match-phrase

Move match_phrase queries against description & summary to a rescore, add slop
2018-11-15 16:03:56 +01:00 · 2018-11-15 16:03:56 +01:00 · f2f98c7096
--- a/docs/topics/development/search.rst
+++ b/docs/topics/development/search.rst
@ -38,7 +38,7 @@ Flow of a search query through AMO

 Let's assume we search on addons-frontend (not legacy) the search query hits the API and gets handled by ``AddonSearchView``, which directly queries ElasticSearch and doesn't involve the database at all.

-There are a few filters that are described in the :ref:`/api/v4/addons/search/ docs <addon-search>` but most of them are not very relevant for raw search queries. Examples are filters by guid, platform, category, add-on type or appversion (application version compatibility).
+There are a few filters that are described in the :ref:`/api/v4/addons/search/ docs <addon-search>` but most of them are not very relevant for raw search queries. Examples are filters by guid, platform, category, add-on type or appversion (application version compatibility). Those filters are applied using a ``filter`` clause and shouldn't affect scoring.

 Much more relevant for raw add-on searches (and this is primarily used when you use the search on the frontend) is ``SearchQueryFilter``.

@ -70,12 +70,19 @@ containing more text like description, summary and tags.

 **Applied rules** (merged via ``should``):

-1. Look for phrase matches inside the summary (``boost=3.0``)
-2. Look for phrase matches inside the description (``boost=2.0``)
+1. Look for matches inside the summary (``boost=3.0``, ``operator=and``)
+2. Look for matches inside the description (``boost=2.0``, ``operator=and``)

 If the language of the request matches a known language-specific analyzer, those are made using a ``multi_match`` query using ``summary`` or ``description`` and the corresponding ``{field}_l10n_{analyzer}``, similar to how exact name matches are performed above, in order to support potential translations.


+Rescoring rules
+---------------
+
+On top of the two sets of rules above, a ``rescore`` query is applied with a ``window_size`` of ``10``. In production, we have 5 shards, so that
+should re-adjust the score of the top 50 results returned only. The rules used for rescoring are the same used in the secondary rules above, with just one difference: it's using ``match_phrase`` instead of ``match``, with a slop of ``10``.
+
+
 General query flow:
 -------------------

@ -83,4 +90,4 @@ General query flow:
 2. Fetch locale specific analyzer (`List of analyzers <https://github.com/mozilla/addons-server/blob/master/src/olympia/constants/search.py#L15-L61>`_)
 3. Merge primary and secondary *should* rules
 4. Create a ``function_score`` query that uses a ``field_value_factor`` function on ``boost`` field that we set when indexing
- 5. Add a specific query-time boost for webextension add-ons
+ 5. Add the ``rescore`` query to the mix
--- a/src/olympia/addons/tests/test_indexers.py
+++ b/src/olympia/addons/tests/test_indexers.py
@ -365,6 +365,7 @@ class TestAddonIndexer(TestCase):
            'fr': '',  # Empty description should be ignored in extract.
            'it': '<script>alert(42)</script>',
        }
+        self.addon.summary_id = None
        self.addon.name = translations_name
        self.addon.description = translations_description
        self.addon.save()
@ -380,12 +381,19 @@ class TestAddonIndexer(TestCase):
        ])
        assert extracted['name_l10n_english'] == [translations_name['en-US']]
        assert extracted['name_l10n_spanish'] == [translations_name['es']]
+        assert extracted['name_l10n_italian'] == []
        assert (extracted['description_l10n_english'] ==
                [translations_description['en-US']])
        assert (extracted['description_l10n_spanish'] ==
                [translations_description['es']])
+        assert extracted['description_l10n_french'] == []
        assert (extracted['description_l10n_italian'] ==
                ['&lt;script&gt;alert(42)&lt;/script&gt;'])
+        assert extracted['summary_l10n_english'] == []
+        # The non-l10n fields are fallbacks in the addon's default locale, they
+        # need to always contain a string.
+        assert extracted['name'] == u'Name in ënglish'
+        assert extracted['summary'] == ''

    def test_extract_translations_engb_default(self):
        """Make sure we do correctly extract things for en-GB default locale"""
--- a/src/olympia/amo/indexers.py
+++ b/src/olympia/amo/indexers.py
@ -124,7 +124,7 @@ class BaseSearchIndexer(object):
    def extract_field_api_translations(cls, obj, field, db_field=None):
        """
        Returns a dict containing translations that we need to store for
-        the API.
+        the API. Empty translations are skipped entirely.
        """
        if db_field is None:
            db_field = '%s_id' % field
@ -141,23 +141,26 @@ class BaseSearchIndexer(object):
    @classmethod
    def extract_field_search_translation(cls, obj, field, default_locale):
        """
-        Returns the translation for this field in the object's default locale.
+        Returns the translation for this field in the object's default locale,
+        in the form a dict with one entry (the field being the key and the
+        translation being the value, or an empty string if none was found).
+
        That field will be analyzed and indexed by ES *without*
        language-specific analyzers.
        """
        translations = dict(obj.translations[getattr(obj, '%s_id' % field)])
        default_locale = default_locale.lower() if default_locale else None
+        value = translations.get(default_locale, getattr(obj, field))

        return {
-            field: unicode(
-                translations.get(default_locale, getattr(obj, field)))
+            field: unicode(value) if value else ''
        }

    @classmethod
    def extract_field_analyzed_translations(cls, obj, field, db_field=None):
        """
        Returns a dict containing translations for each language-specific
-        analyzer for the given field.
+        analyzer for the given field. Empty translations are skipped entirely.
        """
        if db_field is None:
            db_field = '%s_id' % field
@ -170,6 +173,6 @@ class BaseSearchIndexer(object):
            extend_with_me['%s_l10n_%s' % (field, analyzer)] = list(
                set(unicode(string) for locale, string
                    in obj.translations[getattr(obj, db_field)]
-                    if locale.lower() in languages))
+                    if locale.lower() in languages and string))

        return extend_with_me
--- a/src/olympia/amo/tests/init.py
+++ b/src/olympia/amo/tests/init.py
@ -637,8 +637,9 @@ def addon_factory(
        'created': when,
        'last_updated': when,
    }
-    if type_ != amo.ADDON_PERSONA:
-        # Personas don't have a summary.
+    if type_ != amo.ADDON_PERSONA and 'summary' not in kw:
+        # Assign a dummy summary if none was specified in keyword args, unless
+        # we're creating a Persona since they don't have summaries.
        kwargs['summary'] = u'Summary for %s' % name
    if type_ not in [amo.ADDON_PERSONA, amo.ADDON_SEARCH]:
        # Personas and search engines don't need guids
--- a/src/olympia/search/filters.py
+++ b/src/olympia/search/filters.py
@ -427,7 +427,8 @@ class SearchQueryFilter(BaseFilterBackend):

        return should

-    def secondary_should_rules(self, search_query, analyzer):
+    def secondary_should_rules(
+            self, search_query, analyzer, rescore_mode=False):
        """Return "secondary" should rules for the query.

        These are the ones using the weakest boosts, they are applied to fields
@ -435,51 +436,97 @@ class SearchQueryFilter(BaseFilterBackend):

        Applied rules:

-        * Look for phrase matches inside the summary (boost=3.0)
-        * Look for phrase matches inside the description (boost=2.0).
+        * Look for matches inside the summary (boost=3.0)
+        * Look for matches inside the description (boost=2.0).

        If we're using a supported language, both rules are done through a
        multi_match that considers both the default locale translation
        (using snowball analyzer) and the translation in the current language
        (using language-specific analyzer). If we're not using a supported
        language then only the first part is applied.
+
+        If rescore_mode is True, the match applied are match_phrase queries
+        with a slop of 5 instead of a regular match. As those are more
+        expensive they are only done in the 'rescore' part of the query.
        """
+        if rescore_mode is False:
+            query_class = query.Match
+            query_kwargs = {
+                'operator': 'and',
+            }
+            query_class_name = 'Match'
+            multi_match_kwargs = {
+                'operator': 'and',
+            }
+        else:
+            query_class = query.MatchPhrase
+            query_kwargs = {
+                'slop': 10,
+            }
+            query_class_name = 'MatchPhrase'
+            multi_match_kwargs = {
+                'slop': 10,
+                'type': 'phrase',
+            }
+
        if analyzer:
            summary_query_name = (
-                'MultiMatch(MatchPhrase(summary),'
-                'MatchPhrase(summary_l10n_%s))' % analyzer)
+                'MultiMatch(%s(summary),%s(summary_l10n_%s))' % (
+                    query_class_name, query_class_name, analyzer))
            description_query_name = (
-                'MultiMatch(MatchPhrase(description),'
-                'MatchPhrase(description_l10n_%s))' % analyzer)
+                'MultiMatch(%s(description),%s(description_l10n_%s))' % (
+                    query_class_name, query_class_name, analyzer))
            should = [
+                # When *not* doing a rescore, we do regular non-phrase matches
+                # with 'operator': 'and' (see query_class/multi_match_kwargs
+                # above). This may seem wrong, the ES docs warn against this,
+                # but this is exactly what we want here: we want all terms
+                # to be present in either of the fields individually, not some
+                # in one and some in another.
                query.MultiMatch(
                    _name=summary_query_name,
                    query=search_query,
-                    type='phrase',
                    fields=['summary', 'summary_l10n_%s' % analyzer],
                    boost=3.0,
+                    **multi_match_kwargs
                ),
                query.MultiMatch(
                    _name=description_query_name,
                    query=search_query,
-                    type='phrase',
                    fields=['description', 'description_l10n_%s' % analyzer],
                    boost=2.0,
+                    **multi_match_kwargs
                ),
            ]
        else:
            should = [
-                query.MatchPhrase(summary={
-                    '_name': 'MatchPhrase(summary)',
-                    'query': search_query, 'boost': 3.0}),
-                query.MatchPhrase(description={
-                    '_name': 'MatchPhrase(description)',
-                    'query': search_query, 'boost': 2.0}),
+                query_class(
+                    summary=dict(
+                        _name='%s(summary)' % query_class_name,
+                        query=search_query,
+                        boost=3.0,
+                        **query_kwargs)),
+                query_class(
+                    summary=dict(
+                        _name='%s(description)' % query_class_name,
+                        query=search_query,
+                        boost=2.0,
+                        **query_kwargs)),
            ]

        return should

-    def apply_search_query(self, search_query, qs):
+    def rescore_rules(self, search_query, analyzer):
+        """
+        Rules for the rescore part of the query. Currently just more expensive
+        version of secondary_search_rules(), doing match_phrase with a slop
+        against summary & description, including translated variants if
+        possible.
+        """
+        return self.secondary_should_rules(
+            search_query, analyzer, rescore_mode=True)
+
+    def apply_search_query(self, search_query, qs, sort=None):
        lang = translation.get_language()
        analyzer = get_locale_analyzer(lang)

@ -494,14 +541,25 @@ class SearchQueryFilter(BaseFilterBackend):
            query.SF('field_value_factor', field='boost'),
        ]

-        # Assemble everything together and return the search "queryset".
-        return qs.query(
+        # Assemble everything together
+        qs = qs.query(
            'function_score',
            query=query.Bool(should=primary_should + secondary_should),
            functions=functions)

+        if sort is None or sort == 'relevance':
+            # If we are searching by relevancy, rescore the top 10
+            # (window_size below) results per shard with more expensive rules
+            # using match_phrase + slop.
+            rescore_query = self.rescore_rules(search_query, analyzer)
+            qs = qs.extra(rescore={'window_size': 10, 'query': {
+                'rescore_query': query.Bool(should=rescore_query).to_dict()}})
+
+        return qs
+
    def filter_queryset(self, request, qs, view):
        search_query = request.GET.get('q', '').lower()
+        sort_param = request.GET.get('sort')

        if not search_query:
            return qs
@ -510,7 +568,7 @@ class SearchQueryFilter(BaseFilterBackend):
            raise serializers.ValidationError(
                ugettext('Maximum query length exceeded.'))

-        return self.apply_search_query(search_query, qs)
+        return self.apply_search_query(search_query, qs, sort_param)


 class SearchParameterFilter(BaseFilterBackend):
--- a/src/olympia/search/tests/test_filters.py
+++ b/src/olympia/search/tests/test_filters.py
@ -38,8 +38,7 @@ class TestQueryFilter(FilterTestsBase):

    filter_classes = [SearchQueryFilter]

-    def _test_q(self):
-        qs = self._filter(data={'q': 'tea pot'})
+    def _test_q(self, qs):
        # Spot check a few queries.
        should = qs['query']['function_score']['query']['bool']['should']

@ -78,25 +77,81 @@ class TestQueryFilter(FilterTestsBase):
        expected = {
            'multi_match': {
                '_name': (
-                    'MultiMatch(MatchPhrase(summary),'
-                    'MatchPhrase(summary_l10n_english))'),
+                    'MultiMatch(Match(summary),Match(summary_l10n_english))'),
                'query': 'tea pot',
-                'type': 'phrase',
+                'operator': 'and',
                'fields': ['summary', 'summary_l10n_english'],
                'boost': 3.0,
            }
        }
        assert expected in should

+        expected = {
+            'multi_match': {
+                '_name': (
+                    'MultiMatch(Match(description),'
+                    'Match(description_l10n_english))'),
+                'query': 'tea pot',
+                'operator': 'and',
+                'fields': ['description', 'description_l10n_english'],
+                'boost': 2.0,
+            }
+        }
+        assert expected in should
+
        functions = qs['query']['function_score']['functions']
        assert functions[0] == {'field_value_factor': {'field': 'boost'}}
        return qs

+    def test_no_rescore_if_not_sorting_by_relevance(self):
+        qs = self._test_q(
+            self._filter(data={'q': 'tea pot', 'sort': 'rating'}))
+        assert 'rescore' not in qs
+
    def test_q(self):
-        qs = self._test_q()
+        qs = self._test_q(self._filter(data={'q': 'tea pot'}))
        functions = qs['query']['function_score']['functions']
        assert len(functions) == 1

+        expected_rescore = {
+            'bool': {
+                'should': [
+                    {
+                        'multi_match': {
+                            '_name': (
+                                'MultiMatch(MatchPhrase(summary),'
+                                'MatchPhrase(summary_l10n_english))'),
+                            'query': 'tea pot',
+                            'slop': 10,
+                            'type': 'phrase',
+                            'fields': ['summary', 'summary_l10n_english'],
+                            'boost': 3.0,
+                        },
+                    },
+                    {
+                        'multi_match': {
+                            '_name': (
+                                'MultiMatch(MatchPhrase(description),'
+                                'MatchPhrase(description_l10n_english))'),
+                            'query': 'tea pot',
+                            'slop': 10,
+                            'type': 'phrase',
+                            'fields': ['description',
+                                       'description_l10n_english'],
+                            'boost': 2.0,
+                        },
+                    }
+                ]
+            }
+        }
+
+        assert qs['rescore'] == {
+            'window_size': 10,
+            'query': {
+                'rescore_query': expected_rescore
+            }
+        }
+
    def test_q_too_long(self):
        with self.assertRaises(serializers.ValidationError):
            self._filter(data={'q': 'a' * 101})
--- a/src/olympia/search/tests/test_search_ranking.py
+++ b/src/olympia/search/tests/test_search_ranking.py
@ -70,7 +70,7 @@ class TestRankingScenarios(ESTestCase):
            # sense though!)
            # if found_score != expected_score:
            #     filename = 'src/olympia/search/tests/test_search_ranking.py'
-            #     with open('sed_me.sh', 'a+') as f:
+            #     with open('/code/tmp/sed_me.sh', 'a+') as f:
            #         f.write('sed -i s/%s/%s/ %s\n' % (
            #             expected_score, found_score, filename))
            #     continue
@ -430,32 +430,32 @@ class TestRankingScenarios(ESTestCase):
            weekly_downloads=1123)
        amo.tests.addon_factory(
            name='GrApple Yummy', type=amo.ADDON_EXTENSION,
-            average_daily_users=1, weekly_downloads=1)
+            average_daily_users=1, weekly_downloads=1, summary=None)
        amo.tests.addon_factory(
            name='Delicious Bookmarks', type=amo.ADDON_EXTENSION,
-            average_daily_users=1, weekly_downloads=1)
+            average_daily_users=1, weekly_downloads=1, summary=None)

        # Some more or less Dummy data to test a few very specific scenarios
        # e.g for exact name matching
        amo.tests.addon_factory(
            name='Merge Windows', type=amo.ADDON_EXTENSION,
-            average_daily_users=1, weekly_downloads=1),
+            average_daily_users=1, weekly_downloads=1, summary=None)
        amo.tests.addon_factory(
            name='Merge All Windows', type=amo.ADDON_EXTENSION,
-            average_daily_users=1, weekly_downloads=1),
+            average_daily_users=1, weekly_downloads=1, summary=None)
        amo.tests.addon_factory(
            name='All Downloader Professional', type=amo.ADDON_EXTENSION,
-            average_daily_users=1, weekly_downloads=1),
+            average_daily_users=1, weekly_downloads=1, summary=None)

        amo.tests.addon_factory(
            name='test addon test11', type=amo.ADDON_EXTENSION,
-            average_daily_users=1, weekly_downloads=1),
+            average_daily_users=1, weekly_downloads=1, summary=None)
        amo.tests.addon_factory(
            name='test addon test21', type=amo.ADDON_EXTENSION,
-            average_daily_users=1, weekly_downloads=1),
+            average_daily_users=1, weekly_downloads=1, summary=None)
        amo.tests.addon_factory(
            name='test addon test31', type=amo.ADDON_EXTENSION,
-            average_daily_users=1, weekly_downloads=1),
+            average_daily_users=1, weekly_downloads=1, summary=None)

        names = {
            'fr': 'Foobar unique francais',
@ -464,134 +464,143 @@ class TestRankingScenarios(ESTestCase):
        amo.tests.addon_factory(
            name=names, type=amo.ADDON_EXTENSION,
            default_locale='fr', slug='test-addon-test-special',
-            average_daily_users=1, weekly_downloads=1)
+            average_daily_users=1, weekly_downloads=1,
+            summary=None)

        amo.tests.addon_factory(
            name='1-Click YouTube Video Download',
            type=amo.ADDON_EXTENSION,
            average_daily_users=566337, weekly_downloads=150000,
+            summary=None,
            description=(
                'button, click that button, 1-Click Youtube Video '
                'Downloader is a click click great tool')),
        amo.tests.addon_factory(
            name='Amazon 1-Click Lock', type=amo.ADDON_EXTENSION,
-            average_daily_users=50, weekly_downloads=1),
+            average_daily_users=50, weekly_downloads=1, summary=None)

        cls.refresh()

    def test_scenario_tab_center_redux(self):
        self._check_scenario('tab center redux', (
-            ['Tab Center Redux', 69.21394],
-            ['Tab Mix Plus', 0.06495905],
-            ['Redux DevTools', 0.04430029],
+            ['Tab Center Redux', 69.5371],
+            ['Tab Mix Plus', 0.06526235],
+            ['Redux DevTools', 0.044507127],
        ))

    def test_scenario_open_image_new_tab(self):
        self._check_scenario('Open Image in New Tab', (
-            ['Open Image in New Tab', 24.277237],
-            ['Open image in a new tab', 5.6800475],
+            ['Open Image in New Tab', 34.543846],
+            ['Open image in a new tab', 10.555013],
        ))

    def test_scenario_coinhive(self):
        # TODO, should match "CoinBlock". Check word delimiting analysis maybe?
        self._check_scenario('CoinHive', (
-            ['Coinhive Blocker', 3.891288],
-            ['NoMiners', 0.017959397],  # via description
+            ['Coinhive Blocker', 4.6710896],
+            ['NoMiners', 0.32348818],  # via description
            # ['CoinBlock', 0],  # via prefix search
        ))

    def test_scenario_privacy(self):
        self._check_scenario('Privacy', (
-            ['Privacy Badger', 8.7432165],
-            ['Privacy Settings', 4.559415],
-            ['Google Privacy', 4.350088],  # More users, summary
-            ['Privacy Pass', 3.2087922],
-            ['Ghostery', 0.09441561],  # Crazy amount of users, summary
-            # summary + a lot of users, but not as many as ghostery
-            ['Blur', 0.0776396],
+            ['Privacy Badger', 9.063842],
+            ['Privacy Settings', 4.917904],
+            ['Google Privacy', 4.638222],  # More users, summary
+            ['Privacy Pass', 3.2094479],
+            ['Blur', 0.62396055],
+            ['Ghostery', 0.47795504],
        ))

    def test_scenario_firebu(self):
        self._check_scenario('firebu', (
-            ['Firebug', 4.117813],
-            ['Firefinder for Firebug', 1.0876373],
-            ['Firebug Autocompleter', 1.0655142],
-            ['Fire Drag', 0.6470381],
+            ['Firebug', 4.118729],
+            ['Firefinder for Firebug', 1.0878792],
+            ['Firebug Autocompleter', 1.0657512],
+            ['Fire Drag', 0.64718205],
        ))

    def test_scenario_fireb(self):
        self._check_scenario('fireb', (
-            ['Firebug', 4.117813],
-            ['Firefinder for Firebug', 1.0876373],
-            ['Firebug Autocompleter', 1.0655142],
-            ['Fire Drag', 0.6470381],
+            ['Firebug', 4.118729],
+            ['Firefinder for Firebug', 1.0878792],
+            ['Firebug Autocompleter', 1.0657512],
+            ['Fire Drag', 0.64718205],
        ))

    def test_scenario_menu_wizzard(self):
        self._check_scenario('Menu Wizzard', (
-            ['Menu Wizard', 0.10683497],  # (fuzzy, typo)
+            ['Menu Wizard', 0.10698298],  # (fuzzy, typo)
            # partial match + users
-            ['Add-ons Manager Context Menu', 0.0791911],
+            ['Add-ons Manager Context Menu', 0.07930083],
        ))

    def test_scenario_frame_demolition(self):
        self._check_scenario('Frame Demolition', (
-            ['Frame Demolition', 20.48827],
+            ['Frame Demolition', 20.534973],
        ))

    def test_scenario_demolition(self):
        # Find "Frame Demolition" via a typo
        self._check_scenario('Demolation', (
-            ['Frame Demolition', 0.057878494],
+            ['Frame Demolition', 0.057891317],
        ))

    def test_scenario_restyle(self):
        self._check_scenario('reStyle', (
-            ['reStyle', 26.352535],
+            ['reStyle', 26.360489],
        ))

    def test_scenario_megaupload_downloadhelper(self):
        # Doesn't find "RapidShare DownloadHelper" anymore
        # since we now query by "MegaUpload AND DownloadHelper"
        self._check_scenario('MegaUpload DownloadHelper', (
-            ['MegaUpload DownloadHelper', 42.920856],
+            ['MegaUpload DownloadHelper', 42.995304],
        ))

    def test_scenario_downloadhelper(self):
        # No direct match, "Download Flash and Video" has
        # huge amount of users that puts it first here
        self._check_scenario('DownloadHelper', (
-            ['RapidShare DownloadHelper', 3.1008768],
-            ['MegaUpload DownloadHelper', 1.7232388],
-            ['Download Flash and Video', 1.5117542],
-            ['1-Click YouTube Video Download', 1.141354],
+            ['RapidShare DownloadHelper', 3.1015685],
+            ['MegaUpload DownloadHelper', 1.7236232],
+            ['Download Flash and Video', 1.5120913],
+            ['1-Click YouTube Video Download', 1.1416086],
        ))

    def test_scenario_megaupload(self):
        self._check_scenario('MegaUpload', (
-            ['MegaUpload DownloadHelper', 3.269901],
-            ['Popup Blocker', 1.4292603],
+            ['MegaUpload DownloadHelper', 3.8486633],
+            ['Popup Blocker', 1.4295679],
        ))

    def test_scenario_no_flash(self):
        self._check_scenario('No Flash', (
-            ['No Flash', 46.64575],
-            ['Download Flash and Video', 4.4118795],
-            ['YouTube Flash Player', 3.5120416],
-            ['YouTube Flash Video Player', 3.202704],
+            ['No Flash', 47.049717],
+            ['Download Flash and Video', 4.7860165],
+            ['YouTube Flash Player', 3.7756333],
+            ['YouTube Flash Video Player', 3.627189],
        ))

        # Case should not matter.
        self._check_scenario('no flash', (
-            ['No Flash', 46.64575],
-            ['Download Flash and Video', 4.4118795],
-            ['YouTube Flash Player', 3.5120416],
-            ['YouTube Flash Video Player', 3.202704],
+            ['No Flash', 47.049717],
+            ['Download Flash and Video', 4.7860165],
+            ['YouTube Flash Player', 3.7756333],
+            ['YouTube Flash Video Player', 3.627189],
+        ))
+
+    def test_scenario_youtube_html5_player(self):
+        # Both are found thanks to their descriptions (matches each individual
+        # term, then get rescored with a match_phrase w/ slop.
+        self._check_scenario('Youtube html5 Player', (
+            ['YouTube Flash Player', 0.41867542],
+            ['No Flash', 0.068471745],
        ))

    def test_scenario_disable_hello_pocket_reader_plus(self):
        self._check_scenario('Disable Hello, Pocket & Reader+', (
-            ['Disable Hello, Pocket & Reader+', 59.37624],  # yeay!
+            ['Disable Hello, Pocket & Reader+', 59.869083],  # yeay!
        ))

    def test_scenario_grapple(self):
@ -600,7 +609,7 @@ class TestRankingScenarios(ESTestCase):
        see `legacy_api.SearchTest` for various examples.
        """
        self._check_scenario('grapple', (
-            ['GrApple Yummy', 0.97180986],
+            ['GrApple Yummy', 0.7218929],
        ))

    def test_scenario_delicious(self):
@ -609,36 +618,36 @@ class TestRankingScenarios(ESTestCase):
        see `legacy_api.SearchTest` for various examples.
        """
        self._check_scenario('delicious', (
-            ['Delicious Bookmarks', 1.1302524],
+            ['Delicious Bookmarks', 0.85394204],
        ))

    def test_score_boost_name_match(self):
        # Tests that we match directly "Merge Windows" and also find
        # "Merge All Windows" because of slop=1
        self._check_scenario('merge windows', (
-            ['Merge Windows', 12.554659],
-            ['Merge All Windows', 1.7936656],
+            ['Merge Windows', 10.207693],
+            ['Merge All Windows', 1.7960566],
        ), no_match=(
            'All Downloader Professional',
        ))

        self._check_scenario('merge all windows', (
-            ['Merge All Windows', 14.103567],
-            ['Merge Windows', 0.042702418],
-            ['All Downloader Professional', 0.0070751677],
+            ['Merge All Windows', 11.195793],
+            ['Merge Windows', 0.04285209],
+            ['All Downloader Professional', 0.0070999665],
        ))

    def test_score_boost_exact_match(self):
        """Test that we rank exact matches at the top."""
        self._check_scenario('test addon test21', (
-            ['test addon test21', 14.293872],
+            ['test addon test21', 11.341756],
        ))

    def test_score_boost_exact_match_description_hijack(self):
        """Test that we rank exact matches at the top."""
        self._check_scenario('Amazon 1-Click Lock', (
-            ['Amazon 1-Click Lock', 34.255657],
-            ['1-Click YouTube Video Download', 0.22216046],
+            ['Amazon 1-Click Lock', 26.223818],
+            ['1-Click YouTube Video Download', 0.22368877],
        ))

    def test_score_boost_exact_match_in_right_language(self):
@ -646,13 +655,13 @@ class TestRankingScenarios(ESTestCase):
        # First in english. Straightforward: it should be an exact match, the
        # translation exists.
        self._check_scenario(u'foobar unique english', (
-            [u'Foobar unique english', 4.675893],
+            [u'Foobar unique english', 2.9022827],
        ), lang='en-US')

        # Then check in french. Also straightforward: it should be an exact
        # match, the translation exists, it's even the default locale.
        self._check_scenario(u'foobar unique francais', (
-            [u'Foobar unique francais', 14.146512],
+            [u'Foobar unique francais', 10.837958],
        ), lang='fr')

        # Check with a language that we don't have a translation for (mn), and
@ -663,7 +672,7 @@ class TestRankingScenarios(ESTestCase):
        assert 'mn' not in SEARCH_LANGUAGE_TO_ANALYZER
        assert 'mn' in settings.LANGUAGES
        self._check_scenario(u'foobar unique francais', (
-            [u'Foobar unique francais', 12.54679],
+            [u'Foobar unique francais', 9.005518],
        ), lang='mn', expected_lang='fr')

        # Check with a language that we don't have a translation for (ca), and
@ -674,12 +683,12 @@ class TestRankingScenarios(ESTestCase):
        assert 'ca' in SEARCH_LANGUAGE_TO_ANALYZER
        assert 'ca' in settings.LANGUAGES
        self._check_scenario(u'foobar unique francais', (
-            [u'Foobar unique francais', 11.289922],
+            [u'Foobar unique francais', 8.107916],
        ), lang='ca', expected_lang='fr')

        # Check with a language that we do have a translation for (en-US), but
        # we're requesting the string that matches the default locale (fr).
        # Note that the name returned follows the language requested.
        self._check_scenario(u'foobar unique francais', (
-            [u'Foobar unique english', 9.786429],
+            [u'Foobar unique english', 7.0274434],
        ), lang='en-US')