Search: Implement exact matching. (#6895)

* Search: Implement exact matching. Fixes #6837 This re-uses the existing `name_sort` field that is a not analyzed version of `name` which is needed for exact matches. * Fix flake8 * Add another test for exact matching * Add test that tests for description hijack * Use new name.raw field * Remove l10n test for now, it only works accidentally. * Fix comment * Fix tests
2017-11-13 18:54:02 +01:00 · 2017-11-13 18:54:02 +01:00 · b8749341b3
--- a/src/olympia/addons/indexers.py
+++ b/src/olympia/addons/indexers.py
@ -22,7 +22,8 @@ class AddonIndexer(BaseSearchIndexer):
    """Fields we don't need to expose in the results, only used for filtering
    or sorting."""
    hidden_fields = (
-        'name_sort',
+        '*.raw',
+        '*_sort',
        'boost',
        'hotness',
        # Translated content that is used for filtering purposes is stored
@ -140,11 +141,17 @@ class AddonIndexer(BaseSearchIndexer):
                        },
                    },
                    'modified': {'type': 'date', 'index': False},
+                    'name': {
+                        'type': 'text',
                        # Adding word-delimiter to split on camelcase and
                        # punctuation.
-                    'name': {'type': 'text',
-                             'analyzer': 'standardPlusWordDelimiter'},
+                        'analyzer': 'standardPlusWordDelimiter',
+                        'fields': {
                            # Turn off analysis on name so we can sort by it.
+                            'raw': {'type': 'keyword'}
+                        },
+                    },
+                    # TODO: Can be removed once we have `name.raw` indexed
                    'name_sort': {'type': 'keyword'},
                    'persona': {
                        'type': 'object',
--- a/src/olympia/addons/tests/test_views.py
+++ b/src/olympia/addons/tests/test_views.py
@ -2218,7 +2218,7 @@ class TestAddonSearchView(ESTestCase):
        qset = AddonSearchView().get_queryset()

        assert set(qset.to_dict()['_source']['excludes']) == set(
-            ('name_sort', 'boost', 'hotness', 'name', 'description',
+            ('*.raw', '*_sort', 'boost', 'hotness', 'name', 'description',
             'name_l10n_*', 'description_l10n_*', 'summary', 'summary_l10n_*')
        )

--- a/src/olympia/search/filters.py
+++ b/src/olympia/search/filters.py
@ -272,9 +272,20 @@ class SearchQueryFilter(BaseFilterBackend):

        # Apply rules to search on few base fields. Some might not be present
        # in every document type / indexes.
-        for k, v in rules:
+        for query_cls, opts in rules:
            for field in ('name', 'slug', 'listed_authors.name'):
-                should.append(k(**{field: v}))
+                should.append(query_cls(**{field: opts}))
+
+        # Exact matches need to be queried against a non-analyzed field. Let's
+        # do a term query on `name.raw` for an exact match against the add-on
+        # name and boost it since this is likely what the user wants.
+        # Use a super-high boost to avoid `description` or `summary`
+        # getting in our way.
+        should.append(query.Term(**{
+            'name.raw': {
+                'value': search_query, 'boost': 100
+            }
+        }))

        # For name, also search in translated field with the right language
        # and analyzer.
--- a/src/olympia/search/tests/test_filters.py
+++ b/src/olympia/search/tests/test_filters.py
@ -134,6 +134,20 @@ class TestQueryFilter(FilterTestsBase):
            ]}}
        }

+    def test_q_exact(self):
+        qs = self._filter(data={'q': 'Adblock Plus'})
+        should = qs['query']['function_score']['query']['bool']['should']
+
+        expected = {
+            'term': {
+                'name.raw': {
+                    'boost': 100, 'value': u'adblock plus',
+                }
+            }
+        }
+
+        assert expected in should
+

 class TestReviewedContentFilter(FilterTestsBase):

--- a/src/olympia/search/tests/test_views.py
+++ b/src/olympia/search/tests/test_views.py
@ -727,29 +727,29 @@ class TestSearchResultScoring(ESTestCase):
        self.refresh()

        response = self.client.get(self.url, {'q': 'merge windows'})
-        result = self.get_results(response)
+        results = self.get_results(response)

        # Doesn't match "All Downloader Professional"
-        assert addons[2].pk not in result
+        assert addons[2].pk not in results

        # Matches both "Merge Windows" and "Merge All Windows" but can't
        # correctly predict their exact scoring since we don't have
        # an exact match that would prefer 'merge windows'. Both should be
        # the first two matches though.
-        assert addons[1].pk in result[:2]
-        assert addons[0].pk in result[:2]
+        assert addons[1].pk in results[:2]
+        assert addons[0].pk in results[:2]

        response = self.client.get(self.url, {'q': 'merge all windows'})
-        result = self.get_results(response)
+        results = self.get_results(response)

        # Make sure we match 'All Downloader Professional' but it's
        # term match frequency is much lower than the other two so it's
        # last.
-        assert addons[2].pk == result[-1]
+        assert addons[2].pk == results[-1]

        # Other two are first rank again.
-        assert addons[1].pk in result[:2]
-        assert addons[0].pk in result[:2]
+        assert addons[1].pk in results[:2]
+        assert addons[0].pk in results[:2]

    def test_score_boost_name_match_slop(self):
        addon = amo.tests.addon_factory(
@ -760,9 +760,54 @@ class TestSearchResultScoring(ESTestCase):

        # direct match
        response = self.client.get(self.url, {'q': 'merge windows'})
-        result = self.get_results(response)
+        results = self.get_results(response)

-        assert result[0] == addon.pk
+        assert results[0] == addon.pk
+
+    def test_score_boost_exact_match(self):
+        """Test that we rank exact matches at the top."""
+        addons = [
+            amo.tests.addon_factory(
+                name='test addon test11', type=amo.ADDON_EXTENSION,
+                average_daily_users=0, weekly_downloads=0),
+            amo.tests.addon_factory(
+                name='test addon test21', type=amo.ADDON_EXTENSION,
+                average_daily_users=0, weekly_downloads=0),
+            amo.tests.addon_factory(
+                name='test addon test31', type=amo.ADDON_EXTENSION,
+                average_daily_users=0, weekly_downloads=0),
+        ]
+
+        self.refresh()
+
+        response = self.client.get(self.url, {'q': 'test addon test21'})
+        results = self.get_results(response)
+
+        assert results[0] == addons[1].pk
+
+    def test_score_boost_exact_match_description_hijack(self):
+        """Test that we rank exact matches at the top."""
+        addons = [
+            amo.tests.addon_factory(
+                name='1-Click YouTube Video Download',
+                type=amo.ADDON_EXTENSION,
+                average_daily_users=566337, weekly_downloads=150000,
+                description=(
+                    'button, click that button, 1-Click Youtube Video '
+                    'Downloader is a click click great tool')),
+            amo.tests.addon_factory(
+                name='Amazon 1-Click Lock', type=amo.ADDON_EXTENSION,
+                average_daily_users=50, weekly_downloads=0),
+        ]
+
+        self.refresh()
+
+        response = self.client.get(self.url, {
+            'q': 'Amazon 1-Click Lock'
+        })
+        results = self.get_results(response)
+
+        assert results[0] == addons[1].pk


 class TestPersonaSearch(SearchBase):