From b8749341b3bb09b958908d025b35c4df982ff50c Mon Sep 17 00:00:00 2001
From: Christopher Grebs <cg@webshox.org>
Date: Mon, 13 Nov 2017 18:54:02 +0100
Subject: [PATCH] Search: Implement exact matching. (#6895)

* Search: Implement exact matching.

Fixes #6837

This re-uses the existing `name_sort` field that is a not analyzed
version of `name` which is needed for exact matches.

* Fix flake8

* Add another test for exact matching

* Add test that tests for description hijack

* Use new name.raw field

* Remove l10n test for now, it only works accidentally.

* Fix comment

* Fix tests
---
 src/olympia/addons/indexers.py           | 19 ++++---
 src/olympia/addons/tests/test_views.py   |  2 +-
 src/olympia/search/filters.py            | 15 +++++-
 src/olympia/search/tests/test_filters.py | 14 +++++
 src/olympia/search/tests/test_views.py   | 65 ++++++++++++++++++++----
 5 files changed, 96 insertions(+), 19 deletions(-)

diff --git a/src/olympia/addons/indexers.py b/src/olympia/addons/indexers.py
index 645c7bdce0..744123f468 100644
--- a/src/olympia/addons/indexers.py
+++ b/src/olympia/addons/indexers.py
@@ -22,7 +22,8 @@ class AddonIndexer(BaseSearchIndexer):
     """Fields we don't need to expose in the results, only used for filtering
     or sorting."""
     hidden_fields = (
-        'name_sort',
+        '*.raw',
+        '*_sort',
         'boost',
         'hotness',
         # Translated content that is used for filtering purposes is stored
@@ -140,11 +141,17 @@ class AddonIndexer(BaseSearchIndexer):
                         },
                     },
                     'modified': {'type': 'date', 'index': False},
-                    # Adding word-delimiter to split on camelcase and
-                    # punctuation.
-                    'name': {'type': 'text',
-                             'analyzer': 'standardPlusWordDelimiter'},
-                    # Turn off analysis on name so we can sort by it.
+                    'name': {
+                        'type': 'text',
+                        # Adding word-delimiter to split on camelcase and
+                        # punctuation.
+                        'analyzer': 'standardPlusWordDelimiter',
+                        'fields': {
+                            # Turn off analysis on name so we can sort by it.
+                            'raw': {'type': 'keyword'}
+                        },
+                    },
+                    # TODO: Can be removed once we have `name.raw` indexed
                     'name_sort': {'type': 'keyword'},
                     'persona': {
                         'type': 'object',
diff --git a/src/olympia/addons/tests/test_views.py b/src/olympia/addons/tests/test_views.py
index 76f75b34e7..f872f90d61 100644
--- a/src/olympia/addons/tests/test_views.py
+++ b/src/olympia/addons/tests/test_views.py
@@ -2218,7 +2218,7 @@ class TestAddonSearchView(ESTestCase):
         qset = AddonSearchView().get_queryset()
 
         assert set(qset.to_dict()['_source']['excludes']) == set(
-            ('name_sort', 'boost', 'hotness', 'name', 'description',
+            ('*.raw', '*_sort', 'boost', 'hotness', 'name', 'description',
              'name_l10n_*', 'description_l10n_*', 'summary', 'summary_l10n_*')
         )
 
diff --git a/src/olympia/search/filters.py b/src/olympia/search/filters.py
index 7e2ed343e0..20ff7019e4 100644
--- a/src/olympia/search/filters.py
+++ b/src/olympia/search/filters.py
@@ -272,9 +272,20 @@ class SearchQueryFilter(BaseFilterBackend):
 
         # Apply rules to search on few base fields. Some might not be present
         # in every document type / indexes.
-        for k, v in rules:
+        for query_cls, opts in rules:
             for field in ('name', 'slug', 'listed_authors.name'):
-                should.append(k(**{field: v}))
+                should.append(query_cls(**{field: opts}))
+
+        # Exact matches need to be queried against a non-analyzed field. Let's
+        # do a term query on `name.raw` for an exact match against the add-on
+        # name and boost it since this is likely what the user wants.
+        # Use a super-high boost to avoid `description` or `summary`
+        # getting in our way.
+        should.append(query.Term(**{
+            'name.raw': {
+                'value': search_query, 'boost': 100
+            }
+        }))
 
         # For name, also search in translated field with the right language
         # and analyzer.
diff --git a/src/olympia/search/tests/test_filters.py b/src/olympia/search/tests/test_filters.py
index abf0a5d4bc..40d4c57ec7 100644
--- a/src/olympia/search/tests/test_filters.py
+++ b/src/olympia/search/tests/test_filters.py
@@ -134,6 +134,20 @@ class TestQueryFilter(FilterTestsBase):
             ]}}
         }
 
+    def test_q_exact(self):
+        qs = self._filter(data={'q': 'Adblock Plus'})
+        should = qs['query']['function_score']['query']['bool']['should']
+
+        expected = {
+            'term': {
+                'name.raw': {
+                    'boost': 100, 'value': u'adblock plus',
+                }
+            }
+        }
+
+        assert expected in should
+
 
 class TestReviewedContentFilter(FilterTestsBase):
 
diff --git a/src/olympia/search/tests/test_views.py b/src/olympia/search/tests/test_views.py
index 40bfd1328c..d1648514de 100644
--- a/src/olympia/search/tests/test_views.py
+++ b/src/olympia/search/tests/test_views.py
@@ -727,29 +727,29 @@ class TestSearchResultScoring(ESTestCase):
         self.refresh()
 
         response = self.client.get(self.url, {'q': 'merge windows'})
-        result = self.get_results(response)
+        results = self.get_results(response)
 
         # Doesn't match "All Downloader Professional"
-        assert addons[2].pk not in result
+        assert addons[2].pk not in results
 
         # Matches both "Merge Windows" and "Merge All Windows" but can't
         # correctly predict their exact scoring since we don't have
         # an exact match that would prefer 'merge windows'. Both should be
         # the first two matches though.
-        assert addons[1].pk in result[:2]
-        assert addons[0].pk in result[:2]
+        assert addons[1].pk in results[:2]
+        assert addons[0].pk in results[:2]
 
         response = self.client.get(self.url, {'q': 'merge all windows'})
-        result = self.get_results(response)
+        results = self.get_results(response)
 
         # Make sure we match 'All Downloader Professional' but it's
         # term match frequency is much lower than the other two so it's
         # last.
-        assert addons[2].pk == result[-1]
+        assert addons[2].pk == results[-1]
 
         # Other two are first rank again.
-        assert addons[1].pk in result[:2]
-        assert addons[0].pk in result[:2]
+        assert addons[1].pk in results[:2]
+        assert addons[0].pk in results[:2]
 
     def test_score_boost_name_match_slop(self):
         addon = amo.tests.addon_factory(
@@ -760,9 +760,54 @@ class TestSearchResultScoring(ESTestCase):
 
         # direct match
         response = self.client.get(self.url, {'q': 'merge windows'})
-        result = self.get_results(response)
+        results = self.get_results(response)
 
-        assert result[0] == addon.pk
+        assert results[0] == addon.pk
+
+    def test_score_boost_exact_match(self):
+        """Test that we rank exact matches at the top."""
+        addons = [
+            amo.tests.addon_factory(
+                name='test addon test11', type=amo.ADDON_EXTENSION,
+                average_daily_users=0, weekly_downloads=0),
+            amo.tests.addon_factory(
+                name='test addon test21', type=amo.ADDON_EXTENSION,
+                average_daily_users=0, weekly_downloads=0),
+            amo.tests.addon_factory(
+                name='test addon test31', type=amo.ADDON_EXTENSION,
+                average_daily_users=0, weekly_downloads=0),
+        ]
+
+        self.refresh()
+
+        response = self.client.get(self.url, {'q': 'test addon test21'})
+        results = self.get_results(response)
+
+        assert results[0] == addons[1].pk
+
+    def test_score_boost_exact_match_description_hijack(self):
+        """Test that we rank exact matches at the top."""
+        addons = [
+            amo.tests.addon_factory(
+                name='1-Click YouTube Video Download',
+                type=amo.ADDON_EXTENSION,
+                average_daily_users=566337, weekly_downloads=150000,
+                description=(
+                    'button, click that button, 1-Click Youtube Video '
+                    'Downloader is a click click great tool')),
+            amo.tests.addon_factory(
+                name='Amazon 1-Click Lock', type=amo.ADDON_EXTENSION,
+                average_daily_users=50, weekly_downloads=0),
+        ]
+
+        self.refresh()
+
+        response = self.client.get(self.url, {
+            'q': 'Amazon 1-Click Lock'
+        })
+        results = self.get_results(response)
+
+        assert results[0] == addons[1].pk
 
 
 class TestPersonaSearch(SearchBase):