Search: Implement exact matching. (#6895)

* Search: Implement exact matching. Fixes #6837 This re-uses the existing `name_sort` field that is a not analyzed version of `name` which is needed for exact matches. * Fix flake8 * Add another test for exact matching * Add test that tests for description hijack * Use new name.raw field * Remove l10n test for now, it only works accidentally. * Fix comment * Fix tests
2017-11-13 18:54:02 +01:00 · 2017-11-13 18:54:02 +01:00 · b8749341b3
--- a/src/olympia/addons/indexers.py
+++ b/src/olympia/addons/indexers.py
@ -22,7 +22,8 @@ class AddonIndexer(BaseSearchIndexer):
    """Fields we don't need to expose in the results, only used for filtering
    or sorting."""
    hidden_fields = (
-        'name_sort',
+        '*.raw',
        '*_sort',
        'boost',
        'hotness',
        # Translated content that is used for filtering purposes is stored
@ -140,11 +141,17 @@ class AddonIndexer(BaseSearchIndexer):
                        },
                    },
                    'modified': {'type': 'date', 'index': False},
-                    # Adding word-delimiter to split on camelcase and
+                    'name': {
-                    # punctuation.
+                        'type': 'text',
-                    'name': {'type': 'text',
+                        # Adding word-delimiter to split on camelcase and
-                             'analyzer': 'standardPlusWordDelimiter'},
+                        # punctuation.
-                    # Turn off analysis on name so we can sort by it.
+                        'analyzer': 'standardPlusWordDelimiter',
                        'fields': {
                            # Turn off analysis on name so we can sort by it.
                            'raw': {'type': 'keyword'}
                        },
                    },
                    # TODO: Can be removed once we have `name.raw` indexed
                    'name_sort': {'type': 'keyword'},
                    'persona': {
                        'type': 'object',
--- a/src/olympia/addons/tests/test_views.py
+++ b/src/olympia/addons/tests/test_views.py
@ -2218,7 +2218,7 @@ class TestAddonSearchView(ESTestCase):
        qset = AddonSearchView().get_queryset()
        assert set(qset.to_dict()['_source']['excludes']) == set(
-            ('name_sort', 'boost', 'hotness', 'name', 'description',
+            ('*.raw', '*_sort', 'boost', 'hotness', 'name', 'description',
             'name_l10n_*', 'description_l10n_*', 'summary', 'summary_l10n_*')
        )
--- a/src/olympia/search/filters.py
+++ b/src/olympia/search/filters.py
@ -272,9 +272,20 @@ class SearchQueryFilter(BaseFilterBackend):
        # Apply rules to search on few base fields. Some might not be present
        # in every document type / indexes.
-        for k, v in rules:
+        for query_cls, opts in rules:
            for field in ('name', 'slug', 'listed_authors.name'):
-                should.append(k(**{field: v}))
+                should.append(query_cls(**{field: opts}))
        # Exact matches need to be queried against a non-analyzed field. Let's
        # do a term query on `name.raw` for an exact match against the add-on
        # name and boost it since this is likely what the user wants.
        # Use a super-high boost to avoid `description` or `summary`
        # getting in our way.
        should.append(query.Term(**{
            'name.raw': {
                'value': search_query, 'boost': 100
            }
        }))
        # For name, also search in translated field with the right language
        # and analyzer.
--- a/src/olympia/search/tests/test_filters.py
+++ b/src/olympia/search/tests/test_filters.py
@ -134,6 +134,20 @@ class TestQueryFilter(FilterTestsBase):
            ]}}
        }
    def test_q_exact(self):
        qs = self._filter(data={'q': 'Adblock Plus'})
        should = qs['query']['function_score']['query']['bool']['should']
        expected = {
            'term': {
                'name.raw': {
                    'boost': 100, 'value': u'adblock plus',
                }
            }
        }
        assert expected in should
 class TestReviewedContentFilter(FilterTestsBase):
--- a/src/olympia/search/tests/test_views.py
+++ b/src/olympia/search/tests/test_views.py
@ -727,29 +727,29 @@ class TestSearchResultScoring(ESTestCase):
        self.refresh()
        response = self.client.get(self.url, {'q': 'merge windows'})
-        result = self.get_results(response)
+        results = self.get_results(response)
        # Doesn't match "All Downloader Professional"
-        assert addons[2].pk not in result
+        assert addons[2].pk not in results
        # Matches both "Merge Windows" and "Merge All Windows" but can't
        # correctly predict their exact scoring since we don't have
        # an exact match that would prefer 'merge windows'. Both should be
        # the first two matches though.
-        assert addons[1].pk in result[:2]
+        assert addons[1].pk in results[:2]
-        assert addons[0].pk in result[:2]
+        assert addons[0].pk in results[:2]
        response = self.client.get(self.url, {'q': 'merge all windows'})
-        result = self.get_results(response)
+        results = self.get_results(response)
        # Make sure we match 'All Downloader Professional' but it's
        # term match frequency is much lower than the other two so it's
        # last.
-        assert addons[2].pk == result[-1]
+        assert addons[2].pk == results[-1]
        # Other two are first rank again.
-        assert addons[1].pk in result[:2]
+        assert addons[1].pk in results[:2]
-        assert addons[0].pk in result[:2]
+        assert addons[0].pk in results[:2]
    def test_score_boost_name_match_slop(self):
        addon = amo.tests.addon_factory(
@ -760,9 +760,54 @@ class TestSearchResultScoring(ESTestCase):
        # direct match
        response = self.client.get(self.url, {'q': 'merge windows'})
-        result = self.get_results(response)
+        results = self.get_results(response)
-        assert result[0] == addon.pk
+        assert results[0] == addon.pk
    def test_score_boost_exact_match(self):
        """Test that we rank exact matches at the top."""
        addons = [
            amo.tests.addon_factory(
                name='test addon test11', type=amo.ADDON_EXTENSION,
                average_daily_users=0, weekly_downloads=0),
            amo.tests.addon_factory(
                name='test addon test21', type=amo.ADDON_EXTENSION,
                average_daily_users=0, weekly_downloads=0),
            amo.tests.addon_factory(
                name='test addon test31', type=amo.ADDON_EXTENSION,
                average_daily_users=0, weekly_downloads=0),
        ]
        self.refresh()
        response = self.client.get(self.url, {'q': 'test addon test21'})
        results = self.get_results(response)
        assert results[0] == addons[1].pk
    def test_score_boost_exact_match_description_hijack(self):
        """Test that we rank exact matches at the top."""
        addons = [
            amo.tests.addon_factory(
                name='1-Click YouTube Video Download',
                type=amo.ADDON_EXTENSION,
                average_daily_users=566337, weekly_downloads=150000,
                description=(
                    'button, click that button, 1-Click Youtube Video '
                    'Downloader is a click click great tool')),
            amo.tests.addon_factory(
                name='Amazon 1-Click Lock', type=amo.ADDON_EXTENSION,
                average_daily_users=50, weekly_downloads=0),
        ]
        self.refresh()
        response = self.client.get(self.url, {
            'q': 'Amazon 1-Click Lock'
        })
        results = self.get_results(response)
        assert results[0] == addons[1].pk
 class TestPersonaSearch(SearchBase):