Search: Implement exact matching. (#6895)

* Search: Implement exact matching.

Fixes #6837

This re-uses the existing `name_sort` field that is a not analyzed
version of `name` which is needed for exact matches.

* Fix flake8

* Add another test for exact matching

* Add test that tests for description hijack

* Use new name.raw field

* Remove l10n test for now, it only works accidentally.

* Fix comment

* Fix tests
This commit is contained in:
Christopher Grebs 2017-11-13 18:54:02 +01:00 коммит произвёл GitHub
Родитель 0af406cbdc
Коммит b8749341b3
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 96 добавлений и 19 удалений

Просмотреть файл

@ -22,7 +22,8 @@ class AddonIndexer(BaseSearchIndexer):
"""Fields we don't need to expose in the results, only used for filtering
or sorting."""
hidden_fields = (
'name_sort',
'*.raw',
'*_sort',
'boost',
'hotness',
# Translated content that is used for filtering purposes is stored
@ -140,11 +141,17 @@ class AddonIndexer(BaseSearchIndexer):
},
},
'modified': {'type': 'date', 'index': False},
'name': {
'type': 'text',
# Adding word-delimiter to split on camelcase and
# punctuation.
'name': {'type': 'text',
'analyzer': 'standardPlusWordDelimiter'},
'analyzer': 'standardPlusWordDelimiter',
'fields': {
# Turn off analysis on name so we can sort by it.
'raw': {'type': 'keyword'}
},
},
# TODO: Can be removed once we have `name.raw` indexed
'name_sort': {'type': 'keyword'},
'persona': {
'type': 'object',

Просмотреть файл

@ -2218,7 +2218,7 @@ class TestAddonSearchView(ESTestCase):
qset = AddonSearchView().get_queryset()
assert set(qset.to_dict()['_source']['excludes']) == set(
('name_sort', 'boost', 'hotness', 'name', 'description',
('*.raw', '*_sort', 'boost', 'hotness', 'name', 'description',
'name_l10n_*', 'description_l10n_*', 'summary', 'summary_l10n_*')
)

Просмотреть файл

@ -272,9 +272,20 @@ class SearchQueryFilter(BaseFilterBackend):
# Apply rules to search on few base fields. Some might not be present
# in every document type / indexes.
for k, v in rules:
for query_cls, opts in rules:
for field in ('name', 'slug', 'listed_authors.name'):
should.append(k(**{field: v}))
should.append(query_cls(**{field: opts}))
# Exact matches need to be queried against a non-analyzed field. Let's
# do a term query on `name.raw` for an exact match against the add-on
# name and boost it since this is likely what the user wants.
# Use a super-high boost to avoid `description` or `summary`
# getting in our way.
should.append(query.Term(**{
'name.raw': {
'value': search_query, 'boost': 100
}
}))
# For name, also search in translated field with the right language
# and analyzer.

Просмотреть файл

@ -134,6 +134,20 @@ class TestQueryFilter(FilterTestsBase):
]}}
}
def test_q_exact(self):
qs = self._filter(data={'q': 'Adblock Plus'})
should = qs['query']['function_score']['query']['bool']['should']
expected = {
'term': {
'name.raw': {
'boost': 100, 'value': u'adblock plus',
}
}
}
assert expected in should
class TestReviewedContentFilter(FilterTestsBase):

Просмотреть файл

@ -727,29 +727,29 @@ class TestSearchResultScoring(ESTestCase):
self.refresh()
response = self.client.get(self.url, {'q': 'merge windows'})
result = self.get_results(response)
results = self.get_results(response)
# Doesn't match "All Downloader Professional"
assert addons[2].pk not in result
assert addons[2].pk not in results
# Matches both "Merge Windows" and "Merge All Windows" but can't
# correctly predict their exact scoring since we don't have
# an exact match that would prefer 'merge windows'. Both should be
# the first two matches though.
assert addons[1].pk in result[:2]
assert addons[0].pk in result[:2]
assert addons[1].pk in results[:2]
assert addons[0].pk in results[:2]
response = self.client.get(self.url, {'q': 'merge all windows'})
result = self.get_results(response)
results = self.get_results(response)
# Make sure we match 'All Downloader Professional' but it's
# term match frequency is much lower than the other two so it's
# last.
assert addons[2].pk == result[-1]
assert addons[2].pk == results[-1]
# Other two are first rank again.
assert addons[1].pk in result[:2]
assert addons[0].pk in result[:2]
assert addons[1].pk in results[:2]
assert addons[0].pk in results[:2]
def test_score_boost_name_match_slop(self):
addon = amo.tests.addon_factory(
@ -760,9 +760,54 @@ class TestSearchResultScoring(ESTestCase):
# direct match
response = self.client.get(self.url, {'q': 'merge windows'})
result = self.get_results(response)
results = self.get_results(response)
assert result[0] == addon.pk
assert results[0] == addon.pk
def test_score_boost_exact_match(self):
"""Test that we rank exact matches at the top."""
addons = [
amo.tests.addon_factory(
name='test addon test11', type=amo.ADDON_EXTENSION,
average_daily_users=0, weekly_downloads=0),
amo.tests.addon_factory(
name='test addon test21', type=amo.ADDON_EXTENSION,
average_daily_users=0, weekly_downloads=0),
amo.tests.addon_factory(
name='test addon test31', type=amo.ADDON_EXTENSION,
average_daily_users=0, weekly_downloads=0),
]
self.refresh()
response = self.client.get(self.url, {'q': 'test addon test21'})
results = self.get_results(response)
assert results[0] == addons[1].pk
def test_score_boost_exact_match_description_hijack(self):
"""Test that we rank exact matches at the top."""
addons = [
amo.tests.addon_factory(
name='1-Click YouTube Video Download',
type=amo.ADDON_EXTENSION,
average_daily_users=566337, weekly_downloads=150000,
description=(
'button, click that button, 1-Click Youtube Video '
'Downloader is a click click great tool')),
amo.tests.addon_factory(
name='Amazon 1-Click Lock', type=amo.ADDON_EXTENSION,
average_daily_users=50, weekly_downloads=0),
]
self.refresh()
response = self.client.get(self.url, {
'q': 'Amazon 1-Click Lock'
})
results = self.get_results(response)
assert results[0] == addons[1].pk
class TestPersonaSearch(SearchBase):