Search: Implement exact matching. (#6895)
* Search: Implement exact matching. Fixes #6837 This re-uses the existing `name_sort` field that is a not analyzed version of `name` which is needed for exact matches. * Fix flake8 * Add another test for exact matching * Add test that tests for description hijack * Use new name.raw field * Remove l10n test for now, it only works accidentally. * Fix comment * Fix tests
This commit is contained in:
Родитель
0af406cbdc
Коммит
b8749341b3
|
@ -22,7 +22,8 @@ class AddonIndexer(BaseSearchIndexer):
|
|||
"""Fields we don't need to expose in the results, only used for filtering
|
||||
or sorting."""
|
||||
hidden_fields = (
|
||||
'name_sort',
|
||||
'*.raw',
|
||||
'*_sort',
|
||||
'boost',
|
||||
'hotness',
|
||||
# Translated content that is used for filtering purposes is stored
|
||||
|
@ -140,11 +141,17 @@ class AddonIndexer(BaseSearchIndexer):
|
|||
},
|
||||
},
|
||||
'modified': {'type': 'date', 'index': False},
|
||||
# Adding word-delimiter to split on camelcase and
|
||||
# punctuation.
|
||||
'name': {'type': 'text',
|
||||
'analyzer': 'standardPlusWordDelimiter'},
|
||||
# Turn off analysis on name so we can sort by it.
|
||||
'name': {
|
||||
'type': 'text',
|
||||
# Adding word-delimiter to split on camelcase and
|
||||
# punctuation.
|
||||
'analyzer': 'standardPlusWordDelimiter',
|
||||
'fields': {
|
||||
# Turn off analysis on name so we can sort by it.
|
||||
'raw': {'type': 'keyword'}
|
||||
},
|
||||
},
|
||||
# TODO: Can be removed once we have `name.raw` indexed
|
||||
'name_sort': {'type': 'keyword'},
|
||||
'persona': {
|
||||
'type': 'object',
|
||||
|
|
|
@ -2218,7 +2218,7 @@ class TestAddonSearchView(ESTestCase):
|
|||
qset = AddonSearchView().get_queryset()
|
||||
|
||||
assert set(qset.to_dict()['_source']['excludes']) == set(
|
||||
('name_sort', 'boost', 'hotness', 'name', 'description',
|
||||
('*.raw', '*_sort', 'boost', 'hotness', 'name', 'description',
|
||||
'name_l10n_*', 'description_l10n_*', 'summary', 'summary_l10n_*')
|
||||
)
|
||||
|
||||
|
|
|
@ -272,9 +272,20 @@ class SearchQueryFilter(BaseFilterBackend):
|
|||
|
||||
# Apply rules to search on few base fields. Some might not be present
|
||||
# in every document type / indexes.
|
||||
for k, v in rules:
|
||||
for query_cls, opts in rules:
|
||||
for field in ('name', 'slug', 'listed_authors.name'):
|
||||
should.append(k(**{field: v}))
|
||||
should.append(query_cls(**{field: opts}))
|
||||
|
||||
# Exact matches need to be queried against a non-analyzed field. Let's
|
||||
# do a term query on `name.raw` for an exact match against the add-on
|
||||
# name and boost it since this is likely what the user wants.
|
||||
# Use a super-high boost to avoid `description` or `summary`
|
||||
# getting in our way.
|
||||
should.append(query.Term(**{
|
||||
'name.raw': {
|
||||
'value': search_query, 'boost': 100
|
||||
}
|
||||
}))
|
||||
|
||||
# For name, also search in translated field with the right language
|
||||
# and analyzer.
|
||||
|
|
|
@ -134,6 +134,20 @@ class TestQueryFilter(FilterTestsBase):
|
|||
]}}
|
||||
}
|
||||
|
||||
def test_q_exact(self):
|
||||
qs = self._filter(data={'q': 'Adblock Plus'})
|
||||
should = qs['query']['function_score']['query']['bool']['should']
|
||||
|
||||
expected = {
|
||||
'term': {
|
||||
'name.raw': {
|
||||
'boost': 100, 'value': u'adblock plus',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert expected in should
|
||||
|
||||
|
||||
class TestReviewedContentFilter(FilterTestsBase):
|
||||
|
||||
|
|
|
@ -727,29 +727,29 @@ class TestSearchResultScoring(ESTestCase):
|
|||
self.refresh()
|
||||
|
||||
response = self.client.get(self.url, {'q': 'merge windows'})
|
||||
result = self.get_results(response)
|
||||
results = self.get_results(response)
|
||||
|
||||
# Doesn't match "All Downloader Professional"
|
||||
assert addons[2].pk not in result
|
||||
assert addons[2].pk not in results
|
||||
|
||||
# Matches both "Merge Windows" and "Merge All Windows" but can't
|
||||
# correctly predict their exact scoring since we don't have
|
||||
# an exact match that would prefer 'merge windows'. Both should be
|
||||
# the first two matches though.
|
||||
assert addons[1].pk in result[:2]
|
||||
assert addons[0].pk in result[:2]
|
||||
assert addons[1].pk in results[:2]
|
||||
assert addons[0].pk in results[:2]
|
||||
|
||||
response = self.client.get(self.url, {'q': 'merge all windows'})
|
||||
result = self.get_results(response)
|
||||
results = self.get_results(response)
|
||||
|
||||
# Make sure we match 'All Downloader Professional' but it's
|
||||
# term match frequency is much lower than the other two so it's
|
||||
# last.
|
||||
assert addons[2].pk == result[-1]
|
||||
assert addons[2].pk == results[-1]
|
||||
|
||||
# Other two are first rank again.
|
||||
assert addons[1].pk in result[:2]
|
||||
assert addons[0].pk in result[:2]
|
||||
assert addons[1].pk in results[:2]
|
||||
assert addons[0].pk in results[:2]
|
||||
|
||||
def test_score_boost_name_match_slop(self):
|
||||
addon = amo.tests.addon_factory(
|
||||
|
@ -760,9 +760,54 @@ class TestSearchResultScoring(ESTestCase):
|
|||
|
||||
# direct match
|
||||
response = self.client.get(self.url, {'q': 'merge windows'})
|
||||
result = self.get_results(response)
|
||||
results = self.get_results(response)
|
||||
|
||||
assert result[0] == addon.pk
|
||||
assert results[0] == addon.pk
|
||||
|
||||
def test_score_boost_exact_match(self):
|
||||
"""Test that we rank exact matches at the top."""
|
||||
addons = [
|
||||
amo.tests.addon_factory(
|
||||
name='test addon test11', type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=0, weekly_downloads=0),
|
||||
amo.tests.addon_factory(
|
||||
name='test addon test21', type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=0, weekly_downloads=0),
|
||||
amo.tests.addon_factory(
|
||||
name='test addon test31', type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=0, weekly_downloads=0),
|
||||
]
|
||||
|
||||
self.refresh()
|
||||
|
||||
response = self.client.get(self.url, {'q': 'test addon test21'})
|
||||
results = self.get_results(response)
|
||||
|
||||
assert results[0] == addons[1].pk
|
||||
|
||||
def test_score_boost_exact_match_description_hijack(self):
|
||||
"""Test that we rank exact matches at the top."""
|
||||
addons = [
|
||||
amo.tests.addon_factory(
|
||||
name='1-Click YouTube Video Download',
|
||||
type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=566337, weekly_downloads=150000,
|
||||
description=(
|
||||
'button, click that button, 1-Click Youtube Video '
|
||||
'Downloader is a click click great tool')),
|
||||
amo.tests.addon_factory(
|
||||
name='Amazon 1-Click Lock', type=amo.ADDON_EXTENSION,
|
||||
average_daily_users=50, weekly_downloads=0),
|
||||
]
|
||||
|
||||
self.refresh()
|
||||
|
||||
response = self.client.get(self.url, {
|
||||
'q': 'Amazon 1-Click Lock'
|
||||
})
|
||||
results = self.get_results(response)
|
||||
|
||||
assert results[0] == addons[1].pk
|
||||
|
||||
|
||||
class TestPersonaSearch(SearchBase):
|
||||
|
|
Загрузка…
Ссылка в новой задаче