Search: Implement exact matching. (#6895)
* Search: Implement exact matching. Fixes #6837 This re-uses the existing `name_sort` field that is a not analyzed version of `name` which is needed for exact matches. * Fix flake8 * Add another test for exact matching * Add test that tests for description hijack * Use new name.raw field * Remove l10n test for now, it only works accidentally. * Fix comment * Fix tests
This commit is contained in:
Родитель
0af406cbdc
Коммит
b8749341b3
|
@ -22,7 +22,8 @@ class AddonIndexer(BaseSearchIndexer):
|
||||||
"""Fields we don't need to expose in the results, only used for filtering
|
"""Fields we don't need to expose in the results, only used for filtering
|
||||||
or sorting."""
|
or sorting."""
|
||||||
hidden_fields = (
|
hidden_fields = (
|
||||||
'name_sort',
|
'*.raw',
|
||||||
|
'*_sort',
|
||||||
'boost',
|
'boost',
|
||||||
'hotness',
|
'hotness',
|
||||||
# Translated content that is used for filtering purposes is stored
|
# Translated content that is used for filtering purposes is stored
|
||||||
|
@ -140,11 +141,17 @@ class AddonIndexer(BaseSearchIndexer):
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'modified': {'type': 'date', 'index': False},
|
'modified': {'type': 'date', 'index': False},
|
||||||
# Adding word-delimiter to split on camelcase and
|
'name': {
|
||||||
# punctuation.
|
'type': 'text',
|
||||||
'name': {'type': 'text',
|
# Adding word-delimiter to split on camelcase and
|
||||||
'analyzer': 'standardPlusWordDelimiter'},
|
# punctuation.
|
||||||
# Turn off analysis on name so we can sort by it.
|
'analyzer': 'standardPlusWordDelimiter',
|
||||||
|
'fields': {
|
||||||
|
# Turn off analysis on name so we can sort by it.
|
||||||
|
'raw': {'type': 'keyword'}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# TODO: Can be removed once we have `name.raw` indexed
|
||||||
'name_sort': {'type': 'keyword'},
|
'name_sort': {'type': 'keyword'},
|
||||||
'persona': {
|
'persona': {
|
||||||
'type': 'object',
|
'type': 'object',
|
||||||
|
|
|
@ -2218,7 +2218,7 @@ class TestAddonSearchView(ESTestCase):
|
||||||
qset = AddonSearchView().get_queryset()
|
qset = AddonSearchView().get_queryset()
|
||||||
|
|
||||||
assert set(qset.to_dict()['_source']['excludes']) == set(
|
assert set(qset.to_dict()['_source']['excludes']) == set(
|
||||||
('name_sort', 'boost', 'hotness', 'name', 'description',
|
('*.raw', '*_sort', 'boost', 'hotness', 'name', 'description',
|
||||||
'name_l10n_*', 'description_l10n_*', 'summary', 'summary_l10n_*')
|
'name_l10n_*', 'description_l10n_*', 'summary', 'summary_l10n_*')
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -272,9 +272,20 @@ class SearchQueryFilter(BaseFilterBackend):
|
||||||
|
|
||||||
# Apply rules to search on few base fields. Some might not be present
|
# Apply rules to search on few base fields. Some might not be present
|
||||||
# in every document type / indexes.
|
# in every document type / indexes.
|
||||||
for k, v in rules:
|
for query_cls, opts in rules:
|
||||||
for field in ('name', 'slug', 'listed_authors.name'):
|
for field in ('name', 'slug', 'listed_authors.name'):
|
||||||
should.append(k(**{field: v}))
|
should.append(query_cls(**{field: opts}))
|
||||||
|
|
||||||
|
# Exact matches need to be queried against a non-analyzed field. Let's
|
||||||
|
# do a term query on `name.raw` for an exact match against the add-on
|
||||||
|
# name and boost it since this is likely what the user wants.
|
||||||
|
# Use a super-high boost to avoid `description` or `summary`
|
||||||
|
# getting in our way.
|
||||||
|
should.append(query.Term(**{
|
||||||
|
'name.raw': {
|
||||||
|
'value': search_query, 'boost': 100
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
|
||||||
# For name, also search in translated field with the right language
|
# For name, also search in translated field with the right language
|
||||||
# and analyzer.
|
# and analyzer.
|
||||||
|
|
|
@ -134,6 +134,20 @@ class TestQueryFilter(FilterTestsBase):
|
||||||
]}}
|
]}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def test_q_exact(self):
|
||||||
|
qs = self._filter(data={'q': 'Adblock Plus'})
|
||||||
|
should = qs['query']['function_score']['query']['bool']['should']
|
||||||
|
|
||||||
|
expected = {
|
||||||
|
'term': {
|
||||||
|
'name.raw': {
|
||||||
|
'boost': 100, 'value': u'adblock plus',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert expected in should
|
||||||
|
|
||||||
|
|
||||||
class TestReviewedContentFilter(FilterTestsBase):
|
class TestReviewedContentFilter(FilterTestsBase):
|
||||||
|
|
||||||
|
|
|
@ -727,29 +727,29 @@ class TestSearchResultScoring(ESTestCase):
|
||||||
self.refresh()
|
self.refresh()
|
||||||
|
|
||||||
response = self.client.get(self.url, {'q': 'merge windows'})
|
response = self.client.get(self.url, {'q': 'merge windows'})
|
||||||
result = self.get_results(response)
|
results = self.get_results(response)
|
||||||
|
|
||||||
# Doesn't match "All Downloader Professional"
|
# Doesn't match "All Downloader Professional"
|
||||||
assert addons[2].pk not in result
|
assert addons[2].pk not in results
|
||||||
|
|
||||||
# Matches both "Merge Windows" and "Merge All Windows" but can't
|
# Matches both "Merge Windows" and "Merge All Windows" but can't
|
||||||
# correctly predict their exact scoring since we don't have
|
# correctly predict their exact scoring since we don't have
|
||||||
# an exact match that would prefer 'merge windows'. Both should be
|
# an exact match that would prefer 'merge windows'. Both should be
|
||||||
# the first two matches though.
|
# the first two matches though.
|
||||||
assert addons[1].pk in result[:2]
|
assert addons[1].pk in results[:2]
|
||||||
assert addons[0].pk in result[:2]
|
assert addons[0].pk in results[:2]
|
||||||
|
|
||||||
response = self.client.get(self.url, {'q': 'merge all windows'})
|
response = self.client.get(self.url, {'q': 'merge all windows'})
|
||||||
result = self.get_results(response)
|
results = self.get_results(response)
|
||||||
|
|
||||||
# Make sure we match 'All Downloader Professional' but it's
|
# Make sure we match 'All Downloader Professional' but it's
|
||||||
# term match frequency is much lower than the other two so it's
|
# term match frequency is much lower than the other two so it's
|
||||||
# last.
|
# last.
|
||||||
assert addons[2].pk == result[-1]
|
assert addons[2].pk == results[-1]
|
||||||
|
|
||||||
# Other two are first rank again.
|
# Other two are first rank again.
|
||||||
assert addons[1].pk in result[:2]
|
assert addons[1].pk in results[:2]
|
||||||
assert addons[0].pk in result[:2]
|
assert addons[0].pk in results[:2]
|
||||||
|
|
||||||
def test_score_boost_name_match_slop(self):
|
def test_score_boost_name_match_slop(self):
|
||||||
addon = amo.tests.addon_factory(
|
addon = amo.tests.addon_factory(
|
||||||
|
@ -760,9 +760,54 @@ class TestSearchResultScoring(ESTestCase):
|
||||||
|
|
||||||
# direct match
|
# direct match
|
||||||
response = self.client.get(self.url, {'q': 'merge windows'})
|
response = self.client.get(self.url, {'q': 'merge windows'})
|
||||||
result = self.get_results(response)
|
results = self.get_results(response)
|
||||||
|
|
||||||
assert result[0] == addon.pk
|
assert results[0] == addon.pk
|
||||||
|
|
||||||
|
def test_score_boost_exact_match(self):
|
||||||
|
"""Test that we rank exact matches at the top."""
|
||||||
|
addons = [
|
||||||
|
amo.tests.addon_factory(
|
||||||
|
name='test addon test11', type=amo.ADDON_EXTENSION,
|
||||||
|
average_daily_users=0, weekly_downloads=0),
|
||||||
|
amo.tests.addon_factory(
|
||||||
|
name='test addon test21', type=amo.ADDON_EXTENSION,
|
||||||
|
average_daily_users=0, weekly_downloads=0),
|
||||||
|
amo.tests.addon_factory(
|
||||||
|
name='test addon test31', type=amo.ADDON_EXTENSION,
|
||||||
|
average_daily_users=0, weekly_downloads=0),
|
||||||
|
]
|
||||||
|
|
||||||
|
self.refresh()
|
||||||
|
|
||||||
|
response = self.client.get(self.url, {'q': 'test addon test21'})
|
||||||
|
results = self.get_results(response)
|
||||||
|
|
||||||
|
assert results[0] == addons[1].pk
|
||||||
|
|
||||||
|
def test_score_boost_exact_match_description_hijack(self):
|
||||||
|
"""Test that we rank exact matches at the top."""
|
||||||
|
addons = [
|
||||||
|
amo.tests.addon_factory(
|
||||||
|
name='1-Click YouTube Video Download',
|
||||||
|
type=amo.ADDON_EXTENSION,
|
||||||
|
average_daily_users=566337, weekly_downloads=150000,
|
||||||
|
description=(
|
||||||
|
'button, click that button, 1-Click Youtube Video '
|
||||||
|
'Downloader is a click click great tool')),
|
||||||
|
amo.tests.addon_factory(
|
||||||
|
name='Amazon 1-Click Lock', type=amo.ADDON_EXTENSION,
|
||||||
|
average_daily_users=50, weekly_downloads=0),
|
||||||
|
]
|
||||||
|
|
||||||
|
self.refresh()
|
||||||
|
|
||||||
|
response = self.client.get(self.url, {
|
||||||
|
'q': 'Amazon 1-Click Lock'
|
||||||
|
})
|
||||||
|
results = self.get_results(response)
|
||||||
|
|
||||||
|
assert results[0] == addons[1].pk
|
||||||
|
|
||||||
|
|
||||||
class TestPersonaSearch(SearchBase):
|
class TestPersonaSearch(SearchBase):
|
||||||
|
|
Загрузка…
Ссылка в новой задаче