Decrease prefix for fuzzy name matching in search, but force all terms to be found

This commit is contained in:
Mathieu Pillard 2018-11-16 12:29:45 +01:00
Родитель f2f98c7096
Коммит f3523b772f
4 изменённых файлов: 34 добавлений и 19 удалений

Просмотреть файл

@ -57,7 +57,7 @@ to a specific set of fields: add-on name and author(s) name.
3. A ``phrase`` match on ``name`` that allows swapped terms (``boost=8.0``, ``slop=1``)
4. A ``match`` on ``name``, using the standard text analyzer (``boost=6.0``, ``analyzer=standard``, ``operator=and``)
5. A ``prefix`` match on ``name`` (``boost=3.0``)
6. If a query is < 20 characters long, a fuzzy match on ``name`` (``boost=4.0``, ``prefix_length=4``, ``fuzziness=AUTO``)
6. If a query is < 20 characters long, a fuzzy match on ``name`` (``boost=4.0``, ``prefix_length=2``, ``fuzziness=AUTO``, ``operator=and``)
All rules except 1 and 2 are applied to both ``name`` and ``listed_authors.name``.

Просмотреть файл

@ -409,7 +409,7 @@ class SearchQueryFilter(BaseFilterBackend):
if len(search_query) < self.MAX_QUERY_LENGTH_FOR_FUZZY_SEARCH:
rules.append((query.Match, {
'query': search_query, 'boost': 4.0,
'prefix_length': 4, 'fuzziness': 'AUTO'}))
'prefix_length': 2, 'fuzziness': 'AUTO', 'operator': 'and'}))
# Apply all the rules we built above to name and listed_authors.name.
for query_cls, definition in rules:

Просмотреть файл

@ -162,8 +162,9 @@ class TestQueryFilter(FilterTestsBase):
expected = {
'match': {
'name': {
'boost': 4.0, 'prefix_length': 4, 'query': 'blah',
'boost': 4.0, 'prefix_length': 2, 'query': 'blah',
'fuzziness': 'AUTO', '_name': 'FuzzyMatch(name)',
'operator': 'and',
}
}
}
@ -175,8 +176,9 @@ class TestQueryFilter(FilterTestsBase):
expected = {
'match': {
'name': {
'boost': 4.0, 'prefix_length': 4, 'query': 'search terms',
'boost': 4.0, 'prefix_length': 2, 'query': 'search terms',
'fuzziness': 'AUTO', '_name': 'FuzzyMatch(name)',
'operator': 'and',
}
}
}
@ -193,9 +195,10 @@ class TestQueryFilter(FilterTestsBase):
expected = {
'match': {
'name': {
'boost': 4.0, 'prefix_length': 4,
'boost': 4.0, 'prefix_length': 2,
'query': 'this search query is too long.',
'fuzziness': 'AUTO', '_name': 'FuzzyMatch(name)',
'operator': 'and',
}
}
}

Просмотреть файл

@ -473,8 +473,8 @@ class TestRankingScenarios(ESTestCase):
average_daily_users=566337, weekly_downloads=150000,
summary=None,
description=(
'button, click that button, 1-Click Youtube Video '
'Downloader is a click click great tool')),
'This addon contains Amazon 1-Click Lock in its description '
' but not in its name.')),
amo.tests.addon_factory(
name='Amazon 1-Click Lock', type=amo.ADDON_EXTENSION,
average_daily_users=50, weekly_downloads=1, summary=None)
@ -484,8 +484,11 @@ class TestRankingScenarios(ESTestCase):
def test_scenario_tab_center_redux(self):
self._check_scenario('tab center redux', (
['Tab Center Redux', 69.5371],
['Tab Mix Plus', 0.06526235],
['Redux DevTools', 0.044507127],
# Those used to be found but we now require all terms to be present
# through operator: and on the fuzzy name query (and they have
# nothing else to match).
# ['Tab Mix Plus', 0.06526235],
# ['Redux DevTools', 0.044507127],
))
def test_scenario_open_image_new_tab(self):
@ -530,9 +533,10 @@ class TestRankingScenarios(ESTestCase):
def test_scenario_menu_wizzard(self):
self._check_scenario('Menu Wizzard', (
['Menu Wizard', 0.10698298], # (fuzzy, typo)
# partial match + users
['Add-ons Manager Context Menu', 0.07930083],
['Menu Wizard', 0.43297172], # (fuzzy, typo)
# 'Add-ons Manager Context Menu' used to be found but we now
# require all terms to be present through operator: and on the
# fuzzy name query (and it has nothing else to match).
))
def test_scenario_frame_demolition(self):
@ -594,8 +598,8 @@ class TestRankingScenarios(ESTestCase):
# Both are found thanks to their descriptions (matches each individual
# term, then get rescored with a match_phrase w/ slop.
self._check_scenario('Youtube html5 Player', (
['YouTube Flash Player', 0.41867542],
['No Flash', 0.068471745],
['YouTube Flash Player', 0.41527697],
['No Flash', 0.06847079],
))
def test_scenario_disable_hello_pocket_reader_plus(self):
@ -621,6 +625,11 @@ class TestRankingScenarios(ESTestCase):
['Delicious Bookmarks', 0.85394204],
))
def test_scenario_name_fuzzy(self):
self._check_scenario('opeb boocmarks tab', (
['Open Bookmarks in New Tab', 0.4235528],
))
def test_score_boost_name_match(self):
# Tests that we match directly "Merge Windows" and also find
# "Merge All Windows" because of slop=1
@ -633,21 +642,24 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario('merge all windows', (
['Merge All Windows', 11.195793],
['Merge Windows', 0.04285209],
['All Downloader Professional', 0.0070999665],
# These used to be found but we now require all terms to be present
# through operator: and on the fuzzy name query (and they have
# nothing else to match).
# ['Merge Windows', 0.04285209],
# ['All Downloader Professional', 0.0070999665],
))
def test_score_boost_exact_match(self):
"""Test that we rank exact matches at the top."""
self._check_scenario('test addon test21', (
['test addon test21', 11.341756],
['test addon test21', 11.338805],
))
def test_score_boost_exact_match_description_hijack(self):
"""Test that we rank exact matches at the top."""
self._check_scenario('Amazon 1-Click Lock', (
['Amazon 1-Click Lock', 26.223818],
['1-Click YouTube Video Download', 0.22368877],
['Amazon 1-Click Lock', 26.224829],
['1-Click YouTube Video Download', 0.1229738],
))
def test_score_boost_exact_match_in_right_language(self):