Elasticsearch 7.x compatibility & cleanups (#19126)

* Elasticsearch 7.x compatibility & cleanups

* Fix double import

* Bump versions

* Add position back
This commit is contained in:
Mathieu Pillard 2022-04-21 15:28:32 +02:00 коммит произвёл GitHub
Родитель ff2442cfd5
Коммит 4b97ad368c
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
12 изменённых файлов: 422 добавлений и 441 удалений

Просмотреть файл

@ -314,13 +314,16 @@ references:
defaults-with-elasticsearch: &defaults-with-elasticsearch
<<: *defaults
parameters:
elasticsearchversion:
type: string
docker:
- image: *python
- image: *redis
- image: *memcached
- image: *mysql
<<: *mysql-config
- image: docker.elastic.co/elasticsearch/elasticsearch:6.8.8
- image: docker.elastic.co/elasticsearch/elasticsearch:<< parameters.elasticsearchversion >>
environment:
# Disable all xpack related features to avoid unrelated logging in
# docker logs. https://github.com/mozilla/addons-server/issues/8887
@ -628,8 +631,6 @@ jobs:
command: dockerize -wait tcp://localhost:9200 -timeout 1m
- run:
command: pytest -m "es_tests and not needs_locales_compilation and not static_assets" -v src/olympia/
environment:
ES_VERSION: 6.x
release-master:
<<: *defaults-release
@ -659,7 +660,12 @@ workflows:
- docs
- main
- reviewers-and-zadmin
- es-tests
- es-tests:
matrix:
parameters:
elasticsearchversion:
- 6.8.23
- 7.17.2
- release-master:
filters:
branches:

Просмотреть файл

@ -63,7 +63,7 @@ services:
- MYSQL_DATABASE=olympia
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:6.8.8
image: docker.elastic.co/elasticsearch/elasticsearch:6.8.23
environment:
# Disable all xpack related features to avoid unrelated logging
# in docker logs. https://github.com/mozilla/addons-server/issues/8887

Просмотреть файл

@ -5,9 +5,10 @@ from olympia.constants.promoted import RECOMMENDED
import olympia.core.logger
from olympia import amo
from olympia.amo.indexers import BaseSearchIndexer
from olympia.amo.utils import attach_trans_dict
from olympia.amo.celery import create_chunked_tasks_signatures
from olympia.amo.utils import to_language
from olympia.constants.search import SEARCH_LANGUAGE_TO_ANALYZER
from olympia.lib.es.utils import create_index
from olympia.versions.compare import version_int
@ -15,10 +16,154 @@ from olympia.versions.compare import version_int
log = olympia.core.logger.getLogger('z.es')
class AddonIndexer(BaseSearchIndexer):
"""Fields we don't need to expose in the results, only used for filtering
or sorting."""
class AddonIndexer:
"""
Base Indexer class for add-ons.
"""
@classmethod
def attach_translation_mappings(cls, mapping, field_names):
"""
For each field in field_names, attach a dict to the ES mapping
properties making "<field_name>_translations" an object containing
"string" and "lang" as non-indexed strings.
Used to store non-indexed, non-analyzed translations in ES that will be
sent back by the API for each item. It does not take care of the
indexed content for search, it's there only to store and return
raw translations.
"""
for field_name in field_names:
# _translations is the suffix in TranslationSerializer.
mapping['properties'][
'%s_translations' % field_name
] = cls.get_translations_definition()
@classmethod
def get_translations_definition(cls):
"""
Return the mapping to use for raw translations (to be returned directly
by the API, not used for analysis).
See attach_translation_mappings() for more information.
"""
return {
'type': 'object',
'properties': {
'lang': {'type': 'text', 'index': False},
'string': {'type': 'text', 'index': False},
},
}
@classmethod
def get_raw_field_definition(cls):
"""
Return the mapping to use for the "raw" version of a field. Meant to be
used as part of a 'fields': {'raw': ... } definition in the mapping of
an existing field.
Used for exact matches and sorting
"""
# It needs to be a keyword to turnoff all analysis ; that means we
# don't get the lowercase filter applied by the standard &
# language-specific analyzers, so we need to do that ourselves through
# a custom normalizer for exact matches to work in a case-insensitive
# way.
return {
'type': 'keyword',
'normalizer': 'lowercase_keyword_normalizer',
}
@classmethod
def attach_language_specific_analyzers(cls, mapping, field_names):
"""
For each field in field_names, attach language-specific mappings that
will use specific analyzers for these fields in every language that we
support.
These mappings are used by the search filtering code if they exist.
"""
for lang, analyzer in SEARCH_LANGUAGE_TO_ANALYZER.items():
for field in field_names:
property_name = '%s_l10n_%s' % (field, lang)
mapping['properties'][property_name] = {
'type': 'text',
'analyzer': analyzer,
}
@classmethod
def attach_language_specific_analyzers_with_raw_variant(cls, mapping, field_names):
"""
Like attach_language_specific_analyzers() but with an extra field to
storethe "raw" variant of the value, for exact matches.
"""
for lang, analyzer in SEARCH_LANGUAGE_TO_ANALYZER.items():
for field in field_names:
property_name = '%s_l10n_%s' % (field, lang)
mapping['properties'][property_name] = {
'type': 'text',
'analyzer': analyzer,
'fields': {
'raw': cls.get_raw_field_definition(),
},
}
@classmethod
def extract_field_api_translations(cls, obj, field, db_field=None):
"""
Returns a dict containing translations that we need to store for
the API. Empty translations are skipped entirely.
"""
if db_field is None:
db_field = '%s_id' % field
extend_with_me = {
'%s_translations'
% field: [
{'lang': to_language(lang), 'string': str(string)}
for lang, string in obj.translations[getattr(obj, db_field)]
if string
]
}
return extend_with_me
@classmethod
def extract_field_search_translation(cls, obj, field, default_locale):
"""
Returns the translation for this field in the object's default locale,
in the form a dict with one entry (the field being the key and the
translation being the value, or an empty string if none was found).
That field will be analyzed and indexed by ES *without*
language-specific analyzers.
"""
translations = dict(obj.translations[getattr(obj, '%s_id' % field)])
default_locale = default_locale.lower() if default_locale else None
value = translations.get(default_locale, getattr(obj, field))
return {field: str(value) if value else ''}
@classmethod
def extract_field_analyzed_translations(cls, obj, field, db_field=None):
"""
Returns a dict containing translations for each language that we have
an analyzer for, for the given field.
When no translation exist for a given language+field combo, the value
returned is an empty string, to avoid storing the word "None" as the
field does not understand null values.
"""
if db_field is None:
db_field = '%s_id' % field
translations = dict(obj.translations[getattr(obj, db_field)])
return {
'%s_l10n_%s' % (field, lang): translations.get(lang) or ''
for lang in SEARCH_LANGUAGE_TO_ANALYZER
}
# Fields we don't need to expose in the results, only used for filtering
# or sorting.
hidden_fields = (
'*.raw',
'boost',
@ -53,7 +198,6 @@ class AddonIndexer(BaseSearchIndexer):
# for instance.
'tokenizer': 'standard',
'filter': [
'standard',
'custom_word_delimiter',
'lowercase',
'stop',
@ -218,7 +362,6 @@ class AddonIndexer(BaseSearchIndexer):
@classmethod
def get_mapping(cls):
doc_name = cls.get_doctype_name()
appver_mapping = {
'properties': {
'max': {'type': 'long'},
@ -266,102 +409,100 @@ class AddonIndexer(BaseSearchIndexer):
},
}
mapping = {
doc_name: {
'properties': {
'id': {'type': 'long'},
'app': {'type': 'byte'},
'average_daily_users': {'type': 'long'},
'bayesian_rating': {'type': 'double'},
'boost': {'type': 'float', 'null_value': 1.0},
'category': {'type': 'integer'},
'colors': {
'type': 'nested',
'properties': {
'h': {'type': 'integer'},
's': {'type': 'integer'},
'l': {'type': 'integer'},
'ratio': {'type': 'double'},
},
'properties': {
'id': {'type': 'long'},
'app': {'type': 'byte'},
'average_daily_users': {'type': 'long'},
'bayesian_rating': {'type': 'double'},
'boost': {'type': 'float', 'null_value': 1.0},
'category': {'type': 'integer'},
'colors': {
'type': 'nested',
'properties': {
'h': {'type': 'integer'},
's': {'type': 'integer'},
'l': {'type': 'integer'},
'ratio': {'type': 'double'},
},
'contributions': {'type': 'text'},
'created': {'type': 'date'},
'current_version': version_mapping,
'default_locale': {'type': 'keyword', 'index': False},
'description': {'type': 'text', 'analyzer': 'snowball'},
'guid': {'type': 'keyword'},
'has_eula': {'type': 'boolean', 'index': False},
'has_privacy_policy': {'type': 'boolean', 'index': False},
'hotness': {'type': 'double'},
'icon_hash': {'type': 'keyword', 'index': False},
'icon_type': {'type': 'keyword', 'index': False},
'is_disabled': {'type': 'boolean'},
'is_experimental': {'type': 'boolean'},
'is_recommended': {'type': 'boolean'},
'last_updated': {'type': 'date'},
'listed_authors': {
'type': 'object',
'properties': {
'id': {'type': 'long'},
'name': {'type': 'text'},
'username': {'type': 'keyword'},
'is_public': {'type': 'boolean', 'index': False},
},
},
'modified': {'type': 'date', 'index': False},
'name': {
'type': 'text',
# Adding word-delimiter to split on camelcase, known
# words like 'tab', and punctuation, and eliminate
# duplicates.
'analyzer': 'standard_with_word_split',
'fields': {
# Raw field for exact matches and sorting.
'raw': cls.get_raw_field_definition(),
# Trigrams for partial matches.
'trigrams': {
'type': 'text',
'analyzer': 'trigram',
},
},
},
'previews': {
'type': 'object',
'properties': {
'id': {'type': 'long', 'index': False},
'caption_translations': cls.get_translations_definition(),
'modified': {'type': 'date', 'index': False},
'position': {'type': 'long', 'index': False},
'sizes': {
'type': 'object',
'properties': {
'thumbnail': {'type': 'short', 'index': False},
'image': {'type': 'short', 'index': False},
},
},
},
},
'promoted': {
'type': 'object',
'properties': {
'group_id': {'type': 'byte'},
'approved_for_apps': {'type': 'byte'},
},
},
'ratings': {
'type': 'object',
'properties': {
'count': {'type': 'short', 'index': False},
'average': {'type': 'float'},
},
},
'slug': {'type': 'keyword'},
'requires_payment': {'type': 'boolean', 'index': False},
'status': {'type': 'byte'},
'summary': {'type': 'text', 'analyzer': 'snowball'},
'tags': {'type': 'keyword'},
'type': {'type': 'byte'},
'weekly_downloads': {'type': 'long'},
},
'contributions': {'type': 'text'},
'created': {'type': 'date'},
'current_version': version_mapping,
'default_locale': {'type': 'keyword', 'index': False},
'description': {'type': 'text', 'analyzer': 'snowball'},
'guid': {'type': 'keyword'},
'has_eula': {'type': 'boolean', 'index': False},
'has_privacy_policy': {'type': 'boolean', 'index': False},
'hotness': {'type': 'double'},
'icon_hash': {'type': 'keyword', 'index': False},
'icon_type': {'type': 'keyword', 'index': False},
'is_disabled': {'type': 'boolean'},
'is_experimental': {'type': 'boolean'},
'is_recommended': {'type': 'boolean'},
'last_updated': {'type': 'date'},
'listed_authors': {
'type': 'object',
'properties': {
'id': {'type': 'long'},
'name': {'type': 'text'},
'username': {'type': 'keyword'},
'is_public': {'type': 'boolean', 'index': False},
},
},
'modified': {'type': 'date', 'index': False},
'name': {
'type': 'text',
# Adding word-delimiter to split on camelcase, known
# words like 'tab', and punctuation, and eliminate
# duplicates.
'analyzer': 'standard_with_word_split',
'fields': {
# Raw field for exact matches and sorting.
'raw': cls.get_raw_field_definition(),
# Trigrams for partial matches.
'trigrams': {
'type': 'text',
'analyzer': 'trigram',
},
},
},
'previews': {
'type': 'object',
'properties': {
'id': {'type': 'long', 'index': False},
'caption_translations': cls.get_translations_definition(),
'modified': {'type': 'date', 'index': False},
'position': {'type': 'long', 'index': False},
'sizes': {
'type': 'object',
'properties': {
'thumbnail': {'type': 'short', 'index': False},
'image': {'type': 'short', 'index': False},
},
},
},
},
'promoted': {
'type': 'object',
'properties': {
'group_id': {'type': 'byte'},
'approved_for_apps': {'type': 'byte'},
},
},
'ratings': {
'type': 'object',
'properties': {
'count': {'type': 'short', 'index': False},
'average': {'type': 'float', 'index': False},
},
},
'slug': {'type': 'keyword'},
'requires_payment': {'type': 'boolean', 'index': False},
'status': {'type': 'byte'},
'summary': {'type': 'text', 'analyzer': 'snowball'},
'tags': {'type': 'keyword'},
'type': {'type': 'byte'},
'weekly_downloads': {'type': 'long'},
},
}
@ -603,9 +744,7 @@ class AddonIndexer(BaseSearchIndexer):
index_settings = copy.deepcopy(cls.index_settings)
config = {
'mappings': {
cls.get_doctype_name(): cls.get_mapping(),
},
'mappings': cls.get_mapping(),
'settings': {
# create_index will add its own index settings like number of
# shards and replicas.

Просмотреть файл

@ -112,7 +112,6 @@ def find_inconsistencies_between_es_and_db(ids, **kw):
db_addons = Addon.unfiltered.in_bulk(ids)
es_addons = (
Search(
doc_type=AddonIndexer.get_doctype_name(),
index=AddonIndexer.get_index_alias(),
using=amo.search.get_es(),
)

Просмотреть файл

@ -130,10 +130,7 @@ class TestAddonIndexer(TestCase):
return fields
def test_mapping(self):
doc_name = self.indexer.get_doctype_name()
assert doc_name
mapping_properties = self.indexer.get_mapping()[doc_name]['properties']
mapping_properties = self.indexer.get_mapping()['properties']
# Make sure the get_mapping() method does not return fields we did
# not expect to be present, or omitted fields we want.
@ -181,10 +178,7 @@ class TestAddonIndexer(TestCase):
Old versions of ElasticSearch allowed 'no' and 'yes' strings,
this changed with ElasticSearch 5.x.
"""
doc_name = self.indexer.get_doctype_name()
assert doc_name
mapping_properties = self.indexer.get_mapping()[doc_name]['properties']
mapping_properties = self.indexer.get_mapping()['properties']
assert all(
isinstance(prop['index'], bool)
@ -578,13 +572,12 @@ class TestAddonIndexerWithES(ESTestCase):
self.reindex(Addon)
indexer = AddonIndexer()
doc_name = indexer.get_doctype_name()
real_index_name = self.get_index_name('default')
mappings = self.es.indices.get_mapping(indexer.get_index_alias())[
alias = indexer.get_index_alias()
mappings = self.es.indices.get_mapping(alias, include_type_name=False)[
real_index_name
]['mappings']
actual_properties = mappings[doc_name]['properties']
indexer_properties = indexer.get_mapping()[doc_name]['properties']
actual_properties = mappings['properties']
indexer_properties = indexer.get_mapping()['properties']
assert set(actual_properties.keys()) == set(indexer_properties.keys())

Просмотреть файл

@ -672,7 +672,9 @@ class AddonSearchView(ListAPIView):
using=amo.search.get_es(),
index=AddonIndexer.get_index_alias(),
)
.extra(_source={'excludes': AddonIndexer.hidden_fields})
.extra(
_source={'excludes': AddonIndexer.hidden_fields}, track_total_hits=True
)
.params(search_type='dfs_query_then_fetch')
)
@ -994,6 +996,15 @@ class AddonRecommendationView(AddonSearchView):
)
)
def get_results_count(self, results):
try:
# Elasticsearch 7.x and higher
total = results.hits.total['value']
except TypeError:
# Elasticsearch 6.x and lower
total = results.hits.total
return int(total)
def filter_queryset(self, qs):
qs = super().filter_queryset(qs)
guid_param = self.request.GET.get('guid')
@ -1001,17 +1012,18 @@ class AddonRecommendationView(AddonSearchView):
guids, self.ab_outcome, self.fallback_reason = get_addon_recommendations(
guid_param, taar_enable
)
results_qs = qs.query(query.Bool(must=[Q('terms', guid=guids)]))
results_qs.execute() # To cache the results.
if results_qs.count() != 4 and is_outcome_recommended(self.ab_outcome):
recommended_qs = qs.query(query.Bool(must=[Q('terms', guid=guids)]))
results = recommended_qs.execute()
if self.get_results_count(results) != 4 and is_outcome_recommended(
self.ab_outcome
):
(
guids,
self.ab_outcome,
self.fallback_reason,
) = get_addon_recommendations_invalid()
return qs.query(query.Bool(must=[Q('terms', guid=guids)]))
return results_qs
return results
def paginate_queryset(self, queryset):
# We don't need pagination for the fixed number of results.

Просмотреть файл

@ -1,184 +0,0 @@
from django.conf import settings
import olympia.core.logger
from olympia.constants.search import SEARCH_LANGUAGE_TO_ANALYZER
from .utils import to_language
log = olympia.core.logger.getLogger('z.es')
class BaseSearchIndexer:
"""
Base Indexer class for all search-related things (as opposed to
stats-related things).
Intended to be inherited from every document type that we want to put in
ElasticSearch for search-related purposes. A class inheriting from
BaseSearchIndexer should implement at least the following classmethods:
- get_model(cls)
- get_mapping(cls)
- extract_document(cls, obj)
- create_new_index(cls, index_name)
- reindex_tasks_group(cls, index_name)
"""
@classmethod
def get_index_alias(cls):
return settings.ES_INDEXES.get(cls.get_model().ES_ALIAS_KEY)
@classmethod
def get_doctype_name(cls):
"""Return the document type name for this indexer. We default to simply
use the db table from the corresponding model."""
return cls.get_model()._meta.db_table
@classmethod
def attach_translation_mappings(cls, mapping, field_names):
"""
For each field in field_names, attach a dict to the ES mapping
properties making "<field_name>_translations" an object containing
"string" and "lang" as non-indexed strings.
Used to store non-indexed, non-analyzed translations in ES that will be
sent back by the API for each item. It does not take care of the
indexed content for search, it's there only to store and return
raw translations.
"""
doc_name = cls.get_doctype_name()
for field_name in field_names:
# _translations is the suffix in TranslationSerializer.
mapping[doc_name]['properties'][
'%s_translations' % field_name
] = cls.get_translations_definition()
@classmethod
def get_translations_definition(cls):
"""
Return the mapping to use for raw translations (to be returned directly
by the API, not used for analysis).
See attach_translation_mappings() for more information.
"""
return {
'type': 'object',
'properties': {
'lang': {'type': 'text', 'index': False},
'string': {'type': 'text', 'index': False},
},
}
@classmethod
def get_raw_field_definition(cls):
"""
Return the mapping to use for the "raw" version of a field. Meant to be
used as part of a 'fields': {'raw': ... } definition in the mapping of
an existing field.
Used for exact matches and sorting
"""
# It needs to be a keyword to turnoff all analysis ; that means we
# don't get the lowercase filter applied by the standard &
# language-specific analyzers, so we need to do that ourselves through
# a custom normalizer for exact matches to work in a case-insensitive
# way.
return {
'type': 'keyword',
'normalizer': 'lowercase_keyword_normalizer',
}
@classmethod
def attach_language_specific_analyzers(cls, mapping, field_names):
"""
For each field in field_names, attach language-specific mappings that
will use specific analyzers for these fields in every language that we
support.
These mappings are used by the search filtering code if they exist.
"""
doc_name = cls.get_doctype_name()
for lang, analyzer in SEARCH_LANGUAGE_TO_ANALYZER.items():
for field in field_names:
property_name = f'{field}_l10n_{lang}'
mapping[doc_name]['properties'][property_name] = {
'type': 'text',
'analyzer': analyzer,
}
@classmethod
def attach_language_specific_analyzers_with_raw_variant(cls, mapping, field_names):
"""
Like attach_language_specific_analyzers() but with an extra field to
storethe "raw" variant of the value, for exact matches.
"""
doc_name = cls.get_doctype_name()
for lang, analyzer in SEARCH_LANGUAGE_TO_ANALYZER.items():
for field in field_names:
property_name = f'{field}_l10n_{lang}'
mapping[doc_name]['properties'][property_name] = {
'type': 'text',
'analyzer': analyzer,
'fields': {
'raw': cls.get_raw_field_definition(),
},
}
@classmethod
def extract_field_api_translations(cls, obj, field, db_field=None):
"""
Returns a dict containing translations that we need to store for
the API. Empty translations are skipped entirely.
"""
if db_field is None:
db_field = '%s_id' % field
extend_with_me = {
'%s_translations'
% field: [
{'lang': to_language(lang), 'string': str(string)}
for lang, string in obj.translations[getattr(obj, db_field)]
if string
]
}
return extend_with_me
@classmethod
def extract_field_search_translation(cls, obj, field, default_locale):
"""
Returns the translation for this field in the object's default locale,
in the form a dict with one entry (the field being the key and the
translation being the value, or an empty string if none was found).
That field will be analyzed and indexed by ES *without*
language-specific analyzers.
"""
translations = dict(obj.translations[getattr(obj, '%s_id' % field)])
default_locale = default_locale.lower() if default_locale else None
value = translations.get(default_locale, getattr(obj, field))
return {field: str(value) if value else ''}
@classmethod
def extract_field_analyzed_translations(cls, obj, field, db_field=None):
"""
Returns a dict containing translations for each language that we have
an analyzer for, for the given field.
When no translation exist for a given language+field combo, the value
returned is an empty string, to avoid storing the word "None" as the
field does not understand null values.
"""
if db_field is None:
db_field = '%s_id' % field
translations = dict(obj.translations[getattr(obj, db_field)])
return {
f'{field}_l10n_{lang}': translations.get(lang) or ''
for lang in SEARCH_LANGUAGE_TO_ANALYZER
}

Просмотреть файл

@ -72,7 +72,13 @@ class ESPaginator(Paginator):
page = Page(result.hits, number, self)
# Overwrite the `count` with the total received from ES results.
self.count = int(page.object_list.total)
try:
# Elasticsearch 7.x and higher
total = page.object_list.total['value']
except TypeError:
# Elasticsearch 6.x and lower
total = page.object_list.total
self.count = int(total)
# Now that we have the count validate that the page number isn't higher
# than the possible number of pages and adjust accordingly.

Просмотреть файл

@ -1,16 +1,14 @@
# These two dicts are mapping between language codes in zamboni and language
# analyzers in elasticsearch.
# These two dicts are mapping between language codes in addons-server and
# language analyzers in elasticsearch.
#
# Each key value of ANALYZER_MAP is language analyzer supported by
# elasticsearch. See
# http://www.elasticsearch.org/guide/reference/index-modules/analysis/lang-analyzer.html
# The keys are elasticsearch language analyzers while each value is a list of
# language codes supported by AMO.
#
# Each value of ANALYZER_MAP is a list which is supported by the key analyzer.
# All values are picked from AMO_LANGUAGES in settings.py.
#
# The rows commented out are that the language is not supported by
# The lines commented out are that the language is not supported by
# elasticsearch yet. We should update it when elasticsearch supports new
# analyzer for the language.
#
# http://www.elasticsearch.org/guide/reference/index-modules/analysis/lang-analyzer.html
SEARCH_ANALYZER_MAP = {
# '': ['af'], # Afrikaans

Просмотреть файл

@ -24,6 +24,10 @@ unflag_reindexing_amo = Reindexing.objects.unflag_reindexing_amo
get_indices = Reindexing.objects.get_indices
def get_major_version(es):
return int(es.info()['version']['number'].split('.')[0])
def index_objects(
*, ids, indexer_class, index=None, transforms=None, manager_name=None
):
@ -33,7 +37,6 @@ def index_objects(
manager_name = 'objects'
manager = getattr(indexer_class.get_model(), manager_name)
indices = Reindexing.objects.get_indices(index)
if transforms is None:
@ -44,19 +47,26 @@ def index_objects(
qs = qs.transform(transform)
bulk = []
es = amo_search.get_es()
major_version = get_major_version(es)
for obj in qs:
data = indexer_class.extract_document(obj)
for index in indices:
bulk.append(
{
'_source': data,
'_id': obj.id,
'_type': indexer_class.get_doctype_name(),
'_index': index,
}
)
item = {
'_source': data,
'_id': obj.id,
'_index': index,
}
if major_version < 7:
# While on 6.x, we use the `addons` type when creating indices
# and when bulk-indexing. We completely ignore it on searches.
# When on 7.x, we don't pass type at all at creation or
# indexing, and continue to ignore it on searches.
# That should ensure we're compatible with both transparently.
item['_type'] = 'addons'
bulk.append(item)
es = amo_search.get_es()
return helpers.bulk(es, bulk)
@ -110,8 +120,12 @@ def create_index(index, config=None):
'max_result_window': settings.ES_MAX_RESULT_WINDOW,
}
)
major_version = get_major_version(es)
if not es.indices.exists(index):
# See above, while on 6.x the mapping needs to include the `addons` doc
# type.
if major_version < 7:
config['mappings'] = {'addons': config['mappings']}
es.indices.create(index, body=config)
return index

Просмотреть файл

@ -2,6 +2,8 @@ from unittest import mock
from django.core import mail
from elasticsearch_dsl import Q, Search
from olympia import amo
from olympia.activity.models import ActivityLog
from olympia.addons.indexers import AddonIndexer
@ -12,8 +14,6 @@ from olympia.ratings.models import Rating, RatingFlag
from olympia.amo.search import get_es
from olympia.users.models import UserProfile
from elasticsearch_dsl import Q, Search
class TestRatingModel(TestCase):
fixtures = ['ratings/test_models']

Просмотреть файл

@ -49,7 +49,7 @@ class TestRankingScenarios(ESTestCase):
expected_name = addon[0]
expected_score = addon[1]
found_name = get_name_from_result(results[idx], expected_lang)
found_score = results[idx]['_score']
found_score = int(results[idx]['_score'])
assert found_name == expected_name, (
'Expected "{}" to be on position {} with score {} but '
@ -659,21 +659,21 @@ class TestRankingScenarios(ESTestCase):
cls.refresh()
def test_scenario_tabby_cat(self):
self._check_scenario('Tabby cat', (['Tabby Cat', 43117.04],))
self._check_scenario('Tabby cat', (['Tabby Cat', 43117],))
def test_scenario_tabbycat(self):
self._check_scenario(
'tabbycat',
(
['Tabby Cat', 6298.6855],
['OneTab', 622.352660],
['Tab Mix Plus', 544.4538],
['FoxyTab', 533.5872],
['Authenticator', 481.1612],
['Tab Center Redux', 410.79758],
['Open Bookmarks in New Tab', 379.56177],
['Open image in a new tab', 291.76306],
['Open Image in New Tab', 229.28279],
['Tabby Cat', 6298],
['OneTab', 622],
['Tab Mix Plus', 544],
['FoxyTab', 533],
['Authenticator', 481],
['Tab Center Redux', 410],
['Open Bookmarks in New Tab', 379],
['Open image in a new tab', 291],
['Open Image in New Tab', 229],
),
)
@ -681,15 +681,15 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'tabbbycat',
(
['Tabby Cat', 6298.6855],
['OneTab', 622.352660],
['Tab Mix Plus', 544.4538],
['FoxyTab', 533.5872],
['Authenticator', 481.1612],
['Tab Center Redux', 410.79758],
['Open Bookmarks in New Tab', 379.56177],
['Open image in a new tab', 291.76306],
['Open Image in New Tab', 229.28279],
['Tabby Cat', 6298],
['OneTab', 622],
['Tab Mix Plus', 544],
['FoxyTab', 533],
['Authenticator', 481],
['Tab Center Redux', 410],
['Open Bookmarks in New Tab', 379],
['Open image in a new tab', 291],
['Open Image in New Tab', 229],
),
)
@ -697,15 +697,15 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'tabbicat',
(
['Tabby Cat', 4938.7627],
['OneTab', 622.352660],
['Tab Mix Plus', 544.4538],
['FoxyTab', 533.5872],
['Authenticator', 481.1612],
['Tab Center Redux', 410.79758],
['Open Bookmarks in New Tab', 379.56177],
['Open image in a new tab', 291.76306],
['Open Image in New Tab', 229.28279],
['Tabby Cat', 4938],
['OneTab', 622],
['Tab Mix Plus', 544],
['FoxyTab', 533],
['Authenticator', 481],
['Tab Center Redux', 410],
['Open Bookmarks in New Tab', 379],
['Open image in a new tab', 291],
['Open Image in New Tab', 229],
),
)
@ -713,19 +713,19 @@ class TestRankingScenarios(ESTestCase):
# Tab Mix Plus and Redux DevTools to be found in this test but we now
# require all terms to be present through minimum_should_match on the
# fuzzy name query (and they have nothing else to match).
self._check_scenario('tab center redux', (['Tab Center Redux', 10784.336],))
self._check_scenario('tab center redux', (['Tab Center Redux', 10784],))
def test_scenario_websocket(self):
# Should *not* find add-ons that simply mention 'Source', 'Persona',
# or other words with just 'so' in their name.
self._check_scenario('websocket', (['Simple WebSocket Client', 1871.3523],))
self._check_scenario('websocket', (['Simple WebSocket Client', 1871],))
def test_scenario_open_image_new_tab(self):
self._check_scenario(
'Open Image in New Tab',
(
['Open Image in New Tab', 5553.028],
['Open image in a new tab', 1735.6191],
['Open Image in New Tab', 5553],
['Open image in a new tab', 1735],
),
)
@ -734,8 +734,8 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'CoinHive',
(
['Coinhive Blocker', 1899.6584],
['NoMiners', 69.78211], # via description
['Coinhive Blocker', 1899],
['NoMiners', 69], # via description
# ['CoinBlock', 0], # via prefix search
),
)
@ -744,12 +744,12 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'Privacy',
(
['Privacy Badger', 2645.5205],
['Google Privacy', 1922.5374], # More users, summary
['Privacy Settings', 1909.5417],
['Privacy Pass', 1815.3016],
['Ghostery', 183.42003],
['Blur', 173.68744],
['Privacy Badger', 2645],
['Google Privacy', 1922], # More users, summary
['Privacy Settings', 1909],
['Privacy Pass', 1815],
['Ghostery', 183],
['Blur', 173],
),
)
@ -759,10 +759,10 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'firebu',
(
['Firebug', 1998.017],
['Firefinder for Firebug', 1139.96],
['Firebug Autocompleter', 967.0083],
['Fire Drag', 793.19653],
['Firebug', 1998],
['Firefinder for Firebug', 1139],
['Firebug Autocompleter', 967],
['Fire Drag', 793],
),
)
@ -770,10 +770,10 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'fireb',
(
['Firebug', 1672.0083],
['Firefinder for Firebug', 990.9907],
['Firebug Autocompleter', 829.7388],
['Fire Drag', 793.19653],
['Firebug', 1672],
['Firefinder for Firebug', 990],
['Firebug Autocompleter', 829],
['Fire Drag', 793],
),
)
@ -782,25 +782,23 @@ class TestRankingScenarios(ESTestCase):
# but we now require all terms to be present through
# minimum_should_match on the fuzzy name query (and it has nothing else
# to match).
self._check_scenario(
'Menu Wizzard', (['Menu Wizard', 1523.5862],) # (fuzzy, typo)
)
self._check_scenario('Menu Wizzard', (['Menu Wizard', 1523],)) # (fuzzy, typo)
def test_scenario_frame_demolition(self):
self._check_scenario('Frame Demolition', (['Frame Demolition', 4601.814],))
self._check_scenario('Frame Demolition', (['Frame Demolition', 4601],))
def test_scenario_demolition(self):
# Find "Frame Demolition" via a typo
self._check_scenario('Frame Demolition', (['Frame Demolition', 4601.814],))
self._check_scenario('Frame Demolition', (['Frame Demolition', 4601],))
def test_scenario_restyle(self):
self._check_scenario('reStyle', (['reStyle', 4432.1533],))
self._check_scenario('reStyle', (['reStyle', 4432],))
def test_scenario_megaupload_downloadhelper(self):
# Doesn't find "RapidShare DownloadHelper" anymore
# since we now query by "MegaUpload AND DownloadHelper"
self._check_scenario(
'MegaUpload DownloadHelper', (['MegaUpload DownloadHelper', 5226.827],)
'MegaUpload DownloadHelper', (['MegaUpload DownloadHelper', 5226],)
)
def test_scenario_downloadhelper(self):
@ -809,25 +807,25 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'DownloadHelper',
(
['RapidShare DownloadHelper', 2083.029],
['MegaUpload DownloadHelper', 1409.281],
['Download Flash and Video', 915.1305],
['1-Click YouTube Video Download', 732.97125],
['All Downloader Professional', 76.08504],
['RapidShare DownloadHelper', 2083],
['MegaUpload DownloadHelper', 1409],
['Download Flash and Video', 915],
['1-Click YouTube Video Download', 732],
['All Downloader Professional', 76],
),
)
def test_scenario_megaupload(self):
self._check_scenario('MegaUpload', (['MegaUpload DownloadHelper', 1625.1984],))
self._check_scenario('MegaUpload', (['MegaUpload DownloadHelper', 1625],))
def test_scenario_no_flash(self):
self._check_scenario(
'No Flash',
(
['No Flash', 7230.9683],
['Download Flash and Video', 1565.2987],
['YouTube Flash Player', 1370.8643],
['YouTube Flash Video Player', 1260.1235],
['No Flash', 7230],
['Download Flash and Video', 1565],
['YouTube Flash Player', 1370],
['YouTube Flash Video Player', 1260],
),
)
@ -835,10 +833,10 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'no flash',
(
['No Flash', 7230.9683],
['Download Flash and Video', 1565.2987],
['YouTube Flash Player', 1370.8643],
['YouTube Flash Video Player', 1260.1235],
['No Flash', 7230],
['Download Flash and Video', 1565],
['YouTube Flash Player', 1370],
['YouTube Flash Video Player', 1260],
),
)
@ -848,24 +846,24 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'Youtube html5 Player',
(
['YouTube Flash Player', 472.92587],
['No Flash', 205.2885],
['YouTube Flash Player', 472],
['No Flash', 205],
),
)
def test_scenario_disable_hello_pocket_reader_plus(self):
self._check_scenario(
'Disable Hello, Pocket & Reader+',
(['Disable Hello, Pocket & Reader+', 8651.1045],), # yeay!
(['Disable Hello, Pocket & Reader+', 8651],), # yeay!
)
def test_scenario_grapple(self):
"""Making sure this scenario works via the API"""
self._check_scenario('grapple', (['GrApple Yummy', 280.32382],))
self._check_scenario('grapple', (['GrApple Yummy', 280],))
def test_scenario_delicious(self):
"""Making sure this scenario works via the API"""
self._check_scenario('delicious', (['Delicious Bookmarks', 313.91644],))
self._check_scenario('delicious', (['Delicious Bookmarks', 313],))
def test_scenario_name_fuzzy(self):
# Fuzzy + minimum_should_match combination means we find these 3 (only
@ -873,9 +871,9 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'opeb boocmarks tab',
(
['Open Bookmarks in New Tab', 1124.5222],
['Open image in a new tab', 204.52444],
['Open Image in New Tab', 160.72607],
['Open Bookmarks in New Tab', 1124],
['Open image in a new tab', 204],
['Open Image in New Tab', 160],
),
)
@ -885,16 +883,16 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'merge windows',
(
['Merge Windows', 1202.2235],
['Merge All Windows', 409.31805],
['Merge Windows', 1202],
['Merge All Windows', 409],
),
)
self._check_scenario(
'merge all windows',
(
['Merge All Windows', 1290.6941],
['Merge Windows', 188.86444],
['Merge All Windows', 1290],
['Merge Windows', 188],
),
)
@ -903,9 +901,9 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'test addon test21',
(
['test addon test21', 1283.0007],
['test addon test31', 183.39906],
['test addon test11', 173.83998],
['test addon test21', 1283],
['test addon test31', 183],
['test addon test11', 173],
),
)
@ -914,8 +912,8 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'Amazon 1-Click Lock',
(
['Amazon 1-Click Lock', 4794.2417],
['1-Click YouTube Video Download', 127.363266],
['Amazon 1-Click Lock', 4794],
['1-Click YouTube Video Download', 127],
),
)
@ -925,14 +923,14 @@ class TestRankingScenarios(ESTestCase):
# translation exists.
self._check_scenario(
'foobar unique english',
(['Foobar unique english', 784.86255],),
(['Foobar unique english', 784],),
lang='en-US',
)
# Then in canadian english. Should get the same score.
self._check_scenario(
'foobar unique english',
(['Foobar unique english', 784.86255],),
(['Foobar unique english', 784],),
lang='en-CA',
)
@ -949,7 +947,7 @@ class TestRankingScenarios(ESTestCase):
# the default_locale for this addon (fr).
self._check_scenario(
'foobar unique english',
(['Foobar unique francais', 784.86255],),
(['Foobar unique francais', 784],),
lang='en-GB',
expected_lang='fr',
)
@ -958,7 +956,7 @@ class TestRankingScenarios(ESTestCase):
# match, the translation exists, it's even the default locale.
self._check_scenario(
'foobar unique francais',
(['Foobar unique francais', 1065.5952],),
(['Foobar unique francais', 1065],),
lang='fr',
)
@ -971,7 +969,7 @@ class TestRankingScenarios(ESTestCase):
assert 'mn' in settings.AMO_LANGUAGES
self._check_scenario(
'foobar unique francais',
(['Foobar unique francais', 1057.3596],),
(['Foobar unique francais', 1057],),
lang='mn',
expected_lang='fr',
)
@ -985,7 +983,7 @@ class TestRankingScenarios(ESTestCase):
assert 'ca' in settings.AMO_LANGUAGES
self._check_scenario(
'foobar unique francais',
(['Foobar unique francais', 1057.3596],),
(['Foobar unique francais', 1057],),
lang='ca',
expected_lang='fr',
)
@ -995,7 +993,7 @@ class TestRankingScenarios(ESTestCase):
# Note that the name returned follows the language requested.
self._check_scenario(
'foobar unique francais',
(['Foobar unique english', 1057.3596],),
(['Foobar unique english', 1057],),
lang='en-US',
)
@ -1003,14 +1001,14 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'tab',
(
['Tabby Cat', 3729.772],
['Tab Mix Plus', 1351.80030],
['OneTab', 1144.28310],
['Tab Center Redux', 960.8551],
['FoxyTab', 902.628],
['Open Bookmarks in New Tab', 886.82874],
['Open image in a new tab', 715.3191],
['Open Image in New Tab', 560.6783],
['Tabby Cat', 3729],
['Tab Mix Plus', 1351],
['OneTab', 1144],
['Tab Center Redux', 960],
['FoxyTab', 902],
['Open Bookmarks in New Tab', 886],
['Open image in a new tab', 715],
['Open Image in New Tab', 560],
),
)
@ -1019,9 +1017,9 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'strip',
(
['Stripy Dog 1', 2902.6985], # recommended
['Stripy Dog 2', 2902.6985], # line
['Stripy Dog 3', 580.5397], # verified (no boost)
['Stripy Dog 4', 580.5397], # not promoted
['Stripy Dog 1', 2902], # recommended
['Stripy Dog 2', 2902], # line
['Stripy Dog 3', 580], # verified (no boost)
['Stripy Dog 4', 580], # not promoted
),
)