make the strings unique, use a custom analyzer
This commit is contained in:
Родитель
160d95bae7
Коммит
47bc4cb63d
|
@ -22,9 +22,10 @@ def extract(addon):
|
|||
d = dict(zip(attrs, attrgetter(*attrs)(addon)))
|
||||
# Coerce the Translation into a string.
|
||||
d['name_sort'] = unicode(addon.name).lower()
|
||||
d['name'] = [string for _, string in addon.translations[addon.name_id]]
|
||||
d['description'] = [string for
|
||||
_, string in addon.translations[addon.description_id]]
|
||||
translations = addon.translations
|
||||
d['name'] = list(set(string for _, string in translations[addon.name_id]))
|
||||
d['description'] = list(set(string for
|
||||
_, string in translations[addon.description_id]))
|
||||
d['app'] = [a.id for a in addon.compatible_apps]
|
||||
# This is an extra query, not good for perf.
|
||||
d['category'] = getattr(addon, 'category_ids', [])
|
||||
|
@ -38,6 +39,7 @@ def setup_mapping():
|
|||
m = {
|
||||
# Turn off analysis on name so we can sort by it.
|
||||
'name_sort': {'type': 'string', 'index': 'not_analyzed'},
|
||||
'name': {'type': 'string', 'analyzer': 'standardPlusWordDelimiter'},
|
||||
}
|
||||
es = elasticutils.get_es()
|
||||
try:
|
||||
|
|
|
@ -17,3 +17,26 @@ The reindex job uses celery to parallelize indexing. Running the job multiple
|
|||
times will replace old index items with a new document.
|
||||
|
||||
The index is maintained incrementally through post_save and post_delete hooks.
|
||||
|
||||
|
||||
Settings
|
||||
--------
|
||||
|
||||
We use a custom analyzer for indexing add-on names since they're a little
|
||||
different from normal text. To get the same results as our servers, put this in
|
||||
your elasticsearch.yml::
|
||||
|
||||
index:
|
||||
analysis:
|
||||
analyzer:
|
||||
standardPlusWordDelimiter:
|
||||
type: custom
|
||||
tokenizer: standard
|
||||
filter: [standard, wordDelim, lowercase, stop]
|
||||
filter:
|
||||
wordDelim:
|
||||
type: word_delimiter
|
||||
preserve_original: true
|
||||
|
||||
If you don't do this your results will be slightly different, but you probably
|
||||
wouldn't notice.
|
||||
|
|
Загрузка…
Ссылка в новой задаче