Merge pull request #116 from yohanboniface/bug-1013874-upgrade-elasticsearch
Upgrading Elasticsearch (bug 1013874)
This commit is contained in:
Коммит
b348528e02
|
@ -4,9 +4,8 @@ python:
|
|||
services:
|
||||
- memcached
|
||||
before_install:
|
||||
- wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.90.13.deb && sudo dpkg -i elasticsearch-0.90.13.deb
|
||||
- sudo /usr/share/elasticsearch/bin/plugin -install elasticsearch/elasticsearch-analysis-icu/1.13.0
|
||||
- sudo /usr/share/elasticsearch/bin/elasticsearch -Des.config=scripts/elasticsearch/elasticsearch.yml
|
||||
- wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.3.2.deb && sudo dpkg -i elasticsearch-1.3.2.deb
|
||||
- sudo /usr/share/elasticsearch/bin/elasticsearch -d -D es.path.data=/tmp -D es.gateway.type=none -D es.index.store.type=memory -D es.discovery.zen.ping.multicast.enabled=false
|
||||
install:
|
||||
- make update_deps
|
||||
- pip install --no-deps -r requirements/test.txt --find-links https://pyrepo.addons.mozilla.org/
|
||||
|
|
|
@ -21,7 +21,6 @@ import waffle
|
|||
import amo
|
||||
from amo.decorators import write
|
||||
from amo.utils import chunked, walkfiles
|
||||
from addons import search
|
||||
from addons.models import Addon, AppSupport, FrozenAddon, Persona
|
||||
from files.models import File
|
||||
from lib.es.utils import raise_if_reindex_in_progress
|
||||
|
@ -474,10 +473,8 @@ def give_personas_versions():
|
|||
|
||||
|
||||
@cronjobs.register
|
||||
def reindex_addons(index=None, aliased=True, addon_type=None):
|
||||
def reindex_addons(index=None, addon_type=None):
|
||||
from . import tasks
|
||||
# Make sure our mapping is up to date.
|
||||
search.setup_mapping(index, aliased)
|
||||
ids = (Addon.objects.values_list('id', flat=True)
|
||||
.filter(_current_version__isnull=False,
|
||||
status__in=amo.VALID_STATUSES,
|
||||
|
|
|
@ -4,14 +4,17 @@ from operator import attrgetter
|
|||
from django.conf import settings
|
||||
from django.core.exceptions import ObjectDoesNotExist
|
||||
|
||||
import pyes.exceptions as pyes
|
||||
|
||||
import amo
|
||||
import amo.search
|
||||
from amo.models import SearchMixin
|
||||
from addons.cron import reindex_addons
|
||||
from addons.models import Persona
|
||||
from amo.utils import create_es_index_if_missing
|
||||
from bandwagon.cron import reindex_collections
|
||||
from bandwagon.models import Collection
|
||||
from compat.cron import compatibility_report
|
||||
from compat.models import AppCompat
|
||||
from lib.es.utils import create_index
|
||||
from users.cron import reindex_users
|
||||
from users.models import UserProfile
|
||||
from versions.compare import version_int
|
||||
|
||||
|
@ -64,7 +67,7 @@ def extract(addon):
|
|||
# This would otherwise get attached when by the transformer.
|
||||
d['weekly_downloads'] = addon.persona.popularity
|
||||
# Boost on popularity.
|
||||
d['_boost'] = addon.persona.popularity ** .2
|
||||
d['boost'] = addon.persona.popularity ** .2
|
||||
d['has_theme_rereview'] = (
|
||||
addon.persona.rereviewqueuetheme_set.exists())
|
||||
except Persona.DoesNotExist:
|
||||
|
@ -73,10 +76,10 @@ def extract(addon):
|
|||
else:
|
||||
# Boost by the number of users on a logarithmic scale. The maximum
|
||||
# boost (11,000,000 users for adblock) is about 5x.
|
||||
d['_boost'] = addon.average_daily_users ** .2
|
||||
d['boost'] = addon.average_daily_users ** .2
|
||||
# Double the boost if the add-on is public.
|
||||
if addon.status == amo.STATUS_PUBLIC and 'boost' in d:
|
||||
d['_boost'] = max(d['_boost'], 1) * 4
|
||||
d['boost'] = max(d['boost'], 1) * 4
|
||||
|
||||
# Indices for each language. languages is a list of locales we want to
|
||||
# index with analyzer if the string's locale matches.
|
||||
|
@ -98,28 +101,40 @@ def extract(addon):
|
|||
return d
|
||||
|
||||
|
||||
def setup_mapping(index=None, aliased=True):
|
||||
"""Set up the addons index mapping."""
|
||||
def get_alias():
|
||||
return settings.ES_INDEXES.get(SearchMixin.ES_ALIAS_KEY)
|
||||
|
||||
|
||||
def create_new_index(index=None, config=None):
|
||||
if config is None:
|
||||
config = {}
|
||||
if index is None:
|
||||
index = get_alias()
|
||||
config['settings'] = {'index': INDEX_SETTINGS}
|
||||
config['mappings'] = get_mappings()
|
||||
create_index(index, config)
|
||||
|
||||
|
||||
def get_mappings():
|
||||
# Mapping describes how elasticsearch handles a document during indexing.
|
||||
# Most fields are detected and mapped automatically.
|
||||
appver = {'dynamic': False, 'properties': {'max': {'type': 'long'},
|
||||
'min': {'type': 'long'}}}
|
||||
mapping = {
|
||||
# Optional boosting during indexing.
|
||||
'_boost': {'name': '_boost', 'null_value': 1.0},
|
||||
appver = {
|
||||
'dynamic': False,
|
||||
'properties': {
|
||||
'max': {'type': 'long'},
|
||||
'min': {'type': 'long'}
|
||||
}
|
||||
}
|
||||
mapping = {
|
||||
'properties': {
|
||||
'boost': {'type': 'float', 'null_value': 1.0},
|
||||
# Turn off analysis on name so we can sort by it.
|
||||
'name_sort': {'type': 'string', 'index': 'not_analyzed'},
|
||||
# Adding word-delimiter to split on camelcase and punctuation.
|
||||
'name': {'type': 'string',
|
||||
'analyzer': 'standardPlusWordDelimiter'},
|
||||
'summary': {'type': 'string',
|
||||
'analyzer': 'snowball'},
|
||||
'description': {'type': 'string',
|
||||
'analyzer': 'snowball'},
|
||||
'tags': {'type': 'string',
|
||||
'index': 'not_analyzed',
|
||||
'index_name': 'tag'},
|
||||
'name': {'type': 'string', 'analyzer': 'standardPlusWordDelimiter'},
|
||||
'summary': {'type': 'string', 'analyzer': 'snowball'},
|
||||
'description': {'type': 'string', 'analyzer': 'snowball'},
|
||||
'tags': {'type': 'string', 'index': 'not_analyzed', 'index_name': 'tag'},
|
||||
'platforms': {'type': 'integer', 'index_name': 'platform'},
|
||||
'appversion': {'properties': dict((app.id, appver)
|
||||
for app in amo.APP_USAGE)},
|
||||
|
@ -127,10 +142,9 @@ def setup_mapping(index=None, aliased=True):
|
|||
}
|
||||
# Add room for language-specific indexes.
|
||||
for analyzer in amo.SEARCH_ANALYZER_MAP:
|
||||
if (not settings.ES_USE_PLUGINS and
|
||||
analyzer in amo.SEARCH_ANALYZER_PLUGINS):
|
||||
log.info('While creating mapping, skipping the %s analyzer'
|
||||
% analyzer)
|
||||
if (not settings.ES_USE_PLUGINS
|
||||
and analyzer in amo.SEARCH_ANALYZER_PLUGINS):
|
||||
log.info('While creating mapping, skipping the %s analyzer' % analyzer)
|
||||
continue
|
||||
|
||||
mapping['properties']['name_' + analyzer] = {
|
||||
|
@ -146,14 +160,55 @@ def setup_mapping(index=None, aliased=True):
|
|||
'analyzer': analyzer,
|
||||
}
|
||||
|
||||
es = amo.search.get_es()
|
||||
# Adjust the mapping for all models at once because fields are shared
|
||||
# across all doc types in an index. If we forget to adjust one of them
|
||||
# we'll get burned later on.
|
||||
for model in Addon, AppCompat, Collection, UserProfile:
|
||||
index = index or model._get_index()
|
||||
index = create_es_index_if_missing(index, aliased=aliased)
|
||||
models = (Addon, AppCompat, Collection, UserProfile)
|
||||
return dict((m._meta.db_table, mapping) for m in models)
|
||||
|
||||
|
||||
def reindex(index):
|
||||
indexers = [
|
||||
reindex_addons, reindex_collections, reindex_users, compatibility_report
|
||||
]
|
||||
for indexer in indexers:
|
||||
log.info('Indexing %r' % indexer.__name__)
|
||||
try:
|
||||
es.put_mapping(model._meta.db_table, mapping, index)
|
||||
except pyes.ElasticSearchException, e:
|
||||
log.error(e)
|
||||
indexer(index)
|
||||
except Exception:
|
||||
# We want to log this event but continue
|
||||
log.error('Indexer %r failed' % indexer.__name__)
|
||||
|
||||
|
||||
INDEX_SETTINGS = {
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"standardPlusWordDelimiter": {
|
||||
"tokenizer": "standard",
|
||||
"filter": ["standard", "wordDelim", "lowercase", "stop", "dict"]
|
||||
}
|
||||
},
|
||||
"filter": {
|
||||
"wordDelim": {
|
||||
"type": "word_delimiter",
|
||||
"preserve_original": True
|
||||
},
|
||||
"dict": {
|
||||
"type": "dictionary_decompounder",
|
||||
"word_list": [
|
||||
"cool", "iris", "fire", "bug", "flag", "fox", "grease",
|
||||
"monkey", "flash", "block", "forecast", "screen", "grab",
|
||||
"cookie", "auto", "fill", "text", "all", "so", "think",
|
||||
"mega", "upload", "download", "video", "map", "spring",
|
||||
"fix", "input", "clip", "fly", "lang", "up", "down",
|
||||
"persona", "css", "html", "http", "ball", "firefox",
|
||||
"bookmark", "chat", "zilla", "edit", "menu", "menus",
|
||||
"status", "bar", "with", "easy", "sync", "search", "google",
|
||||
"time", "window", "js", "super", "scroll", "title", "close",
|
||||
"undo", "user", "inspect", "inspector", "browser",
|
||||
"context", "dictionary", "mail", "button", "url",
|
||||
"password", "secure", "image", "new", "tab", "delete",
|
||||
"click", "name", "smart", "down", "manager", "open",
|
||||
"query", "net", "link", "blog", "this", "color", "select",
|
||||
"key", "keys", "foxy", "translate", "word", ]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,12 +1,9 @@
|
|||
import os
|
||||
import datetime
|
||||
|
||||
from nose.exc import SkipTest
|
||||
from nose.tools import eq_
|
||||
import mock
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
import amo
|
||||
import amo.tests
|
||||
from addons import cron
|
||||
|
|
|
@ -26,7 +26,6 @@ from addons.models import (Addon, AddonCategory, AddonDependency,
|
|||
BlacklistedSlug, Category, Charity, CompatOverride,
|
||||
CompatOverrideRange, FrozenAddon,
|
||||
IncompatibleVersions, Persona, Preview)
|
||||
from addons.search import setup_mapping
|
||||
from applications.models import Application, AppVersion
|
||||
from constants.applications import DEVICE_TYPES
|
||||
from devhub.models import ActivityLog, AddonLog, RssKey, SubmitStep
|
||||
|
@ -2181,12 +2180,10 @@ class TestSearchSignals(amo.tests.ESTestCase):
|
|||
|
||||
def setUp(self):
|
||||
super(TestSearchSignals, self).setUp()
|
||||
setup_mapping()
|
||||
self.addCleanup(self.cleanup)
|
||||
|
||||
def cleanup(self):
|
||||
for index in settings.ES_INDEXES.values():
|
||||
self.es.delete_index_if_exists(index)
|
||||
self.empty_index('default')
|
||||
|
||||
def test_no_addons(self):
|
||||
eq_(Addon.search().count(), 0)
|
||||
|
|
|
@ -6,8 +6,8 @@ from django.db import models, transaction
|
|||
from django.utils import encoding, translation
|
||||
|
||||
import caching.base
|
||||
import elasticsearch
|
||||
import multidb.pinning
|
||||
import pyes.exceptions
|
||||
import queryset_transform
|
||||
|
||||
from . import search
|
||||
|
@ -291,24 +291,26 @@ class OnChangeMixin(object):
|
|||
|
||||
class SearchMixin(object):
|
||||
|
||||
ES_ALIAS_KEY = 'default'
|
||||
|
||||
@classmethod
|
||||
def _get_index(cls):
|
||||
indexes = settings.ES_INDEXES
|
||||
return indexes.get(cls._meta.db_table) or indexes['default']
|
||||
return indexes.get(cls.ES_ALIAS_KEY)
|
||||
|
||||
@classmethod
|
||||
def index(cls, document, id=None, bulk=False, index=None):
|
||||
"""Wrapper around pyes.ES.index."""
|
||||
def index(cls, document, id=None, refresh=False, index=None):
|
||||
"""Wrapper around Elasticsearch.index."""
|
||||
search.get_es().index(
|
||||
document, index=index or cls._get_index(),
|
||||
doc_type=cls._meta.db_table, id=id, bulk=bulk)
|
||||
body=document, index=index or cls._get_index(),
|
||||
doc_type=cls.get_mapping_type(), id=id, refresh=refresh)
|
||||
|
||||
@classmethod
|
||||
def unindex(cls, id, index=None):
|
||||
es = search.get_es()
|
||||
try:
|
||||
es.delete(index or cls._get_index(), cls._meta.db_table, id)
|
||||
except pyes.exceptions.NotFoundException:
|
||||
except elasticsearch.TransportError:
|
||||
# Item wasn't found, whatevs.
|
||||
pass
|
||||
|
||||
|
@ -316,15 +318,8 @@ class SearchMixin(object):
|
|||
def search(cls, index=None):
|
||||
return search.ES(cls, index or cls._get_index())
|
||||
|
||||
# For compatibility with elasticutils > v0.5.
|
||||
# TODO: Remove these when we've moved mkt to its own index.
|
||||
|
||||
@classmethod
|
||||
def get_index(cls):
|
||||
return cls._get_index()
|
||||
|
||||
@classmethod
|
||||
def get_mapping_type_name(cls):
|
||||
def get_mapping_type(cls):
|
||||
return cls._meta.db_table
|
||||
|
||||
|
||||
|
|
|
@ -3,10 +3,7 @@ import logging
|
|||
from django.conf import settings as dj_settings
|
||||
|
||||
from django_statsd.clients import statsd
|
||||
from elasticutils import S as EU_S
|
||||
from elasticutils.contrib.django import get_es as eu_get_es
|
||||
from pyes import ES as pyes_ES
|
||||
from pyes import VERSION as PYES_VERSION
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
|
||||
log = logging.getLogger('z.es')
|
||||
|
@ -18,60 +15,14 @@ DEFAULT_INDEXES = ['default']
|
|||
DEFAULT_DUMP_CURL = None
|
||||
|
||||
|
||||
# Pulled from elasticutils 0.5 so we can upgrade elasticutils to a newer
|
||||
# version which is based on pyelasticsearch and not break AMO.
|
||||
def get_es(hosts=None, default_indexes=None, timeout=None, dump_curl=None,
|
||||
**settings):
|
||||
"""Create an ES object and return it.
|
||||
|
||||
:arg hosts: list of uris; ES hosts to connect to, defaults to
|
||||
``['localhost:9200']``
|
||||
:arg default_indexes: list of strings; the default indexes to use,
|
||||
defaults to 'default'
|
||||
:arg timeout: int; the timeout in seconds, defaults to 5
|
||||
:arg dump_curl: function or None; function that dumps curl output,
|
||||
see docs, defaults to None
|
||||
:arg settings: other settings to pass into `pyes.es.ES`
|
||||
|
||||
Examples:
|
||||
|
||||
>>> es = get_es()
|
||||
|
||||
>>> es = get_es(hosts=['localhost:9200'])
|
||||
|
||||
>>> es = get_es(timeout=30) # good for indexing
|
||||
|
||||
>>> es = get_es(default_indexes=['sumo_prod_20120627']
|
||||
|
||||
>>> class CurlDumper(object):
|
||||
... def write(self, text):
|
||||
... print text
|
||||
...
|
||||
>>> es = get_es(dump_curl=CurlDumper())
|
||||
|
||||
"""
|
||||
def get_es(hosts=None, timeout=None, **settings):
|
||||
"""Create an ES object and return it."""
|
||||
# Cheap way of de-None-ifying things
|
||||
hosts = hosts or getattr(dj_settings, 'ES_HOSTS', DEFAULT_HOSTS)
|
||||
default_indexes = default_indexes or [dj_settings.ES_INDEXES['default']]
|
||||
timeout = (timeout if timeout is not None else
|
||||
getattr(dj_settings, 'ES_TIMEOUT', DEFAULT_TIMEOUT))
|
||||
dump_curl = dump_curl or getattr(dj_settings, 'DEFAULT_DUMP_CURL', False)
|
||||
|
||||
if not isinstance(default_indexes, list):
|
||||
default_indexes = [default_indexes]
|
||||
|
||||
es = pyes_ES(hosts, default_indexes=default_indexes, timeout=timeout,
|
||||
dump_curl=dump_curl, **settings)
|
||||
|
||||
# pyes 0.15 does this lame thing where it ignores dump_curl in
|
||||
# the ES constructor and always sets it to None. So what we do
|
||||
# is set it manually after the ES has been created and
|
||||
# defaults['dump_curl'] is truthy. This might not work for all
|
||||
# values of dump_curl.
|
||||
if PYES_VERSION[0:2] == (0, 15) and dump_curl is not None:
|
||||
es.dump_curl = dump_curl
|
||||
|
||||
return es
|
||||
return Elasticsearch(hosts, timeout=timeout, **settings)
|
||||
|
||||
|
||||
class ES(object):
|
||||
|
@ -112,6 +63,9 @@ class ES(object):
|
|||
def facet(self, **kw):
|
||||
return self._clone(next_step=('facet', kw.items()))
|
||||
|
||||
def source(self, *fields):
|
||||
return self._clone(next_step=('source', fields))
|
||||
|
||||
def extra(self, **kw):
|
||||
new = self._clone()
|
||||
actions = 'values values_dict order_by query filter facet'.split()
|
||||
|
@ -147,6 +101,7 @@ class ES(object):
|
|||
queries = []
|
||||
sort = []
|
||||
fields = ['id']
|
||||
source = []
|
||||
facets = {}
|
||||
as_list = as_dict = False
|
||||
for action, value in self.steps:
|
||||
|
@ -169,35 +124,56 @@ class ES(object):
|
|||
queries.extend(self._process_queries(value))
|
||||
elif action == 'filter':
|
||||
filters.extend(self._process_filters(value))
|
||||
elif action == 'source':
|
||||
source.extend(value)
|
||||
elif action == 'facet':
|
||||
facets.update(value)
|
||||
else:
|
||||
raise NotImplementedError(action)
|
||||
|
||||
qs = {}
|
||||
if len(filters) > 1:
|
||||
qs['filter'] = {'and': filters}
|
||||
elif filters:
|
||||
qs['filter'] = filters[0]
|
||||
|
||||
if len(queries) > 1:
|
||||
qs['query'] = {'bool': {'must': queries}}
|
||||
qs = {'bool': {'must': queries}}
|
||||
elif queries:
|
||||
qs['query'] = queries[0]
|
||||
qs = queries[0]
|
||||
else:
|
||||
qs = {"match_all": {}}
|
||||
|
||||
qs = {
|
||||
"function_score": {
|
||||
"query": qs,
|
||||
"functions": [{"field_value_factor": {"field": "boost"}}]
|
||||
}
|
||||
}
|
||||
|
||||
if filters:
|
||||
if len(filters) > 1:
|
||||
filters = {"and": filters}
|
||||
qs = {
|
||||
"filtered": {
|
||||
"query": qs,
|
||||
"filter": filters
|
||||
}
|
||||
}
|
||||
|
||||
body = {"query": qs}
|
||||
if sort:
|
||||
body['sort'] = sort
|
||||
if self.start:
|
||||
body['from'] = self.start
|
||||
if self.stop is not None:
|
||||
body['size'] = self.stop - self.start
|
||||
if facets:
|
||||
body['facets'] = facets
|
||||
|
||||
if fields:
|
||||
qs['fields'] = fields
|
||||
if facets:
|
||||
qs['facets'] = facets
|
||||
if sort:
|
||||
qs['sort'] = sort
|
||||
if self.start:
|
||||
qs['from'] = self.start
|
||||
if self.stop is not None:
|
||||
qs['size'] = self.stop - self.start
|
||||
body['fields'] = fields
|
||||
# As per version 1.0, ES has deprecated loading fields not stored from
|
||||
# '_source', plus non leaf fields are not allowed in fields.
|
||||
if source:
|
||||
body['_source'] = source
|
||||
|
||||
self.fields, self.as_list, self.as_dict = fields, as_list, as_dict
|
||||
return qs
|
||||
return body
|
||||
|
||||
def _split(self, string):
|
||||
if '__' in string:
|
||||
|
@ -261,7 +237,11 @@ class ES(object):
|
|||
es = get_es()
|
||||
try:
|
||||
with statsd.timer('search.es.timer') as timer:
|
||||
hits = es.search(qs, self.index, self.type._meta.db_table)
|
||||
hits = es.search(
|
||||
body=qs,
|
||||
index=self.index,
|
||||
doc_type=self.type._meta.db_table
|
||||
)
|
||||
except Exception:
|
||||
log.error(qs)
|
||||
raise
|
||||
|
@ -317,7 +297,12 @@ class DictSearchResults(SearchResults):
|
|||
# Elasticsearch >= 1.0 style.
|
||||
for h in hits:
|
||||
hit = {}
|
||||
for field, value in h['fields'].items():
|
||||
fields = h['fields']
|
||||
# If source is returned, it means that it has been asked, so
|
||||
# take it.
|
||||
if '_source' in h:
|
||||
fields.update(h['_source'])
|
||||
for field, value in fields.items():
|
||||
if type(value) != list:
|
||||
value = [value]
|
||||
hit[field] = value
|
||||
|
@ -353,43 +338,3 @@ class ObjectSearchResults(SearchResults):
|
|||
def __iter__(self):
|
||||
objs = dict((obj.id, obj) for obj in self.objects)
|
||||
return (objs[id] for id in self.ids if id in objs)
|
||||
|
||||
|
||||
class TempS(EU_S):
|
||||
# Temporary class override to mimic ElasticUtils v0.5 behavior.
|
||||
# TODO: Remove this when we've moved mkt to its own index.
|
||||
|
||||
def get_es(self, **kwargs):
|
||||
"""Returns the pyelasticsearch ElasticSearch object to use.
|
||||
|
||||
This uses the django get_es builder by default which takes
|
||||
into account settings in ``settings.py``.
|
||||
|
||||
"""
|
||||
return super(TempS, self).get_es(default_builder=eu_get_es)
|
||||
|
||||
def _do_search(self):
|
||||
"""
|
||||
Perform the search, then convert that raw format into a SearchResults
|
||||
instance and return it.
|
||||
"""
|
||||
if not self._results_cache:
|
||||
hits = self.raw()
|
||||
if self.as_list:
|
||||
ResultClass = ListSearchResults
|
||||
elif self.as_dict or self.type is None:
|
||||
ResultClass = DictSearchResults
|
||||
else:
|
||||
ResultClass = ObjectSearchResults
|
||||
self._results_cache = ResultClass(self.type, hits, self.fields)
|
||||
return self._results_cache
|
||||
|
||||
def _build_query(self):
|
||||
query = super(TempS, self)._build_query()
|
||||
if 'fields' in query:
|
||||
if 'id' not in query['fields']:
|
||||
query['fields'].append('id')
|
||||
else:
|
||||
query['fields'] = ['id']
|
||||
|
||||
return query
|
||||
|
|
|
@ -19,10 +19,7 @@ from django.test.client import Client
|
|||
from django.utils import translation
|
||||
|
||||
import caching
|
||||
import elasticutils.contrib.django as elasticutils
|
||||
import mock
|
||||
import pyelasticsearch.exceptions as pyelasticsearch
|
||||
import pyes.exceptions as pyes
|
||||
import test_utils
|
||||
import tower
|
||||
from dateutil.parser import parse as dateutil_parser
|
||||
|
@ -201,21 +198,7 @@ class TestClient(Client):
|
|||
raise AttributeError
|
||||
|
||||
|
||||
ES_patchers = [mock.patch('amo.search.get_es', spec=True),
|
||||
mock.patch('elasticutils.contrib.django', spec=True)]
|
||||
|
||||
|
||||
def start_es_mock():
|
||||
for patch in ES_patchers:
|
||||
patch.start()
|
||||
|
||||
|
||||
def stop_es_mock():
|
||||
for patch in ES_patchers:
|
||||
patch.stop()
|
||||
|
||||
if hasattr(elasticutils, '_local') and hasattr(elasticutils._local, 'es'):
|
||||
delattr(elasticutils._local, 'es')
|
||||
Mocked_ES = mock.patch('amo.search.get_es', spec=True)
|
||||
|
||||
|
||||
def mock_es(f):
|
||||
|
@ -225,11 +208,11 @@ def mock_es(f):
|
|||
"""
|
||||
@wraps(f)
|
||||
def decorated(request, *args, **kwargs):
|
||||
start_es_mock()
|
||||
Mocked_ES.start()
|
||||
try:
|
||||
return f(request, *args, **kwargs)
|
||||
finally:
|
||||
stop_es_mock()
|
||||
Mocked_ES.stop()
|
||||
return decorated
|
||||
|
||||
|
||||
|
@ -243,7 +226,7 @@ class MockEsMixin(object):
|
|||
@classmethod
|
||||
def setUpClass(cls):
|
||||
if cls.mock_es:
|
||||
start_es_mock()
|
||||
Mocked_ES.start()
|
||||
try:
|
||||
reset.send(None) # Reset all the ES tasks on hold.
|
||||
super(MockEsMixin, cls).setUpClass()
|
||||
|
@ -251,7 +234,7 @@ class MockEsMixin(object):
|
|||
# We need to unpatch here because tearDownClass will not be
|
||||
# called.
|
||||
if cls.mock_es:
|
||||
stop_es_mock()
|
||||
Mocked_ES.stop()
|
||||
raise
|
||||
|
||||
@classmethod
|
||||
|
@ -260,7 +243,7 @@ class MockEsMixin(object):
|
|||
super(MockEsMixin, cls).tearDownClass()
|
||||
finally:
|
||||
if cls.mock_es:
|
||||
stop_es_mock()
|
||||
Mocked_ES.stop()
|
||||
|
||||
|
||||
class TestCase(MockEsMixin, RedisTest, test_utils.TestCase):
|
||||
|
@ -736,7 +719,7 @@ class ESTestCase(TestCase):
|
|||
|
||||
super(ESTestCase, cls).setUpClass()
|
||||
try:
|
||||
cls.es.cluster_health()
|
||||
cls.es.cluster.health()
|
||||
except Exception, e:
|
||||
e.args = tuple([u'%s (it looks like ES is not running, '
|
||||
'try starting it or set RUN_ES_TESTS=False)'
|
||||
|
@ -750,27 +733,10 @@ class ESTestCase(TestCase):
|
|||
}
|
||||
|
||||
for index in set(settings.ES_INDEXES.values()):
|
||||
# Get the index that's pointed to by the alias.
|
||||
try:
|
||||
indices = cls.es.get_alias(index)
|
||||
index = indices[0]
|
||||
except IndexError:
|
||||
# There's no alias, just use the index.
|
||||
print 'Found no alias for %s.' % index
|
||||
index = index
|
||||
except (pyes.IndexMissingException,
|
||||
pyelasticsearch.ElasticHttpNotFoundError):
|
||||
pass
|
||||
cls.es.indices.delete(index, ignore=[404])
|
||||
|
||||
# Remove any alias as well.
|
||||
try:
|
||||
cls.es.delete_index(index)
|
||||
except (pyes.IndexMissingException,
|
||||
pyelasticsearch.ElasticHttpNotFoundError) as exc:
|
||||
print 'Could not delete index %r: %s' % (index, exc)
|
||||
|
||||
addons.search.setup_mapping()
|
||||
stats.search.setup_indexes()
|
||||
addons.search.create_new_index()
|
||||
stats.search.create_new_index()
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
|
@ -799,7 +765,7 @@ class ESTestCase(TestCase):
|
|||
@classmethod
|
||||
def refresh(cls, index='default', timesleep=0):
|
||||
process.send(None)
|
||||
cls.es.refresh(settings.ES_INDEXES[index], timesleep=timesleep)
|
||||
cls.es.indices.refresh(settings.ES_INDEXES[index])
|
||||
|
||||
@classmethod
|
||||
def reindex(cls, model, index='default'):
|
||||
|
@ -817,3 +783,10 @@ class ESTestCase(TestCase):
|
|||
addon_factory(),
|
||||
addon_factory(),
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def empty_index(cls, index):
|
||||
cls.es.delete_by_query(
|
||||
settings.ES_INDEXES[index],
|
||||
body={"query": {"match_all": {}}}
|
||||
)
|
||||
|
|
|
@ -60,121 +60,100 @@ class TestES(amo.tests.ESTestCase):
|
|||
# Doing a filter creates a new ES object.
|
||||
qs = Addon.search()
|
||||
qs2 = qs.filter(type=1)
|
||||
eq_(qs._build_query(), {'fields': ['id']})
|
||||
eq_(qs2._build_query(), {'fields': ['id'],
|
||||
'filter': {'term': {'type': 1}}})
|
||||
assert 'filtered' not in qs._build_query()['query']
|
||||
assert 'filtered' in qs2._build_query()['query']
|
||||
|
||||
def test_filter(self):
|
||||
qs = Addon.search().filter(type=1)
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'term': {'type': 1}}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
[{'term': {'type': 1}}])
|
||||
|
||||
def test_in_filter(self):
|
||||
qs = Addon.search().filter(type__in=[1, 2])
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'in': {'type': [1, 2]}}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
[{'in': {'type': [1, 2]}}])
|
||||
|
||||
def test_and(self):
|
||||
qs = Addon.search().filter(type=1, category__in=[1, 2])
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'and': [
|
||||
{'term': {'type': 1}},
|
||||
{'in': {'category': [1, 2]}},
|
||||
]}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
{'and': [{'term': {'type': 1}}, {'in': {'category': [1, 2]}}]})
|
||||
|
||||
def test_query(self):
|
||||
qs = Addon.search().query(type=1)
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'query': {'term': {'type': 1}}})
|
||||
eq_(qs._build_query()['query']['function_score']['query'],
|
||||
{'term': {'type': 1}})
|
||||
|
||||
def test_query_match(self):
|
||||
qs = Addon.search().query(name__match='woo woo')
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'query': {'match': {'name': 'woo woo'}}})
|
||||
eq_(qs._build_query()['query']['function_score']['query'],
|
||||
{'match': {'name': 'woo woo'}})
|
||||
|
||||
def test_query_multiple_and_range(self):
|
||||
qs = Addon.search().query(type=1, status__gte=1)
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'query': {'bool': {'must': [
|
||||
{'term': {'type': 1}},
|
||||
{'range': {'status': {'gte': 1}}},
|
||||
]}}})
|
||||
eq_(qs._build_query()['query']['function_score']['query'],
|
||||
{'bool': {'must': [{'term': {'type': 1}},
|
||||
{'range': {'status': {'gte': 1}}}, ]}})
|
||||
|
||||
def test_query_or(self):
|
||||
qs = Addon.search().query(or_=dict(type=1, status__gte=2))
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'query': {'bool': {'should': [
|
||||
{'term': {'type': 1}},
|
||||
{'range': {'status': {'gte': 2}}},
|
||||
]}}})
|
||||
eq_(qs._build_query()['query']['function_score']['query'],
|
||||
{'bool': {'should': [{'term': {'type': 1}},
|
||||
{'range': {'status': {'gte': 2}}}, ]}})
|
||||
|
||||
def test_query_or_and(self):
|
||||
qs = Addon.search().query(or_=dict(type=1, status__gte=2), category=2)
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'query': {'bool': {'must': [
|
||||
{'term': {'category': 2}},
|
||||
{'bool': {'should': [
|
||||
{'term': {'type': 1}},
|
||||
{'range': {'status': {'gte': 2}}},
|
||||
]}}
|
||||
]}}})
|
||||
eq_(qs._build_query()['query']['function_score']['query'],
|
||||
{'bool': {'must': [{'term': {'category': 2}},
|
||||
{'bool': {'should': [
|
||||
{'term': {'type': 1}},
|
||||
{'range': {'status': {'gte': 2}}}, ]}}]}})
|
||||
|
||||
def test_query_fuzzy(self):
|
||||
fuzz = {'boost': 2, 'value': 'woo'}
|
||||
qs = Addon.search().query(or_=dict(type=1, status__fuzzy=fuzz))
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'query': {'bool': {'should': [
|
||||
{'fuzzy': {'status': fuzz}},
|
||||
{'term': {'type': 1}},
|
||||
]}}})
|
||||
eq_(qs._build_query()['query']['function_score']['query'],
|
||||
{'bool': {'should': [{'fuzzy': {'status': fuzz}},
|
||||
{'term': {'type': 1}}, ]}})
|
||||
|
||||
def test_order_by_desc(self):
|
||||
qs = Addon.search().order_by('-rating')
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'sort': [{'rating': 'desc'}]})
|
||||
eq_(qs._build_query()['sort'], [{'rating': 'desc'}])
|
||||
|
||||
def test_order_by_asc(self):
|
||||
qs = Addon.search().order_by('rating')
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'sort': ['rating']})
|
||||
eq_(qs._build_query()['sort'], ['rating'])
|
||||
|
||||
def test_order_by_multiple(self):
|
||||
qs = Addon.search().order_by('-rating', 'id')
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'sort': [{'rating': 'desc'}, 'id']})
|
||||
eq_(qs._build_query()['sort'], [{'rating': 'desc'}, 'id'])
|
||||
|
||||
def test_slice(self):
|
||||
qs = Addon.search()[5:12]
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'from': 5,
|
||||
'size': 7})
|
||||
eq_(qs._build_query()['from'], 5)
|
||||
eq_(qs._build_query()['size'], 7)
|
||||
|
||||
def test_filter_or(self):
|
||||
qs = Addon.search().filter(type=1).filter(or_=dict(status=1, app=2))
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'and': [
|
||||
{'term': {'type': 1}},
|
||||
{'or': [{'term': {'status': 1}},
|
||||
{'term': {'app': 2}}]},
|
||||
]}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
{'and': [
|
||||
{'term': {'type': 1}},
|
||||
{'or': [{'term': {'status': 1}}, {'term': {'app': 2}}]},
|
||||
]})
|
||||
|
||||
qs = Addon.search().filter(type=1, or_=dict(status=1, app=2))
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'and': [
|
||||
{'term': {'type': 1}},
|
||||
{'or': [{'term': {'status': 1}},
|
||||
{'term': {'app': 2}}]},
|
||||
]}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
{'and': [
|
||||
{'term': {'type': 1}},
|
||||
{'or': [{'term': {'status': 1}}, {'term': {'app': 2}}]},
|
||||
]})
|
||||
|
||||
def test_slice_stop(self):
|
||||
qs = Addon.search()[:6]
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'size': 6})
|
||||
eq_(qs._build_query()['size'], 6)
|
||||
|
||||
def test_slice_stop_zero(self):
|
||||
qs = Addon.search()[:0]
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'size': 0})
|
||||
eq_(qs._build_query()['size'], 0)
|
||||
|
||||
def test_getitem(self):
|
||||
addons = list(Addon.search())
|
||||
|
@ -200,55 +179,54 @@ class TestES(amo.tests.ESTestCase):
|
|||
|
||||
def test_gte(self):
|
||||
qs = Addon.search().filter(type__in=[1, 2], status__gte=4)
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'and': [
|
||||
{'in': {'type': [1, 2]}},
|
||||
{'range': {'status': {'gte': 4}}},
|
||||
]}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
{'and': [
|
||||
{'in': {'type': [1, 2]}},
|
||||
{'range': {'status': {'gte': 4}}},
|
||||
]})
|
||||
|
||||
def test_lte(self):
|
||||
qs = Addon.search().filter(type__in=[1, 2], status__lte=4)
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'and': [
|
||||
{'in': {'type': [1, 2]}},
|
||||
{'range': {'status': {'lte': 4}}},
|
||||
]}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
{'and': [
|
||||
{'in': {'type': [1, 2]}},
|
||||
{'range': {'status': {'lte': 4}}},
|
||||
]})
|
||||
|
||||
def test_gt(self):
|
||||
qs = Addon.search().filter(type__in=[1, 2], status__gt=4)
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'and': [
|
||||
{'in': {'type': [1, 2]}},
|
||||
{'range': {'status': {'gt': 4}}},
|
||||
]}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
{'and': [
|
||||
{'in': {'type': [1, 2]}},
|
||||
{'range': {'status': {'gt': 4}}},
|
||||
]})
|
||||
|
||||
def test_lt(self):
|
||||
qs = Addon.search().filter(type__in=[1, 2], status__lt=4)
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'and': [
|
||||
{'range': {'status': {'lt': 4}}},
|
||||
{'in': {'type': [1, 2]}},
|
||||
]}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
{'and': [
|
||||
{'range': {'status': {'lt': 4}}},
|
||||
{'in': {'type': [1, 2]}},
|
||||
]})
|
||||
|
||||
def test_lt2(self):
|
||||
qs = Addon.search().filter(status__lt=4)
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'range': {'status': {'lt': 4}}}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
[{'range': {'status': {'lt': 4}}}])
|
||||
|
||||
def test_range(self):
|
||||
qs = Addon.search().filter(date__range=('a', 'b'))
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'range': {'date': {'gte': 'a',
|
||||
'lte': 'b'}}}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
[{'range': {'date': {'gte': 'a', 'lte': 'b'}}}])
|
||||
|
||||
def test_prefix(self):
|
||||
qs = Addon.search().query(name__startswith='woo')
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'query': {'prefix': {'name': 'woo'}}})
|
||||
eq_(qs._build_query()['query']['function_score']['query'],
|
||||
{'prefix': {'name': 'woo'}})
|
||||
|
||||
def test_values(self):
|
||||
qs = Addon.search().values('name')
|
||||
eq_(qs._build_query(), {'fields': ['id', 'name']})
|
||||
eq_(qs._build_query()['fields'], ['id', 'name'])
|
||||
|
||||
def test_values_result(self):
|
||||
addons = [([a.id], [a.slug]) for a in self._addons]
|
||||
|
@ -257,11 +235,11 @@ class TestES(amo.tests.ESTestCase):
|
|||
|
||||
def test_values_dict(self):
|
||||
qs = Addon.search().values_dict('name')
|
||||
eq_(qs._build_query(), {'fields': ['id', 'name']})
|
||||
eq_(qs._build_query()['fields'], ['id', 'name'])
|
||||
|
||||
def test_empty_values_dict(self):
|
||||
qs = Addon.search().values_dict()
|
||||
eq_(qs._build_query(), {})
|
||||
assert 'fields' not in qs._build_query()
|
||||
|
||||
def test_values_dict_result(self):
|
||||
addons = [{'id': [a.id], 'slug': [a.slug]} for a in self._addons]
|
||||
|
@ -289,74 +267,67 @@ class TestES(amo.tests.ESTestCase):
|
|||
|
||||
def test_extra_values(self):
|
||||
qs = Addon.search().extra(values=['name'])
|
||||
eq_(qs._build_query(), {'fields': ['id', 'name']})
|
||||
eq_(qs._build_query()['fields'], ['id', 'name'])
|
||||
|
||||
qs = Addon.search().values('status').extra(values=['name'])
|
||||
eq_(qs._build_query(), {'fields': ['id', 'status', 'name']})
|
||||
eq_(qs._build_query()['fields'], ['id', 'status', 'name'])
|
||||
|
||||
def test_extra_values_dict(self):
|
||||
qs = Addon.search().extra(values_dict=['name'])
|
||||
eq_(qs._build_query(), {'fields': ['id', 'name']})
|
||||
eq_(qs._build_query()['fields'], ['id', 'name'])
|
||||
|
||||
qs = Addon.search().values_dict('status').extra(values_dict=['name'])
|
||||
eq_(qs._build_query(), {'fields': ['id', 'status', 'name']})
|
||||
eq_(qs._build_query()['fields'], ['id', 'status', 'name'])
|
||||
|
||||
def test_extra_order_by(self):
|
||||
qs = Addon.search().extra(order_by=['-rating'])
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'sort': [{'rating': 'desc'}]})
|
||||
eq_(qs._build_query()['sort'], [{'rating': 'desc'}])
|
||||
|
||||
qs = Addon.search().order_by('-id').extra(order_by=['-rating'])
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'sort': [{'id': 'desc'},
|
||||
{'rating': 'desc'}]})
|
||||
eq_(qs._build_query()['sort'], [{'id': 'desc'}, {'rating': 'desc'}])
|
||||
|
||||
def test_extra_query(self):
|
||||
qs = Addon.search().extra(query={'type': 1})
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'query': {'term': {'type': 1}}})
|
||||
eq_(qs._build_query()['query']['function_score']['query'],
|
||||
{'term': {'type': 1}})
|
||||
|
||||
qs = Addon.search().filter(status=1).extra(query={'type': 1})
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'query': {'term': {'type': 1}},
|
||||
'filter': {'term': {'status': 1}}})
|
||||
filtered = qs._build_query()['query']['filtered']
|
||||
eq_(filtered['query']['function_score']['query'], {'term': {'type': 1}})
|
||||
eq_(filtered['filter'], [{'term': {'status': 1}}])
|
||||
|
||||
def test_extra_filter(self):
|
||||
qs = Addon.search().extra(filter={'category__in': [1, 2]})
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'in': {'category': [1, 2]}}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
[{'in': {'category': [1, 2]}}])
|
||||
|
||||
qs = (Addon.search().filter(type=1)
|
||||
.extra(filter={'category__in': [1, 2]}))
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'and': [
|
||||
{'term': {'type': 1}},
|
||||
{'in': {'category': [1, 2]}},
|
||||
]}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
{'and': [{'term': {'type': 1}}, {'in': {'category': [1, 2]}}, ]})
|
||||
|
||||
def test_extra_filter_or(self):
|
||||
qs = Addon.search().extra(filter={'or_': {'status': 1, 'app': 2}})
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'or': [
|
||||
{'term': {'status': 1}},
|
||||
{'term': {'app': 2}}]}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
[{'or': [{'term': {'status': 1}}, {'term': {'app': 2}}]}])
|
||||
|
||||
qs = (Addon.search().filter(type=1)
|
||||
.extra(filter={'or_': {'status': 1, 'app': 2}}))
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'and': [
|
||||
{'term': {'type': 1}},
|
||||
{'or': [{'term': {'status': 1}},
|
||||
{'term': {'app': 2}}]},
|
||||
]}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
{'and': [{'term': {'type': 1}},
|
||||
{'or': [{'term': {'status': 1}}, {'term': {'app': 2}}]}]})
|
||||
|
||||
def test_facet_range(self):
|
||||
facet = {'range': {'status': [{'lte': 3}, {'gte': 5}]}}
|
||||
# Pass a copy so edits aren't propagated back here.
|
||||
qs = Addon.search().filter(app=1).facet(by_status=dict(facet))
|
||||
eq_(qs._build_query(), {'fields': ['id'],
|
||||
'filter': {'term': {'app': 1}},
|
||||
'facets': {'by_status': facet}})
|
||||
eq_(qs._build_query()['query']['filtered']['filter'],
|
||||
[{'term': {'app': 1}}])
|
||||
eq_(qs._build_query()['facets'], {'by_status': facet})
|
||||
|
||||
def test_source(self):
|
||||
qs = Addon.search().source('versions')
|
||||
eq_(qs._build_query()['_source'], ['versions'])
|
||||
|
||||
|
||||
class TestPaginator(amo.tests.ESTestCase):
|
||||
|
|
|
@ -35,10 +35,8 @@ from django.utils.functional import Promise
|
|||
from django.utils.http import urlquote
|
||||
|
||||
import bleach
|
||||
import elasticutils.contrib.django as elasticutils
|
||||
import html5lib
|
||||
import jinja2
|
||||
import pyes.exceptions as pyes
|
||||
import pytz
|
||||
from babel import Locale
|
||||
from cef import log_cef as _log_cef
|
||||
|
@ -123,7 +121,7 @@ def paginate(request, queryset, per_page=20, count=None):
|
|||
``.count()`` on the queryset. This can be good if the queryset would
|
||||
produce an expensive count query.
|
||||
"""
|
||||
p = (ESPaginator if isinstance(queryset, (amo.search.ES, elasticutils.S))
|
||||
p = (ESPaginator if isinstance(queryset, amo.search.ES)
|
||||
else paginator.Paginator)(queryset, per_page)
|
||||
|
||||
if count is not None:
|
||||
|
@ -919,59 +917,6 @@ def rm_local_tmp_file(path):
|
|||
return os.unlink(path)
|
||||
|
||||
|
||||
def timestamp_index(index):
|
||||
"""Returns index-YYYYMMDDHHMMSS with the current time."""
|
||||
return '%s-%s' % (index, datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
|
||||
|
||||
|
||||
def create_es_index_if_missing(index, config=None, aliased=False):
|
||||
"""Creates an index if it's not present.
|
||||
|
||||
Returns the index name. It may change if it was aliased.
|
||||
|
||||
Options:
|
||||
|
||||
- index: name of the index.
|
||||
- config: if provided, used as the settings option for the
|
||||
ES calls.
|
||||
- aliased: If set to true, the index is suffixed with a timestamp
|
||||
and an alias with the index name is created.
|
||||
"""
|
||||
es = amo.search.get_es()
|
||||
|
||||
if aliased:
|
||||
alias = index
|
||||
try:
|
||||
indices = es.get_alias(alias)
|
||||
if len(indices) > 1:
|
||||
raise ValueError("The %r alias should not point to "
|
||||
"several indices" % index)
|
||||
# we're good here - the alias and the index exist
|
||||
return indices[0]
|
||||
except pyes.IndexMissingException:
|
||||
# no alias exists, so we want to
|
||||
# create a fresh one and a fresh index
|
||||
index = timestamp_index(index)
|
||||
|
||||
if settings.IN_TEST_SUITE:
|
||||
if not config:
|
||||
config = {}
|
||||
# Be nice to ES running on ci.mozilla.org
|
||||
config.update({'number_of_shards': 3,
|
||||
'number_of_replicas': 0})
|
||||
try:
|
||||
es.create_index_if_missing(index, settings=config)
|
||||
if aliased:
|
||||
try:
|
||||
es.add_alias(alias, [index])
|
||||
except pyes.ElasticSearchException, exc:
|
||||
log.info('ES error creating alias: %s' % exc)
|
||||
except pyes.ElasticSearchException, exc:
|
||||
log.info('ES error creating index: %s' % exc)
|
||||
|
||||
return index
|
||||
|
||||
|
||||
def timer(*func, **kwargs):
|
||||
"""
|
||||
Outputs statsd timings for the decorated method, ignored if not
|
||||
|
|
|
@ -149,7 +149,7 @@ def _drop_collection_recs(**kw):
|
|||
|
||||
|
||||
@cronjobs.register
|
||||
def reindex_collections(index=None, aliased=True):
|
||||
def reindex_collections(index=None):
|
||||
from . import tasks
|
||||
ids = (Collection.objects.exclude(type=amo.COLLECTION_SYNCHRONIZED)
|
||||
.values_list('id', flat=True))
|
||||
|
|
|
@ -19,11 +19,11 @@ def extract(collection):
|
|||
in translations[collection.description_id]))
|
||||
|
||||
# Boost by the number of subscribers.
|
||||
d['_boost'] = collection.subscribers ** .2
|
||||
d['boost'] = collection.subscribers ** .2
|
||||
|
||||
# Double the boost if the collection is public.
|
||||
if collection.listed:
|
||||
d['_boost'] = max(d['_boost'], 1) * 4
|
||||
d['boost'] = max(d['boost'], 1) * 4
|
||||
|
||||
# Indices for each language. languages is a list of locales we want to
|
||||
# index with analyzer if the string's locale matches.
|
||||
|
|
|
@ -4,7 +4,6 @@ import math
|
|||
from django.core.files.storage import default_storage as storage
|
||||
from django.db.models import Count
|
||||
|
||||
import elasticutils.contrib.django as elasticutils
|
||||
from celeryutils import task
|
||||
|
||||
import amo
|
||||
|
|
|
@ -20,7 +20,7 @@ log = logging.getLogger('z.compat')
|
|||
|
||||
|
||||
@cronjobs.register
|
||||
def compatibility_report(index=None, aliased=True):
|
||||
def compatibility_report(index=None):
|
||||
docs = defaultdict(dict)
|
||||
indices = get_indices(index)
|
||||
|
||||
|
@ -123,5 +123,6 @@ def compatibility_report(index=None, aliased=True):
|
|||
for chunk in amo.utils.chunked(docs.values(), 150):
|
||||
for doc in chunk:
|
||||
for index in indices:
|
||||
AppCompat.index(doc, id=doc['id'], bulk=True, index=index)
|
||||
amo.search.get_es().flush_bulk(forced=True)
|
||||
AppCompat.index(doc, id=doc['id'], refresh=False, index=index)
|
||||
es = amo.search.get_es()
|
||||
es.indices.refresh()
|
||||
|
|
|
@ -65,7 +65,6 @@ SEARCH_ANALYZER_PLUGINS = [
|
|||
'polish',
|
||||
]
|
||||
|
||||
|
||||
# Which stemmer to use for each langauge.
|
||||
#
|
||||
# Note: We use the keys of this dict for supported stop words, also, which is
|
||||
|
|
|
@ -600,6 +600,7 @@ class TestThemeQueueRereview(ThemeReviewTestMixin, amo.tests.TestCase):
|
|||
assert (copy_mock2.call_args_list[0][0][1]
|
||||
.endswith('preview_large.jpg'))
|
||||
|
||||
|
||||
class TestDeletedThemeLookup(amo.tests.TestCase):
|
||||
fixtures = ['base/users', 'editors/user_persona_reviewer',
|
||||
'editors/user_senior_persona_reviewer']
|
||||
|
@ -627,6 +628,7 @@ class TestThemeSearch(amo.tests.ESTestCase):
|
|||
def setUp(self):
|
||||
self.addon = addon_factory(type=amo.ADDON_PERSONA, name='themeteam',
|
||||
status=amo.STATUS_PENDING)
|
||||
self.refresh('default')
|
||||
|
||||
def search(self, q, flagged=False, rereview=False):
|
||||
get_query = {'q': q, 'queue_type': ('rereview' if rereview else
|
||||
|
@ -643,11 +645,13 @@ class TestThemeSearch(amo.tests.ESTestCase):
|
|||
|
||||
def test_flagged(self):
|
||||
self.addon.update(status=amo.STATUS_REVIEW_PENDING)
|
||||
self.refresh('default')
|
||||
eq_(self.search('theme', flagged=True)[0]['id'], self.addon.id)
|
||||
|
||||
def test_rereview(self):
|
||||
RereviewQueueTheme.objects.create(theme=self.addon.persona)
|
||||
self.addon.save()
|
||||
self.refresh('default')
|
||||
eq_(self.search('theme', rereview=True)[0]['id'], self.addon.id)
|
||||
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ import constants.editors as rvw
|
|||
from access import acl
|
||||
from addons.models import Addon, Persona
|
||||
from amo.decorators import json_view, post_required
|
||||
from amo.search import TempS
|
||||
from amo.search import ES
|
||||
from amo.urlresolvers import reverse
|
||||
from amo.utils import days_ago, paginate
|
||||
from devhub.models import ActivityLog
|
||||
|
@ -199,7 +199,7 @@ def themes_search(request):
|
|||
flagged = search_form.cleaned_data['queue_type'] == 'flagged'
|
||||
|
||||
# ES query on name.
|
||||
themes = TempS(Addon).filter(type=amo.ADDON_PERSONA)
|
||||
themes = Addon.search().filter(type=amo.ADDON_PERSONA)
|
||||
if rereview:
|
||||
themes = themes.filter(has_theme_rereview=True)
|
||||
else:
|
||||
|
|
|
@ -2,8 +2,7 @@ import logging
|
|||
|
||||
from django.shortcuts import render
|
||||
|
||||
from pyes.exceptions import ElasticSearchException
|
||||
from pyes.urllib3.connectionpool import HTTPError
|
||||
from elasticsearch import TransportError
|
||||
|
||||
|
||||
log = logging.getLogger('z.es')
|
||||
|
@ -12,7 +11,6 @@ log = logging.getLogger('z.es')
|
|||
class ElasticsearchExceptionMiddleware(object):
|
||||
|
||||
def process_exception(self, request, exception):
|
||||
if (issubclass(exception.__class__, (ElasticSearchException,
|
||||
HTTPError))):
|
||||
if issubclass(exception.__class__, TransportError):
|
||||
log.error(u'Elasticsearch error: %s' % exception)
|
||||
return render(request, 'search/down.html', status=503)
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
import mock
|
||||
from nose.tools import eq_
|
||||
from pyes.exceptions import ElasticSearchException, IndexMissingException
|
||||
from pyes.urllib3.connectionpool import HTTPError, MaxRetryError, TimeoutError
|
||||
from test_utils import RequestFactory
|
||||
from elasticsearch import TransportError
|
||||
|
||||
import amo.tests
|
||||
from search.middleware import ElasticsearchExceptionMiddleware as ESM
|
||||
|
@ -15,19 +14,10 @@ class TestElasticsearchExceptionMiddleware(amo.tests.TestCase):
|
|||
|
||||
@mock.patch('search.middleware.render')
|
||||
def test_exceptions_we_catch(self, render_mock):
|
||||
# These are instantiated with an error string.
|
||||
for e in [ElasticSearchException, IndexMissingException]:
|
||||
ESM().process_exception(self.request, e('ES ERROR'))
|
||||
render_mock.assert_called_with(self.request, 'search/down.html',
|
||||
status=503)
|
||||
render_mock.reset_mock()
|
||||
|
||||
# These are just Exception classes.
|
||||
for e in [HTTPError, MaxRetryError, TimeoutError]:
|
||||
ESM().process_exception(self.request, e('ES ERROR'))
|
||||
render_mock.assert_called_with(self.request, 'search/down.html',
|
||||
status=503)
|
||||
render_mock.reset_mock()
|
||||
ESM().process_exception(self.request, TransportError(400, 'ES ERROR'))
|
||||
render_mock.assert_called_with(self.request, 'search/down.html',
|
||||
status=503)
|
||||
render_mock.reset_mock()
|
||||
|
||||
@mock.patch('search.middleware.render')
|
||||
def test_exceptions_we_do_not_catch(self, render_mock):
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import json
|
||||
import mock
|
||||
import urlparse
|
||||
|
||||
from django.conf import settings
|
||||
|
|
|
@ -85,14 +85,14 @@ def addon_total_contributions():
|
|||
|
||||
|
||||
@cronjobs.register
|
||||
def index_latest_stats(index=None, aliased=True):
|
||||
def index_latest_stats(index=None):
|
||||
raise_if_reindex_in_progress('amo')
|
||||
fmt = lambda d: d.strftime('%Y-%m-%d')
|
||||
latest = UpdateCount.search(index).order_by('-date').values_dict()
|
||||
if latest:
|
||||
latest = latest[0]['date']
|
||||
else:
|
||||
latest = datetime.date.today() - datetime.timedelta(days=1)
|
||||
fmt = lambda d: d.strftime('%Y-%m-%d')
|
||||
date_range = '%s:%s' % (fmt(latest), fmt(datetime.date.today()))
|
||||
latest = fmt(datetime.date.today() - datetime.timedelta(days=1))
|
||||
date_range = '%s:%s' % (latest, fmt(datetime.date.today()))
|
||||
cron_log.info('index_stats --date=%s' % date_range)
|
||||
call_command('index_stats', addons=None, date=date_range)
|
||||
|
|
|
@ -37,7 +37,12 @@ class AddonCollectionCount(models.Model):
|
|||
db_table = 'stats_addons_collections_counts'
|
||||
|
||||
|
||||
class CollectionCount(SearchMixin, models.Model):
|
||||
class StatsSearchMixin(SearchMixin):
|
||||
|
||||
ES_ALIAS_KEY = 'stats'
|
||||
|
||||
|
||||
class CollectionCount(StatsSearchMixin, models.Model):
|
||||
collection = models.ForeignKey('bandwagon.Collection')
|
||||
count = models.PositiveIntegerField()
|
||||
date = models.DateField()
|
||||
|
@ -57,7 +62,7 @@ class CollectionStats(models.Model):
|
|||
db_table = 'stats_collections'
|
||||
|
||||
|
||||
class DownloadCount(SearchMixin, models.Model):
|
||||
class DownloadCount(StatsSearchMixin, models.Model):
|
||||
addon = models.ForeignKey('addons.Addon')
|
||||
count = models.PositiveIntegerField()
|
||||
date = models.DateField()
|
||||
|
@ -68,7 +73,7 @@ class DownloadCount(SearchMixin, models.Model):
|
|||
|
||||
|
||||
# TODO: remove when the script is proven to work correctly.
|
||||
class DownloadCountTmp(SearchMixin, models.Model):
|
||||
class DownloadCountTmp(StatsSearchMixin, models.Model):
|
||||
addon = models.ForeignKey('addons.Addon')
|
||||
count = models.PositiveIntegerField()
|
||||
date = models.DateField()
|
||||
|
@ -78,7 +83,7 @@ class DownloadCountTmp(SearchMixin, models.Model):
|
|||
db_table = 'download_counts_tmp'
|
||||
|
||||
|
||||
class UpdateCount(SearchMixin, models.Model):
|
||||
class UpdateCount(StatsSearchMixin, models.Model):
|
||||
addon = models.ForeignKey('addons.Addon')
|
||||
count = models.PositiveIntegerField()
|
||||
date = models.DateField()
|
||||
|
@ -93,7 +98,7 @@ class UpdateCount(SearchMixin, models.Model):
|
|||
|
||||
|
||||
# TODO: remove when the script is proven to work correctly.
|
||||
class UpdateCountTmp(SearchMixin, models.Model):
|
||||
class UpdateCountTmp(StatsSearchMixin, models.Model):
|
||||
addon = models.ForeignKey('addons.Addon')
|
||||
count = models.PositiveIntegerField()
|
||||
date = models.DateField()
|
||||
|
@ -123,7 +128,7 @@ class ThemeUpdateCountManager(models.Manager):
|
|||
return dict((d['addon_id'], d['avg']) for d in averages)
|
||||
|
||||
|
||||
class ThemeUpdateCount(SearchMixin, models.Model):
|
||||
class ThemeUpdateCount(StatsSearchMixin, models.Model):
|
||||
"""Daily users taken from the ADI data (coming from Hive)."""
|
||||
addon = models.ForeignKey('addons.Addon')
|
||||
count = models.PositiveIntegerField()
|
||||
|
@ -384,7 +389,7 @@ class ClientData(models.Model):
|
|||
'is_chromeless', 'language', 'region')
|
||||
|
||||
|
||||
class ThemeUserCount(SearchMixin, models.Model):
|
||||
class ThemeUserCount(StatsSearchMixin, models.Model):
|
||||
"""Theme popularity (weekly average of users).
|
||||
|
||||
This is filled in by a cron job reading the popularity from the theme
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
import collections
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management import call_command
|
||||
|
||||
import amo
|
||||
import amo.search
|
||||
from amo.utils import create_es_index_if_missing
|
||||
from applications.models import AppVersion
|
||||
from stats.models import CollectionCount, DownloadCount, UpdateCount
|
||||
from lib.es.utils import create_index
|
||||
from stats.models import (CollectionCount, DownloadCount, StatsSearchMixin,
|
||||
UpdateCount)
|
||||
|
||||
|
||||
def es_dict(items):
|
||||
|
@ -123,24 +127,42 @@ def get_all_app_versions():
|
|||
return dict(rv)
|
||||
|
||||
|
||||
def setup_indexes(index=None, aliased=True):
|
||||
es = amo.search.get_es()
|
||||
for model in CollectionCount, DownloadCount, UpdateCount:
|
||||
index = index or model._get_index()
|
||||
index = create_es_index_if_missing(index, aliased=aliased)
|
||||
def get_alias():
|
||||
return settings.ES_INDEXES.get(StatsSearchMixin.ES_ALIAS_KEY)
|
||||
|
||||
mapping = {
|
||||
'properties': {
|
||||
'id': {'type': 'long'},
|
||||
'count': {'type': 'long'},
|
||||
'data': {'dynamic': 'true',
|
||||
'properties': {
|
||||
'v': {'type': 'long'},
|
||||
'k': {'type': 'string'}
|
||||
}
|
||||
},
|
||||
'date': {'format': 'dateOptionalTime',
|
||||
'type': 'date'}
|
||||
|
||||
def create_new_index(index=None, config=None):
|
||||
if config is None:
|
||||
config = {}
|
||||
if index is None:
|
||||
index = get_alias()
|
||||
config['mappings'] = get_mappings()
|
||||
create_index(index, config)
|
||||
|
||||
|
||||
def reindex(index):
|
||||
call_command('index_stats')
|
||||
|
||||
|
||||
def get_mappings():
|
||||
mapping = {
|
||||
'properties': {
|
||||
'id': {'type': 'long'},
|
||||
'boost': {'type': 'float', 'null_value': 1.0},
|
||||
'count': {'type': 'long'},
|
||||
'data': {
|
||||
'dynamic': 'true',
|
||||
'properties': {
|
||||
'v': {'type': 'long'},
|
||||
'k': {'type': 'string'}
|
||||
}
|
||||
},
|
||||
'date': {
|
||||
'format': 'dateOptionalTime',
|
||||
'type': 'date'
|
||||
}
|
||||
}
|
||||
es.put_mapping(model._meta.db_table, mapping, index)
|
||||
}
|
||||
|
||||
models = (CollectionCount, DownloadCount, UpdateCount)
|
||||
return dict((m._meta.db_table, mapping) for m in models)
|
||||
|
|
|
@ -289,8 +289,8 @@ def index_update_counts(ids, **kw):
|
|||
key = '%s-%s' % (update.addon_id, update.date)
|
||||
data = search.extract_update_count(update)
|
||||
for index in indices:
|
||||
UpdateCount.index(data, bulk=True, id=key, index=index)
|
||||
es.flush_bulk(forced=True)
|
||||
UpdateCount.index(data, refresh=False, id=key, index=index)
|
||||
es.indices.refresh(index)
|
||||
except Exception, exc:
|
||||
index_update_counts.retry(args=[ids], exc=exc, **kw)
|
||||
raise
|
||||
|
@ -310,9 +310,9 @@ def index_download_counts(ids, **kw):
|
|||
key = '%s-%s' % (dl.addon_id, dl.date)
|
||||
data = search.extract_download_count(dl)
|
||||
for index in indices:
|
||||
DownloadCount.index(data, bulk=True, id=key, index=index)
|
||||
DownloadCount.index(data, refresh=False, id=key, index=index)
|
||||
|
||||
es.flush_bulk(forced=True)
|
||||
es.indices.refresh(index)
|
||||
except Exception, exc:
|
||||
index_download_counts.retry(args=[ids], exc=exc)
|
||||
raise
|
||||
|
@ -339,8 +339,8 @@ def index_collection_counts(ids, **kw):
|
|||
AddonCollectionCount.objects.filter(**filters),
|
||||
CollectionStats.objects.filter(**filters))
|
||||
for index in indices:
|
||||
CollectionCount.index(data, bulk=True, id=key, index=index)
|
||||
es.flush_bulk(forced=True)
|
||||
CollectionCount.index(data, refresh=False, id=key, index=index)
|
||||
es.indices.refresh(index)
|
||||
except Exception, exc:
|
||||
index_collection_counts.retry(args=[ids], exc=exc)
|
||||
raise
|
||||
|
@ -362,8 +362,8 @@ def index_theme_user_counts(ids, **kw):
|
|||
key = '%s-%s' % (user_count.addon_id, user_count.date)
|
||||
data = search.extract_theme_user_count(user_count)
|
||||
for index in indices:
|
||||
ThemeUserCount.index(data, bulk=True, id=key, index=index)
|
||||
es.flush_bulk(forced=True)
|
||||
ThemeUserCount.index(data, refresh=False, id=key, index=index)
|
||||
es.indices.refresh(index)
|
||||
except Exception, exc:
|
||||
index_theme_user_counts.retry(args=[ids], exc=exc)
|
||||
raise
|
||||
|
|
|
@ -162,7 +162,7 @@ class TestIndexLatest(amo.tests.ESTestCase):
|
|||
def test_index_latest(self):
|
||||
latest = datetime.date.today() - datetime.timedelta(days=5)
|
||||
UpdateCount.index({'date': latest})
|
||||
self.refresh('update_counts')
|
||||
self.refresh('stats')
|
||||
|
||||
start = latest.strftime('%Y-%m-%d')
|
||||
finish = datetime.date.today().strftime('%Y-%m-%d')
|
||||
|
|
|
@ -87,7 +87,7 @@ class ESStatsTest(StatsTest, amo.tests.ESTestCase):
|
|||
tasks.index_download_counts(list(downloads))
|
||||
user_counts = ThemeUserCount.objects.values_list('id', flat=True)
|
||||
tasks.index_theme_user_counts(list(user_counts))
|
||||
self.refresh('update_counts')
|
||||
self.refresh('stats')
|
||||
|
||||
|
||||
class TestSeriesSecurity(StatsTest):
|
||||
|
@ -849,7 +849,7 @@ class TestCollections(amo.tests.ESTestCase):
|
|||
'votes_down': x, 'downloads': x})}
|
||||
CollectionCount.index(data, id='%s-%s' % (x, self.collection.pk))
|
||||
|
||||
self.refresh('stats_collections_counts')
|
||||
self.refresh('stats')
|
||||
|
||||
def tests_collection_anon(self):
|
||||
res = self.client.get(self.url)
|
||||
|
|
|
@ -18,6 +18,7 @@ from django.utils.cache import add_never_cache_headers, patch_cache_control
|
|||
from django.utils.datastructures import SortedDict
|
||||
|
||||
from cache_nuggets.lib import memoize
|
||||
from dateutil.parser import parse
|
||||
from product_details import product_details
|
||||
|
||||
import amo
|
||||
|
@ -56,7 +57,7 @@ def dashboard(request):
|
|||
'stats_base_url': stats_base_url})
|
||||
|
||||
|
||||
def get_series(model, extra_field=None, **filters):
|
||||
def get_series(model, extra_field=None, source=None, **filters):
|
||||
"""
|
||||
Get a generator of dicts for the stats model given by the filters.
|
||||
|
||||
|
@ -67,13 +68,17 @@ def get_series(model, extra_field=None, **filters):
|
|||
extra = () if extra_field is None else (extra_field,)
|
||||
# Put a slice on it so we get more than 10 (the default), but limit to 365.
|
||||
qs = (model.search().order_by('-date').filter(**filters)
|
||||
.values_dict('date', 'count', *extra))[:365]
|
||||
for val in qs:
|
||||
.values_dict('date', 'count', *extra))
|
||||
if source:
|
||||
qs = qs.source(source)
|
||||
for val in qs[:365]:
|
||||
# Convert the datetimes to a date.
|
||||
date_ = date(*val['date'][0].timetuple()[:3])
|
||||
date_ = parse(val['date'][0]).date()
|
||||
rv = dict(count=val['count'][0], date=date_, end=date_)
|
||||
if extra_field:
|
||||
rv['data'] = extract(val[extra_field])
|
||||
if source:
|
||||
rv['data'] = extract(val[source])
|
||||
yield rv
|
||||
|
||||
|
||||
|
@ -200,7 +205,7 @@ def sources_series(request, addon, group, start, end, format):
|
|||
date_range = check_series_params_or_404(group, start, end, format)
|
||||
check_stats_permission(request, addon)
|
||||
|
||||
series = get_series(DownloadCount, extra_field='_source.sources',
|
||||
series = get_series(DownloadCount, source='sources',
|
||||
addon=addon.id, date__range=date_range)
|
||||
|
||||
if format == 'csv':
|
||||
|
@ -235,13 +240,13 @@ def usage_breakdown_series(request, addon, group,
|
|||
check_stats_permission(request, addon)
|
||||
|
||||
fields = {
|
||||
'applications': '_source.apps',
|
||||
'locales': '_source.locales',
|
||||
'oses': '_source.os',
|
||||
'versions': '_source.versions',
|
||||
'statuses': '_source.status',
|
||||
'applications': 'apps',
|
||||
'locales': 'locales',
|
||||
'oses': 'os',
|
||||
'versions': 'versions',
|
||||
'statuses': 'status',
|
||||
}
|
||||
series = get_series(UpdateCount, extra_field=fields[field],
|
||||
series = get_series(UpdateCount, source=fields[field],
|
||||
addon=addon.id, date__range=date_range)
|
||||
if field == 'locales':
|
||||
series = process_locales(series)
|
||||
|
@ -624,7 +629,7 @@ def _collection_query(request, collection, start=None, end=None):
|
|||
.values_dict())[:365]
|
||||
series = []
|
||||
for val in qs:
|
||||
date_ = date(*val['date'].timetuple()[:3])
|
||||
date_ = parse(val['date']).date()
|
||||
series.append(dict(count=val['count'], date=date_, end=date_,
|
||||
data=extract(val['data'])))
|
||||
return series
|
||||
|
|
|
@ -47,7 +47,7 @@ def update_user_ratings():
|
|||
|
||||
|
||||
@cronjobs.register
|
||||
def reindex_users(index=None, aliased=True):
|
||||
def reindex_users(index=None):
|
||||
from . import tasks
|
||||
ids = UserProfile.objects.values_list('id', flat=True)
|
||||
taskset = [tasks.index_users.subtask(args=[chunk], kwargs=dict(index=index))
|
||||
|
|
|
@ -10,7 +10,8 @@ the most relevant hits.
|
|||
|
||||
Also check out `elasticsearch-head <http://mobz.github.io/elasticsearch-head/>`_,
|
||||
a plugin with web front-end to elasticsearch that can be easier than talking to
|
||||
elasticsearch over curl.
|
||||
elasticsearch over curl, or `Marvel <http://www.elasticsearch.org/overview/marvel/>`_,
|
||||
which includes a query editors with autocompletion.
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
@ -21,41 +22,10 @@ Elasticsearch comes with most package managers.::
|
|||
|
||||
If Elasticsearch isn't packaged for your system, you can install it
|
||||
manually, `here are some good instructions on how to do so
|
||||
<http://www.elasticsearch.org/tutorials/2010/07/01/setting-up-elasticsearch.html>`_.
|
||||
<http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_installing_elasticsearch.html>`_.
|
||||
|
||||
For running Olympia you must install the
|
||||
`ICU Analysis Plugin <http://www.elasticsearch.org/guide/reference/index-modules/analysis/icu-plugin/>`_.
|
||||
See the `ICU Github Page <https://github.com/elasticsearch/elasticsearch-analysis-icu>`_
|
||||
for instructions on installing this plugin.
|
||||
|
||||
On an Ubuntu box, this would mean running::
|
||||
|
||||
sudo /usr/share/elasticsearch/bin/plugin -install elasticsearch/elasticsearch-analysis-icu/1.13.0
|
||||
|
||||
Settings
|
||||
--------
|
||||
|
||||
.. literalinclude:: /../scripts/elasticsearch/elasticsearch.yml
|
||||
|
||||
We use a custom analyzer for indexing add-on names since they're a little
|
||||
different from normal text.
|
||||
|
||||
To get the same results as our servers, configure Elasticsearch by copying the
|
||||
:src:`scripts/elasticsearch/elasticsearch.yml` (available in the
|
||||
``scripts/elasticsearch/`` folder of your install) to your system:
|
||||
|
||||
* If on OS X, copy that file into
|
||||
``/usr/local/Cellar/elasticsearch/*/config/``.
|
||||
* On Linux, the directory is ``/etc/elasticsearch/``.
|
||||
|
||||
.. note::
|
||||
|
||||
If you are on a linux box, make sure to comment out the 4 lines relevant to
|
||||
the path configuration, unless it corresponds to an existing
|
||||
``/usr/local/var`` folder and you want it to be stored there.
|
||||
|
||||
If you don't do this your results will be slightly different, but you probably
|
||||
won't notice.
|
||||
On Ubuntu, you should just download and install a .deb from the
|
||||
`download page <http://www.elasticsearch.org/download/>`_.
|
||||
|
||||
Launching and Setting Up
|
||||
------------------------
|
||||
|
@ -63,6 +33,9 @@ Launching and Setting Up
|
|||
Launch the Elasticsearch service. If you used homebrew, ``brew info
|
||||
elasticsearch`` will show you the commands to launch. If you used aptitude,
|
||||
Elasticsearch will come with a start-stop daemon in /etc/init.d.
|
||||
On Ubuntu, if you have installed from a .deb, you can type:
|
||||
|
||||
sudo service elasticsearch start
|
||||
|
||||
Olympia has commands that sets up mappings and indexes objects such as add-ons
|
||||
and apps for you. Setting up the mappings is analagous to defining the
|
||||
|
@ -102,8 +75,9 @@ maintained incrementally through post_save and post_delete hooks::
|
|||
Querying Elasticsearch in Django
|
||||
--------------------------------
|
||||
|
||||
We use `elasticutils <http://github.com/mozilla/elasticutils>`_, a Python
|
||||
library that gives us a search API to elasticsearch.
|
||||
For now, we have our own query builder (which is an historical clone of
|
||||
`elasticutils <http://github.com/mozilla/elasticutils>`_), but we will
|
||||
switch to the official one very soon.
|
||||
|
||||
We attach elasticutils to Django models with a mixin. This lets us do things
|
||||
like ``.search()`` which returns an object which acts a lot like Django's ORM's
|
||||
|
|
|
@ -1,88 +1,26 @@
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from optparse import make_option
|
||||
|
||||
import requests
|
||||
from celery_tasktree import task_with_callbacks, TaskTree
|
||||
|
||||
from django.conf import settings as django_settings
|
||||
from django.core.management import call_command
|
||||
import elasticsearch
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from addons.cron import reindex_addons
|
||||
from amo.utils import timestamp_index
|
||||
from apps.addons.search import setup_mapping as put_amo_mapping
|
||||
from bandwagon.cron import reindex_collections
|
||||
from compat.cron import compatibility_report
|
||||
from amo.search import get_es
|
||||
from apps.addons import search as addons_search
|
||||
from apps.stats import search as stats_search
|
||||
from lib.es.utils import (is_reindexing_amo, unflag_reindexing_amo,
|
||||
flag_reindexing_amo)
|
||||
from stats.search import setup_indexes as put_stats_mapping
|
||||
from users.cron import reindex_users
|
||||
|
||||
|
||||
_INDEXES = {}
|
||||
_ALIASES = django_settings.ES_INDEXES.copy()
|
||||
# Remove stats indexes. They may be added later via the --with-stats option.
|
||||
_STATS_ALIASES = {}
|
||||
for k, v in _ALIASES.items():
|
||||
if 'stats' in v:
|
||||
_ALIASES.pop(k)
|
||||
_STATS_ALIASES[k] = v
|
||||
|
||||
|
||||
def index_stats(index=None, aliased=True):
|
||||
"""Indexes the previous 365 days."""
|
||||
call_command('index_stats', addons=None)
|
||||
|
||||
|
||||
_INDEXES = {'stats': [index_stats],
|
||||
'apps': [reindex_addons,
|
||||
reindex_collections,
|
||||
reindex_users,
|
||||
compatibility_report]}
|
||||
flag_reindexing_amo, timestamp_index)
|
||||
|
||||
logger = logging.getLogger('z.elasticsearch')
|
||||
DEFAULT_NUM_REPLICAS = 0
|
||||
DEFAULT_NUM_SHARDS = 3
|
||||
|
||||
if hasattr(django_settings, 'ES_URLS'):
|
||||
base_url = django_settings.ES_URLS[0]
|
||||
else:
|
||||
base_url = 'http://127.0.0.1:9200'
|
||||
|
||||
|
||||
def url(path):
|
||||
return '%s%s' % (base_url, path)
|
||||
|
||||
|
||||
def _action(name, **kw):
|
||||
return {name: kw}
|
||||
|
||||
|
||||
def call_es(path, *args, **kw):
|
||||
method = kw.pop('method', 'GET')
|
||||
status = kw.pop('status', 200)
|
||||
if isinstance(status, int):
|
||||
status = [status]
|
||||
|
||||
if not path.startswith('/'):
|
||||
path = '/' + path
|
||||
|
||||
method = getattr(requests, method.lower())
|
||||
res = method(url(path), *args, **kw)
|
||||
|
||||
if res.status_code not in status:
|
||||
error = CommandError('Call on %r failed.\n%s' % (path, res.content))
|
||||
error.content = res.content
|
||||
error.json = res.json()
|
||||
raise error
|
||||
|
||||
return res
|
||||
ES = get_es()
|
||||
|
||||
|
||||
def log(msg, stdout=sys.stdout):
|
||||
|
@ -91,116 +29,45 @@ def log(msg, stdout=sys.stdout):
|
|||
|
||||
@task_with_callbacks
|
||||
def delete_indexes(indexes, stdout=sys.stdout):
|
||||
"""Removes the indexes.
|
||||
|
||||
- indexes: list of indexes names to remove.
|
||||
"""
|
||||
# now call the server - can we do this with a single call?
|
||||
for index in indexes:
|
||||
log('Removing index %r' % index, stdout=stdout)
|
||||
call_es(index, method='DELETE')
|
||||
indices = ','.join(indexes)
|
||||
log('Removing indices %r' % indices, stdout=stdout)
|
||||
ES.indices.delete(indices, ignore=[404, 500])
|
||||
|
||||
|
||||
@task_with_callbacks
|
||||
def run_aliases_actions(actions, stdout=sys.stdout):
|
||||
"""Run actions on aliases.
|
||||
|
||||
- action: list of action/index/alias items
|
||||
"""
|
||||
# we also want to rename or delete the current index in case we have one
|
||||
dump = []
|
||||
aliases = []
|
||||
|
||||
for action, index, alias in actions:
|
||||
dump.append({action: {'index': index, 'alias': alias}})
|
||||
|
||||
if action == 'add':
|
||||
aliases.append(alias)
|
||||
|
||||
post_data = json.dumps({'actions': dump})
|
||||
|
||||
# now call the server
|
||||
log('Rebuilding aliases with actions: %s' % dump, stdout=stdout)
|
||||
try:
|
||||
call_es('_aliases', post_data, method='POST')
|
||||
except CommandError, e:
|
||||
log('Initial command error: %s' % e, stdout=stdout)
|
||||
# XXX Did not find a better way to extract the info
|
||||
error = e.json['error']
|
||||
res = re.search('(Invalid alias name \[)(?P<index>.*?)(\])', error)
|
||||
if res is None:
|
||||
raise
|
||||
|
||||
index = res.groupdict()['index']
|
||||
log('Removing index %r' % index, stdout=stdout)
|
||||
call_es(index, method='DELETE')
|
||||
|
||||
# Now trying again
|
||||
log('Trying again to rebuild the aliases', stdout=stdout)
|
||||
call_es('_aliases', post_data, method='POST')
|
||||
def update_aliases(actions, stdout=sys.stdout):
|
||||
log('Rebuilding aliases with actions: %s' % actions, stdout=stdout)
|
||||
ES.indices.update_aliases({'actions': actions}, ignore=404)
|
||||
|
||||
|
||||
@task_with_callbacks
|
||||
def create_mapping(new_index, alias, num_replicas=DEFAULT_NUM_REPLICAS,
|
||||
num_shards=DEFAULT_NUM_SHARDS, stdout=sys.stdout):
|
||||
"""Creates a mapping for the new index.
|
||||
|
||||
- new_index: new index name.
|
||||
- alias: alias name
|
||||
- num_replicas: number of replicas in ES
|
||||
- num_shards: number of shards in ES
|
||||
"""
|
||||
log('Create the mapping for index %r, alias: %r' % (new_index, alias),
|
||||
def create_new_index(module, new_index, stdout=sys.stdout):
|
||||
alias = module.get_alias()
|
||||
log('Create the index {0}, for alias: {1}'.format(new_index, alias),
|
||||
stdout=stdout)
|
||||
|
||||
if requests.head(url('/' + alias)).status_code == 200:
|
||||
res = call_es('%s/_settings' % (alias)).json()
|
||||
config = {}
|
||||
|
||||
# Retrieve settings from last index, if any
|
||||
if ES.indices.exists(alias):
|
||||
res = ES.indices.get_settings(alias)
|
||||
idx_settings = res.get(alias, {}).get('settings', {})
|
||||
else:
|
||||
idx_settings = {}
|
||||
config['number_of_replicas'] = idx_settings.get(
|
||||
'number_of_replicas',
|
||||
settings.ES_DEFAULT_NUM_REPLICAS
|
||||
)
|
||||
config['number_of_shards'] = idx_settings.get(
|
||||
'number_of_shards',
|
||||
settings.ES_DEFAULT_NUM_SHARDS
|
||||
)
|
||||
|
||||
settings = {
|
||||
'number_of_replicas': idx_settings.get('number_of_replicas',
|
||||
num_replicas),
|
||||
'number_of_shards': idx_settings.get('number_of_shards',
|
||||
num_shards)
|
||||
}
|
||||
|
||||
# Create mapping without aliases since we do it manually
|
||||
if not 'stats' in alias:
|
||||
put_amo_mapping(new_index, aliased=False)
|
||||
else:
|
||||
put_stats_mapping(new_index, aliased=False)
|
||||
|
||||
# Create new index
|
||||
index_url = url('/%s' % new_index)
|
||||
|
||||
# if the index already exists we can keep it
|
||||
if requests.head(index_url).status_code == 200:
|
||||
return
|
||||
|
||||
call_es(index_url, json.dumps(settings), method='PUT',
|
||||
status=(200, 201))
|
||||
module.create_new_index(new_index, config)
|
||||
|
||||
|
||||
@task_with_callbacks
|
||||
def create_index(index, is_stats, stdout=sys.stdout):
|
||||
"""Create the index.
|
||||
|
||||
- index: name of the index
|
||||
- is_stats: if True, we're indexing stats
|
||||
"""
|
||||
log('Running all indexes for %r' % index, stdout=stdout)
|
||||
indexers = is_stats and _INDEXES['stats'] or _INDEXES['apps']
|
||||
|
||||
for indexer in indexers:
|
||||
log('Indexing %r' % indexer.__name__, stdout=stdout)
|
||||
try:
|
||||
indexer(index, aliased=False)
|
||||
except Exception:
|
||||
# We want to log this event but continue
|
||||
log('Indexer %r failed' % indexer.__name__, stdout=stdout)
|
||||
traceback.print_exc()
|
||||
def index_data(module, index, stdout=sys.stdout):
|
||||
log('Reindexing {0}'.format(index), stdout=stdout)
|
||||
module.reindex(index)
|
||||
|
||||
|
||||
@task_with_callbacks
|
||||
|
@ -232,9 +99,6 @@ Current Aliases configuration:
|
|||
class Command(BaseCommand):
|
||||
help = 'Reindex all ES indexes'
|
||||
option_list = BaseCommand.option_list + (
|
||||
make_option('--prefix', action='store',
|
||||
help='Indexes prefixes, like test_',
|
||||
default=''),
|
||||
make_option('--force', action='store_true',
|
||||
help=('Bypass the database flag that says '
|
||||
'another indexation is ongoing'),
|
||||
|
@ -261,12 +125,11 @@ class Command(BaseCommand):
|
|||
raise CommandError('Indexation already occuring - use --force to '
|
||||
'bypass')
|
||||
|
||||
prefix = kwargs.get('prefix', '')
|
||||
log('Starting the reindexation', stdout=self.stdout)
|
||||
|
||||
modules = [addons_search]
|
||||
if kwargs.get('with_stats', False):
|
||||
# Add the stats indexes back.
|
||||
_ALIASES.update(_STATS_ALIASES)
|
||||
modules.append(stats_search)
|
||||
|
||||
if kwargs.get('wipe', False):
|
||||
confirm = raw_input('Are you sure you want to wipe all AMO '
|
||||
|
@ -277,28 +140,20 @@ class Command(BaseCommand):
|
|||
|
||||
if confirm == 'yes':
|
||||
unflag_database(stdout=self.stdout)
|
||||
for index in set(_ALIASES.values()):
|
||||
requests.delete(url('/%s') % index)
|
||||
for index in set(m.get_alias() for m in modules):
|
||||
ES.indices.delete(index)
|
||||
else:
|
||||
raise CommandError("Aborted.")
|
||||
elif force:
|
||||
unflag_database(stdout=self.stdout)
|
||||
|
||||
# Get list current aliases at /_aliases.
|
||||
all_aliases = requests.get(url('/_aliases')).json()
|
||||
alias_actions = []
|
||||
|
||||
# building the list of indexes
|
||||
indexes = set([prefix + index for index in
|
||||
_ALIASES.values()])
|
||||
|
||||
actions = []
|
||||
|
||||
def add_action(*elmt):
|
||||
if elmt in actions:
|
||||
def add_alias_action(action, index, alias):
|
||||
action = {action: {'index': index, 'alias': alias}}
|
||||
if action in alias_actions:
|
||||
return
|
||||
actions.append(elmt)
|
||||
|
||||
all_aliases = all_aliases.items()
|
||||
alias_actions.append(action)
|
||||
|
||||
# creating a task tree
|
||||
log('Building the task tree', stdout=self.stdout)
|
||||
|
@ -308,46 +163,46 @@ class Command(BaseCommand):
|
|||
to_remove = []
|
||||
|
||||
# for each index, we create a new time-stamped index
|
||||
for alias in indexes:
|
||||
is_stats = 'stats' in alias
|
||||
for module in modules:
|
||||
old_index = None
|
||||
alias = module.get_alias()
|
||||
|
||||
for aliased_index, alias_ in all_aliases:
|
||||
if alias in alias_['aliases'].keys():
|
||||
try:
|
||||
olds = ES.indices.get_alias(alias)
|
||||
except elasticsearch.TransportError:
|
||||
pass
|
||||
else:
|
||||
for old_index in olds.keys():
|
||||
# mark the index to be removed later
|
||||
old_index = aliased_index
|
||||
to_remove.append(aliased_index)
|
||||
|
||||
# mark the alias to be removed as well
|
||||
add_action('remove', aliased_index, alias)
|
||||
to_remove.append(old_index)
|
||||
add_alias_action('remove', old_index, alias)
|
||||
|
||||
# create a new index, using the alias name with a timestamp
|
||||
new_index = timestamp_index(alias)
|
||||
|
||||
# if old_index is None that could mean it's a full index
|
||||
# In that case we want to continue index in it
|
||||
future_alias = url('/%s' % alias)
|
||||
if requests.head(future_alias).status_code == 200:
|
||||
if ES.indices.exists(alias):
|
||||
old_index = alias
|
||||
|
||||
# flag the database
|
||||
step1 = tree.add_task(flag_database,
|
||||
args=[new_index, old_index, alias],
|
||||
kwargs={'stdout': self.stdout})
|
||||
step2 = step1.add_task(create_mapping,
|
||||
args=[new_index, alias],
|
||||
step2 = step1.add_task(create_new_index,
|
||||
args=[module, new_index],
|
||||
kwargs={'stdout': self.stdout})
|
||||
step3 = step2.add_task(create_index,
|
||||
args=[new_index, is_stats],
|
||||
step3 = step2.add_task(index_data,
|
||||
args=[module, new_index],
|
||||
kwargs={'stdout': self.stdout})
|
||||
last_action = step3
|
||||
|
||||
# adding new index to the alias
|
||||
add_action('add', new_index, alias)
|
||||
add_alias_action('add', new_index, alias)
|
||||
|
||||
# Alias the new index and remove the old aliases, if any.
|
||||
renaming_step = last_action.add_task(run_aliases_actions,
|
||||
args=[actions],
|
||||
renaming_step = last_action.add_task(update_aliases,
|
||||
args=[alias_actions],
|
||||
kwargs={'stdout': self.stdout})
|
||||
|
||||
# unflag the database - there's no need to duplicate the
|
||||
|
@ -356,8 +211,9 @@ class Command(BaseCommand):
|
|||
kwargs={'stdout': self.stdout})
|
||||
|
||||
# Delete the old indexes, if any
|
||||
delete.add_task(delete_indexes,
|
||||
args=[to_remove], kwargs={'stdout': self.stdout})
|
||||
if to_remove:
|
||||
delete.add_task(delete_indexes,
|
||||
args=[to_remove], kwargs={'stdout': self.stdout})
|
||||
|
||||
# let's do it
|
||||
log('Running all indexation tasks', stdout=self.stdout)
|
||||
|
@ -365,7 +221,8 @@ class Command(BaseCommand):
|
|||
os.environ['FORCE_INDEXING'] = '1'
|
||||
try:
|
||||
tree.apply_async()
|
||||
time.sleep(10) # give celeryd some time to flag the DB
|
||||
if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
|
||||
time.sleep(10) # give celeryd some time to flag the DB
|
||||
while is_reindexing_amo():
|
||||
sys.stdout.write('.')
|
||||
sys.stdout.flush()
|
||||
|
@ -376,7 +233,7 @@ class Command(BaseCommand):
|
|||
sys.stdout.write('\n')
|
||||
|
||||
# let's return the /_aliases values
|
||||
aliases = call_es('_aliases').json()
|
||||
aliases = ES.indices.get_aliases()
|
||||
aliases = json.dumps(aliases, sort_keys=True, indent=4)
|
||||
summary = _SUMMARY % (len(indexes), aliases)
|
||||
summary = _SUMMARY % (len(modules), aliases)
|
||||
log(summary, stdout=self.stdout)
|
||||
|
|
|
@ -12,9 +12,10 @@ import amo.search
|
|||
import amo.tests
|
||||
from amo.urlresolvers import reverse
|
||||
from amo.utils import urlparams
|
||||
from es.management.commands.reindex import call_es
|
||||
from lib.es.utils import is_reindexing_amo, unflag_reindexing_amo
|
||||
|
||||
ES = amo.search.get_es()
|
||||
|
||||
|
||||
class TestIndexCommand(amo.tests.ESTestCase):
|
||||
|
||||
|
@ -26,13 +27,13 @@ class TestIndexCommand(amo.tests.ESTestCase):
|
|||
self.url = reverse('search.search')
|
||||
|
||||
# Any index created during the test will be deleted.
|
||||
self.indices = call_es('_status').json()['indices'].keys()
|
||||
self.indices = ES.indices.status()['indices'].keys()
|
||||
|
||||
def tearDown(self):
|
||||
current_indices = call_es('_status').json()['indices'].keys()
|
||||
current_indices = ES.indices.status()['indices'].keys()
|
||||
for index in current_indices:
|
||||
if index not in self.indices:
|
||||
call_es(index, method='DELETE')
|
||||
ES.indices.delete(index, ignore=404)
|
||||
|
||||
def check_results(self, expected):
|
||||
"""Make sure the expected addons are listed in a standard search."""
|
||||
|
@ -55,7 +56,7 @@ class TestIndexCommand(amo.tests.ESTestCase):
|
|||
|
||||
def get_indices_aliases(self):
|
||||
"""Return the test indices with an alias."""
|
||||
indices = call_es('_aliases').json()
|
||||
indices = ES.indices.get_aliases()
|
||||
items = [(index, aliases['aliases'].keys()[0])
|
||||
for index, aliases in indices.items()
|
||||
if len(aliases['aliases']) > 0 and index.startswith('test')]
|
||||
|
@ -102,7 +103,7 @@ class TestIndexCommand(amo.tests.ESTestCase):
|
|||
wanted.append(amo.tests.addon_factory())
|
||||
connection._commit()
|
||||
connection.clean_savepoints()
|
||||
amo.search.get_es().refresh()
|
||||
self.refresh()
|
||||
self.check_results(wanted)
|
||||
|
||||
if len(wanted) == old_addons_count:
|
||||
|
@ -117,7 +118,7 @@ class TestIndexCommand(amo.tests.ESTestCase):
|
|||
# The reindexation is done, let's double check we have all our docs.
|
||||
connection._commit()
|
||||
connection.clean_savepoints()
|
||||
amo.search.get_es().refresh()
|
||||
self.refresh()
|
||||
self.check_results(wanted)
|
||||
|
||||
# New indices have been created, and aliases now point to them.
|
||||
|
|
|
@ -1,9 +1,17 @@
|
|||
import os
|
||||
import datetime
|
||||
import logging
|
||||
|
||||
from django.core.management.base import CommandError
|
||||
from django.conf import settings
|
||||
|
||||
from elasticsearch import helpers
|
||||
|
||||
import amo.search
|
||||
from .models import Reindexing
|
||||
from django.core.management.base import CommandError
|
||||
|
||||
from .models import Reindexing
|
||||
|
||||
log = logging.getLogger('z.es')
|
||||
|
||||
# shortcut functions
|
||||
is_reindexing_amo = Reindexing.objects.is_reindexing_amo
|
||||
|
@ -25,13 +33,19 @@ def index_objects(ids, model, search, index=None, transforms=None):
|
|||
for t in transforms:
|
||||
qs = qs.transform(t)
|
||||
|
||||
bulk = []
|
||||
for ob in qs:
|
||||
data = search.extract(ob)
|
||||
|
||||
for index in indices:
|
||||
model.index(data, bulk=True, id=ob.id, index=index)
|
||||
bulk.append({
|
||||
"_source": data,
|
||||
"_id": ob.id,
|
||||
"_type": ob.get_mapping_type(),
|
||||
"_index": index
|
||||
})
|
||||
|
||||
amo.search.get_es().flush_bulk(forced=True)
|
||||
es = amo.search.get_es()
|
||||
return helpers.bulk(es, bulk)
|
||||
|
||||
|
||||
def raise_if_reindex_in_progress(site):
|
||||
|
@ -44,3 +58,36 @@ def raise_if_reindex_in_progress(site):
|
|||
if already_reindexing and 'FORCE_INDEXING' not in os.environ:
|
||||
raise CommandError("Indexation already occuring. Add a FORCE_INDEXING "
|
||||
"variable in the environ to force it")
|
||||
|
||||
|
||||
def timestamp_index(index):
|
||||
"""Returns index-YYYYMMDDHHMMSS with the current time."""
|
||||
return '%s-%s' % (index, datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
|
||||
|
||||
|
||||
def create_index(index, config=None):
|
||||
"""Creates an index if it's not present.
|
||||
|
||||
Returns the index name.
|
||||
|
||||
Options:
|
||||
|
||||
- index: name of the index.
|
||||
- config: if provided, used as the settings option for the
|
||||
ES calls.
|
||||
"""
|
||||
es = amo.search.get_es()
|
||||
|
||||
if settings.IN_TEST_SUITE:
|
||||
if not config:
|
||||
config = {}
|
||||
# Be nice to ES running on ci.mozilla.org
|
||||
config.update({
|
||||
'number_of_shards': 3,
|
||||
'number_of_replicas': 0
|
||||
})
|
||||
|
||||
if not es.indices.exists(index):
|
||||
es.indices.create(index, body=config, ignore=400)
|
||||
|
||||
return index
|
||||
|
|
|
@ -80,7 +80,7 @@ loggers = {
|
|||
'newrelic': {
|
||||
'level': 'WARNING',
|
||||
},
|
||||
'elasticutils': {
|
||||
'elasticsearch': {
|
||||
'level': 'WARNING',
|
||||
},
|
||||
'suds': {
|
||||
|
|
|
@ -1097,7 +1097,7 @@ LOGGING = {
|
|||
'amqplib': {'handlers': ['null']},
|
||||
'caching.invalidation': {'handlers': ['null']},
|
||||
'caching': {'level': logging.WARNING},
|
||||
'pyes': {'handlers': ['null']},
|
||||
'elasticsearch': {'handlers': ['null']},
|
||||
'rdflib': {'handlers': ['null']},
|
||||
'suds': {'handlers': ['null']},
|
||||
'z.task': {'level': logging.INFO},
|
||||
|
@ -1277,12 +1277,11 @@ BUILDER_VERSIONS_URL = ('https://builder.addons.mozilla.org/repackage/' +
|
|||
## elasticsearch
|
||||
ES_HOSTS = ['127.0.0.1:9200']
|
||||
ES_URLS = ['http://%s' % h for h in ES_HOSTS]
|
||||
ES_INDEXES = {'default': 'addons',
|
||||
'update_counts': 'addons_stats',
|
||||
'download_counts': 'addons_stats',
|
||||
'stats_contributions': 'addons_stats',
|
||||
'stats_collections_counts': 'addons_stats',
|
||||
'users_install': 'addons_stats'}
|
||||
ES_INDEXES = {
|
||||
'default': 'addons',
|
||||
'stats': 'addons_stats',
|
||||
}
|
||||
|
||||
ES_TIMEOUT = 30
|
||||
ES_DEFAULT_NUM_REPLICAS = 2
|
||||
ES_DEFAULT_NUM_SHARDS = 5
|
||||
|
|
|
@ -1,20 +1,19 @@
|
|||
from django.conf import settings
|
||||
# from django.conf import settings
|
||||
|
||||
import elasticutils
|
||||
# import elasticutils
|
||||
|
||||
from addons.search import setup_mapping
|
||||
# from addons.search import setup_mapping
|
||||
|
||||
def columns():
|
||||
es = elasticutils.get_es()
|
||||
index = settings.ES_INDEXES['default']
|
||||
return es.get_mapping('addons', index)['addons']['properties'].keys()
|
||||
# def columns():
|
||||
# es = elasticutils.get_es()
|
||||
# index = settings.ES_INDEXES['default']
|
||||
# return es.get_mapping('addons', index)['addons']['properties'].keys()
|
||||
|
||||
|
||||
def run():
|
||||
if 'name_finnish' not in columns():
|
||||
print 'ok'
|
||||
setup_mapping()
|
||||
else:
|
||||
print 'skippint'
|
||||
assert 'name_finnish' in columns()
|
||||
|
||||
# def run():
|
||||
# if 'name_finnish' not in columns():
|
||||
# print 'ok'
|
||||
# setup_mapping()
|
||||
# else:
|
||||
# print 'skippint'
|
||||
# assert 'name_finnish' in columns()
|
||||
|
|
|
@ -40,7 +40,7 @@ django-statsd-mozilla==0.3.10
|
|||
django-storages==1.1.8
|
||||
django-waffle==0.9.2
|
||||
easy-thumbnails==1.4
|
||||
elasticutils==0.8.2
|
||||
elasticsearch==1.0.0
|
||||
email-reply-parser==0.2.0
|
||||
fastchardet==0.2.0
|
||||
feedparser==5.1.3
|
||||
|
@ -74,8 +74,6 @@ polib==1.0.3
|
|||
protobuf==2.5.0
|
||||
pyasn1==0.1.7
|
||||
PyBrowserID==0.6
|
||||
pyelasticsearch==0.6.1
|
||||
pyes==0.16.0
|
||||
PyJWT-mozilla==0.1.4.2
|
||||
PyMySQL==0.5
|
||||
pymemcache==1.2
|
||||
|
@ -99,6 +97,7 @@ SQLAlchemy==0.7.5
|
|||
statsd==2.0.3
|
||||
suds==0.4
|
||||
thrift==0.9.1
|
||||
urllib3==1.9
|
||||
|
||||
## Not on pypi.
|
||||
-e git+https://github.com/mozilla/amo-validator.git@1.3.1#egg=amo-validator
|
||||
|
|
|
@ -1,28 +0,0 @@
|
|||
cluster:
|
||||
name: wooyeah
|
||||
|
||||
# Don't try to cluster with other machines during local development.
|
||||
# Remove the following 3 lines to enable default clustering.
|
||||
network.host: localhost
|
||||
discovery.zen.ping.multicast.enabled: false
|
||||
discovery.zen.ping.unicast.hosts: ["localhost"]
|
||||
|
||||
path:
|
||||
logs: /usr/local/var/log
|
||||
data: /usr/local/var/data
|
||||
plugins: /usr/local/var/lib/elasticsearch/plugins
|
||||
|
||||
index:
|
||||
analysis:
|
||||
analyzer:
|
||||
standardPlusWordDelimiter:
|
||||
type: custom
|
||||
tokenizer: standard
|
||||
filter: [standard, wordDelim, lowercase, stop, dict]
|
||||
filter:
|
||||
wordDelim:
|
||||
type: word_delimiter
|
||||
preserve_original: true
|
||||
dict:
|
||||
type: dictionary_decompounder
|
||||
word_list: [cool, iris, fire, bug, flag, fox, grease, monkey, flash, block, forecast, screen, grab, cookie, auto, fill, text, all, so, think, mega, upload, download, video, map, spring, fix, input, clip, fly, lang, up, down, persona, css, html, all, http, ball, firefox, bookmark, chat, zilla, edit, menu, menus, status, bar, with, easy, sync, search, google, time, window, js, super, scroll, title, close, undo, user, inspect, inspector, browser, context, dictionary, mail, button, url, password, secure, image, new, tab, delete, click, name, smart, down, manager, open, query, net, link, blog, this, color, select, key, keys, foxy, translate, word]
|
|
@ -1,7 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
import os
|
||||
|
||||
if __name__ == '__main__':
|
||||
cf = os.path.join(os.path.dirname(__file__), 'elasticsearch.yml')
|
||||
cf = os.path.abspath(cf)
|
||||
os.system('elasticsearch -f -D es.config=%s' % cf)
|
|
@ -1,94 +0,0 @@
|
|||
cool
|
||||
iris
|
||||
fire
|
||||
bug
|
||||
flag
|
||||
fox
|
||||
grease
|
||||
monkey
|
||||
flash
|
||||
block
|
||||
forecast
|
||||
screen
|
||||
grab
|
||||
cookie
|
||||
auto
|
||||
fill
|
||||
text
|
||||
all
|
||||
so
|
||||
think
|
||||
mega
|
||||
upload
|
||||
download
|
||||
video
|
||||
map
|
||||
spring
|
||||
fix
|
||||
input
|
||||
clip
|
||||
fly
|
||||
lang
|
||||
up
|
||||
down
|
||||
persona
|
||||
css
|
||||
html
|
||||
all
|
||||
http
|
||||
ball
|
||||
firefox
|
||||
bookmark
|
||||
chat
|
||||
zilla
|
||||
edit
|
||||
menu
|
||||
menus
|
||||
status
|
||||
bar
|
||||
with
|
||||
easy
|
||||
sync
|
||||
search
|
||||
google
|
||||
time
|
||||
window
|
||||
js
|
||||
super
|
||||
scroll
|
||||
title
|
||||
close
|
||||
undo
|
||||
user
|
||||
inspect
|
||||
inspector
|
||||
browser
|
||||
context
|
||||
dictionary
|
||||
mail
|
||||
button
|
||||
url
|
||||
password
|
||||
secure
|
||||
image
|
||||
new
|
||||
tab
|
||||
delete
|
||||
click
|
||||
name
|
||||
smart
|
||||
down
|
||||
manager
|
||||
open
|
||||
query
|
||||
net
|
||||
link
|
||||
blog
|
||||
this
|
||||
color
|
||||
select
|
||||
key
|
||||
keys
|
||||
foxy
|
||||
translate
|
||||
word
|
Загрузка…
Ссылка в новой задаче