зеркало из https://github.com/mozilla/kitsune.git
Incremental question update and fixes
* unhardcodes indexes * ditches ElasticMeta--elasticutils uses Model._meta.db_table for queries, so we should use that for building indexes, too * implements incremental updates for Question and adds test * adds some test harness code for future elastic tests * adds basic docs for setting up Elastic
This commit is contained in:
Родитель
5ba45381e7
Коммит
f7f5bf5a2f
|
@ -66,8 +66,8 @@ def setup_mapping(index):
|
|||
# TODO: If the mapping is there already and we do a put_mapping,
|
||||
# does that stomp on the existing mapping or raise an error?
|
||||
try:
|
||||
es.put_mapping(Post.ElasticMeta.type, mapping, index)
|
||||
except pyes.ElasticSearchException, e:
|
||||
es.put_mapping(Post._meta.db_table, mapping, index)
|
||||
except pyes.exceptions.ElasticSearchException, e:
|
||||
log.error(e)
|
||||
|
||||
|
||||
|
@ -110,12 +110,12 @@ def index_post(post, bulk=False, force_insert=False, es=None):
|
|||
|
||||
index = settings.ES_INDEXES['default']
|
||||
try:
|
||||
es.index(post, index, doc_type=Post.ElasticMeta.type,
|
||||
es.index(post, index, doc_type=Post._meta.db_table,
|
||||
id=post['id'], bulk=bulk, force_insert=force_insert)
|
||||
except pyes.urllib3.TimeoutError:
|
||||
# If we have a timeout, try it again rather than die. If we
|
||||
# have a second one, that will cause everything to die.
|
||||
es.index(post, index, doc_type=Post.ElasticMeta.type,
|
||||
es.index(post, index, doc_type=Post._meta.db_table,
|
||||
id=post['id'], bulk=bulk, force_insert=force_insert)
|
||||
|
||||
|
||||
|
@ -126,12 +126,12 @@ def reindex_documents():
|
|||
from forums.models import Post
|
||||
from django.conf import settings
|
||||
|
||||
index = settings.ES_INDEXES['default']
|
||||
index = (settings.ES_INDEXES.get(Post._meta.db_table)
|
||||
or settings.ES_INDEXES['default'])
|
||||
|
||||
log.info('reindex posts: %s %s', index,
|
||||
Post.ElasticMeta.type)
|
||||
log.info('reindex posts: %s %s', index, Post._meta.db_table)
|
||||
|
||||
es = pyes.ES(settings.ES_HOSTS, timeout=4.0)
|
||||
es = pyes.ES(settings.ES_HOSTS, timeout=10.0)
|
||||
|
||||
log.info('setting up mapping....')
|
||||
setup_mapping(index)
|
||||
|
|
|
@ -186,9 +186,6 @@ class Post(ActionMixin, ModelBase):
|
|||
class Meta:
|
||||
ordering = ['created']
|
||||
|
||||
class ElasticMeta(object):
|
||||
type = 'forums'
|
||||
|
||||
class SphinxMeta(object):
|
||||
index = 'discussion_forums'
|
||||
filter_mapping = {'author_ord': crc32}
|
||||
|
|
|
@ -73,8 +73,8 @@ def setup_mapping(index):
|
|||
# TODO: If the mapping is there already and we do a put_mapping,
|
||||
# does that stomp on the existing mapping or raise an error?
|
||||
try:
|
||||
es.put_mapping(Question.ElasticMeta.type, mapping, index)
|
||||
except pyes.ElasticSearchException, e:
|
||||
es.put_mapping(Question._meta.db_table, mapping, index)
|
||||
except pyes.exceptions.ElasticSearchException, e:
|
||||
log.error(e)
|
||||
|
||||
|
||||
|
@ -164,14 +164,16 @@ def index_doc(doc, bulk=False, force_insert=False, es=None):
|
|||
if es is None:
|
||||
es = elasticutils.get_es()
|
||||
|
||||
index = settings.ES_INDEXES['default']
|
||||
index = (settings.ES_INDEXES.get(Question._meta.db_table)
|
||||
or settings.ES_INDEXES['default'])
|
||||
|
||||
try:
|
||||
es.index(doc, index, doc_type=Question.ElasticMeta.type,
|
||||
es.index(doc, index, doc_type=Question._meta.db_table,
|
||||
id=doc['id'], bulk=bulk, force_insert=force_insert)
|
||||
except pyes.urllib3.TimeoutError:
|
||||
# If we have a timeout, try it again rather than die. If we
|
||||
# have a second one, that will cause everything to die.
|
||||
es.index(doc, index, doc_type=Question.ElasticMeta.type,
|
||||
es.index(doc, index, doc_type=Question._meta.db_table,
|
||||
id=doc['id'], bulk=bulk, force_insert=force_insert)
|
||||
|
||||
|
||||
|
@ -190,9 +192,9 @@ def reindex_questions():
|
|||
index = settings.ES_INDEXES['default']
|
||||
|
||||
log.info('reindex questions: %s %s', index,
|
||||
Question.ElasticMeta.type)
|
||||
Question._meta.db_table)
|
||||
|
||||
es = pyes.ES(settings.ES_HOSTS, timeout=4.0)
|
||||
es = pyes.ES(settings.ES_HOSTS, timeout=10.0)
|
||||
|
||||
log.info('setting up mapping....')
|
||||
setup_mapping(index)
|
||||
|
|
|
@ -8,6 +8,7 @@ from django.contrib.contenttypes.models import ContentType
|
|||
from django.core.cache import cache
|
||||
from django.db import models
|
||||
from django.db.models.signals import post_save
|
||||
from django.dispatch import receiver
|
||||
|
||||
from product_details import product_details
|
||||
from redis.exceptions import ConnectionError
|
||||
|
@ -21,7 +22,7 @@ import questions as constants
|
|||
from questions.karma_actions import AnswerAction, SolutionAction
|
||||
from questions.question_config import products
|
||||
from questions.tasks import (update_question_votes, update_answer_pages,
|
||||
log_answer)
|
||||
log_answer, index_questions)
|
||||
from search import S
|
||||
from search.utils import crc32
|
||||
from sumo.helpers import urlparams
|
||||
|
@ -65,9 +66,6 @@ class Question(ModelBase, BigVocabTaggableMixin):
|
|||
'Can change/remove the solution to a question'),
|
||||
)
|
||||
|
||||
class ElasticMeta(object):
|
||||
type = 'question'
|
||||
|
||||
class SphinxMeta(object):
|
||||
index = 'questions'
|
||||
filter_mapping = {
|
||||
|
@ -249,6 +247,13 @@ class Question(ModelBase, BigVocabTaggableMixin):
|
|||
return Answer.objects.filter(pk=self.solution_id).exists()
|
||||
|
||||
|
||||
@receiver(post_save, sender=Question,
|
||||
dispatch_uid='questions.search.index')
|
||||
def update_question_search_index(sender, instance, **kw):
|
||||
# TODO: waffle here
|
||||
index_questions.delay([instance.id])
|
||||
|
||||
|
||||
class QuestionMetaData(ModelBase):
|
||||
"""Metadata associated with a support question."""
|
||||
question = models.ForeignKey('Question', related_name='metadata_set')
|
||||
|
|
|
@ -86,3 +86,11 @@ def log_answer(answer):
|
|||
FirstAnswerAction(answer.creator, answer.created.date()).save()
|
||||
|
||||
unpin_this_thread()
|
||||
|
||||
|
||||
@task
|
||||
def index_questions(ids, **kw):
|
||||
from questions import es_search
|
||||
from questions.models import Question
|
||||
for q in Question.uncached.filter(id__in=ids):
|
||||
es_search.index_docs(es_search.extract_question(q))
|
||||
|
|
|
@ -3,7 +3,10 @@ from datetime import datetime
|
|||
from django.conf import settings
|
||||
from django.template.defaultfilters import slugify
|
||||
|
||||
from elasticutils import get_es
|
||||
|
||||
from nose.tools import eq_
|
||||
from nose import SkipTest
|
||||
|
||||
from questions.models import Question
|
||||
from sumo.tests import LocalizingClient, TestCase
|
||||
|
@ -39,3 +42,28 @@ def tags_eq(tagged_object, tag_names):
|
|||
"""Assert that the names of the tags on tagged_object are tag_names."""
|
||||
eq_(sorted([t.name for t in tagged_object.tags.all()]),
|
||||
sorted(tag_names))
|
||||
|
||||
|
||||
# TODO: Have to define this here, since I need the data that TestCaseBase
|
||||
# generates. Should we turn ESTestCase into a mixin?
|
||||
class ESTestCase(TestCaseBase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
super(ESTestCase, cls).setUpClass()
|
||||
if getattr(settings, 'ES_HOSTS', None) is None:
|
||||
raise SkipTest
|
||||
|
||||
# Delete test indexes if they exist.
|
||||
cls.es = get_es()
|
||||
for index in settings.ES_INDEXES.values():
|
||||
cls.es.delete_index_if_exists(index)
|
||||
|
||||
from search.utils import es_reindex
|
||||
|
||||
es_reindex()
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
for index in settings.ES_INDEXES.values():
|
||||
cls.es.delete_index_if_exists(index)
|
||||
super(ESTestCase, cls).tearDownClass()
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
from questions.tests import ESTestCase
|
||||
from questions.models import Question
|
||||
|
||||
import elasticutils
|
||||
import uuid
|
||||
|
||||
from nose.tools import eq_
|
||||
|
||||
|
||||
class TestQuestionUpdate(ESTestCase):
|
||||
def test_added(self):
|
||||
# Use a uuid since it's "unique" and makes sure we're not
|
||||
# accidentally picking up a Question we don't want.
|
||||
title = str(uuid.uuid4())
|
||||
question = Question(title=title,
|
||||
content='Lorem Ipsum Dolor',
|
||||
creator_id=118533)
|
||||
|
||||
# Assert that it's not in the index before saving.
|
||||
eq_(elasticutils.S(Question).query(title=title).count(), 0)
|
||||
|
||||
question.save()
|
||||
|
||||
eq_(elasticutils.S(Question).query(title=title).count(), 0)
|
|
@ -41,16 +41,17 @@ def es_reindex():
|
|||
|
||||
es = elasticutils.get_es()
|
||||
|
||||
# TODO: unhardcode this
|
||||
es.delete_index("sumo")
|
||||
# Go through and delete, then recreate the indexes.
|
||||
for index in settings.ES_INDEXES.values():
|
||||
es.delete_index_if_exists(index)
|
||||
|
||||
try:
|
||||
es.create_index_if_missing("sumo")
|
||||
except pyes.exceptions.ElasticSearchException:
|
||||
# TODO: Why would this throw an exception? We should handle
|
||||
# it. Maybe Elastic isn't running or something in which case
|
||||
# proceeding is an exercise in futility.
|
||||
pass
|
||||
try:
|
||||
es.create_index_if_missing(index)
|
||||
except pyes.exceptions.ElasticSearchException:
|
||||
# TODO: Why would this throw an exception? We should handle
|
||||
# it. Maybe Elastic isn't running or something in which case
|
||||
# proceeding is an exercise in futility.
|
||||
pass
|
||||
|
||||
# Reindex questions.
|
||||
import questions.es_search
|
||||
|
|
|
@ -68,6 +68,11 @@ class TestCase(test_utils.TestCase):
|
|||
super(TestCase, self).setUp()
|
||||
settings.REDIS_BACKENDS = settings.REDIS_TEST_BACKENDS
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
super(TestCase, cls).setUpClass()
|
||||
settings.ES_INDEXES = settings.TEST_ES_INDEXES
|
||||
|
||||
|
||||
class MigrationTests(TestCase):
|
||||
"""Sanity checks for the SQL migration scripts"""
|
||||
|
@ -116,7 +121,6 @@ class MigrationTests(TestCase):
|
|||
|
||||
|
||||
class MobileTestCase(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(MobileTestCase, self).setUp()
|
||||
self.client.cookies[settings.MOBILE_COOKIE] = 'on'
|
||||
|
|
|
@ -55,8 +55,8 @@ def setup_mapping(index):
|
|||
# TODO: If the mapping is there already and we do a put_mapping,
|
||||
# does that stomp on the existing mapping or raise an error?
|
||||
try:
|
||||
es.put_mapping(Document.ElasticMeta.type, mapping, index)
|
||||
except pyes.ElasticSearchException, e:
|
||||
es.put_mapping(Document._meta.db_table, mapping, index)
|
||||
except pyes.exceptions.ElasticSearchException, e:
|
||||
log.error(e)
|
||||
|
||||
|
||||
|
@ -91,14 +91,16 @@ def index_doc(doc, bulk=False, force_insert=False, es=None):
|
|||
if es is None:
|
||||
es = elasticutils.get_es()
|
||||
|
||||
index = settings.ES_INDEXES['default']
|
||||
index = (settings.ES_INDEXES.get(Document._meta.db_table)
|
||||
or settings.ES_INDEXES['default'])
|
||||
|
||||
try:
|
||||
es.index(doc, index, doc_type=Document.ElasticMeta.type,
|
||||
es.index(doc, index, doc_type=Document._meta.db_table,
|
||||
id=doc['id'], bulk=bulk, force_insert=force_insert)
|
||||
except pyes.urllib3.TimeoutError:
|
||||
# If we have a timeout, try it again rather than die. If we
|
||||
# have a second one, that will cause everything to die.
|
||||
es.index(doc, index, doc_type=Document.ElasticMeta.type,
|
||||
es.index(doc, index, doc_type=Document._meta.db_table,
|
||||
id=doc['id'], bulk=bulk, force_insert=force_insert)
|
||||
|
||||
|
||||
|
@ -111,10 +113,9 @@ def reindex_documents():
|
|||
|
||||
index = settings.ES_INDEXES['default']
|
||||
|
||||
log.info('reindex documents: %s %s', index,
|
||||
Document.ElasticMeta.type)
|
||||
log.info('reindex documents: %s %s', index, Document._meta.db_table)
|
||||
|
||||
es = pyes.ES(settings.ES_HOSTS, timeout=4.0)
|
||||
es = pyes.ES(settings.ES_HOSTS, timeout=10.0)
|
||||
|
||||
log.info('setting up mapping....')
|
||||
setup_mapping(index)
|
||||
|
|
|
@ -248,9 +248,6 @@ class Document(NotificationsMixin, ModelBase, BigVocabTaggableMixin):
|
|||
('slug', 'locale'))
|
||||
permissions = [('archive_document', 'Can archive document')]
|
||||
|
||||
class ElasticMeta(object):
|
||||
type = 'wiki'
|
||||
|
||||
class SphinxMeta(object):
|
||||
index = 'wiki_pages'
|
||||
filter_mapping = {
|
||||
|
|
|
@ -4,11 +4,13 @@
|
|||
Search
|
||||
======
|
||||
|
||||
Kitsune uses `Sphinx Search <http://www.sphinxsearch.com>`_ to power its
|
||||
on-site search facility.
|
||||
Kitsune is in the process of switching from `Sphinx Search
|
||||
<http://www.sphinxsearch.com>`_ to `Elastic Search
|
||||
<http://www.elasticsearch.org/>`_ to power its on-site search
|
||||
facility.
|
||||
|
||||
Sphinx search gives us a number of advantages over MySQL's full-text search or
|
||||
Google's site search.
|
||||
Both of these give us a number of advantages over MySQL's full-text
|
||||
search or Google's site search.
|
||||
|
||||
* Much faster than MySQL.
|
||||
* And reduces load on MySQL.
|
||||
|
@ -17,6 +19,17 @@ Google's site search.
|
|||
* We don't rely on Google reindexing the site.
|
||||
* We can fine-tune the algorithm ourselves.
|
||||
|
||||
.. Note::
|
||||
|
||||
Right now we're rewriting our search system to use Elastic and
|
||||
switching between Sphinx and Elastic. At some point, the results
|
||||
we're getting with our Elastic-based code will be good enough to
|
||||
switch over. At that point, we'll remove the Sphinx-based search
|
||||
code.
|
||||
|
||||
Until then, we have instructions for installing both Sphinx Search
|
||||
and Elastic Search.
|
||||
|
||||
|
||||
Installing Sphinx Search
|
||||
========================
|
||||
|
@ -118,3 +131,58 @@ You can also stop ``searchd``::
|
|||
This method not only lets you maintain a running Sphinx instance that doesn't
|
||||
get wiped out by the tests, but also lets you see some very interesting output
|
||||
from Sphinx about indexing rate and statistics.
|
||||
|
||||
|
||||
Installing Elastic Search
|
||||
=========================
|
||||
|
||||
There's an installation guide on the Elastic Search site.
|
||||
|
||||
http://www.elasticsearch.org/guide/reference/setup/installation.html
|
||||
|
||||
The directory you install Elastic in will hereafter be referred to as
|
||||
``ELASTICDIR``.
|
||||
|
||||
You can configure Elastic Search with the configuration file at
|
||||
``ELASTICDIR/config/elasticsearch.yml``.
|
||||
|
||||
Elastic Search uses three settings in ``settings.py`` that you can
|
||||
override in ``settings_local.py``::
|
||||
|
||||
# Connection information for Elastic
|
||||
ES_HOSTS = ['127.0.0.1:9200']
|
||||
ES_INDEXES = {'default': 'sumo'}
|
||||
TEST_ES_INDEXES = {'default': 'sumo_test'}
|
||||
|
||||
.. Warning::
|
||||
|
||||
The host setting must match the host and port in
|
||||
``ELASTICDIR/config/elasticsearch.yml``. So if you change it in
|
||||
one place, you must also change it in the other.
|
||||
|
||||
.. Warning::
|
||||
|
||||
Make sure the index name values in ``ES_INDEXES`` and
|
||||
``TEST_ES_INDEXES`` are **not** the same. If they are the same,
|
||||
then running unit tests will nix your index.
|
||||
|
||||
|
||||
Using Elastic Search
|
||||
====================
|
||||
|
||||
Start Elastic Search by::
|
||||
|
||||
$ ELASTICDIR/bin/elasticsearch
|
||||
|
||||
That launches Elastic Search in the background.
|
||||
|
||||
Do a complete reindexing of everything by::
|
||||
|
||||
$ ./manage.py esreindex
|
||||
|
||||
This will delete the existing indexes, create new ones, and reindex
|
||||
everything in your database. On my machine it takes about 30 minutes.
|
||||
|
||||
You can see Elastic Search statistics/health with::
|
||||
|
||||
$ ./manage.py eswhazzup
|
||||
|
|
|
@ -575,6 +575,7 @@ SESSION_EXISTS_COOKIE = 'sumo_session'
|
|||
# Connection information for Elastic
|
||||
ES_HOSTS = ['127.0.0.1:9200']
|
||||
ES_INDEXES = {'default': 'sumo'}
|
||||
TEST_ES_INDEXES = {'default': 'sumo_test'}
|
||||
|
||||
#
|
||||
# Connection information for Sphinx search
|
||||
|
|
Загрузка…
Ссылка в новой задаче