[bug 709080] Refactor ES code into SearchMixin

This reduces the triplification of everything in es_search modules. This reduces the triplification of everything in es_search modules. This reduces the triplification of everything in es_search modules. Also, I introduced ES_INDEXING_TIMEOUT, fixed documentation, and cleaned up some other things while I was poking around. Also also, I nixed all the ES mapping constants. Calling put_mapping with typos will kick up an error, so there's no need to additionally have constants around to prevent typos.
2012-01-06 15:07:03 -05:00 · 2012-01-06 15:07:03 -05:00 · 4a0130c0c9
--- a/apps/forums/es_search.py
+++ b/apps/forums/es_search.py
@ -1,177 +0,0 @@
-import elasticutils
-import logging
-import pyes
-import time
-
-from search.es_utils import (TYPE, INTEGER, STRING, ANALYZED, ANALYZER,
-                             SNOWBALL, TERM_VECTOR, YES, STORE, BOOLEAN,
-                             INDEX, WITH_POS_OFFSETS, DATE, get_index)
-
-
-log = logging.getLogger('k.forums.es_search')
-
-
-def setup_mapping(index):
-    from forums.models import Thread
-
-    mapping = {
-        'properties': {
-            'id': {TYPE: INTEGER},
-            'thread_id': {TYPE: INTEGER},
-            'forum_id': {TYPE: INTEGER},
-            'title': {TYPE: STRING, INDEX: ANALYZED, ANALYZER: SNOWBALL},
-            'is_sticky': {TYPE: BOOLEAN},
-            'is_locked': {TYPE: BOOLEAN},
-            'author_id': {TYPE: INTEGER},
-            'author_ord': {TYPE: STRING},
-            'content': {TYPE: STRING, INDEX: ANALYZED, ANALYZER: SNOWBALL,
-                        STORE: YES, TERM_VECTOR: WITH_POS_OFFSETS},
-            'created': {TYPE: DATE},
-            'updated': {TYPE: DATE},
-            'replies': {TYPE: INTEGER}
-            }
-        }
-
-    es = elasticutils.get_es()
-
-    try:
-        es.put_mapping(Thread._meta.db_table, mapping, index)
-    except pyes.exceptions.ElasticSearchException, e:
-        log.error(e)
-
-
-def extract_thread(thread):
-    """Extracts interesting thing from a Thread and its Posts"""
-    d = {}
-    d['id'] = thread.id
-    d['forum_id'] = thread.forum.id
-    d['title'] = thread.title
-    d['is_sticky'] = thread.is_sticky
-    d['is_locked'] = thread.is_locked
-    d['created'] = thread.created
-
-    if thread.last_post is not None:
-        d['updated'] = thread.last_post.created
-    else:
-        d['updates'] = None
-
-    d['replies'] = thread.replies
-
-    author_ids = set()
-    author_ords = set()
-    content = []
-
-    for post in thread.post_set.all():
-        author_ids.add(post.author.id)
-        author_ords.add(post.author.username)
-        content.append(post.content)
-
-    d['author_id'] = list(author_ids)
-    d['author_ord'] = list(author_ords)
-    d['content'] = content
-
-    return d
-
-
-def index_thread(thread, bulk=False, force_insert=False, es=None,
-                 refresh=False):
-    from forums.models import Thread
-
-    if es is None:
-        es = elasticutils.get_es()
-
-    index = get_index(Thread)
-
-    try:
-        es.index(thread, index, doc_type=Thread._meta.db_table,
-                 id=thread['id'], bulk=bulk, force_insert=force_insert)
-    except pyes.urllib3.TimeoutError:
-        # If we have a timeout, try it again rather than die.  If we
-        # have a second one, that will cause everything to die.
-        es.index(thread, index, doc_type=Thread._meta.db_table,
-                 id=thread['id'], bulk=bulk, force_insert=force_insert)
-
-    if refresh:
-        es.refresh(timesleep=0)
-
-
-def unindex_threads(ids):
-    from forums.models import Thread
-
-    es = elasticutils.get_es()
-    index = get_index(Thread)
-
-    for thread_id in ids:
-        try:
-            es.delete(index, doc_type=Thread._meta.db_table, id=thread_id)
-        except pyes.exception.NotFoundException:
-            # If the document isn't in the index, then we ignore it.
-            # TODO: Is that right?
-            pass
-
-
-def unindex_posts(ids):
-    from forums.models import Post
-
-    for post_id in ids:
-        try:
-            post = Post.objects.get(post_id)
-            index_thread(extract_thread(post.thread))
-        except Post.ObjectNotFound:
-            pass
-
-
-def reindex_documents(percent=100):
-    """Iterate over this to update the mapping and index all documents.
-
-    Yields number of documents done.
-
-    Note: This only gets called from the command line.  Ergo we do
-    some logging so the user knows what's going on.
-
-    :arg percent: The percentage of questions to index.  Defaults to
-        100--e.g. all of them.
-
-    """
-    from forums.models import Thread
-    from django.conf import settings
-
-    index = get_index(Thread)
-
-    start_time = time.time()
-
-    log.info('reindex threads: %s %s', index, Thread._meta.db_table)
-
-    es = pyes.ES(settings.ES_HOSTS, timeout=10.0)
-
-    log.info('setting up mapping....')
-    setup_mapping(index)
-
-    log.info('iterating through threads....')
-    total = Thread.objects.count()
-    to_index = int(total * (percent / 100.0))
-    log.info('total threads: %s (to be indexed %s)', total, to_index)
-    total = to_index
-
-    t = 0
-    for thread in Thread.objects.order_by('id').all():
-        t += 1
-        if t % 1000 == 0:
-            time_to_go = (total - t) * ((time.time() - start_time) / t)
-            if time_to_go < 60:
-                time_to_go = "%d secs" % time_to_go
-            else:
-                time_to_go = "%d min" % (time_to_go / 60)
-
-            log.info('%s/%s...  (%s to go)', t, total, time_to_go)
-            es.flush_bulk(forced=True)
-
-        if t > total:
-            break
-
-        index_thread(extract_thread(thread), bulk=True, es=es)
-        yield t
-
-    es.flush_bulk(forced=True)
-    log.info('done!')
-    es.refresh()
--- a/apps/forums/models.py
+++ b/apps/forums/models.py
@ -1,7 +1,6 @@
 import datetime

 from django.db import models
-from django.conf import settings
 from django.contrib.auth.models import User
 from django.db.models.signals import post_save, pre_delete
 from django.dispatch import receiver
@ -15,7 +14,7 @@ from sumo.helpers import urlparams, wiki_to_html
 from sumo.urlresolvers import reverse
 from sumo.models import ModelBase
 from search import searcher
-from search import es_utils
+from search.models import SearchMixin
 from search.utils import crc32
 import waffle

@ -94,7 +93,7 @@ class Forum(NotificationsMixin, ModelBase):
        self.last_post = _last_post_from(posts, exclude_post=exclude_post)


-class Thread(NotificationsMixin, ModelBase):
+class Thread(NotificationsMixin, ModelBase, SearchMixin):
    title = models.CharField(max_length=255)
    forum = models.ForeignKey('Forum')
    created = models.DateTimeField(default=datetime.datetime.now,
@ -180,27 +179,85 @@ class Thread(NotificationsMixin, ModelBase):
        # If self.last_post is None, and this was called from Post.delete,
        # then Post.delete will erase the thread, as well.

+    @classmethod
+    def get_mapping(cls):
+        mapping = {
+            'properties': {
+                'id': {'type': 'integer'},
+                'thread_id': {'type': 'integer'},
+                'forum_id': {'type': 'integer'},
+                'title': {'type': 'string', 'analyzer': 'snowball'},
+                'is_sticky': {'type': 'boolean'},
+                'is_locked': {'type': 'boolean'},
+                'author_id': {'type': 'integer'},
+                'author_ord': {'type': 'string'},
+                'content': {'type': 'string', 'analyzer': 'snowball',
+                            'store': 'yes',
+                            'term_vector': 'with_positions_offsets'},
+                'created': {'type': 'date'},
+                'updated': {'type': 'date'},
+                'replies': {'type': 'integer'}
+                }
+            }
+        return mapping

-@receiver(post_save, sender=Thread,
-          dispatch_uid='forums.search.index.thread.save')
-def update_thread_in_index(sender, instance, **kw):
-    # raw is True when saving a model exactly as presented--like when
-    # loading fixtures.  In this case we don't want to trigger.
-    if not settings.ES_LIVE_INDEXING or kw.get('raw'):
-        return
+    def extract_document(self):
+        """Extracts interesting thing from a Thread and its Posts"""
+        d = {}
+        d['id'] = self.id
+        d['forum_id'] = self.forum.id
+        d['title'] = self.title
+        d['is_sticky'] = self.is_sticky
+        d['is_locked'] = self.is_locked
+        d['created'] = self.created

-    from forums.tasks import index_threads
-    es_utils.add_index_task(index_threads.delay, (instance.id,))
+        if self.last_post is not None:
+            d['updated'] = self.last_post.created
+        else:
+            d['updates'] = None
+
+        d['replies'] = self.replies
+
+        author_ids = set()
+        author_ords = set()
+        content = []
+
+        for post in self.post_set.all():
+            author_ids.add(post.author.id)
+            author_ords.add(post.author.username)
+            content.append(post.content)
+
+        d['author_id'] = list(author_ids)
+        d['author_ord'] = list(author_ords)
+        d['content'] = content
+
+        return d


-@receiver(pre_delete, sender=Thread,
-          dispatch_uid='forums.search.index.thread.delete')
-def remove_thread_from_index(sender, instance, **kw):
-    if not settings.ES_LIVE_INDEXING:
-        return
+# Register this as a model we index in ES.
+Thread.register_search_model()

-    from forums.tasks import unindex_threads
-    unindex_threads([instance.id])
+
+def _update_t_index(sender, instance, **kw):
+    """Given a Thread, creates an index task"""
+    if not kw.get('raw'):
+        obj = instance
+        obj.__class__.add_index_task((obj.id,))
+
+
+def _remove_t_index(sender, instance, **kw):
+    """Given a Thread, create an unindex task"""
+    if not kw.get('raw'):
+        obj = instance
+        obj.__class__.add_unindex_task((obj.id,))
+
+
+f_t_es_post_save = receiver(
+    post_save, sender=Thread,
+    dispatch_uid='f.t.es.post_save')(_update_t_index)
+f_t_es_pre_delete = receiver(
+    pre_delete, sender=Thread,
+    dispatch_uid='f.t.es.pre_delete')(_remove_t_index)


 class Post(ActionMixin, ModelBase):
@ -285,26 +342,19 @@ class Post(ActionMixin, ModelBase):
        return wiki_to_html(self.content)


-@receiver(post_save, sender=Post,
-          dispatch_uid='forums.search.index.post.save')
-def update_post_in_index(sender, instance, **kw):
-    # raw is True when saving a model exactly as presented--like when
-    # loading fixtures.  In this case we don't want to trigger.
-    if not settings.ES_LIVE_INDEXING or kw.get('raw'):
-        return
-
-    from forums.tasks import index_threads
-    es_utils.add_index_task(index_threads.delay, (instance.thread_id,))
+def _update_post_index(sender, instance, **kw):
+    """Given a Post, update the Thread in the index"""
+    if not kw.get('raw'):
+        obj = instance.thread
+        obj.__class__.add_index_task((obj.id,))


-@receiver(pre_delete, sender=Post,
-          dispatch_uid='forums.search.index.post.delete')
-def remove_post_from_index(sender, instance, **kw):
-    if not settings.ES_LIVE_INDEXING:
-        return
-
-    from forums.tasks import index_threads
-    es_utils.add_index_task(index_threads.delay, (instance.thread_id,))
+f_p_es_post_save = receiver(
+    post_save, sender=Post,
+    dispatch_uid='f_p_es_post_save')(_update_post_index)
+f_p_es_pre_delete = receiver(
+    pre_delete, sender=Post,
+    dispatch_uid='f_p_es_pre_delete')(_update_post_index)


 def discussion_searcher(request):
--- a/apps/questions/es_search.py
+++ b/apps/questions/es_search.py
@ -1,215 +0,0 @@
-import elasticutils
-import logging
-import pyes
-import time
-
-from search.es_utils import (TYPE, LONG, STRING, ANALYZER,
-                             SNOWBALL, TERM_VECTOR, STORE, YES, BOOLEAN,
-                             WITH_POS_OFFSETS, DATE, INTEGER, get_index)
-
-
-log = logging.getLogger('k.questions.es_search')
-
-
-def setup_mapping(index):
-    from questions.models import Question
-
-    mapping = {
-        'properties': {
-            'id': {TYPE: LONG},
-            'question_id': {TYPE: LONG},
-            'title': {TYPE: STRING, ANALYZER: SNOWBALL},
-            'question_content':
-                {TYPE: STRING, ANALYZER: SNOWBALL,
-                # TODO: Stored because originally, this is the only field we
-                # were excerpting on. Standardize one way or the other.
-                 STORE: YES, TERM_VECTOR: WITH_POS_OFFSETS},
-            'answer_content':
-                {TYPE: STRING, ANALYZER: SNOWBALL},
-            'replies': {TYPE: INTEGER},
-            'is_solved': {TYPE: BOOLEAN},
-            'is_locked': {TYPE: BOOLEAN},
-            'has_answers': {TYPE: BOOLEAN},
-            'has_helpful': {TYPE: BOOLEAN},
-            'created': {TYPE: DATE},
-            'updated': {TYPE: DATE},
-            'question_creator': {TYPE: STRING},
-            'answer_creator': {TYPE: STRING},
-            'question_votes': {TYPE: INTEGER},
-            'answer_votes': {TYPE: INTEGER},
-            'tag': {TYPE: STRING}
-            }
-        }
-
-    es = elasticutils.get_es()
-
-    # TODO: If the mapping is there already and we do a put_mapping,
-    # does that stomp on the existing mapping or raise an error?
-    try:
-        es.put_mapping(Question._meta.db_table, mapping, index)
-    except pyes.exceptions.ElasticSearchException, e:
-        log.error(e)
-
-
-def extract_question(question):
-    """Extracts indexable attributes from a Question and its answers."""
-    question_data = {}
-
-    question_data['id'] = question.id
-
-    question_data['title'] = question.title
-    question_data['question_content'] = question.content
-    question_data['replies'] = question.num_answers
-    question_data['is_solved'] = bool(question.solution_id)
-    question_data['is_locked'] = question.is_locked
-    question_data['has_answers'] = bool(question.num_answers)
-
-    question_data['created'] = question.created
-    question_data['updated'] = question.updated
-
-    question_data['question_creator'] = question.creator.username
-    question_data['question_votes'] = question.num_votes_past_week
-
-    question_data['tag'] = [tag['name'] for tag in question.tags.values()]
-
-    # Array of strings.
-    answer_content = []
-
-    # has_helpful is true if at least one answer is marked as
-    # helpful.
-    has_helpful = False
-
-    # answer_creator is the set of all answer creator user names.
-    answer_creator = set()
-
-    # answer_votes is the sum of votes for all of the answers.
-    answer_votes = 0
-
-    for ans in question.answers.all():
-        answer_content.append(ans.content)
-        has_helpful = has_helpful or bool(ans.num_helpful_votes)
-        answer_creator.add(ans.creator.username)
-        answer_votes += ans.upvotes
-
-    question_data['answer_content'] = answer_content
-    question_data['has_helpful'] = has_helpful
-    question_data['answer_creator'] = list(answer_creator)
-    question_data['answer_votes'] = answer_votes
-
-    return question_data
-
-
-def index_doc(doc, bulk=False, force_insert=False, es=None, refresh=False):
-    from questions.models import Question
-
-    if es is None:
-        es = elasticutils.get_es()
-
-    index = get_index(Question)
-
-    try:
-        es.index(doc, index, doc_type=Question._meta.db_table,
-                 id=doc['id'], bulk=bulk, force_insert=force_insert)
-    except pyes.urllib3.TimeoutError:
-        # If we have a timeout, try it again rather than die.  If we
-        # have a second one, that will cause everything to die.
-        es.index(doc, index, doc_type=Question._meta.db_table,
-                 id=doc['id'], bulk=bulk, force_insert=force_insert)
-
-    if refresh:
-        es.refresh(timesleep=0)
-
-
-def unindex_questions(ids):
-    """Removes Questions from the index."""
-    from questions.models import Question
-
-    es = elasticutils.get_es()
-    index = get_index(Question)
-
-    for question_id in ids:
-        # TODO wrap this in a try/except--amongst other things, this will
-        # only be in the index if the Question had no Answers.
-        try:
-            es.delete(index, doc_type=Question._meta.db_table,
-                      id=question_id)
-        except pyes.exceptions.NotFoundException:
-            # If the document isn't in the index, then we ignore it.
-            # TODO: Is that right?
-            pass
-
-
-def unindex_answers(ids):
-    """Removes Answers from the index.
-
-    :arg ids: list of question ids
-
-    """
-    # Answers are rolled up in Question documents, so we reindex the
-    # Question.
-    from questions.models import Question
-
-    for question_id in ids:
-        try:
-            # TODO: test the case where we delete the question
-            # twice.
-            question = Question.objects.get(id=question_id)
-            index_doc(extract_question(question))
-        except Question.ObjectDoesNotExist:
-            pass
-
-
-def reindex_questions(percent=100):
-    """Iterate over this to update the mapping and index all documents.
-
-    Yields number of documents done.
-
-    Note: This gets run from the command line, so we log stuff to let
-    the user know what's going on.
-
-    :arg percent: The percentage of questions to index.  Defaults to
-        100--e.g. all of them.
-
-    """
-    from questions.models import Question
-    from django.conf import settings
-
-    index = get_index(Question)
-
-    start_time = time.time()
-
-    log.info('reindex questions: %s %s', index,
-             Question._meta.db_table)
-
-    es = pyes.ES(settings.ES_HOSTS, timeout=10.0)
-
-    log.info('setting up mapping....')
-    setup_mapping(index)
-
-    log.info('iterating through questions....')
-    total = Question.objects.count()
-    to_index = int(total * (percent / 100.0))
-    log.info('total questions: %s (to be indexed: %s)', total, to_index)
-    total = to_index
-
-    t = 0
-    for q in Question.objects.order_by('id').all():
-        t += 1
-        if t % 1000 == 0:
-            time_to_go = (total - t) * ((time.time() - start_time) / t)
-            if time_to_go < 60:
-                time_to_go = "%d secs" % time_to_go
-            else:
-                time_to_go = "%d min" % (time_to_go / 60)
-            log.info('%s/%s...  (%s to go)', t, total, time_to_go)
-            es.flush_bulk(forced=True)
-
-        if t > total:
-            break
-
-        index_doc(extract_question(q), bulk=True, es=es)
-        yield t
-
-    es.flush_bulk(forced=True)
-    log.info('done!')
-    es.refresh()
--- a/apps/questions/models.py
+++ b/apps/questions/models.py
@ -24,9 +24,9 @@ from questions.karma_actions import (AnswerAction, FirstAnswerAction,
                                     SolutionAction)
 from questions.question_config import products
 from questions.tasks import (update_question_votes, update_answer_pages,
-                             log_answer, index_questions, unindex_questions)
+                             log_answer)
 from search import searcher
-from search import es_utils
+from search.models import SearchMixin
 from search.utils import crc32
 from sumo.helpers import urlparams
 from sumo.models import ModelBase
@ -41,7 +41,7 @@ from upload.models import ImageAttachment
 log = logging.getLogger('k.questions')


-class Question(ModelBase, BigVocabTaggableMixin):
+class Question(ModelBase, BigVocabTaggableMixin, SearchMixin):
    """A support question."""
    title = models.CharField(max_length=255)
    creator = models.ForeignKey(User, related_name='questions')
@ -282,51 +282,122 @@ class Question(ModelBase, BigVocabTaggableMixin):
            cache.add(cache_key, tags)
        return tags

+    @classmethod
+    def get_mapping(cls):
+        mapping = {
+            'properties': {
+                'id': {'type': 'long'},
+                'question_id': {'type': 'long'},
+                'title': {'type': 'string', 'analyzer': 'snowball'},
+                'question_content':
+                    {'type': 'string', 'analyzer': 'snowball',
+                    # TODO: Stored because originally, this is the
+                    # only field we were excerpting on. Standardize
+                    # one way or the other.
+                     'store': 'yes', 'term_vector': 'with_positions_offsets'},
+                'answer_content':
+                    {'type': 'string', 'analyzer': 'snowball'},
+                'replies': {'type': 'integer'},
+                'is_solved': {'type': 'boolean'},
+                'is_locked': {'type': 'boolean'},
+                'has_answers': {'type': 'boolean'},
+                'has_helpful': {'type': 'boolean'},
+                'created': {'type': 'date'},
+                'updated': {'type': 'date'},
+                'question_creator': {'type': 'string'},
+                'answer_creator': {'type': 'string'},
+                'question_votes': {'type': 'integer'},
+                'answer_votes': {'type': 'integer'},
+                'tag': {'type': 'string'}
+                }
+            }
+        return mapping

-@receiver(post_save, sender=Question,
-          dispatch_uid='questions.search.index.question.save')
-def update_question_in_index(sender, instance, **kw):
-    # raw is True when saving a model exactly as presented--like when
-    # loading fixtures.  In this case we don't want to trigger.
-    if not settings.ES_LIVE_INDEXING or kw.get('raw'):
-        return
+    def extract_document(self):
+        """Extracts indexable attributes from a Question and its answers."""
+        d = {}

-    es_utils.add_index_task(index_questions.delay, (instance.id,))
+        d['id'] = self.id
+
+        d['title'] = self.title
+        d['question_content'] = self.content
+        d['replies'] = self.num_answers
+        d['is_solved'] = bool(self.solution_id)
+        d['is_locked'] = self.is_locked
+        d['has_answers'] = bool(self.num_answers)
+
+        d['created'] = self.created
+        d['updated'] = self.updated
+
+        d['question_creator'] = self.creator.username
+        d['question_votes'] = self.num_votes_past_week
+
+        d['tag'] = [tag['name'] for tag in self.tags.values()]
+
+        # Array of strings.
+        answer_content = []
+
+        # has_helpful is true if at least one answer is marked as
+        # helpful.
+        has_helpful = False
+
+        # answer_creator is the set of all answer creator user names.
+        answer_creator = set()
+
+        # answer_votes is the sum of votes for all of the answers.
+        answer_votes = 0
+
+        for ans in self.answers.all():
+            answer_content.append(ans.content)
+            has_helpful = has_helpful or bool(ans.num_helpful_votes)
+            answer_creator.add(ans.creator.username)
+            answer_votes += ans.upvotes
+
+        d['answer_content'] = answer_content
+        d['has_helpful'] = has_helpful
+        d['answer_creator'] = list(answer_creator)
+        d['answer_votes'] = answer_votes
+
+        return d


-@receiver(post_save, sender=TaggedItem,
-          dispatch_uid='questions.search.index.tags.save')
-def update_question_tags_in_index(sender, instance, **kwargs):
-    # raw is True when saving a model exactly as presented--like when
-    # loading fixtures.  In this case we don't want to trigger.
-    if not settings.ES_LIVE_INDEXING or kwargs.get('raw'):
-        return
-
-    es_utils.add_index_task(index_questions.delay,
-                            (instance.content_object.id,))
+# Register this as a model we index in ES.
+Question.register_search_model()


-@receiver(pre_delete, sender=Question,
-          dispatch_uid='questions.search.index.question.delete')
-def remove_question_from_index(sender, instance, **kw):
-    if (not settings.ES_LIVE_INDEXING or kw.get('raw') or
-        not isinstance(instance.content_object, Question)):
-        return
-
-    unindex_questions([instance.id])
+def _update_qs_index(sender, instance, **kw):
+    """Given a Question, creates an index task"""
+    if not kw.get('raw'):
+        obj = instance
+        obj.__class__.add_index_task((obj.id,))


-@receiver(pre_delete, sender=TaggedItem,
-          dispatch_uid='questions.search.index.tags.delete')
-def update_question_in_index_on_tags_delete(sender, instance, **kwargs):
-    # raw is True when saving a model exactly as presented--like when
-    # loading fixtures.  In this case we don't want to trigger.
-    if (not settings.ES_LIVE_INDEXING or kwargs.get('raw') or
-        not isinstance(instance.content_object, Question)):
-        return
+def _update_tag_index(sender, instance, **kw):
+    """Given a TaggedItem for a Question, creates an index task"""
+    obj = instance.content_object
+    if not kw.get('raw') and isinstance(obj, Question):
+        obj.__class__.add_index_task((obj.id,))

-    es_utils.add_index_task(index_questions.delay,
-                            (instance.content_object.id,))
+
+def _remove_qs_index(sender, instance, **kw):
+    """Given a Question, creates an unindex task"""
+    if not kw.get('raw'):
+        obj = instance
+        obj.__class__.add_unindex_task((obj.id,))
+
+
+q_es_post_save = receiver(
+    post_save, sender=Question,
+    dispatch_uid='q.es.post_save')(_update_qs_index)
+q_es_pre_delete = receiver(
+    pre_delete, sender=Question,
+    dispatch_uid='q.es.pre_delete')(_remove_qs_index)
+q_tag_es_post_save = receiver(
+    post_save, sender=TaggedItem,
+    dispatch_uid='q.es.post_save')(_update_tag_index)
+q_tag_es_pre_delete = receiver(
+    pre_delete, sender=TaggedItem,
+    dispatch_uid='q.tag.es.pre_delete')(_update_tag_index)


 class QuestionMetaData(ModelBase):
@ -524,24 +595,19 @@ post_save.connect(answer_connector, sender=Answer,
                  dispatch_uid='question_answer_activity')


-@receiver(post_save, sender=Answer,
-          dispatch_uid='questions.search.index.answer.save')
-def update_answer_in_index(sender, instance, **kw):
-    # raw is True when saving a model exactly as presented--like when
-    # loading fixtures.  In this case we don't want to trigger.
-    if not settings.ES_LIVE_INDEXING or kw.get('raw'):
-        return
-
-    es_utils.add_index_task(index_questions.delay, (instance.question_id,))
+def _update_ans_index(sender, instance, **kw):
+    """Given an Answer for a Question, create an index task"""
+    if not kw.get('raw'):
+        obj = instance.question
+        obj.__class__.add_index_task((obj.id,))


-@receiver(pre_delete, sender=Answer,
-          dispatch_uid='questions.search.index.answer.delete')
-def remove_answer_from_index(sender, instance, **kw):
-    if not settings.ES_LIVE_INDEXING:
-        return
-
-    es_utils.add_index_task(index_questions.delay, (instance.question_id,))
+q_ans_es_post_save = receiver(
+    post_save, sender=Answer,
+    dispatch_uid='q.ans.es.post_save')(_update_ans_index)
+q_ans_es_pre_delete = receiver(
+    pre_delete, sender=Answer,
+    dispatch_uid='q.ans.es.pre_delete')(_update_ans_index)


 class QuestionVote(ModelBase):
@ -570,31 +636,26 @@ class AnswerVote(ModelBase):
        VoteMetadata.objects.create(vote=self, key=key, value=value)


-@receiver(post_save, sender=AnswerVote,
-          dispatch_uid='questions.search.index.answervote.save')
-def update_answervote_in_index(sender, instance, **kw):
-    # TODO: We only need to update the helpful bit.  It's possible
-    # we could ignore all AnswerVotes that aren't helpful and if
-    # they're marked as helpful, then update the index.  Look into
-    # this.
+def _update_ansv_index(sender, instance, **kw):
+    """Given an AnswerVote for an Answer for a Question, creates an
+    unindex task

-    # raw is True when saving a model exactly as presented--like when
-    # loading fixtures.  In this case we don't want to trigger.
-    if not settings.ES_LIVE_INDEXING or kw.get('raw'):
-        return
-
-    es_utils.add_index_task(index_questions.delay, (
-            instance.answer.question_id,))
+    """
+    if not kw.get('raw'):
+        obj = instance.answer.question
+        obj.__class__.add_index_task((obj.id,))


-@receiver(pre_delete, sender=AnswerVote,
-          dispatch_uid='questions.search.index.answervote.delete')
-def remove_answervote_from_index(sender, instance, **kw):
-    if not settings.ES_LIVE_INDEXING:
-        return
-
-    es_utils.add_index_task(index_questions.delay, (
-            instance.answer.question_id,))
+# TODO: We only need to update the helpful bit.  It's possible
+# we could ignore all AnswerVotes that aren't helpful and if
+# they're marked as helpful, then update the index.  Look into
+# this.
+q_av_es_post_save = receiver(
+    post_save, sender=AnswerVote,
+    dispatch_uid='q.av.es.post_save')(_update_ansv_index)
+q_av_es_pre_delete = receiver(
+    post_save, sender=AnswerVote,
+    dispatch_uid='q.av.es.pre_delete')(_update_ansv_index)


 class VoteMetadata(ModelBase):
--- a/apps/questions/tests/test_es.py
+++ b/apps/questions/tests/test_es.py
@ -61,6 +61,10 @@ class QuestionUpdateTests(ElasticTestCase):
        eq_(elasticutils.S(Question).count(), 0)

    def test_questions_tags(self):
+        """Make sure that adding tags to a Question causes it to
+        refresh the index.
+
+        """
        tag = u'hiphop'
        eq_(elasticutils.S(Question).filter(tag=tag).count(), 0)
        q = question(save=True)
--- a/apps/search/es_utils.py
+++ b/apps/search/es_utils.py
@ -1,13 +1,12 @@
 from itertools import chain, count, izip
 import logging
-from threading import local
+from pprint import pprint
+import time

 import elasticutils
-from pprint import pprint
 import pyes

 from django.conf import settings
-from django.core import signals


 ESTimeoutError = pyes.urllib3.TimeoutError
@ -15,69 +14,7 @@ ESMaxRetryError = pyes.urllib3.MaxRetryError
 ESIndexMissingException = pyes.exceptions.IndexMissingException


-TYPE = 'type'
-ANALYZER = 'analyzer'
-INDEX = 'index'
-STORE = 'store'
-TERM_VECTOR = 'term_vector'
-
-NOT_INDEXED = 'not_indexed'
-
-LONG = 'long'
-INTEGER = 'integer'
-STRING = 'string'
-BOOLEAN = 'boolean'
-DATE = 'date'
-
-ANALYZED = 'analyzed'
-NOTANALYZED = 'not_analyzed'
-
-SNOWBALL = 'snowball'
-
-YES = 'yes'
-
-WITH_POS_OFFSETS = 'with_positions_offsets'
-
-
-_local_tasks = local()
-_local_tasks.es_index_task_set = set()
-
-
-def add_index_task(fun, *args):
-    """Adds an index task.
-
-    Note: args and its contents **must** be hashable.
-
-    :arg fun: the function to call
-    :arg args: arguments to the function
-
-    """
-    _local_tasks.es_index_task_set.add((fun, args))
-
-
-def generate_tasks(**kwargs):
-    """Goes through thread local index update tasks set and generates
-    celery tasks for all tasks in the set.
-
-    Because this works off of a set, it naturally de-dupes the tasks,
-    so if four tasks get tossed into the set that are identical, we
-    execute it only once.
-
-    """
-    lt = _local_tasks
-    for fun, args in lt.es_index_task_set:
-        fun(*args)
-
-    lt.es_index_task_set.clear()
-
-
-signals.request_finished.connect(generate_tasks)
-
-
-def get_index(model):
-    """Returns the index name for this model."""
-    return (settings.ES_INDEXES.get(model._meta.db_table)
-            or settings.ES_INDEXES['default'])
+log = logging.getLogger('search.es_utils')


 def get_doctype_stats():
@ -86,30 +23,79 @@ def get_doctype_stats():
    For example:

    >>> get_doctype_stats()
-    {'questions': 1000, 'forums': 1000, 'wiki': 1000}
+    {'questions_question': 14216, 'forums_thread': 419, 'wiki_document': 759}

    :throws pyes.urllib3.MaxRetryError: if it can't connect to elasticsearch
    :throws pyes.exceptions.IndexMissingException: if the index doesn't exist
+
    """
-    # TODO: We have to import these here, otherwise we have an import
-    # loop es_utils -> models.py -> es_utils. This should get fixed by
-    # having the models register themselves as indexable with es_utils
-    # or something like that. Then es_utils won't have to explicitly
-    # know about models.
-    from forums.models import Thread
-    from questions.models import Question
-    from wiki.models import Document
+    from search.models import get_search_models

    stats = {}

-    for name, model in (('questions', Question),
-                        ('forums', Thread),
-                        ('wiki', Document)):
-        stats[name] = elasticutils.S(model).count()
+    for cls in get_search_models():
+        stats[cls._meta.db_table] = elasticutils.S(cls).count()

    return stats


+def reindex_model(cls, percent=100):
+    """Reindexes all the objects for a single mode.
+
+    Yields number of documents done.
+
+    Note: This gets run from the command line, so we log stuff to let
+    the user know what's going on.
+
+    :arg cls: the model class
+    :arg percent: The percentage of questions to index.  Defaults to
+        100--e.g. all of them.
+
+    """
+    doc_type = cls._meta.db_table
+    index = cls._get_index()
+
+    start_time = time.time()
+
+    log.info('reindex %s into %s index', doc_type, index)
+
+    es = pyes.ES(settings.ES_HOSTS, timeout=settings.ES_INDEXING_TIMEOUT)
+
+    log.info('setting up mapping....')
+    mapping = cls.get_mapping()
+    es.put_mapping(doc_type, mapping, index)
+
+    log.info('iterating through %s....', doc_type)
+    total = cls.objects.count()
+    to_index = int(total * (percent / 100.0))
+    log.info('total %s: %s (to be indexed: %s)', doc_type, total, to_index)
+    total = to_index
+
+    t = 0
+    for obj in cls.objects.order_by('id').all():
+        t += 1
+        if t % 1000 == 0:
+            time_to_go = (total - t) * ((time.time() - start_time) / t)
+            if time_to_go < 60:
+                time_to_go = "%d secs" % time_to_go
+            else:
+                time_to_go = "%d min" % (time_to_go / 60)
+            log.info('%s/%s...  (%s to go)', t, total, time_to_go)
+
+        if t % settings.ES_FLUSH_BULK_EVERY == 0:
+            es.flush_bulk()
+
+        if t > total:
+            break
+
+        cls.index(obj.extract_document(), bulk=True, es=es)
+        yield t
+
+    es.flush_bulk(forced=True)
+    log.info('done!')
+    es.refresh()
+
+
 def es_reindex_with_progress(percent=100):
    """Rebuild Elastic indexes as you iterate over yielded progress ratios.

@ -118,55 +104,43 @@ def es_reindex_with_progress(percent=100):
        development where doing a full reindex takes an hour.

    """
-    # TODO: We have to import these here, otherwise we have an import
-    # loop es_utils -> models.py -> es_utils. This should get fixed by
-    # having the models register themselves as indexable with es_utils
-    # or something like that. Then es_utils won't have to explicitly
-    # know about models.
-    import forums.es_search
-    from forums.models import Thread
-    import questions.es_search
-    from questions.models import Question
-    import wiki.es_search
-    from wiki.models import Document
+    from search.models import get_search_models

    es = elasticutils.get_es()

    # Go through and delete, then recreate the indexes.
    for index in settings.ES_INDEXES.values():
        es.delete_index_if_exists(index)
-        es.create_index_if_missing(index)  # Should always be missing.
+        es.create_index(index)
+
+    search_models = get_search_models()
+
+    total = sum([cls.objects.count() for cls in search_models])
+
+    to_index = [reindex_model(cls, percent) for cls in search_models]

-    # TODO: Having the knowledge of apps' internals repeated here is lame.
-    total = (Question.objects.count() +
-             Thread.objects.count() +
-             Document.objects.count())
    return (float(done) / total for done, _ in
-            izip(count(1),
-                 chain(questions.es_search.reindex_questions(percent),
-                       wiki.es_search.reindex_documents(percent),
-                       forums.es_search.reindex_documents(percent))))
+            izip(count(1), chain(*to_index)))


 def es_reindex(percent=100):
-    """Rebuild ElasticSearch indexes."""
+    """Rebuild ElasticSearch indexes"""
    [x for x in es_reindex_with_progress(percent) if False]


 def es_whazzup():
-    """Runs cluster_stats on the Elastic system."""
-    # We create a logger because elasticutils uses it.
-    logging.basicConfig()
-
+    """Runs cluster_stats on the Elastic system"""
    es = elasticutils.get_es()

+    # TODO: It'd be better to show more useful information than raw
+    # cluster_stats.
    try:
        pprint(es.cluster_stats())
    except pyes.urllib3.connectionpool.MaxRetryError:
-        print ('ERROR: Your elasticsearch process is not running or '
-               'ES_HOSTS is set wrong in your settings_local.py file.')
+        log.error('Your elasticsearch process is not running or ES_HOSTS '
+                  'is set wrong in your settings_local.py file.')
        return

-    print 'Totals:'
+    log.info('Totals:')
    for name, count in get_doctype_stats().items():
-        print '* %s: %d' % (name, count)
+        log.info(' * %s: %d', name, count)
--- a/apps/search/management/commands/esreindex.py
+++ b/apps/search/management/commands/esreindex.py
@ -1,3 +1,4 @@
+import logging
 from django.core.management.base import BaseCommand, CommandError
 from optparse import make_option
 from search.es_utils import es_reindex
@ -10,6 +11,7 @@ class Command(BaseCommand):
                    help='Reindex a percentage of things'),)

    def handle(self, *args, **options):
+        logging.basicConfig(level=logging.INFO)
        percent = options['percent']
        if percent > 100 or percent < 1:
            raise CommandError('percent should be between 1 and 100')
--- a/apps/search/management/commands/eswhazzup.py
+++ b/apps/search/management/commands/eswhazzup.py
@ -1,5 +1,5 @@
+import logging
 from django.core.management.base import BaseCommand
-
 from search.es_utils import es_whazzup


@ -7,4 +7,5 @@ class Command(BaseCommand):
    help = 'Shows elastic stats.'

    def handle(self, *args, **options):
+        logging.basicConfig(level=logging.INFO)
        es_whazzup()
--- a/apps/search/models.py
+++ b/apps/search/models.py
@ -1,3 +1,152 @@
-# We do this here to guarantee that es_utils gets imported and thus
-# its request_finished signal handler is registered.
-import search.es_utils
+import elasticutils
+import logging
+import pyes
+from threading import local
+
+from django.conf import settings
+from django.core import signals
+
+from search.tasks import index_task, unindex_task
+
+log = logging.getLogger('es_search')
+
+
+# db_table name -> model Class for search models
+_search_models = {}
+
+
+def get_search_models():
+    """Returns a list of model classes"""
+    # TODO: if we do weakrefs, then we should remove dead refs here.
+
+    values = _search_models.values()
+
+    # Sort to stabilize.
+    values.sort(key=lambda cls: cls._meta.db_table)
+    return values
+
+
+_local_tasks = local()
+_local_tasks.es_index_task_set = set()
+
+
+class SearchMixin(object):
+    """This mixin adds ES indexing support for the model.
+
+    When using this mixin, make sure to implement:
+
+    * get_mapping
+    * extract_document
+
+    Additionally, after defining your model, register it as a
+    search model::
+
+         MyModel.register_search_model()
+
+    """
+
+    @classmethod
+    def register_search_model(cls):
+        """Registers a model as being involved with ES indexing"""
+        # TODO: Fix this to use weakrefs
+        _search_models[cls._meta.db_table] = cls
+
+    @classmethod
+    def get_mapping(self):
+        """Returns the ES mapping defition for this document type
+
+        This must be implemented. It should return an ES mapping.
+
+        For examples, see the codebase.
+
+        """
+        raise NotImplementedError
+
+    def extract_document(self):
+        """Extracts the ES index document for this instance
+
+        This must be implemented. It should return a dict representing
+        the document to be indexed.
+
+        For examples, see the codebase.
+
+        """
+        raise NotImplementedError
+
+    @classmethod
+    def _get_index(cls):
+        """Returns the index for this class"""
+        indexes = settings.ES_INDEXES
+        return indexes.get(cls._meta.db_table) or indexes['default']
+
+    @classmethod
+    def add_index_task(cls, ids):
+        """Adds an index task.
+
+        :arg ids: tuple of ids
+
+        """
+        _local_tasks.es_index_task_set.add((index_task.delay, (cls, ids)))
+
+    @classmethod
+    def add_unindex_task(cls, ids):
+        """Creates a task to remove this document from the ES index
+
+        :arg ids: tuple of ids
+
+        """
+        _local_tasks.es_index_task_set.add((unindex_task.delay, (cls, ids)))
+
+    @classmethod
+    def index(cls, document, bulk=False, force_insert=False, refresh=False,
+              es=None):
+        """Indexes a single document"""
+        if not settings.ES_LIVE_INDEXING:
+            return
+
+        if es is None:
+            es = elasticutils.get_es()
+
+        index = cls._get_index()
+        doc_type = cls._meta.db_table
+
+        # TODO: handle pyes.urllib3.TimeoutErrors here.
+        es.index(document, index=index, doc_type=doc_type, id=document['id'],
+                 bulk=bulk, force_insert=force_insert)
+
+        if refresh:
+            es.refresh(timesleep=0)
+
+    @classmethod
+    def unindex(cls, id):
+        """Removes a document from the index"""
+        if not settings.ES_LIVE_INDEXING:
+            return
+
+        index = cls._get_index()
+        doc_type = cls._meta.db_table
+        try:
+            elasticutils.get_es().delete(index, doc_type, id)
+        except pyes.exceptions.NotFoundException:
+            # Ignore the case where we try to delete something that's
+            # not there.
+            pass
+
+
+def generate_tasks(**kwargs):
+    """Goes through thread local index update tasks set and generates
+    celery tasks for all tasks in the set.
+
+    Because this works off of a set, it naturally de-dupes the tasks,
+    so if four tasks get tossed into the set that are identical, we
+    execute it only once.
+
+    """
+    lt = _local_tasks
+    for fun, args in lt.es_index_task_set:
+        fun(*args)
+
+    lt.es_index_task_set.clear()
+
+
+signals.request_finished.connect(generate_tasks)
--- a/apps/search/tasks.py
+++ b/apps/search/tasks.py
@ -52,3 +52,17 @@ def reindex_with_progress(waffle_when_done=False):
    finally:
        cache.delete(ES_REINDEX_PROGRESS)
        cache.delete(ES_WAFFLE_WHEN_DONE)
+
+
+@task
+def index_task(cls, ids, **kw):
+    """Indexes documents specified by cls and ids"""
+    for obj in cls.uncached.filter(id__in=ids):
+        cls.index(obj.extract_document(), refresh=True)
+
+
+@task
+def unindex_task(cls, ids, **kw):
+    """Unindexes documents specified by cls and ids"""
+    for id in ids:
+        cls.unindex(id)
--- a/apps/search/tests/test_es.py
+++ b/apps/search/tests/test_es.py
@ -2,50 +2,51 @@ import json

 from nose.tools import eq_

-from sumo.tests import TestCase, LocalizingClient, ElasticTestCase
+from sumo.tests import LocalizingClient, ElasticTestCase
 from sumo.urlresolvers import reverse

+from search.models import generate_tasks
 from questions.tests import question, answer, answer_vote
+from questions.models import Question
 from wiki.tests import document, revision
 from forums.tests import thread, post
-from search import es_utils
+import mock


-class ElasticSearchTasksTests(TestCase):
-    def test_tasks(self):
+class ElasticSearchTasksTests(ElasticTestCase):
+    @mock.patch.object(Question, 'index')
+    def test_tasks(self, index_fun):
        """Tests to make sure tasks are added and run"""

-        times_run = []
+        q = question()
+        # Don't call self.refresh here since that calls generate_tasks().

-        def run_task(*args):
-            times_run.append(1)
+        eq_(index_fun.call_count, 0)

-        es_utils.add_index_task(run_task, (1,))
+        q.save()
+        generate_tasks()

-        eq_(len(times_run), 0)
+        eq_(index_fun.call_count, 1)

-        es_utils.generate_tasks()
-
-        eq_(len(times_run), 1)
-
-    def test_tasks_squashed(self):
+    @mock.patch.object(Question, 'index')
+    def test_tasks_squashed(self, index_fun):
        """Tests to make sure tasks are squashed"""

-        times_run = []
+        q = question()
+        # Don't call self.refresh here since that calls generate_tasks().

-        def run_task(*args):
-            times_run.append(1)
+        eq_(index_fun.call_count, 0)

-        es_utils.add_index_task(run_task, (1,))
-        es_utils.add_index_task(run_task, (1,))
-        es_utils.add_index_task(run_task, (1,))
-        es_utils.add_index_task(run_task, (1,))
+        q.save()
+        q.save()
+        q.save()
+        q.save()

-        eq_(len(times_run), 0)
+        eq_(index_fun.call_count, 0)

-        es_utils.generate_tasks()
+        generate_tasks()

-        eq_(len(times_run), 1)
+        eq_(index_fun.call_count, 1)


 class ElasticSearchViewTests(ElasticTestCase):
--- a/apps/sumo/models.py
+++ b/apps/sumo/models.py
@ -3,6 +3,7 @@ from django.db import models

 import caching.base

+
 # Our apps should subclass ManagerBase instead of models.Manager or
 # caching.base.CachingManager directly.
 ManagerBase = caching.base.CachingManager
--- a/apps/sumo/tests/init.py
+++ b/apps/sumo/tests/init.py
@ -83,7 +83,7 @@ class ElasticTestCase(TestCase):
        # index is ready to be queried.  Given that, it's almost
        # always the case that we want to run all the generated tasks,
        # then refresh.
-        from search.es_utils import generate_tasks
+        from search.models import generate_tasks
        generate_tasks()

        es = get_es()
--- a/apps/wiki/es_search.py
+++ b/apps/wiki/es_search.py
@ -1,162 +0,0 @@
-import elasticutils
-import logging
-import pyes
-import time
-
-from search.es_utils import (TYPE, INTEGER, STRING, INDEX, NOTANALYZED,
-                             ANALYZER, SNOWBALL, BOOLEAN, DATE, get_index)
-
-
-log = logging.getLogger('k.wiki.es_search')
-
-
-def setup_mapping(index):
-    from wiki.models import Document
-
-    mapping = {
-        'properties': {
-            'id': {TYPE: INTEGER},
-            'title': {TYPE: STRING, ANALYZER: SNOWBALL},
-            'locale': {TYPE: STRING, INDEX: NOTANALYZED},
-            'current': {TYPE: INTEGER},
-            'parent_id': {TYPE: INTEGER},
-            'content':
-                {TYPE: STRING, ANALYZER: SNOWBALL},
-            'category': {TYPE: INTEGER},
-            'slug': {TYPE: STRING},
-            'is_archived': {TYPE: BOOLEAN},
-            'summary': {TYPE: STRING, ANALYZER: SNOWBALL},
-            'keywords': {TYPE: STRING, ANALYZER: SNOWBALL},
-            'updated': {TYPE: DATE},
-            'tag': {TYPE: STRING}
-            }
-        }
-
-    es = elasticutils.get_es()
-
-    # TODO: If the mapping is there already and we do a put_mapping,
-    # does that stomp on the existing mapping or raise an error?
-    try:
-        es.put_mapping(Document._meta.db_table, mapping, index)
-    except pyes.exceptions.ElasticSearchException, e:
-        log.error(e)
-
-
-def extract_document(doc):
-    """Extracts indexable attributes from a Document"""
-    d = {}
-    d['id'] = doc.id
-    d['title'] = doc.title
-    d['locale'] = doc.locale
-    d['parent_id'] = doc.parent.id if doc.parent else None
-    d['content'] = doc.html
-    d['category'] = doc.category
-    d['slug'] = doc.slug
-    d['is_archived'] = doc.is_archived
-    if doc.parent is None:
-        d['tag'] = [tag['name'] for tag in doc.tags.values()]
-    else:
-        # Translations inherit tags from their parents.
-        d['tag'] = [tag['name'] for tag in doc.parent.tags.values()]
-    if doc.current_revision:
-        d['summary'] = doc.current_revision.summary
-        d['keywords'] = doc.current_revision.keywords
-        d['updated'] = doc.current_revision.created
-        d['current'] = doc.current_revision.id
-    else:
-        d['summary'] = None
-        d['keywords'] = None
-        d['updated'] = None
-        d['current'] = None
-    return d
-
-
-def index_doc(doc, bulk=False, force_insert=False, es=None, refresh=False):
-    from wiki.models import Document
-
-    if es is None:
-        es = elasticutils.get_es()
-
-    index = get_index(Document)
-
-    try:
-        es.index(doc, index, doc_type=Document._meta.db_table,
-                 id=doc['id'], bulk=bulk, force_insert=force_insert)
-    except pyes.urllib3.TimeoutError:
-        # If we have a timeout, try it again rather than die.  If we
-        # have a second one, that will cause everything to die.
-        es.index(doc, index, doc_type=Document._meta.db_table,
-                 id=doc['id'], bulk=bulk, force_insert=force_insert)
-
-    if refresh:
-        es.refresh(timesleep=0)
-
-
-def unindex_documents(ids):
-    from wiki.models import Document
-
-    es = elasticutils.get_es()
-    index = get_index(Document)
-
-    for doc_id in ids:
-        try:
-            es.delete(index, doc_type=Document._meta.db_table, id=doc_id)
-        except pyes.exceptions.NotFoundException:
-            # If the document isn't in the index, then we ignore it.
-            # TODO: Is that right?
-            pass
-
-
-def reindex_documents(percent):
-    """Iterate over this to update the mapping and index all documents.
-
-    Yields number of documents done.
-
-    Note: This gets called from the commandline, so we do some logging
-    so the user knows what's going on.
-
-    :arg percent: The percentage of questions to index.  Defaults to
-        100--e.g. all of them.
-
-    """
-    from wiki.models import Document
-    from django.conf import settings
-
-    index = get_index(Document)
-
-    start_time = time.time()
-
-    log.info('reindex documents: %s %s', index, Document._meta.db_table)
-
-    es = pyes.ES(settings.ES_HOSTS, timeout=10.0)
-
-    log.info('setting up mapping....')
-    setup_mapping(index)
-
-    log.info('iterating through documents....')
-    total = Document.objects.count()
-    to_index = int(total * (percent / 100.0))
-    log.info('total documents: %s (to be indexed: %s)', total, to_index)
-    total = to_index
-
-    t = 0
-    for d in Document.objects.order_by('id').all():
-        t += 1
-        if t % 1000 == 0:
-            time_to_go = (total - t) * ((time.time() - start_time) / t)
-            if time_to_go < 60:
-                time_to_go = "%d secs" % time_to_go
-            else:
-                time_to_go = "%d min" % (time_to_go / 60)
-            log.info('%s/%s...  (%s to go)', t, total, time_to_go)
-            es.flush_bulk(forced=True)
-
-        if t > total:
-            break
-
-        index_doc(extract_document(d), bulk=True, es=es)
-        yield t
-
-    es.flush_bulk(forced=True)
-    log.info('done!')
-    es.refresh()
--- a/apps/wiki/models.py
+++ b/apps/wiki/models.py
@ -18,7 +18,7 @@ from tidings.models import NotificationsMixin
 from tower import ugettext_lazy as _lazy, ugettext as _

 from search import searcher
-from search import es_utils
+from search.models import SearchMixin
 from search.utils import crc32
 from sumo import ProgrammingError
 from sumo_locales import LOCALES
@ -188,7 +188,8 @@ class _NotDocumentView(Exception):
    """A URL not pointing to the document view was passed to from_url()."""


-class Document(NotificationsMixin, ModelBase, BigVocabTaggableMixin):
+class Document(NotificationsMixin, ModelBase, BigVocabTaggableMixin,
+               SearchMixin):
    """A localized knowledgebase document, not revision-specific."""
    title = models.CharField(max_length=255, db_index=True)
    slug = models.CharField(max_length=255, db_index=True)
@ -627,55 +628,93 @@ class Document(NotificationsMixin, ModelBase, BigVocabTaggableMixin):
        from wiki.events import EditDocumentEvent
        return EditDocumentEvent.is_notifying(user, self)

+    @classmethod
+    def get_mapping(cls):
+        mapping = {
+            'properties': {
+                'id': {'type': 'integer'},
+                'title': {'type': 'string', 'analyzer': 'snowball'},
+                'locale': {'type': 'string', 'index': 'not_analyzed'},
+                'current': {'type': 'integer'},
+                'parent_id': {'type': 'integer'},
+                'content':
+                    {'type': 'string', 'analyzer': 'snowball'},
+                'category': {'type': 'integer'},
+                'slug': {'type': 'string'},
+                'is_archived': {'type': 'boolean'},
+                'summary': {'type': 'string', 'analyzer': 'snowball'},
+                'keywords': {'type': 'string', 'analyzer': 'snowball'},
+                'updated': {'type': 'date'},
+                'tag': {'type': 'string'}
+                }
+            }
+        return mapping

-@receiver(post_save, sender=Document,
-          dispatch_uid='wiki.search.index.document.save')
-def update_document_from_index(sender, instance, **kw):
-    # raw is True when saving a model exactly as presented--like when
-    # loading fixtures.  In this case we don't want to trigger.
-    if not settings.ES_LIVE_INDEXING or kw.get('raw'):
-        return
-
-    from wiki.tasks import index_documents
-    es_utils.add_index_task(index_documents.delay, (instance.id,))
+    def extract_document(self):
+        d = {}
+        d['id'] = self.id
+        d['title'] = self.title
+        d['locale'] = self.locale
+        d['parent_id'] = self.parent.id if self.parent else None
+        d['content'] = self.html
+        d['category'] = self.category
+        d['slug'] = self.slug
+        d['is_archived'] = self.is_archived
+        if self.parent is None:
+            d['tag'] = [tag['name'] for tag in self.tags.values()]
+        else:
+            # Translations inherit tags from their parents.
+            d['tag'] = [tag['name'] for tag in self.parent.tags.values()]
+        if self.current_revision:
+            d['summary'] = self.current_revision.summary
+            d['keywords'] = self.current_revision.keywords
+            d['updated'] = self.current_revision.created
+            d['current'] = self.current_revision.id
+        else:
+            d['summary'] = None
+            d['keywords'] = None
+            d['updated'] = None
+            d['current'] = None
+        return d


-@receiver(post_save, sender=TaggedItem,
-          dispatch_uid='wiki.search.index.tags.save')
-def update_wiki_tags_in_index(sender, instance, **kwargs):
-    # raw is True when saving a model exactly as presented--like when
-    # loading fixtures.  In this case we don't want to trigger.
-    if (not settings.ES_LIVE_INDEXING or kwargs.get('raw') or
-        not isinstance(instance.content_object, Document)):
-        return
-
-    from wiki.tasks import index_documents
-    es_utils.add_index_task(index_documents.delay,
-                            (instance.content_object.id,))
+# Register this as a model we index in ES.
+Document.register_search_model()


-@receiver(pre_delete, sender=Document,
-          dispatch_uid='wiki.search.index.document.delete')
-def remove_document_from_index(sender, instance, **kw):
-    if not settings.ES_LIVE_INDEXING:
-        return
-
-    from wiki.tasks import unindex_documents
-    unindex_documents([instance.id])
+def _update_w_index(sender, instance, **kw):
+    """Given a Document, creates an index task"""
+    if not kw.get('raw'):
+        obj = instance
+        obj.__class__.add_index_task((obj.id,))


-@receiver(pre_delete, sender=TaggedItem,
-          dispatch_uid='wiki.search.index.tags.delete')
-def update_wiki_in_index_on_tags_delete(sender, instance, **kwargs):
-    # raw is True when saving a model exactly as presented--like when
-    # loading fixtures.  In this case we don't want to trigger.
-    if (not settings.ES_LIVE_INDEXING or kwargs.get('raw') or
-        not isinstance(instance.content_object, Document)):
-        return
+def _remove_w_index(sender, instance, **kw):
+    """Given a Document, create an unindex task"""
+    if not kw.get('raw'):
+        obj = instance
+        obj.__class__.add_unindex_task((obj.id,))

-    from wiki.tasks import index_documents
-    es_utils.add_index_task(index_documents.delay,
-                            (instance.content_object.id,))
+
+def _update_tag_index(sender, instance, **kw):
+    """Given a TaggedItem for a Document, creates an index task"""
+    obj = instance.content_object
+    if not kw.get('raw') and isinstance(obj, Document):
+        obj.__class__.add_index_task((obj.id,))
+
+
+w_es_post_save = receiver(
+    post_save, sender=Document,
+    dispatch_uid='w.es.post_save')(_update_w_index)
+w_es_pre_delete = receiver(
+    pre_delete, sender=Document,
+    dispatch_uid='w.es.pre_delete')(_remove_w_index)
+w_tag_post_save = receiver(
+    post_save, sender=TaggedItem,
+    dispatch_uid='w.tag.es.post_save')(_update_tag_index)
+w_tag_pre_delete = receiver(
+    pre_delete, sender=TaggedItem,
+    dispatch_uid='w.tag.es.pre_delete')(_update_tag_index)


 class Revision(ModelBase):
--- a/apps/wiki/tests/test_es.py
+++ b/apps/wiki/tests/test_es.py
@ -2,7 +2,6 @@ import elasticutils
 from nose.tools import eq_

 from sumo.tests import ElasticTestCase
-from wiki.es_search import extract_document
 from wiki.tests import document
 from wiki.models import Document

@ -31,14 +30,18 @@ class TestPostUpdate(ElasticTestCase):
        doc2.tags.add(u'badtag')

        # Verify the parent has the right tags.
-        doc_dict = extract_document(doc1)
+        doc_dict = doc1.extract_document()
        eq_(doc_dict['tag'], [u'desktop', u'windows'])

        # Verify the translation has the parent's tags.
-        doc_dict = extract_document(doc2)
+        doc_dict = doc2.extract_document()
        eq_(doc_dict['tag'], [u'desktop', u'windows'])

    def test_wiki_tags(self):
+        """Make sure that adding tags to a Document causes it to
+        refresh the index.
+
+        """
        tag = u'hiphop'
        eq_(elasticutils.S(Document).filter(tag=tag).count(), 0)
        doc = document(save=True)
--- a/docs/searchchapter.rst
+++ b/docs/searchchapter.rst
@ -166,22 +166,46 @@ override in ``settings_local.py``::
   ``ELASTICDIR/config/elasticsearch.yml``.  So if you change it in
   one place, you must also change it in the other.

-You can also set ``USE_ELASTIC`` in your ``settings_local.py`` file.
-This affects whether Kitsune does Elastic indexing when data changes
-in the ``post_save`` and ``pre_delete`` hooks.  For tests,
-``USE_ELASTIC`` is set to ``False`` except for Elastic specific tests.
-
-There are a few other settings you can set in your settings_local.py
+There are a few other settings you can set in your ``settings_local.py``
 file that override Elastic Utils defaults.  See `the Elastic Utils
 docs <http://elasticutils.readthedocs.org/en/latest/installation.html#configure>`_
 for details.

-.. Note::
+Other things you can change:

-   One problem I have on my machine is that it takes a while for
-   Elastic to do stuff.  ``ES_TIMEOUT`` defaults to 1, but I set it to
-   2 in my ``settings_local.py`` file which reduces the number of
-   timeout errors I get.
+``ES_LIVE_INDEXING``
+
+    You can also set ``ES_LIVE_INDEXING`` in your
+    ``settings_local.py`` file. This affects whether Kitsune does
+    Elastic indexing when data changes in the ``post_save`` and
+    ``pre_delete`` hooks.
+
+    For tests, ``ES_LIVE_INDEXING`` is set to ``False`` except for
+    Elastic specific tests so we're not spending a ton of time
+    indexing things we're not using.
+
+``ES_FLUSH_BULK_EVERY``
+
+    We do bulk indexing meaning we queue up a bunch and then push them
+    through all at the same time. This requires memory to queue them,
+    so if you've got low memory, dropping this value to something
+    lower (but still greater than 1) could help.
+
+``ES_TIMEOUT``
+
+    This affects timeouts for search-related requests.
+
+    If you're having problems with ES being slow, raising this number
+    can be helpful.
+
+``ES_INDEXING_TIMEOUT``
+
+    This affects all index-related operations including creating
+    indexes, deleting indexes, creating mappings, indexing documents
+    and calling flush_bulk.
+
+    If you're having problems with indexing operations timing out,
+    raising this number can sometimes help.


 Using Elastic Search
--- a/settings.py
+++ b/settings.py
@ -587,9 +587,11 @@ SESSION_EXISTS_COOKIE = 'sumo_session'
 ES_HOSTS = ['127.0.0.1:9200']
 ES_INDEXES = {'default': 'sumo'}
 ES_LIVE_INDEXING = False  # Keep indexes up to date as objects are made/deleted
-ES_TIMEOUT = 5  # 5 second timeouts for querying/indexing
+ES_TIMEOUT = 5  # 5 second timeouts for querying
+ES_INDEXING_TIMEOUT = 30  # 30 second timeouts for all things indexing
 # Seconds between updating admin progress bar:
 ES_REINDEX_PROGRESS_BAR_INTERVAL = 5
+ES_FLUSH_BULK_EVERY = 1000

 #
 # Connection information for Sphinx search