[579205] Implement search-based article suggestions when asking a new question.

* Added offset and limit kwargs to SearchClient.query(): handy now for AAQ suggestion search, maybe handy later for more efficient pagination.
* Refactored search clients. They now have a _prepare() method for setting up the sphinx client prior to querying, so we don't have to repeat query().
This commit is contained in:
Erik Rose 2010-07-27 09:39:07 -07:00 коммит произвёл James Socol
Родитель a8bb131f36
Коммит 2b7a896dfc
7 изменённых файлов: 159 добавлений и 78 удалений

Просмотреть файл

@ -40,7 +40,7 @@ MSG_CONTENT_LONG = _lazy(u'Please keep the length of your content to %(limit_val
class EditQuestionForm(forms.Form):
"""Form to edit an existing question"""
def __init__(self, user=None, product=None, category=None, *args,
def __init__(self, product=None, category=None, *args,
**kwargs):
"""Init the form.
@ -156,11 +156,12 @@ class EditQuestionForm(forms.Form):
class NewQuestionForm(EditQuestionForm):
"""Form to start a new question"""
def __init__(self, user=None, product=None, category=None, *args,
def __init__(self, product=None, category=None, *args,
**kwargs):
"""Add fields particular to new questions."""
super(NewQuestionForm, self).__init__(user, product, category, *args,
**kwargs)
super(NewQuestionForm, self).__init__(product=product,
category=category,
*args, **kwargs)
# Collect user agent only when making a question for the first time.
# Otherwise, we could grab moderators' user agents.

Просмотреть файл

@ -87,24 +87,14 @@
{% if search_results and not form %}
<h3>{{ _("We've found some articles and previously answered questions that may solve your issue:") }}</h3>
<ul class="search-results">
<li>
<a href="#">Lorem ipsum dolor sit amet?</a>
<p>
Fermentum magnis ullamcorper mauris elementum suspendisse, velit litora feugiat aliquet, accumsan mi fusce aliquam leo ullamcorper dapibus, gravida lectus feugiat habitasse cubilia, sociis...
</p>
</li>
<li>
<a href="#">Lorem ipsum dolor sit amet?</a>
<p>
Fermentum magnis ullamcorper mauris elementum suspendisse, velit litora feugiat aliquet, accumsan mi fusce aliquam leo ullamcorper dapibus, gravida lectus feugiat habitasse cubilia, sociis...
</p>
</li>
<li>
<a href="#">Lorem ipsum dolor sit amet?</a>
<p>
Fermentum magnis ullamcorper mauris elementum suspendisse, velit litora feugiat aliquet, accumsan mi fusce aliquam leo ullamcorper dapibus, gravida lectus feugiat habitasse cubilia, sociis...
</p>
</li>
{% for result in search_results %}
<li>
<a href="{{ result['url'] }}">{{ result['title'] }}</a>
<p>
{{ result['excerpt_html']|safe }}
</p>
</li>
{% endfor %}
</ul>
<form action="#question-form" method="get">

Просмотреть файл

@ -1,9 +1,9 @@
import logging
from itertools import islice
import json
import logging
from django.contrib.auth.decorators import permission_required
from django.core.exceptions import PermissionDenied
from django.core.exceptions import PermissionDenied, ObjectDoesNotExist
from django.contrib.contenttypes.models import ContentType
from django.http import (HttpResponseRedirect, HttpResponse,
HttpResponseBadRequest, HttpResponseForbidden)
@ -21,6 +21,9 @@ from tower import ugettext as _
from tower import ugettext_lazy as _lazy
from access.decorators import has_perm_or_owns_or_403
from search.clients import WikiClient, QuestionsClient
from search.utils import locale_or_default, sphinx_locale
from sumo.models import WikiPage
from sumo.urlresolvers import reverse
from sumo.helpers import urlparams
from sumo.utils import paginate
@ -125,12 +128,22 @@ def new_question(request):
articles = None
if request.method == 'GET':
search = request.GET.get('search', None)
search_results = True if search else None # TODO - get search results
if request.GET.get('showform', False):
search = request.GET.get('search', '')
if search:
search_results = _search_suggestions(search,
locale_or_default(request.locale))
tried_search = True
else:
search_results = []
tried_search = False
if ((tried_search and not search_results) or
request.GET.get('showform')):
# Before we show him the form, make sure the user is auth'd:
if not request.user.is_authenticated():
return HttpResponseRedirect(settings.LOGIN_URL)
form = NewQuestionForm(product=product, category=category,
form = NewQuestionForm(product=product,
category=category,
initial={'title': search})
else:
form = None
@ -580,6 +593,69 @@ def unwatch_question(request, question_id):
return HttpResponseRedirect(question.get_absolute_url())
def _search_suggestions(query, locale):
"""Return an iterable of the most relevant wiki pages and questions.
query -- full text to search on
locale -- locale to limit to
Items returned are dicts:
{ 'url': URL where the article can be viewed,
'title': Title of the article,
'excerpt_html': Excerpt of the article with search terms hilighted,
formatted in HTML }
Weights wiki pages infinitely higher than questions at the moment.
"""
def prepare(result, model, searcher, result_to_id):
"""Turn a search result from a Sphinx client into a dict for templates.
Return {} if an object corresponding to the result cannot be found.
"""
try:
obj = model.objects.get(pk=result_to_id(result))
except ObjectDoesNotExist:
return {}
return {'url': obj.get_absolute_url(),
'title': obj.title,
'excerpt_html': searcher.excerpt(obj.content, query)}
max_suggestions = settings.QUESTIONS_MAX_SUGGESTIONS
query_limit = max_suggestions + settings.QUESTIONS_SUGGESTION_SLOP
# Search wiki pages:
wiki_searcher = WikiClient()
filters = [{'filter': 'locale',
'value': (sphinx_locale(locale),)},
{'filter': 'category',
'value': [x for x in settings.SEARCH_DEFAULT_CATEGORIES
if x >= 0]}]
# Lazily build excerpts from results. Stop when we have enough:
raw_results = wiki_searcher.query(query, filters=filters,
limit=query_limit)
results = islice((p for p in
(prepare(r, WikiPage, wiki_searcher, lambda x: x['id'])
for r in raw_results) if p),
max_suggestions)
results = list(results)
# If we didn't find enough wiki pages to fill the page, pad it out with
# other questions:
if len(results) < max_suggestions:
question_searcher = QuestionsClient() # questions is en-US only
raw_results = question_searcher.query(query,
limit=query_limit - len(results))
results.extend(islice((p for p in
(prepare(r, Question, question_searcher,
lambda x: x['attrs']['question_id'])
for r in raw_results) if p),
max_suggestions - len(results)))
return results
def _answers_data(request, question_id, form=None, watch_form=None):
"""Return a map of the minimal info necessary to draw an answers page."""
question = get_object_or_404(Question, pk=question_id)

Просмотреть файл

@ -56,7 +56,6 @@ class SearchClient(object):
def __init__(self):
self.sphinx = SphinxClient()
self.sphinx.SetServer(settings.SPHINX_HOST, settings.SPHINX_PORT)
self.sphinx.SetLimits(0, settings.SEARCH_MAX_RESULTS)
# initialize regexes for markup cleaning
self.truncate_pattern = re.compile(r'\s.*', re.MULTILINE)
@ -72,7 +71,7 @@ class SearchClient(object):
self.compiled_patterns.append(p)
def _process_filters(self, filters=None):
def _prepare_filters(self, filters=None):
"""Process filters and filter ranges."""
sc = self.sphinx
sc.ResetFilters()
@ -87,6 +86,9 @@ class SearchClient(object):
sc.SetFilter(f['filter'], f['value'],
f.get('exclude', False))
def _prepare(self):
"""Override to twiddle `self.sphinx` before the query gets sent."""
def _query_sphinx(self, query=''):
"""
Pass the query to the SphinxClient() and return the results.
@ -110,14 +112,15 @@ class SearchClient(object):
else:
return []
def query(self, query, filters=None):
"""
Query the search index.
"""
self._process_filters(filters)
def query(self, query, filters=None, offset=0,
limit=settings.SEARCH_MAX_RESULTS):
"""Query the search index."""
self._prepare_filters(filters)
self.sphinx.SetFieldWeights(self.weights)
self.sphinx.SetLimits(offset, limit)
self._prepare()
return self._query_sphinx(query)
def excerpt(self, result, query):
@ -173,22 +176,12 @@ class QuestionsClient(SearchClient):
super(QuestionsClient, self).__init__()
self.groupsort = '@group desc'
def query(self, query, filters=None):
"""
Query the questions index.
Returns a list of matching questions by grouping the answers
together.
"""
self._process_filters(filters)
sc = self.sphinx
sc.SetFieldWeights(self.weights)
sc.SetGroupBy('question_id', constants.SPH_GROUPBY_ATTR,
def _prepare(self):
"""Prepare to group the answers together."""
super(QuestionsClient, self)._prepare()
self.sphinx.SetGroupBy('question_id', constants.SPH_GROUPBY_ATTR,
self.groupsort)
return self._query_sphinx(query)
def set_groupsort(self, groupsort=''):
self.groupsort = groupsort
@ -212,21 +205,15 @@ class DiscussionClient(SearchClient):
super(DiscussionClient, self).__init__()
self.groupsort = '@group desc'
def query(self, query, filters=None):
def _prepare(self):
"""Group posts together, and ensure thread['attrs']['updated'] is the
last post's updated date.
"""
Query the search index.
Returns a list of matching threads by grouping posts together.
Ensures thread['attrs']['updated'] is the last post's updated date.
"""
self._process_filters(filters)
sc = self.sphinx
sc.SetFieldWeights(self.weights)
sc.SetGroupBy('thread_id', constants.SPH_GROUPBY_ATTR, self.groupsort)
sc.SetSortMode(constants.SPH_SORT_ATTR_ASC, 'created')
return self._query_sphinx(query)
super(DiscussionClient, self)._prepare()
self.sphinx.SetGroupBy('thread_id', constants.SPH_GROUPBY_ATTR,
self.groupsort)
self.sphinx.SetSortMode(constants.SPH_SORT_ATTR_ASC, 'created')
def set_groupsort(self, groupsort=''):
self.groupsort = groupsort

Просмотреть файл

@ -3,6 +3,8 @@ import zlib
from django.conf import settings
from sumo_locales import LOCALES
crc32 = lambda x: zlib.crc32(x.encode('utf-8')) & 0xffffffff
@ -11,9 +13,10 @@ call = lambda x: subprocess.Popen(x, stdout=subprocess.PIPE).communicate()
def reindex(rotate=False):
"""
Reindexes sphinx. Note this is only to be used in dev and test
environments.
"""Reindex sphinx.
Note this is only to be used in dev and test environments.
"""
calls = [settings.SPHINX_INDEXER, '--all', '--config',
settings.SPHINX_CONFIG_PATH]
@ -24,18 +27,36 @@ def reindex(rotate=False):
def start_sphinx():
"""
Starts sphinx. Note this is only to be used in dev and test environments.
"""
"""Start sphinx.
Note this is only to be used in dev and test environments.
"""
call([settings.SPHINX_SEARCHD, '--config',
settings.SPHINX_CONFIG_PATH])
def stop_sphinx():
"""
Stops sphinx. Note this is only to be used in dev and test environments.
"""
"""Stop sphinx.
Note this is only to be used in dev and test environments.
"""
call([settings.SPHINX_SEARCHD, '--stop', '--config',
settings.SPHINX_CONFIG_PATH])
def locale_or_default(locale):
"""Return `locale` or, if `locale` isn't a known locale, a default.
Default is taken from Django's LANGUAGE_CODE setting.
"""
if locale not in LOCALES:
locale = settings.LANGUAGE_CODE
return locale
def sphinx_locale(locale):
"""Given a locale string like 'en-US', return a Sphinx-ready locale."""
return crc32(LOCALES[locale].internal)

Просмотреть файл

@ -18,7 +18,7 @@ from questions.models import Question
from sumo.utils import paginate, urlencode
from .clients import (QuestionsClient, WikiClient,
DiscussionClient, SearchError)
from .utils import crc32
from .utils import crc32, locale_or_default, sphinx_locale
import search as constants
from sumo_locales import LOCALES
@ -198,9 +198,7 @@ def search(request):
json.dumps({'error': _('Invalid callback function.')}),
mimetype=mimetype, status=400)
language = request.GET.get('language', request.locale)
if not language in LOCALES:
language = settings.LANGUAGE_CODE
language = locale_or_default(request.GET.get('language', request.locale))
r = request.GET.copy()
a = request.GET.get('a', '0')
@ -241,7 +239,7 @@ def search(request):
return search_
cleaned = search_form.cleaned_data
search_locale = (crc32(LOCALES[language].internal),)
search_locale = (sphinx_locale(language),)
try:
page = int(request.GET.get('page', 1))

Просмотреть файл

@ -329,7 +329,7 @@ SEARCH_RESULTS_PER_PAGE = 10
#
# Search default settings
# comma-separated tuple of category IDs
# comma-separated tuple of included category IDs. Negative IDs are excluded.
SEARCH_DEFAULT_CATEGORIES = (1, 17, 18, -3,)
SEARCH_SUMMARY_LENGTH = 275
# because of markup cleanup, search summaries lengths vary quite a bit
@ -358,6 +358,14 @@ IMAGE_UPLOAD_PATH = 'uploads/images/'
# String must not contain double quotes!
IMAGE_ALLOWED_MIMETYPES = 'image/jpeg,image/png,image/gif'
# Max number of wiki pages or other questions to suggest might answer the
# question you're about to ask
QUESTIONS_MAX_SUGGESTIONS = 5
# Number of extra suggestion results to pull from Sphinx to make up for
# possibly deleted wiki pages or question. To be safe, set this to the number
# of things that could be deleted between indexer runs.
QUESTIONS_SUGGESTION_SLOP = 3
# Email
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'