зеркало из https://github.com/mozilla/kitsune.git
[579205] Implement search-based article suggestions when asking a new question.
* Added offset and limit kwargs to SearchClient.query(): handy now for AAQ suggestion search, maybe handy later for more efficient pagination. * Refactored search clients. They now have a _prepare() method for setting up the sphinx client prior to querying, so we don't have to repeat query().
This commit is contained in:
Родитель
a8bb131f36
Коммит
2b7a896dfc
|
@ -40,7 +40,7 @@ MSG_CONTENT_LONG = _lazy(u'Please keep the length of your content to %(limit_val
|
|||
class EditQuestionForm(forms.Form):
|
||||
"""Form to edit an existing question"""
|
||||
|
||||
def __init__(self, user=None, product=None, category=None, *args,
|
||||
def __init__(self, product=None, category=None, *args,
|
||||
**kwargs):
|
||||
"""Init the form.
|
||||
|
||||
|
@ -156,11 +156,12 @@ class EditQuestionForm(forms.Form):
|
|||
class NewQuestionForm(EditQuestionForm):
|
||||
"""Form to start a new question"""
|
||||
|
||||
def __init__(self, user=None, product=None, category=None, *args,
|
||||
def __init__(self, product=None, category=None, *args,
|
||||
**kwargs):
|
||||
"""Add fields particular to new questions."""
|
||||
super(NewQuestionForm, self).__init__(user, product, category, *args,
|
||||
**kwargs)
|
||||
super(NewQuestionForm, self).__init__(product=product,
|
||||
category=category,
|
||||
*args, **kwargs)
|
||||
|
||||
# Collect user agent only when making a question for the first time.
|
||||
# Otherwise, we could grab moderators' user agents.
|
||||
|
|
|
@ -87,24 +87,14 @@
|
|||
{% if search_results and not form %}
|
||||
<h3>{{ _("We've found some articles and previously answered questions that may solve your issue:") }}</h3>
|
||||
<ul class="search-results">
|
||||
<li>
|
||||
<a href="#">Lorem ipsum dolor sit amet?</a>
|
||||
<p>
|
||||
Fermentum magnis ullamcorper mauris elementum suspendisse, velit litora feugiat aliquet, accumsan mi fusce aliquam leo ullamcorper dapibus, gravida lectus feugiat habitasse cubilia, sociis...
|
||||
</p>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#">Lorem ipsum dolor sit amet?</a>
|
||||
<p>
|
||||
Fermentum magnis ullamcorper mauris elementum suspendisse, velit litora feugiat aliquet, accumsan mi fusce aliquam leo ullamcorper dapibus, gravida lectus feugiat habitasse cubilia, sociis...
|
||||
</p>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#">Lorem ipsum dolor sit amet?</a>
|
||||
<p>
|
||||
Fermentum magnis ullamcorper mauris elementum suspendisse, velit litora feugiat aliquet, accumsan mi fusce aliquam leo ullamcorper dapibus, gravida lectus feugiat habitasse cubilia, sociis...
|
||||
</p>
|
||||
</li>
|
||||
{% for result in search_results %}
|
||||
<li>
|
||||
<a href="{{ result['url'] }}">{{ result['title'] }}</a>
|
||||
<p>
|
||||
{{ result['excerpt_html']|safe }}
|
||||
</p>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
|
||||
<form action="#question-form" method="get">
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
import logging
|
||||
from itertools import islice
|
||||
import json
|
||||
import logging
|
||||
|
||||
from django.contrib.auth.decorators import permission_required
|
||||
from django.core.exceptions import PermissionDenied
|
||||
from django.core.exceptions import PermissionDenied, ObjectDoesNotExist
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.http import (HttpResponseRedirect, HttpResponse,
|
||||
HttpResponseBadRequest, HttpResponseForbidden)
|
||||
|
@ -21,6 +21,9 @@ from tower import ugettext as _
|
|||
from tower import ugettext_lazy as _lazy
|
||||
|
||||
from access.decorators import has_perm_or_owns_or_403
|
||||
from search.clients import WikiClient, QuestionsClient
|
||||
from search.utils import locale_or_default, sphinx_locale
|
||||
from sumo.models import WikiPage
|
||||
from sumo.urlresolvers import reverse
|
||||
from sumo.helpers import urlparams
|
||||
from sumo.utils import paginate
|
||||
|
@ -125,12 +128,22 @@ def new_question(request):
|
|||
articles = None
|
||||
|
||||
if request.method == 'GET':
|
||||
search = request.GET.get('search', None)
|
||||
search_results = True if search else None # TODO - get search results
|
||||
if request.GET.get('showform', False):
|
||||
search = request.GET.get('search', '')
|
||||
if search:
|
||||
search_results = _search_suggestions(search,
|
||||
locale_or_default(request.locale))
|
||||
tried_search = True
|
||||
else:
|
||||
search_results = []
|
||||
tried_search = False
|
||||
|
||||
if ((tried_search and not search_results) or
|
||||
request.GET.get('showform')):
|
||||
# Before we show him the form, make sure the user is auth'd:
|
||||
if not request.user.is_authenticated():
|
||||
return HttpResponseRedirect(settings.LOGIN_URL)
|
||||
form = NewQuestionForm(product=product, category=category,
|
||||
form = NewQuestionForm(product=product,
|
||||
category=category,
|
||||
initial={'title': search})
|
||||
else:
|
||||
form = None
|
||||
|
@ -580,6 +593,69 @@ def unwatch_question(request, question_id):
|
|||
return HttpResponseRedirect(question.get_absolute_url())
|
||||
|
||||
|
||||
def _search_suggestions(query, locale):
|
||||
"""Return an iterable of the most relevant wiki pages and questions.
|
||||
|
||||
query -- full text to search on
|
||||
locale -- locale to limit to
|
||||
|
||||
Items returned are dicts:
|
||||
{ 'url': URL where the article can be viewed,
|
||||
'title': Title of the article,
|
||||
'excerpt_html': Excerpt of the article with search terms hilighted,
|
||||
formatted in HTML }
|
||||
|
||||
Weights wiki pages infinitely higher than questions at the moment.
|
||||
|
||||
"""
|
||||
def prepare(result, model, searcher, result_to_id):
|
||||
"""Turn a search result from a Sphinx client into a dict for templates.
|
||||
|
||||
Return {} if an object corresponding to the result cannot be found.
|
||||
|
||||
"""
|
||||
try:
|
||||
obj = model.objects.get(pk=result_to_id(result))
|
||||
except ObjectDoesNotExist:
|
||||
return {}
|
||||
return {'url': obj.get_absolute_url(),
|
||||
'title': obj.title,
|
||||
'excerpt_html': searcher.excerpt(obj.content, query)}
|
||||
|
||||
max_suggestions = settings.QUESTIONS_MAX_SUGGESTIONS
|
||||
query_limit = max_suggestions + settings.QUESTIONS_SUGGESTION_SLOP
|
||||
|
||||
# Search wiki pages:
|
||||
wiki_searcher = WikiClient()
|
||||
filters = [{'filter': 'locale',
|
||||
'value': (sphinx_locale(locale),)},
|
||||
{'filter': 'category',
|
||||
'value': [x for x in settings.SEARCH_DEFAULT_CATEGORIES
|
||||
if x >= 0]}]
|
||||
# Lazily build excerpts from results. Stop when we have enough:
|
||||
raw_results = wiki_searcher.query(query, filters=filters,
|
||||
limit=query_limit)
|
||||
results = islice((p for p in
|
||||
(prepare(r, WikiPage, wiki_searcher, lambda x: x['id'])
|
||||
for r in raw_results) if p),
|
||||
max_suggestions)
|
||||
results = list(results)
|
||||
|
||||
# If we didn't find enough wiki pages to fill the page, pad it out with
|
||||
# other questions:
|
||||
if len(results) < max_suggestions:
|
||||
question_searcher = QuestionsClient() # questions is en-US only
|
||||
raw_results = question_searcher.query(query,
|
||||
limit=query_limit - len(results))
|
||||
results.extend(islice((p for p in
|
||||
(prepare(r, Question, question_searcher,
|
||||
lambda x: x['attrs']['question_id'])
|
||||
for r in raw_results) if p),
|
||||
max_suggestions - len(results)))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _answers_data(request, question_id, form=None, watch_form=None):
|
||||
"""Return a map of the minimal info necessary to draw an answers page."""
|
||||
question = get_object_or_404(Question, pk=question_id)
|
||||
|
|
|
@ -56,7 +56,6 @@ class SearchClient(object):
|
|||
def __init__(self):
|
||||
self.sphinx = SphinxClient()
|
||||
self.sphinx.SetServer(settings.SPHINX_HOST, settings.SPHINX_PORT)
|
||||
self.sphinx.SetLimits(0, settings.SEARCH_MAX_RESULTS)
|
||||
|
||||
# initialize regexes for markup cleaning
|
||||
self.truncate_pattern = re.compile(r'\s.*', re.MULTILINE)
|
||||
|
@ -72,7 +71,7 @@ class SearchClient(object):
|
|||
|
||||
self.compiled_patterns.append(p)
|
||||
|
||||
def _process_filters(self, filters=None):
|
||||
def _prepare_filters(self, filters=None):
|
||||
"""Process filters and filter ranges."""
|
||||
sc = self.sphinx
|
||||
sc.ResetFilters()
|
||||
|
@ -87,6 +86,9 @@ class SearchClient(object):
|
|||
sc.SetFilter(f['filter'], f['value'],
|
||||
f.get('exclude', False))
|
||||
|
||||
def _prepare(self):
|
||||
"""Override to twiddle `self.sphinx` before the query gets sent."""
|
||||
|
||||
def _query_sphinx(self, query=''):
|
||||
"""
|
||||
Pass the query to the SphinxClient() and return the results.
|
||||
|
@ -110,14 +112,15 @@ class SearchClient(object):
|
|||
else:
|
||||
return []
|
||||
|
||||
def query(self, query, filters=None):
|
||||
"""
|
||||
Query the search index.
|
||||
"""
|
||||
self._process_filters(filters)
|
||||
def query(self, query, filters=None, offset=0,
|
||||
limit=settings.SEARCH_MAX_RESULTS):
|
||||
"""Query the search index."""
|
||||
self._prepare_filters(filters)
|
||||
|
||||
self.sphinx.SetFieldWeights(self.weights)
|
||||
self.sphinx.SetLimits(offset, limit)
|
||||
|
||||
self._prepare()
|
||||
return self._query_sphinx(query)
|
||||
|
||||
def excerpt(self, result, query):
|
||||
|
@ -173,22 +176,12 @@ class QuestionsClient(SearchClient):
|
|||
super(QuestionsClient, self).__init__()
|
||||
self.groupsort = '@group desc'
|
||||
|
||||
def query(self, query, filters=None):
|
||||
"""
|
||||
Query the questions index.
|
||||
|
||||
Returns a list of matching questions by grouping the answers
|
||||
together.
|
||||
"""
|
||||
self._process_filters(filters)
|
||||
|
||||
sc = self.sphinx
|
||||
sc.SetFieldWeights(self.weights)
|
||||
sc.SetGroupBy('question_id', constants.SPH_GROUPBY_ATTR,
|
||||
def _prepare(self):
|
||||
"""Prepare to group the answers together."""
|
||||
super(QuestionsClient, self)._prepare()
|
||||
self.sphinx.SetGroupBy('question_id', constants.SPH_GROUPBY_ATTR,
|
||||
self.groupsort)
|
||||
|
||||
return self._query_sphinx(query)
|
||||
|
||||
def set_groupsort(self, groupsort=''):
|
||||
self.groupsort = groupsort
|
||||
|
||||
|
@ -212,21 +205,15 @@ class DiscussionClient(SearchClient):
|
|||
super(DiscussionClient, self).__init__()
|
||||
self.groupsort = '@group desc'
|
||||
|
||||
def query(self, query, filters=None):
|
||||
def _prepare(self):
|
||||
"""Group posts together, and ensure thread['attrs']['updated'] is the
|
||||
last post's updated date.
|
||||
|
||||
"""
|
||||
Query the search index.
|
||||
|
||||
Returns a list of matching threads by grouping posts together.
|
||||
Ensures thread['attrs']['updated'] is the last post's updated date.
|
||||
"""
|
||||
self._process_filters(filters)
|
||||
|
||||
sc = self.sphinx
|
||||
sc.SetFieldWeights(self.weights)
|
||||
sc.SetGroupBy('thread_id', constants.SPH_GROUPBY_ATTR, self.groupsort)
|
||||
sc.SetSortMode(constants.SPH_SORT_ATTR_ASC, 'created')
|
||||
|
||||
return self._query_sphinx(query)
|
||||
super(DiscussionClient, self)._prepare()
|
||||
self.sphinx.SetGroupBy('thread_id', constants.SPH_GROUPBY_ATTR,
|
||||
self.groupsort)
|
||||
self.sphinx.SetSortMode(constants.SPH_SORT_ATTR_ASC, 'created')
|
||||
|
||||
def set_groupsort(self, groupsort=''):
|
||||
self.groupsort = groupsort
|
||||
|
|
|
@ -3,6 +3,8 @@ import zlib
|
|||
|
||||
from django.conf import settings
|
||||
|
||||
from sumo_locales import LOCALES
|
||||
|
||||
|
||||
crc32 = lambda x: zlib.crc32(x.encode('utf-8')) & 0xffffffff
|
||||
|
||||
|
@ -11,9 +13,10 @@ call = lambda x: subprocess.Popen(x, stdout=subprocess.PIPE).communicate()
|
|||
|
||||
|
||||
def reindex(rotate=False):
|
||||
"""
|
||||
Reindexes sphinx. Note this is only to be used in dev and test
|
||||
environments.
|
||||
"""Reindex sphinx.
|
||||
|
||||
Note this is only to be used in dev and test environments.
|
||||
|
||||
"""
|
||||
calls = [settings.SPHINX_INDEXER, '--all', '--config',
|
||||
settings.SPHINX_CONFIG_PATH]
|
||||
|
@ -24,18 +27,36 @@ def reindex(rotate=False):
|
|||
|
||||
|
||||
def start_sphinx():
|
||||
"""
|
||||
Starts sphinx. Note this is only to be used in dev and test environments.
|
||||
"""
|
||||
"""Start sphinx.
|
||||
|
||||
Note this is only to be used in dev and test environments.
|
||||
|
||||
"""
|
||||
call([settings.SPHINX_SEARCHD, '--config',
|
||||
settings.SPHINX_CONFIG_PATH])
|
||||
|
||||
|
||||
def stop_sphinx():
|
||||
"""
|
||||
Stops sphinx. Note this is only to be used in dev and test environments.
|
||||
"""
|
||||
"""Stop sphinx.
|
||||
|
||||
Note this is only to be used in dev and test environments.
|
||||
|
||||
"""
|
||||
call([settings.SPHINX_SEARCHD, '--stop', '--config',
|
||||
settings.SPHINX_CONFIG_PATH])
|
||||
|
||||
|
||||
def locale_or_default(locale):
|
||||
"""Return `locale` or, if `locale` isn't a known locale, a default.
|
||||
|
||||
Default is taken from Django's LANGUAGE_CODE setting.
|
||||
|
||||
"""
|
||||
if locale not in LOCALES:
|
||||
locale = settings.LANGUAGE_CODE
|
||||
return locale
|
||||
|
||||
|
||||
def sphinx_locale(locale):
|
||||
"""Given a locale string like 'en-US', return a Sphinx-ready locale."""
|
||||
return crc32(LOCALES[locale].internal)
|
||||
|
|
|
@ -18,7 +18,7 @@ from questions.models import Question
|
|||
from sumo.utils import paginate, urlencode
|
||||
from .clients import (QuestionsClient, WikiClient,
|
||||
DiscussionClient, SearchError)
|
||||
from .utils import crc32
|
||||
from .utils import crc32, locale_or_default, sphinx_locale
|
||||
import search as constants
|
||||
from sumo_locales import LOCALES
|
||||
|
||||
|
@ -198,9 +198,7 @@ def search(request):
|
|||
json.dumps({'error': _('Invalid callback function.')}),
|
||||
mimetype=mimetype, status=400)
|
||||
|
||||
language = request.GET.get('language', request.locale)
|
||||
if not language in LOCALES:
|
||||
language = settings.LANGUAGE_CODE
|
||||
language = locale_or_default(request.GET.get('language', request.locale))
|
||||
r = request.GET.copy()
|
||||
a = request.GET.get('a', '0')
|
||||
|
||||
|
@ -241,7 +239,7 @@ def search(request):
|
|||
return search_
|
||||
|
||||
cleaned = search_form.cleaned_data
|
||||
search_locale = (crc32(LOCALES[language].internal),)
|
||||
search_locale = (sphinx_locale(language),)
|
||||
|
||||
try:
|
||||
page = int(request.GET.get('page', 1))
|
||||
|
|
10
settings.py
10
settings.py
|
@ -329,7 +329,7 @@ SEARCH_RESULTS_PER_PAGE = 10
|
|||
|
||||
#
|
||||
# Search default settings
|
||||
# comma-separated tuple of category IDs
|
||||
# comma-separated tuple of included category IDs. Negative IDs are excluded.
|
||||
SEARCH_DEFAULT_CATEGORIES = (1, 17, 18, -3,)
|
||||
SEARCH_SUMMARY_LENGTH = 275
|
||||
# because of markup cleanup, search summaries lengths vary quite a bit
|
||||
|
@ -358,6 +358,14 @@ IMAGE_UPLOAD_PATH = 'uploads/images/'
|
|||
# String must not contain double quotes!
|
||||
IMAGE_ALLOWED_MIMETYPES = 'image/jpeg,image/png,image/gif'
|
||||
|
||||
# Max number of wiki pages or other questions to suggest might answer the
|
||||
# question you're about to ask
|
||||
QUESTIONS_MAX_SUGGESTIONS = 5
|
||||
# Number of extra suggestion results to pull from Sphinx to make up for
|
||||
# possibly deleted wiki pages or question. To be safe, set this to the number
|
||||
# of things that could be deleted between indexer runs.
|
||||
QUESTIONS_SUGGESTION_SLOP = 3
|
||||
|
||||
# Email
|
||||
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче