From 2b7a896dfc339bbae74156e0c76aa6eb07e67ff9 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Tue, 27 Jul 2010 09:39:07 -0700 Subject: [PATCH] [579205] Implement search-based article suggestions when asking a new question. * Added offset and limit kwargs to SearchClient.query(): handy now for AAQ suggestion search, maybe handy later for more efficient pagination. * Refactored search clients. They now have a _prepare() method for setting up the sphinx client prior to querying, so we don't have to repeat query(). --- apps/questions/forms.py | 9 +- .../templates/questions/new_question.html | 26 ++---- apps/questions/views.py | 88 +++++++++++++++++-- apps/search/clients.py | 57 +++++------- apps/search/utils.py | 39 ++++++-- apps/search/views.py | 8 +- settings.py | 10 ++- 7 files changed, 159 insertions(+), 78 deletions(-) diff --git a/apps/questions/forms.py b/apps/questions/forms.py index 21fa78749..f2a1f2c68 100644 --- a/apps/questions/forms.py +++ b/apps/questions/forms.py @@ -40,7 +40,7 @@ MSG_CONTENT_LONG = _lazy(u'Please keep the length of your content to %(limit_val class EditQuestionForm(forms.Form): """Form to edit an existing question""" - def __init__(self, user=None, product=None, category=None, *args, + def __init__(self, product=None, category=None, *args, **kwargs): """Init the form. @@ -156,11 +156,12 @@ class EditQuestionForm(forms.Form): class NewQuestionForm(EditQuestionForm): """Form to start a new question""" - def __init__(self, user=None, product=None, category=None, *args, + def __init__(self, product=None, category=None, *args, **kwargs): """Add fields particular to new questions.""" - super(NewQuestionForm, self).__init__(user, product, category, *args, - **kwargs) + super(NewQuestionForm, self).__init__(product=product, + category=category, + *args, **kwargs) # Collect user agent only when making a question for the first time. # Otherwise, we could grab moderators' user agents. diff --git a/apps/questions/templates/questions/new_question.html b/apps/questions/templates/questions/new_question.html index 2e7146d9f..13a54fa85 100644 --- a/apps/questions/templates/questions/new_question.html +++ b/apps/questions/templates/questions/new_question.html @@ -87,24 +87,14 @@ {% if search_results and not form %}

{{ _("We've found some articles and previously answered questions that may solve your issue:") }}

diff --git a/apps/questions/views.py b/apps/questions/views.py index 37085b9a8..fa649dcea 100644 --- a/apps/questions/views.py +++ b/apps/questions/views.py @@ -1,9 +1,9 @@ -import logging +from itertools import islice import json import logging from django.contrib.auth.decorators import permission_required -from django.core.exceptions import PermissionDenied +from django.core.exceptions import PermissionDenied, ObjectDoesNotExist from django.contrib.contenttypes.models import ContentType from django.http import (HttpResponseRedirect, HttpResponse, HttpResponseBadRequest, HttpResponseForbidden) @@ -21,6 +21,9 @@ from tower import ugettext as _ from tower import ugettext_lazy as _lazy from access.decorators import has_perm_or_owns_or_403 +from search.clients import WikiClient, QuestionsClient +from search.utils import locale_or_default, sphinx_locale +from sumo.models import WikiPage from sumo.urlresolvers import reverse from sumo.helpers import urlparams from sumo.utils import paginate @@ -125,12 +128,22 @@ def new_question(request): articles = None if request.method == 'GET': - search = request.GET.get('search', None) - search_results = True if search else None # TODO - get search results - if request.GET.get('showform', False): + search = request.GET.get('search', '') + if search: + search_results = _search_suggestions(search, + locale_or_default(request.locale)) + tried_search = True + else: + search_results = [] + tried_search = False + + if ((tried_search and not search_results) or + request.GET.get('showform')): + # Before we show him the form, make sure the user is auth'd: if not request.user.is_authenticated(): return HttpResponseRedirect(settings.LOGIN_URL) - form = NewQuestionForm(product=product, category=category, + form = NewQuestionForm(product=product, + category=category, initial={'title': search}) else: form = None @@ -580,6 +593,69 @@ def unwatch_question(request, question_id): return HttpResponseRedirect(question.get_absolute_url()) +def _search_suggestions(query, locale): + """Return an iterable of the most relevant wiki pages and questions. + + query -- full text to search on + locale -- locale to limit to + + Items returned are dicts: + { 'url': URL where the article can be viewed, + 'title': Title of the article, + 'excerpt_html': Excerpt of the article with search terms hilighted, + formatted in HTML } + + Weights wiki pages infinitely higher than questions at the moment. + + """ + def prepare(result, model, searcher, result_to_id): + """Turn a search result from a Sphinx client into a dict for templates. + + Return {} if an object corresponding to the result cannot be found. + + """ + try: + obj = model.objects.get(pk=result_to_id(result)) + except ObjectDoesNotExist: + return {} + return {'url': obj.get_absolute_url(), + 'title': obj.title, + 'excerpt_html': searcher.excerpt(obj.content, query)} + + max_suggestions = settings.QUESTIONS_MAX_SUGGESTIONS + query_limit = max_suggestions + settings.QUESTIONS_SUGGESTION_SLOP + + # Search wiki pages: + wiki_searcher = WikiClient() + filters = [{'filter': 'locale', + 'value': (sphinx_locale(locale),)}, + {'filter': 'category', + 'value': [x for x in settings.SEARCH_DEFAULT_CATEGORIES + if x >= 0]}] + # Lazily build excerpts from results. Stop when we have enough: + raw_results = wiki_searcher.query(query, filters=filters, + limit=query_limit) + results = islice((p for p in + (prepare(r, WikiPage, wiki_searcher, lambda x: x['id']) + for r in raw_results) if p), + max_suggestions) + results = list(results) + + # If we didn't find enough wiki pages to fill the page, pad it out with + # other questions: + if len(results) < max_suggestions: + question_searcher = QuestionsClient() # questions is en-US only + raw_results = question_searcher.query(query, + limit=query_limit - len(results)) + results.extend(islice((p for p in + (prepare(r, Question, question_searcher, + lambda x: x['attrs']['question_id']) + for r in raw_results) if p), + max_suggestions - len(results))) + + return results + + def _answers_data(request, question_id, form=None, watch_form=None): """Return a map of the minimal info necessary to draw an answers page.""" question = get_object_or_404(Question, pk=question_id) diff --git a/apps/search/clients.py b/apps/search/clients.py index 1ab2bf4f7..618a11c59 100644 --- a/apps/search/clients.py +++ b/apps/search/clients.py @@ -56,7 +56,6 @@ class SearchClient(object): def __init__(self): self.sphinx = SphinxClient() self.sphinx.SetServer(settings.SPHINX_HOST, settings.SPHINX_PORT) - self.sphinx.SetLimits(0, settings.SEARCH_MAX_RESULTS) # initialize regexes for markup cleaning self.truncate_pattern = re.compile(r'\s.*', re.MULTILINE) @@ -72,7 +71,7 @@ class SearchClient(object): self.compiled_patterns.append(p) - def _process_filters(self, filters=None): + def _prepare_filters(self, filters=None): """Process filters and filter ranges.""" sc = self.sphinx sc.ResetFilters() @@ -87,6 +86,9 @@ class SearchClient(object): sc.SetFilter(f['filter'], f['value'], f.get('exclude', False)) + def _prepare(self): + """Override to twiddle `self.sphinx` before the query gets sent.""" + def _query_sphinx(self, query=''): """ Pass the query to the SphinxClient() and return the results. @@ -110,14 +112,15 @@ class SearchClient(object): else: return [] - def query(self, query, filters=None): - """ - Query the search index. - """ - self._process_filters(filters) + def query(self, query, filters=None, offset=0, + limit=settings.SEARCH_MAX_RESULTS): + """Query the search index.""" + self._prepare_filters(filters) self.sphinx.SetFieldWeights(self.weights) + self.sphinx.SetLimits(offset, limit) + self._prepare() return self._query_sphinx(query) def excerpt(self, result, query): @@ -173,22 +176,12 @@ class QuestionsClient(SearchClient): super(QuestionsClient, self).__init__() self.groupsort = '@group desc' - def query(self, query, filters=None): - """ - Query the questions index. - - Returns a list of matching questions by grouping the answers - together. - """ - self._process_filters(filters) - - sc = self.sphinx - sc.SetFieldWeights(self.weights) - sc.SetGroupBy('question_id', constants.SPH_GROUPBY_ATTR, + def _prepare(self): + """Prepare to group the answers together.""" + super(QuestionsClient, self)._prepare() + self.sphinx.SetGroupBy('question_id', constants.SPH_GROUPBY_ATTR, self.groupsort) - return self._query_sphinx(query) - def set_groupsort(self, groupsort=''): self.groupsort = groupsort @@ -212,21 +205,15 @@ class DiscussionClient(SearchClient): super(DiscussionClient, self).__init__() self.groupsort = '@group desc' - def query(self, query, filters=None): + def _prepare(self): + """Group posts together, and ensure thread['attrs']['updated'] is the + last post's updated date. + """ - Query the search index. - - Returns a list of matching threads by grouping posts together. - Ensures thread['attrs']['updated'] is the last post's updated date. - """ - self._process_filters(filters) - - sc = self.sphinx - sc.SetFieldWeights(self.weights) - sc.SetGroupBy('thread_id', constants.SPH_GROUPBY_ATTR, self.groupsort) - sc.SetSortMode(constants.SPH_SORT_ATTR_ASC, 'created') - - return self._query_sphinx(query) + super(DiscussionClient, self)._prepare() + self.sphinx.SetGroupBy('thread_id', constants.SPH_GROUPBY_ATTR, + self.groupsort) + self.sphinx.SetSortMode(constants.SPH_SORT_ATTR_ASC, 'created') def set_groupsort(self, groupsort=''): self.groupsort = groupsort diff --git a/apps/search/utils.py b/apps/search/utils.py index 8a8458cb7..9664e8177 100644 --- a/apps/search/utils.py +++ b/apps/search/utils.py @@ -3,6 +3,8 @@ import zlib from django.conf import settings +from sumo_locales import LOCALES + crc32 = lambda x: zlib.crc32(x.encode('utf-8')) & 0xffffffff @@ -11,9 +13,10 @@ call = lambda x: subprocess.Popen(x, stdout=subprocess.PIPE).communicate() def reindex(rotate=False): - """ - Reindexes sphinx. Note this is only to be used in dev and test - environments. + """Reindex sphinx. + + Note this is only to be used in dev and test environments. + """ calls = [settings.SPHINX_INDEXER, '--all', '--config', settings.SPHINX_CONFIG_PATH] @@ -24,18 +27,36 @@ def reindex(rotate=False): def start_sphinx(): - """ - Starts sphinx. Note this is only to be used in dev and test environments. - """ + """Start sphinx. + Note this is only to be used in dev and test environments. + + """ call([settings.SPHINX_SEARCHD, '--config', settings.SPHINX_CONFIG_PATH]) def stop_sphinx(): - """ - Stops sphinx. Note this is only to be used in dev and test environments. - """ + """Stop sphinx. + Note this is only to be used in dev and test environments. + + """ call([settings.SPHINX_SEARCHD, '--stop', '--config', settings.SPHINX_CONFIG_PATH]) + + +def locale_or_default(locale): + """Return `locale` or, if `locale` isn't a known locale, a default. + + Default is taken from Django's LANGUAGE_CODE setting. + + """ + if locale not in LOCALES: + locale = settings.LANGUAGE_CODE + return locale + + +def sphinx_locale(locale): + """Given a locale string like 'en-US', return a Sphinx-ready locale.""" + return crc32(LOCALES[locale].internal) diff --git a/apps/search/views.py b/apps/search/views.py index 7050e051a..b8957954b 100644 --- a/apps/search/views.py +++ b/apps/search/views.py @@ -18,7 +18,7 @@ from questions.models import Question from sumo.utils import paginate, urlencode from .clients import (QuestionsClient, WikiClient, DiscussionClient, SearchError) -from .utils import crc32 +from .utils import crc32, locale_or_default, sphinx_locale import search as constants from sumo_locales import LOCALES @@ -198,9 +198,7 @@ def search(request): json.dumps({'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) - language = request.GET.get('language', request.locale) - if not language in LOCALES: - language = settings.LANGUAGE_CODE + language = locale_or_default(request.GET.get('language', request.locale)) r = request.GET.copy() a = request.GET.get('a', '0') @@ -241,7 +239,7 @@ def search(request): return search_ cleaned = search_form.cleaned_data - search_locale = (crc32(LOCALES[language].internal),) + search_locale = (sphinx_locale(language),) try: page = int(request.GET.get('page', 1)) diff --git a/settings.py b/settings.py index 701a810d2..64be0b093 100644 --- a/settings.py +++ b/settings.py @@ -329,7 +329,7 @@ SEARCH_RESULTS_PER_PAGE = 10 # # Search default settings -# comma-separated tuple of category IDs +# comma-separated tuple of included category IDs. Negative IDs are excluded. SEARCH_DEFAULT_CATEGORIES = (1, 17, 18, -3,) SEARCH_SUMMARY_LENGTH = 275 # because of markup cleanup, search summaries lengths vary quite a bit @@ -358,6 +358,14 @@ IMAGE_UPLOAD_PATH = 'uploads/images/' # String must not contain double quotes! IMAGE_ALLOWED_MIMETYPES = 'image/jpeg,image/png,image/gif' +# Max number of wiki pages or other questions to suggest might answer the +# question you're about to ask +QUESTIONS_MAX_SUGGESTIONS = 5 +# Number of extra suggestion results to pull from Sphinx to make up for +# possibly deleted wiki pages or question. To be safe, set this to the number +# of things that could be deleted between indexer runs. +QUESTIONS_SUGGESTION_SLOP = 3 + # Email EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'