зеркало из https://github.com/mozilla/kitsune.git
Added API for oSUMO.
This commit is contained in:
Родитель
62a5803df0
Коммит
cf8ccaa583
|
@ -33,6 +33,7 @@ Part 2: Developer's Guide
|
||||||
karma
|
karma
|
||||||
vendor
|
vendor
|
||||||
wikidocs
|
wikidocs
|
||||||
|
osumo
|
||||||
notes
|
notes
|
||||||
licenses
|
licenses
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
.. _osumo-chapter:
|
||||||
|
|
||||||
|
============
|
||||||
|
Offline SUMO
|
||||||
|
============
|
||||||
|
|
||||||
|
The primary documentation for offline sumo lives here:
|
||||||
|
https://osumo.readthedocs.org. The source lives at
|
||||||
|
https://gihub.com/mozilla/osumo.
|
||||||
|
|
||||||
|
Offline SUMO requires a component on Kitsune and this component relies heavily
|
||||||
|
on Redis as we generate all the articles once a day and put it into
|
||||||
|
Kitsune. Make sure that is available.
|
||||||
|
|
||||||
|
The code for offline sumo's bundle generation lives under
|
||||||
|
`kitsune/offline`. Inside, there are a couple of files defined:
|
||||||
|
|
||||||
|
- utils.py does the actual bundle generation.
|
||||||
|
- index.py is responsible for generating the index for offline search.
|
||||||
|
- urls.py defines the url for Django.
|
||||||
|
- views.py implements the two views.
|
||||||
|
- cron.py has the cron job that runs daily for bundle generation.
|
||||||
|
- tests/ has unittests.
|
|
@ -0,0 +1,72 @@
|
||||||
|
import datetime
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.contrib import admin, messages
|
||||||
|
from django.shortcuts import render
|
||||||
|
|
||||||
|
from kitsune.offline.cron import build_kb_bundles
|
||||||
|
from kitsune.sumo.redis_utils import redis_client
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger('k.offline')
|
||||||
|
|
||||||
|
|
||||||
|
def offline_admin(request):
|
||||||
|
redis = redis_client('default')
|
||||||
|
|
||||||
|
action = request.POST.get('action')
|
||||||
|
if action == 'generate_all':
|
||||||
|
log.info('Requested regenerating all bundles.')
|
||||||
|
build_kb_bundles()
|
||||||
|
messages.add_message(request, messages.SUCCESS,
|
||||||
|
'Bundles regenerated!')
|
||||||
|
elif action == 'delete_all':
|
||||||
|
if redis.delete(*redis.keys('osumo:*')):
|
||||||
|
messages.add_message(request, messages.SUCCESS,
|
||||||
|
'Deleted all bundles!')
|
||||||
|
else:
|
||||||
|
messages.add_message(request, messages.ERROR,
|
||||||
|
'Bundle deleting failed.')
|
||||||
|
|
||||||
|
keys = redis.keys('osumo:*')
|
||||||
|
bundles = []
|
||||||
|
totalsize = 0
|
||||||
|
for key in keys:
|
||||||
|
bundle = {}
|
||||||
|
# reverse operation to redis_bundle_name, the schema is:
|
||||||
|
# osumo:locale~product
|
||||||
|
tmp = key.split(':')[1].split('~')
|
||||||
|
|
||||||
|
locale, bundle['product'] = tuple(tmp)
|
||||||
|
# to get the non .lower()'ed version.
|
||||||
|
locale = settings.LANGUAGE_URL_MAP[locale]
|
||||||
|
bundle['locale'] = settings.LOCALES[locale].english
|
||||||
|
|
||||||
|
bundle['hash'] = redis.hget(key, 'hash')
|
||||||
|
|
||||||
|
updated = float(redis.hget(key, 'updated'))
|
||||||
|
updated = datetime.datetime.fromtimestamp(updated)
|
||||||
|
bundle['updated'] = updated.strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
bundle['size'] = round(len(redis.hget(key, 'bundle')) / 1024.0, 2)
|
||||||
|
totalsize += bundle['size']
|
||||||
|
|
||||||
|
bundles.append(bundle)
|
||||||
|
|
||||||
|
# Sorting by by locale and then product
|
||||||
|
bundles.sort(key=lambda x: x['locale'] + x['product'])
|
||||||
|
|
||||||
|
totalsize /= 1024
|
||||||
|
totalsize = round(totalsize, 2)
|
||||||
|
|
||||||
|
return render(request,
|
||||||
|
'admin/offline.html',
|
||||||
|
{'title': 'Offline SUMO Administration',
|
||||||
|
'bundles': bundles,
|
||||||
|
'totalsize': totalsize})
|
||||||
|
|
||||||
|
|
||||||
|
admin.site.register_view('offline',
|
||||||
|
offline_admin,
|
||||||
|
'Offline SUMO Administration')
|
|
@ -0,0 +1,46 @@
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from cronjobs import register
|
||||||
|
from statsd import statsd
|
||||||
|
|
||||||
|
from kitsune.offline.utils import (
|
||||||
|
bundle_for_product,
|
||||||
|
merge_bundles,
|
||||||
|
insert_bundle_into_redis
|
||||||
|
)
|
||||||
|
from kitsune.products.models import Product
|
||||||
|
from kitsune.sumo.utils import uselocale
|
||||||
|
from kitsune.sumo.redis_utils import redis_client
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger('k.offline')
|
||||||
|
|
||||||
|
|
||||||
|
@register
|
||||||
|
def build_kb_bundles(products=('firefox-os', 'firefox', 'mobile')):
|
||||||
|
redis = redis_client('default')
|
||||||
|
|
||||||
|
if not redis:
|
||||||
|
raise IOError('Redis not available. Cannot generate offline bundles.')
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
size = 0
|
||||||
|
|
||||||
|
products = [Product.objects.get(slug=p) for p in products]
|
||||||
|
with statsd.timer('offline.build_kb_bundles.time_elapsed'):
|
||||||
|
for locale in settings.SUMO_LANGUAGES:
|
||||||
|
for product in products:
|
||||||
|
with uselocale(locale):
|
||||||
|
bundle = merge_bundles(bundle_for_product(product, locale))
|
||||||
|
|
||||||
|
size += len(insert_bundle_into_redis(redis,
|
||||||
|
product.slug,
|
||||||
|
locale,
|
||||||
|
bundle)[0])
|
||||||
|
|
||||||
|
time_taken = time.time() - start_time
|
||||||
|
log.info('Generated all offline bundles. '
|
||||||
|
'Size: {0}. Took {1} seconds'.format(size, time_taken))
|
|
@ -0,0 +1,171 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import division
|
||||||
|
|
||||||
|
import math
|
||||||
|
import string
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
_whitespace_regex = re.compile(r'\s|-', flags=re.U)
|
||||||
|
_alpha_regex = re.compile(r'\w', flags=re.U)
|
||||||
|
|
||||||
|
|
||||||
|
def find_word_locations_with_spaces(s):
|
||||||
|
"""Builds an index in the format of {word: location}.
|
||||||
|
|
||||||
|
This is an English like search. For languages without spaces to
|
||||||
|
separate words, use find_word_locations_without_spaces.
|
||||||
|
|
||||||
|
This is a futureproof function. If we need to add location based indexing
|
||||||
|
for better searches with multiple search terms (especially for languages
|
||||||
|
like Chinese, Japanese, and Korean), we need to find each words index.
|
||||||
|
|
||||||
|
In this routine, we separate words at end of sentences by 2 as a gap and by
|
||||||
|
1 if words are separated by a comma (or alike).
|
||||||
|
|
||||||
|
Right now, the routine is only used to get the words count in TFIDFIndex.
|
||||||
|
"""
|
||||||
|
s = s.lower()
|
||||||
|
words = [u'']
|
||||||
|
for c in s:
|
||||||
|
if c in '\'"[]1234567890/\\()_':
|
||||||
|
continue
|
||||||
|
elif c in '.!?': # We want to treat . as a big stop. Add two space.
|
||||||
|
words.append(u'')
|
||||||
|
words.append(u'')
|
||||||
|
elif _whitespace_regex.match(c) or c in string.punctuation:
|
||||||
|
words.append(u'')
|
||||||
|
elif _alpha_regex.match(c) is not None:
|
||||||
|
words[-1] += c
|
||||||
|
else:
|
||||||
|
# characters that we don't care about such as a control character.
|
||||||
|
# It's okay if we skip it.
|
||||||
|
continue
|
||||||
|
|
||||||
|
locations = {}
|
||||||
|
for i, w in enumerate(words):
|
||||||
|
if w:
|
||||||
|
locations.setdefault(w, []).append(i)
|
||||||
|
|
||||||
|
return locations
|
||||||
|
|
||||||
|
|
||||||
|
def find_word_locations_without_spaces(s):
|
||||||
|
"""Builds an index of the format of {word: location}.
|
||||||
|
|
||||||
|
This method is for languages like Chinese where there is no spaces
|
||||||
|
to denote the beginning and end of a word.
|
||||||
|
"""
|
||||||
|
words = [u'']
|
||||||
|
for c in s:
|
||||||
|
if c in u'\'"[]1234567890/\\()_()【】『』、¥《》’‘”“':
|
||||||
|
continue
|
||||||
|
# This is at least the punctuations in Chinese.
|
||||||
|
elif c in u'。!?':
|
||||||
|
words.append(u'')
|
||||||
|
words.append(u'')
|
||||||
|
# Yes, east asian languages could still have white space.
|
||||||
|
elif _whitespace_regex.match(c) or c in u";:,、" + string.punctuation:
|
||||||
|
words.append(u'')
|
||||||
|
elif _alpha_regex.match(c) is not None:
|
||||||
|
words.append(c)
|
||||||
|
else:
|
||||||
|
# Something weird, but it is totally okay.
|
||||||
|
# this character is probably not significant (maybe invisble)
|
||||||
|
continue
|
||||||
|
|
||||||
|
locations = {}
|
||||||
|
for i, w in enumerate(words):
|
||||||
|
if w:
|
||||||
|
locations.setdefault(w, []).append(i)
|
||||||
|
return locations
|
||||||
|
|
||||||
|
|
||||||
|
class TFIDFIndex(object):
|
||||||
|
"""This is an index for search and ranking based on TF-IDF.
|
||||||
|
|
||||||
|
TF-IDF (Term Frequency - Inverse Document Frequency) is a relatively
|
||||||
|
simple and intuitive NLP technique that scores words in a document
|
||||||
|
given a corpus based on how important this word is.
|
||||||
|
|
||||||
|
A full explanation of this is provided at
|
||||||
|
http://osumo.readthedocs.org/en/latest/offlinesearch.html#index-structure.
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
self.doc_count = 0
|
||||||
|
self.global_word_freq = {}
|
||||||
|
self.local_word_freq = {}
|
||||||
|
self.docs_words_boosts = {}
|
||||||
|
|
||||||
|
def feed(self, doc_id, texts, get_locations):
|
||||||
|
self.doc_count += 1
|
||||||
|
self.local_word_freq.setdefault(doc_id, {})
|
||||||
|
self.docs_words_boosts.setdefault(doc_id, {})
|
||||||
|
|
||||||
|
for text, boost in texts:
|
||||||
|
|
||||||
|
locations = get_locations(text)
|
||||||
|
for w, loc in locations.iteritems():
|
||||||
|
global_freq = self.global_word_freq.setdefault(w, 0)
|
||||||
|
local_freq = len(loc)
|
||||||
|
self.global_word_freq[w] = global_freq + local_freq
|
||||||
|
|
||||||
|
old_local_freq = self.local_word_freq[doc_id].setdefault(w, 0)
|
||||||
|
self.local_word_freq[doc_id][w] = old_local_freq + local_freq
|
||||||
|
|
||||||
|
boost = max(self.docs_words_boosts[doc_id].get(w, 0), boost)
|
||||||
|
|
||||||
|
if boost != 1: # save some space..
|
||||||
|
self.docs_words_boosts[doc_id][w] = boost
|
||||||
|
|
||||||
|
def _f(self, term, doc_id):
|
||||||
|
"""The frequency of a certain term in a certain document."""
|
||||||
|
return self.local_word_freq[doc_id][term]
|
||||||
|
|
||||||
|
def _tf(self, term, doc_id):
|
||||||
|
"""The term frequency term of the TF-IDF formula.
|
||||||
|
|
||||||
|
Adapted from Wikipedia:
|
||||||
|
tf(t, d) = 0.5 + \\frac{0.5 f(t, d)}{max(f(w, d), w \in d)}
|
||||||
|
"""
|
||||||
|
o = self._f(term, doc_id) / max(self.local_word_freq[doc_id].values())
|
||||||
|
return 0.5 + (0.5 * o)
|
||||||
|
|
||||||
|
def _idf(self, term):
|
||||||
|
"""The inverse document frequency term from the TF-IDF formula.
|
||||||
|
|
||||||
|
Adapted from Wikipedia.
|
||||||
|
idf(t, D) = \log \\frac{|D|}{|{d \in D : t \in D}|}
|
||||||
|
"""
|
||||||
|
appearance = 0
|
||||||
|
for doc_id, words in self.local_word_freq.iteritems():
|
||||||
|
appearance += 1 if term in words else 0
|
||||||
|
|
||||||
|
return math.log(self.doc_count / appearance, 2)
|
||||||
|
|
||||||
|
def tfidf(self, term, doc_id):
|
||||||
|
"""The whole formula together for TF-IDF.
|
||||||
|
|
||||||
|
Adapted from Wikipedia.
|
||||||
|
"""
|
||||||
|
boost = self.docs_words_boosts[doc_id].get(term, 1)
|
||||||
|
return self._tf(term, doc_id) * self._idf(term) * boost
|
||||||
|
|
||||||
|
def tfidf_doc(self, doc_id):
|
||||||
|
"""Computes the TF-IDF score for each term in a document."""
|
||||||
|
doc = self.local_word_freq[doc_id]
|
||||||
|
scores = []
|
||||||
|
for word in doc:
|
||||||
|
scores.append((word, round(self.tfidf(word, doc_id), 2)))
|
||||||
|
scores.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
return scores
|
||||||
|
|
||||||
|
def offline_index(self):
|
||||||
|
"""Builds the offline index."""
|
||||||
|
index = {}
|
||||||
|
for doc_id in self.local_word_freq:
|
||||||
|
scores = self.tfidf_doc(doc_id)
|
||||||
|
for word, score in scores:
|
||||||
|
l = index.setdefault(word, [])
|
||||||
|
l.append((doc_id, score))
|
||||||
|
return index
|
|
@ -0,0 +1,59 @@
|
||||||
|
{% extends "kadmin/base.html" %}
|
||||||
|
|
||||||
|
{% block content_title %}
|
||||||
|
<h1>Offline SUMO Administration</h1>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<section>
|
||||||
|
<h2>Currently available bundles</h2>
|
||||||
|
{% if bundles %}
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Locale</th>
|
||||||
|
<th>Product</th>
|
||||||
|
<th>Bundle Hash</th>
|
||||||
|
<th>Last updated (server time)</th>
|
||||||
|
<th>Size</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for bundle in bundles %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ bundle.locale }}</td>
|
||||||
|
<td>{{ bundle.product }}</td>
|
||||||
|
<td>{{ bundle.hash }}</td>
|
||||||
|
<td>{{ bundle.updated }}</td>
|
||||||
|
<td>{{ bundle.size }} KB</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
<tr>
|
||||||
|
<td><strong>All</strong></td>
|
||||||
|
<td><strong>All</strong></td>
|
||||||
|
<td> --- </td>
|
||||||
|
<td> --- </td>
|
||||||
|
<td><strong>{{ totalsize }} MB</strong></td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<p>Note sizes are raw sizes of the JSON. May not reflect the actual size in Redis or the ones downloaded due to compression.</p>
|
||||||
|
{% else %}
|
||||||
|
<p>No bundles are in Redis. Please generate them.</p>
|
||||||
|
{% endif %}
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h2>Database administrations</h2>
|
||||||
|
<form method="POST">
|
||||||
|
{% csrf_token %}
|
||||||
|
<input type="hidden" name="action" value="generate_all" />
|
||||||
|
<input type="submit" value="Regenerate all bundles (This may take a while)" />
|
||||||
|
</form>
|
||||||
|
<form method="POST">
|
||||||
|
{% csrf_token %}
|
||||||
|
<input type="hidden" name="action" value="delete_all" />
|
||||||
|
<input type="submit" value="Delete all bundles" />
|
||||||
|
</form>
|
||||||
|
</section>
|
||||||
|
{% endblock %}
|
|
@ -0,0 +1,298 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import time
|
||||||
|
|
||||||
|
from nose.tools import eq_
|
||||||
|
|
||||||
|
from kitsune.offline import utils
|
||||||
|
from kitsune.products.tests import product, topic
|
||||||
|
from kitsune.sumo.tests import TestCase
|
||||||
|
from kitsune.wiki.tests import document, revision
|
||||||
|
|
||||||
|
|
||||||
|
def _create_doc(title='', product=None, topic=None, is_archived=False):
|
||||||
|
title = 'test ' + title if title else 'test'
|
||||||
|
doc = document(title=title, save=True, is_archived=is_archived)
|
||||||
|
revision(summary='summary', is_approved=True, document=doc, save=True)
|
||||||
|
|
||||||
|
if is_archived:
|
||||||
|
expected = {
|
||||||
|
'key': 'en-US~' + doc.slug,
|
||||||
|
'title': doc.title,
|
||||||
|
'archived': True,
|
||||||
|
'slug': doc.slug
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
updated = time.mktime(doc.current_revision.created.timetuple())
|
||||||
|
expected = {
|
||||||
|
'key': 'en-US~' + doc.slug,
|
||||||
|
'title': title,
|
||||||
|
'html': doc.html,
|
||||||
|
'updated': updated,
|
||||||
|
'slug': doc.slug,
|
||||||
|
'id': doc.id,
|
||||||
|
'archived': False
|
||||||
|
}
|
||||||
|
|
||||||
|
if product:
|
||||||
|
doc.products.add(product)
|
||||||
|
|
||||||
|
if topic:
|
||||||
|
doc.topics.add(topic)
|
||||||
|
|
||||||
|
return doc, expected
|
||||||
|
|
||||||
|
|
||||||
|
def _create_product_bundle(prefix='moo'):
|
||||||
|
p = product(title=prefix + 'firefox', save=True)
|
||||||
|
t1 = topic(title=prefix + 'topic1', product=p, save=True)
|
||||||
|
t2 = topic(title=prefix + 'topic2', product=p, save=True)
|
||||||
|
|
||||||
|
doc1, expected_doc1 = _create_doc(title=prefix + 'doc1',
|
||||||
|
product=p, topic=t1)
|
||||||
|
doc2, expected_doc2 = _create_doc(title=prefix + 'doc2',
|
||||||
|
product=p, topic=t2)
|
||||||
|
|
||||||
|
expected_locale_doc = {
|
||||||
|
'key': u'en-US',
|
||||||
|
'name': u'English',
|
||||||
|
'products': [{
|
||||||
|
'slug': p.slug,
|
||||||
|
'name': p.title
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
expected_topic1 = {
|
||||||
|
'key': 'en-US~' + p.slug + '~' + t1.slug,
|
||||||
|
'name': t1.title,
|
||||||
|
'docs': [doc1.slug],
|
||||||
|
'product': p.slug,
|
||||||
|
'slug': t1.slug,
|
||||||
|
'children': []
|
||||||
|
}
|
||||||
|
|
||||||
|
expected_topic2 = {
|
||||||
|
'key': 'en-US~' + p.slug + '~' + t2.slug,
|
||||||
|
'name': t2.title,
|
||||||
|
'docs': [doc2.slug],
|
||||||
|
'product': p.slug,
|
||||||
|
'slug': t2.slug,
|
||||||
|
'children': []
|
||||||
|
}
|
||||||
|
|
||||||
|
return p, {
|
||||||
|
'doc1': expected_doc1,
|
||||||
|
'doc2': expected_doc2,
|
||||||
|
'locale': expected_locale_doc,
|
||||||
|
'topic1': expected_topic1,
|
||||||
|
'topic2': expected_topic2
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class OfflineWikiDataGenerationTest(TestCase):
|
||||||
|
def test_serialize_document(self):
|
||||||
|
doc, expected = _create_doc()
|
||||||
|
serialized = utils.serialize_document_for_offline(doc)
|
||||||
|
eq_(expected, serialized)
|
||||||
|
|
||||||
|
def test_serialized_archived_document(self):
|
||||||
|
doc, expected = _create_doc(is_archived=True)
|
||||||
|
serialized = utils.serialize_document_for_offline(doc)
|
||||||
|
eq_(expected, serialized)
|
||||||
|
|
||||||
|
def test_bundle_for_product(self):
|
||||||
|
p, expected_bundle = _create_product_bundle()
|
||||||
|
|
||||||
|
bundle = utils.bundle_for_product(p, 'en-US')
|
||||||
|
|
||||||
|
assert 'locales' in bundle
|
||||||
|
eq_(1, len(bundle['locales']))
|
||||||
|
eq_(expected_bundle['locale'], bundle['locales'].values()[0])
|
||||||
|
|
||||||
|
assert 'topics' in bundle
|
||||||
|
eq_(2, len(bundle['topics']))
|
||||||
|
topics = sorted(bundle['topics'].values(), key=lambda t: t['slug'])
|
||||||
|
eq_(expected_bundle['topic1'], topics[0])
|
||||||
|
eq_(expected_bundle['topic2'], topics[1])
|
||||||
|
|
||||||
|
assert 'docs' in bundle
|
||||||
|
docs = sorted(bundle['docs'].values(), key=lambda d: d['title'])
|
||||||
|
eq_(expected_bundle['doc1'], docs[0])
|
||||||
|
eq_(expected_bundle['doc2'], docs[1])
|
||||||
|
|
||||||
|
assert 'indexes' in bundle
|
||||||
|
eq_(1, len(bundle['indexes']))
|
||||||
|
assert 'en-US~moofirefox' in bundle['indexes']
|
||||||
|
assert 'index' in bundle['indexes']['en-US~moofirefox']
|
||||||
|
eq_(u'en-US~moofirefox', bundle['indexes']['en-US~moofirefox']['key'])
|
||||||
|
|
||||||
|
def test_merge_bundles(self):
|
||||||
|
p1, expected_bundle1 = _create_product_bundle()
|
||||||
|
p2, expected_bundle2 = _create_product_bundle('yay')
|
||||||
|
|
||||||
|
bundle1 = utils.bundle_for_product(p1, 'en-US')
|
||||||
|
bundle2 = utils.bundle_for_product(p2, 'en-US')
|
||||||
|
|
||||||
|
merged = utils.merge_bundles(bundle1, bundle2)
|
||||||
|
|
||||||
|
assert 'locales' in merged
|
||||||
|
eq_(1, len(merged['locales']))
|
||||||
|
|
||||||
|
expected_locale = expected_bundle1['locale']
|
||||||
|
expected_locale['products'] += expected_bundle2['locale']['products']
|
||||||
|
|
||||||
|
eq_(expected_locale, merged['locales'][0])
|
||||||
|
|
||||||
|
assert 'topics' in merged
|
||||||
|
eq_(4, len(merged['topics']))
|
||||||
|
|
||||||
|
merged['topics'].sort(key=lambda t: t['slug'])
|
||||||
|
|
||||||
|
eq_(expected_bundle1['topic1'], merged['topics'][0])
|
||||||
|
eq_(expected_bundle1['topic2'], merged['topics'][1])
|
||||||
|
eq_(expected_bundle2['topic1'], merged['topics'][2])
|
||||||
|
eq_(expected_bundle2['topic2'], merged['topics'][3])
|
||||||
|
|
||||||
|
assert 'docs' in merged
|
||||||
|
eq_(4, len(merged['docs']))
|
||||||
|
|
||||||
|
merged['docs'].sort(key=lambda d: d['title'])
|
||||||
|
|
||||||
|
eq_(expected_bundle1['doc1'], merged['docs'][0])
|
||||||
|
eq_(expected_bundle1['doc2'], merged['docs'][1])
|
||||||
|
eq_(expected_bundle2['doc1'], merged['docs'][2])
|
||||||
|
eq_(expected_bundle2['doc2'], merged['docs'][3])
|
||||||
|
|
||||||
|
eq_(2, len(merged['indexes']))
|
||||||
|
merged['indexes'].sort(key=lambda i: i['key'])
|
||||||
|
eq_('en-US~moofirefox', merged['indexes'][0]['key'])
|
||||||
|
eq_('en-US~yayfirefox', merged['indexes'][1]['key'])
|
||||||
|
|
||||||
|
def test_index_generation(self):
|
||||||
|
p = product(title='firefox', save=True)
|
||||||
|
t = topic(title='topic1', product=p, save=True)
|
||||||
|
|
||||||
|
doc = document(title='firefox bookmarks',
|
||||||
|
locale='en-US', save=True)
|
||||||
|
|
||||||
|
revision(is_approved=True,
|
||||||
|
summary='this is an article about firefox bookmarks',
|
||||||
|
document=doc, save=True)
|
||||||
|
|
||||||
|
doc.products.add(p)
|
||||||
|
doc.topics.add(t)
|
||||||
|
|
||||||
|
doc2 = document(title='private browsing',
|
||||||
|
locale='en-US', save=True)
|
||||||
|
|
||||||
|
revision(is_approved=True,
|
||||||
|
summary='this is an article about private browsing',
|
||||||
|
document=doc2, save=True)
|
||||||
|
|
||||||
|
doc2.products.add(p)
|
||||||
|
doc2.topics.add(t)
|
||||||
|
|
||||||
|
bundle = utils.bundle_for_product(p, 'en-US')
|
||||||
|
index = bundle['indexes']['en-US~firefox']['index']
|
||||||
|
|
||||||
|
words_in_both = ('this', 'is', 'an', 'article', 'about')
|
||||||
|
|
||||||
|
for word in words_in_both:
|
||||||
|
assert word in index
|
||||||
|
eq_(2, len(index[word]))
|
||||||
|
eq_(2, len(index[word][0]))
|
||||||
|
eq_(2, len(index[word][1]))
|
||||||
|
|
||||||
|
assert 'firefox' in index
|
||||||
|
eq_(1, len(index['firefox']))
|
||||||
|
# Yeah. 'firefox' in this corpus _better_ score higher than 'this'.
|
||||||
|
assert index['firefox'][0][1] > index['this'][0][1]
|
||||||
|
|
||||||
|
assert 'bookmarks' in index
|
||||||
|
eq_(1, len(index['bookmarks']))
|
||||||
|
assert index['bookmarks'][0][1] > index['this'][0][1]
|
||||||
|
|
||||||
|
assert 'private' in index
|
||||||
|
eq_(1, len(index['private']))
|
||||||
|
assert index['private'][0][1] > index['this'][0][1]
|
||||||
|
|
||||||
|
assert 'browsing' in index
|
||||||
|
eq_(1, len(index['browsing']))
|
||||||
|
assert index['browsing'][0][1] > index['this'][0][1]
|
||||||
|
|
||||||
|
def test_archived_articles_in_bundle(self):
|
||||||
|
p = product(title='firefox', save=True)
|
||||||
|
t1 = topic(title='topic1', product=p, save=True)
|
||||||
|
|
||||||
|
doc = document(title='test', is_archived=True,
|
||||||
|
locale='en-US', save=True)
|
||||||
|
revision(is_approved=True, document=doc, save=True)
|
||||||
|
doc.products.add(p)
|
||||||
|
doc.topics.add(t1)
|
||||||
|
|
||||||
|
bundle = utils.bundle_for_product(p, 'en-US')
|
||||||
|
eq_(1, len(bundle['docs']))
|
||||||
|
doc = bundle['docs'].values()[0]
|
||||||
|
eq_(True, doc['archived'])
|
||||||
|
assert 'html' not in doc
|
||||||
|
eq_(1, len(bundle['topics']))
|
||||||
|
|
||||||
|
def test_redirect_articles_in_bundle(self):
|
||||||
|
p = product(title='firefox', save=True)
|
||||||
|
t1 = topic(title='topic1', product=p, save=True)
|
||||||
|
|
||||||
|
doc = document(title='test2', locale='en-US', save=True)
|
||||||
|
revision(is_approved=True,
|
||||||
|
document=doc,
|
||||||
|
save=True)
|
||||||
|
|
||||||
|
doc.products.add(p)
|
||||||
|
doc.topics.add(t1)
|
||||||
|
|
||||||
|
doc = document(title='test', locale='en-US', save=True)
|
||||||
|
revision(is_approved=True, document=doc, content=u'REDIRECT [[doc2]]',
|
||||||
|
save=True)
|
||||||
|
|
||||||
|
doc.products.add(p)
|
||||||
|
doc.topics.add(t1)
|
||||||
|
|
||||||
|
bundle = utils.bundle_for_product(p, 'en-US')
|
||||||
|
eq_(1, len(bundle['docs']))
|
||||||
|
doc = bundle['docs'].values()[0]
|
||||||
|
eq_('test2', doc['title'])
|
||||||
|
|
||||||
|
def test_bogus_articles_in_bundle(self):
|
||||||
|
p = product(title='firefox', save=True)
|
||||||
|
topic(title='topic1', product=p, save=True)
|
||||||
|
|
||||||
|
# Document with no revision should be fun
|
||||||
|
doc = document(title='test2', locale='en-US', save=True)
|
||||||
|
|
||||||
|
bundle = utils.bundle_for_product(p, 'en-US')
|
||||||
|
eq_(0, len(bundle['docs']))
|
||||||
|
eq_(0, len(bundle['topics']))
|
||||||
|
|
||||||
|
# article with no html.
|
||||||
|
revision(document=doc, content='', save=True)
|
||||||
|
bundle = utils.bundle_for_product(p, 'en-US')
|
||||||
|
eq_(0, len(bundle['docs']))
|
||||||
|
eq_(0, len(bundle['topics']))
|
||||||
|
|
||||||
|
def test_other_languages(self):
|
||||||
|
p = product(title='firefox', save=True)
|
||||||
|
t1 = topic(title='topic1', product=p, save=True)
|
||||||
|
|
||||||
|
doc = document(title='test', locale='en-US', save=True)
|
||||||
|
revision(is_approved=True, document=doc, save=True)
|
||||||
|
|
||||||
|
doc.products.add(p)
|
||||||
|
doc.topics.add(t1)
|
||||||
|
|
||||||
|
translated_doc = document(title=u'测试', locale='zh-CN', parent=doc,
|
||||||
|
save=True)
|
||||||
|
revision(is_approved=True, document=translated_doc, save=True)
|
||||||
|
|
||||||
|
bundle = utils.bundle_for_product(p, 'zh-CN')
|
||||||
|
eq_(1, len(bundle['docs']))
|
||||||
|
|
||||||
|
doc = bundle['docs'].values()[0]
|
||||||
|
eq_(u'测试', doc['title'])
|
|
@ -0,0 +1,132 @@
|
||||||
|
import json
|
||||||
|
|
||||||
|
from nose import SkipTest
|
||||||
|
from nose.tools import eq_
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from kitsune.offline.cron import build_kb_bundles
|
||||||
|
from kitsune.products.tests import product, topic
|
||||||
|
from kitsune.sumo.tests import TestCase
|
||||||
|
from kitsune.sumo.urlresolvers import reverse
|
||||||
|
from kitsune.sumo.redis_utils import RedisError, redis_client
|
||||||
|
from kitsune.wiki.models import Document
|
||||||
|
from kitsune.wiki.tests import document, revision
|
||||||
|
|
||||||
|
|
||||||
|
class OfflineViewTests(TestCase):
|
||||||
|
|
||||||
|
def _create_bundle(self, prod, locale=settings.WIKI_DEFAULT_LANGUAGE):
|
||||||
|
p = product(title=prod, save=True)
|
||||||
|
t = topic(title='topic1', product=p, save=True)
|
||||||
|
|
||||||
|
if locale == settings.WIKI_DEFAULT_LANGUAGE:
|
||||||
|
parent = lambda i: None
|
||||||
|
else:
|
||||||
|
def parent(i):
|
||||||
|
d = document(title='test {0} {1}'.format(locale, i),
|
||||||
|
locale=settings.WIKI_DEFAULT_LANGUAGE,
|
||||||
|
save=True)
|
||||||
|
|
||||||
|
d.products.add(p)
|
||||||
|
d.topics.add(t)
|
||||||
|
d.save()
|
||||||
|
|
||||||
|
revision(summary='test article {0}'.format(i),
|
||||||
|
document=d,
|
||||||
|
is_approved=True,
|
||||||
|
save=True)
|
||||||
|
return d
|
||||||
|
|
||||||
|
for i in xrange(5):
|
||||||
|
d = document(title='test {0} {1}'.format(locale, i),
|
||||||
|
locale=locale, save=True)
|
||||||
|
revision(summary='test article {0}'.format(i),
|
||||||
|
document=d,
|
||||||
|
is_approved=True,
|
||||||
|
save=True)
|
||||||
|
|
||||||
|
d.products.add(p)
|
||||||
|
d.topics.add(t)
|
||||||
|
d.parent = parent(i)
|
||||||
|
d.save()
|
||||||
|
|
||||||
|
try:
|
||||||
|
build_kb_bundles((prod, ))
|
||||||
|
except RedisError:
|
||||||
|
pass # do nothing as we should gracefully fallback.
|
||||||
|
|
||||||
|
def test_get_single_bundle(self):
|
||||||
|
self._create_bundle('firefox', 'en-US')
|
||||||
|
|
||||||
|
url = reverse('offline.get_bundle') + '?locale=en-US&product=firefox'
|
||||||
|
resp = self.client.get(url, follow=True)
|
||||||
|
data = json.loads(resp.content)
|
||||||
|
|
||||||
|
assert 'locales' in data
|
||||||
|
eq_(1, len(data['locales']))
|
||||||
|
eq_([{u'slug': u'firefox', u'name': u'firefox'}],
|
||||||
|
data['locales'][0]['products'])
|
||||||
|
eq_('en-US', data['locales'][0]['key'])
|
||||||
|
|
||||||
|
assert 'topics' in data
|
||||||
|
eq_(1, len(data['topics']))
|
||||||
|
eq_('en-US~firefox~topic1', data['topics'][0]['key'])
|
||||||
|
eq_(5, len(data['topics'][0]['docs']))
|
||||||
|
|
||||||
|
assert 'docs' in data
|
||||||
|
eq_(5, len(data['docs']))
|
||||||
|
|
||||||
|
assert 'indexes' in data
|
||||||
|
|
||||||
|
def test_get_bundle_bad_request(self):
|
||||||
|
url = reverse('offline.get_bundle')
|
||||||
|
resp = self.client.get(url, follow=True)
|
||||||
|
eq_(400, resp.status_code)
|
||||||
|
data = json.loads(resp.content)
|
||||||
|
eq_('bad request', data['error'])
|
||||||
|
|
||||||
|
def test_get_bundle_not_found(self):
|
||||||
|
self._create_bundle('firefox', 'en-US')
|
||||||
|
url = reverse('offline.get_bundle') + '?locale=fr&product=redpanda'
|
||||||
|
resp = self.client.get(url, follow=True)
|
||||||
|
eq_(404, resp.status_code)
|
||||||
|
data = json.loads(resp.content)
|
||||||
|
eq_('not found', data['error'])
|
||||||
|
|
||||||
|
def test_get_bundle_meta(self):
|
||||||
|
self._create_bundle('firefox', 'en-US')
|
||||||
|
url = (reverse('offline.bundle_meta') +
|
||||||
|
'?locale=en-US&product=firefox')
|
||||||
|
|
||||||
|
try:
|
||||||
|
redis_client('default')
|
||||||
|
except RedisError:
|
||||||
|
raise SkipTest
|
||||||
|
|
||||||
|
resp = self.client.get(url, follow=True)
|
||||||
|
|
||||||
|
meta = json.loads(resp.content)
|
||||||
|
hash1 = meta['hash']
|
||||||
|
assert resp['Content-Type'] == 'application/json'
|
||||||
|
|
||||||
|
assert len(hash1) == 40 # sha1 hexdigest should be 40 char long.
|
||||||
|
|
||||||
|
doc = Document.objects.all()[0] # getting one document should be okay.
|
||||||
|
doc.title = 'some differnet title!'
|
||||||
|
doc.save()
|
||||||
|
|
||||||
|
# rebuild bundle as the version is different now.
|
||||||
|
build_kb_bundles(('firefox', ))
|
||||||
|
|
||||||
|
# test to see if the hash has changed.
|
||||||
|
resp = self.client.get(url, follow=True)
|
||||||
|
assert hash1 != json.loads(resp.content)['hash']
|
||||||
|
|
||||||
|
def test_get_language(self):
|
||||||
|
self._create_bundle('firefox', 'en-US')
|
||||||
|
|
||||||
|
resp = self.client.get(reverse('offline.get_languages'))
|
||||||
|
meta = json.loads(resp.content)
|
||||||
|
|
||||||
|
assert {'id': 'en-US', 'name': 'English'} in meta['languages']
|
|
@ -0,0 +1,9 @@
|
||||||
|
from django.conf.urls import patterns, url
|
||||||
|
|
||||||
|
# Note that these url do not get considered into the locale middleware.
|
||||||
|
# http://<base>/offline/get-bundle ... etc.
|
||||||
|
urlpatterns = patterns('kitsune.offline.views',
|
||||||
|
url(r'^/get-bundle$', 'get_bundle', name='offline.get_bundle'),
|
||||||
|
url(r'^/bundle-meta$', 'bundle_meta', name='offline.bundle_meta'),
|
||||||
|
url(r'^/get-languages$', 'get_languages', name='offline.get_languages')
|
||||||
|
)
|
|
@ -0,0 +1,214 @@
|
||||||
|
from hashlib import sha1
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
from tower import ugettext as _
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from kitsune.offline.index import (
|
||||||
|
TFIDFIndex,
|
||||||
|
find_word_locations_with_spaces,
|
||||||
|
find_word_locations_without_spaces
|
||||||
|
)
|
||||||
|
from kitsune.wiki.config import TROUBLESHOOTING_CATEGORY, HOW_TO_CATEGORY
|
||||||
|
from kitsune.wiki.models import Document
|
||||||
|
|
||||||
|
|
||||||
|
_noscript_regex = re.compile(r'<noscript>.*?</noscript>', flags=re.DOTALL)
|
||||||
|
|
||||||
|
|
||||||
|
def bundle_key(locale, product_slug):
|
||||||
|
"""The key for a bundle as stored in client-side's indexeddb.
|
||||||
|
|
||||||
|
The arguments to this function must be strings. This key is used
|
||||||
|
for the index.
|
||||||
|
"""
|
||||||
|
return locale + '~' + product_slug
|
||||||
|
|
||||||
|
|
||||||
|
def doc_key(locale, doc_slug):
|
||||||
|
"""The key for a document as stored in client-side's indexeddb.
|
||||||
|
|
||||||
|
The arguments to this function must be strings.
|
||||||
|
"""
|
||||||
|
return locale + '~' + doc_slug
|
||||||
|
|
||||||
|
|
||||||
|
def topic_key(locale, product_slug, topic_slug):
|
||||||
|
"""The key for a topic as stored in client-side's indexeddb.
|
||||||
|
|
||||||
|
The arguments to this function must be strings.
|
||||||
|
"""
|
||||||
|
return locale + '~' + product_slug + '~' + topic_slug
|
||||||
|
|
||||||
|
|
||||||
|
def redis_bundle_name(locale, product_slug):
|
||||||
|
return 'osumo:' + bundle_key(locale.lower(), product_slug.lower())
|
||||||
|
|
||||||
|
|
||||||
|
def transform_html(dochtml):
|
||||||
|
"""Transforms the html to something we want to serve in the app.
|
||||||
|
|
||||||
|
Do things to the document html such as stripping out things the
|
||||||
|
offline app do not need. We could also do this in WikiParser,
|
||||||
|
but this is probably easier for now.
|
||||||
|
"""
|
||||||
|
# Strip out all the <noscript> images
|
||||||
|
dochtml = _noscript_regex.sub('', dochtml)
|
||||||
|
|
||||||
|
return dochtml
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_document_for_offline(doc):
|
||||||
|
"""Grabs the document in a dictionary.
|
||||||
|
|
||||||
|
This method returns a document that is ready to be inserted into
|
||||||
|
the client-side database.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# in order to save some space, the doc htmls and summaries are not returned
|
||||||
|
# as archived articles are already out of date.
|
||||||
|
if doc.is_archived:
|
||||||
|
return {
|
||||||
|
'key': doc_key(doc.locale, doc.slug),
|
||||||
|
'title': doc.title,
|
||||||
|
'archived': True,
|
||||||
|
'slug': doc.slug
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
updated = int(time.mktime(doc.current_revision.created.timetuple()))
|
||||||
|
return {
|
||||||
|
'key': doc_key(doc.locale, doc.slug),
|
||||||
|
'title': doc.title,
|
||||||
|
'html': transform_html(doc.html),
|
||||||
|
'updated': updated,
|
||||||
|
'slug': doc.slug,
|
||||||
|
'id': doc.id,
|
||||||
|
'archived': False
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def bundle_for_product(product, locale):
|
||||||
|
"""Gets an entire bundle for a product in a locale."""
|
||||||
|
bundle = {}
|
||||||
|
|
||||||
|
# put a new locale into the database.
|
||||||
|
bundle['locales'] = {}
|
||||||
|
bundle['locales'][locale] = {
|
||||||
|
'key': locale,
|
||||||
|
'name': settings.LANGUAGES[locale.lower()],
|
||||||
|
'products': [{'slug': product.slug, 'name': product.title}]
|
||||||
|
}
|
||||||
|
|
||||||
|
# we need a dictionary as we need to merge everything together.
|
||||||
|
bundle['topics'] = topics = {}
|
||||||
|
bundle['docs'] = docs_bundle = {}
|
||||||
|
bundle['indexes'] = {}
|
||||||
|
|
||||||
|
index_builder = TFIDFIndex()
|
||||||
|
|
||||||
|
docs = Document.objects.filter(
|
||||||
|
locale=locale,
|
||||||
|
is_template=False,
|
||||||
|
category__in=(TROUBLESHOOTING_CATEGORY, HOW_TO_CATEGORY)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Since the any languages that are derived from English will not have a
|
||||||
|
# product, we must find its parent's product.
|
||||||
|
if locale == settings.WIKI_DEFAULT_LANGUAGE:
|
||||||
|
docs = docs.filter(products__id=product.id)
|
||||||
|
else:
|
||||||
|
docs = docs.filter(parent__products__id=product.id)
|
||||||
|
|
||||||
|
if locale in settings.LANGUAGES_WITHOUT_SPACES:
|
||||||
|
find_word_locations = find_word_locations_without_spaces
|
||||||
|
else:
|
||||||
|
find_word_locations = find_word_locations_with_spaces
|
||||||
|
|
||||||
|
for doc in docs:
|
||||||
|
if not doc.current_revision or not doc.html or doc.redirect_url():
|
||||||
|
# These documents don't have approved revision. We just skip them.
|
||||||
|
# or if it is a redirect.. why even bother.
|
||||||
|
continue
|
||||||
|
|
||||||
|
serialized_doc = serialize_document_for_offline(doc)
|
||||||
|
|
||||||
|
# Only non-archived documents need to be indexed.
|
||||||
|
if not doc.is_archived:
|
||||||
|
# We only index the title and the summary as otherwise the corpus
|
||||||
|
# is too big. We also boost the score of the title.
|
||||||
|
texts = [(doc.title, 1.2), (doc.current_revision.summary, 1)]
|
||||||
|
index_builder.feed(doc.id, texts, find_word_locations)
|
||||||
|
|
||||||
|
docs_bundle[serialized_doc['key']] = serialized_doc
|
||||||
|
|
||||||
|
# Now we need to populate the topics for this locale.
|
||||||
|
for t in doc.get_topics():
|
||||||
|
if t.product.id == product.id:
|
||||||
|
topic = topics.setdefault(t.slug, {})
|
||||||
|
if not topic: # this means that topics has not been set yet.
|
||||||
|
topic['key'] = topic_key(locale, product.slug, t.slug)
|
||||||
|
# The title of the document is not translated so we must
|
||||||
|
# use gettext to get the translation for it.
|
||||||
|
topic['name'] = _(t.title)
|
||||||
|
topic['children'] = [st.slug for st in t.subtopics.all()]
|
||||||
|
topic['docs'] = []
|
||||||
|
topic['product'] = product.slug
|
||||||
|
topic['slug'] = t.slug
|
||||||
|
topic['docs'].append(doc.slug)
|
||||||
|
|
||||||
|
# The bundle needs an index!
|
||||||
|
bundlekey = bundle_key(locale, product.slug)
|
||||||
|
bundle['indexes'][bundlekey] = {}
|
||||||
|
bundle['indexes'][bundlekey]['key'] = bundlekey
|
||||||
|
# The client side will search through this index.
|
||||||
|
bundle['indexes'][bundlekey]['index'] = index_builder.offline_index()
|
||||||
|
|
||||||
|
return bundle
|
||||||
|
|
||||||
|
|
||||||
|
def merge_bundles(*bundles):
|
||||||
|
"""Merges multiple bundles generated by bundle_for_product into one.
|
||||||
|
"""
|
||||||
|
merged_bundle = {}
|
||||||
|
for bundle in bundles:
|
||||||
|
if 'locales' in bundle:
|
||||||
|
merged_locales = merged_bundle.setdefault('locales', {})
|
||||||
|
for k, locale in bundle['locales'].iteritems():
|
||||||
|
merged_locale = merged_locales.setdefault(k, {})
|
||||||
|
if merged_locale:
|
||||||
|
merged_locale['products'].extend(locale['products'])
|
||||||
|
else:
|
||||||
|
merged_locale.update(locale)
|
||||||
|
|
||||||
|
for key in ('topics', 'docs', 'indexes'):
|
||||||
|
if key in bundle:
|
||||||
|
merged_bundle.setdefault(key, {}).update(bundle[key])
|
||||||
|
|
||||||
|
# This is because the database format is actually meant to have all of this
|
||||||
|
# in a list format
|
||||||
|
for key in ('locales', 'topics', 'docs', 'indexes'):
|
||||||
|
if key in merged_bundle:
|
||||||
|
merged_bundle[key] = merged_bundle[key].values()
|
||||||
|
|
||||||
|
return merged_bundle
|
||||||
|
|
||||||
|
|
||||||
|
def insert_bundle_into_redis(redis, product, locale, bundle):
|
||||||
|
"""Put a bundle into redis.
|
||||||
|
|
||||||
|
This is used in both the cron job and the view.
|
||||||
|
"""
|
||||||
|
bundle = json.dumps(bundle)
|
||||||
|
# track version. Used instead of a timestamp as there may be instances when
|
||||||
|
# nothing is updated between last generation and now.
|
||||||
|
bundle_hash = sha1(bundle).hexdigest()
|
||||||
|
|
||||||
|
name = redis_bundle_name(locale.lower(), product.lower())
|
||||||
|
redis.hset(name, 'hash', bundle_hash)
|
||||||
|
redis.hset(name, 'bundle', bundle)
|
||||||
|
redis.hset(name, 'updated', time.time())
|
||||||
|
|
||||||
|
return bundle, bundle_hash
|
|
@ -0,0 +1,84 @@
|
||||||
|
import json
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.http import (HttpResponse,
|
||||||
|
HttpResponseBadRequest,
|
||||||
|
HttpResponseNotFound)
|
||||||
|
|
||||||
|
from kitsune.offline.utils import redis_bundle_name
|
||||||
|
from kitsune.sumo.decorators import cors_enabled
|
||||||
|
from kitsune.sumo.redis_utils import redis_client, RedisError
|
||||||
|
|
||||||
|
|
||||||
|
INVALID_LOCALE = '{"error": "not found", "reason": "invalid locale"}'
|
||||||
|
NOT_FOUND = '{"error": "not found", "reason": "unknown"}'
|
||||||
|
BAD_REQUEST = '{"error": "bad request", "reason": "incomplete request"}'
|
||||||
|
|
||||||
|
|
||||||
|
@cors_enabled('*')
|
||||||
|
def get_bundle(request):
|
||||||
|
if 'locale' not in request.GET or 'product' not in request.GET:
|
||||||
|
return HttpResponseBadRequest(BAD_REQUEST, mimetype='application/json')
|
||||||
|
|
||||||
|
locale = request.GET['locale']
|
||||||
|
product = request.GET['product']
|
||||||
|
if locale.lower() not in settings.LANGUAGES:
|
||||||
|
return HttpResponseNotFound(INVALID_LOCALE,
|
||||||
|
mimetype='application/json')
|
||||||
|
|
||||||
|
name = redis_bundle_name(locale, product)
|
||||||
|
try:
|
||||||
|
redis = redis_client('default')
|
||||||
|
except RedisError:
|
||||||
|
return HttpResponse('not available yet', status=503)
|
||||||
|
else:
|
||||||
|
bundle = redis.hget(name, 'bundle')
|
||||||
|
bundle_hash = redis.hget(name, 'hash')
|
||||||
|
|
||||||
|
if bundle is None:
|
||||||
|
return HttpResponseNotFound(NOT_FOUND, mimetype='application/json')
|
||||||
|
|
||||||
|
response = HttpResponse(bundle, mimetype='application/json')
|
||||||
|
response['Content-Length'] = len(bundle)
|
||||||
|
response['X-Content-Hash'] = bundle_hash
|
||||||
|
response['Access-Control-Expose-Headers'] = \
|
||||||
|
'Content-Length, X-Content-Hash'
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
@cors_enabled('*')
|
||||||
|
def bundle_meta(request):
|
||||||
|
"""This view is responsible for update checking."""
|
||||||
|
if 'locale' not in request.GET or 'product' not in request.GET:
|
||||||
|
return HttpResponseBadRequest(BAD_REQUEST, mimetype='application/json')
|
||||||
|
|
||||||
|
locale = request.GET['locale']
|
||||||
|
product = request.GET['product']
|
||||||
|
|
||||||
|
name = redis_bundle_name(locale, product)
|
||||||
|
try:
|
||||||
|
redis = redis_client('default')
|
||||||
|
except RedisError:
|
||||||
|
return HttpResponse('{"error": "no bundles available"}',
|
||||||
|
mimetype='application/json',
|
||||||
|
status=503)
|
||||||
|
|
||||||
|
bundle_hash = redis.hget(name, 'hash')
|
||||||
|
|
||||||
|
if bundle_hash:
|
||||||
|
u = {'hash': bundle_hash}
|
||||||
|
return HttpResponse(json.dumps(u), mimetype='application/json')
|
||||||
|
else:
|
||||||
|
return HttpResponseNotFound(NOT_FOUND, mimetype='application/json')
|
||||||
|
|
||||||
|
|
||||||
|
@cors_enabled('*')
|
||||||
|
def get_languages(request):
|
||||||
|
"""Responsible for telling what the support languages are"""
|
||||||
|
languages = []
|
||||||
|
for code, name in settings.LANGUAGE_CHOICES:
|
||||||
|
languages.append({'id': code, 'name': name})
|
||||||
|
|
||||||
|
return HttpResponse(json.dumps({'languages': languages}),
|
||||||
|
mimetype='application/json')
|
|
@ -251,6 +251,15 @@ ES_PLUGIN_ANALYZERS = [
|
||||||
|
|
||||||
ES_USE_PLUGINS = False
|
ES_USE_PLUGINS = False
|
||||||
|
|
||||||
|
# These are for the indexer for the offline sumo app.
|
||||||
|
LANGUAGES_WITHOUT_SPACES = (
|
||||||
|
'zh-CN',
|
||||||
|
'zh-TW',
|
||||||
|
'ja',
|
||||||
|
'ko',
|
||||||
|
'my'
|
||||||
|
)
|
||||||
|
|
||||||
TEXT_DOMAIN = 'messages'
|
TEXT_DOMAIN = 'messages'
|
||||||
|
|
||||||
SITE_ID = 1
|
SITE_ID = 1
|
||||||
|
@ -309,6 +318,7 @@ SUPPORTED_NONLOCALES = (
|
||||||
'api',
|
'api',
|
||||||
'favicon.ico',
|
'favicon.ico',
|
||||||
'media',
|
'media',
|
||||||
|
'offline',
|
||||||
'postcrash',
|
'postcrash',
|
||||||
'robots.txt',
|
'robots.txt',
|
||||||
'services',
|
'services',
|
||||||
|
@ -486,6 +496,7 @@ INSTALLED_APPS = (
|
||||||
'kitsune.karma',
|
'kitsune.karma',
|
||||||
'kitsune.tags',
|
'kitsune.tags',
|
||||||
'kitsune.kpi',
|
'kitsune.kpi',
|
||||||
|
'kitsune.offline',
|
||||||
'kitsune.products',
|
'kitsune.products',
|
||||||
'rest_framework',
|
'rest_framework',
|
||||||
|
|
||||||
|
|
|
@ -66,3 +66,33 @@ def json_view(f):
|
||||||
})
|
})
|
||||||
return http.HttpResponseServerError(blob, content_type=JSON)
|
return http.HttpResponseServerError(blob, content_type=JSON)
|
||||||
return _wrapped
|
return _wrapped
|
||||||
|
|
||||||
|
|
||||||
|
def cors_enabled(origin, methods=['GET']):
|
||||||
|
"""A simple decorator to enable CORS."""
|
||||||
|
def decorator(f):
|
||||||
|
@wraps(f)
|
||||||
|
def decorated_func(request, *args, **kwargs):
|
||||||
|
if request.method == 'OPTIONS':
|
||||||
|
# preflight
|
||||||
|
if ('HTTP_ACCESS_CONTROL_REQUEST_METHOD' in request.META and
|
||||||
|
'HTTP_ACCESS_CONTROL_REQUEST_HEADERS' in request.META):
|
||||||
|
|
||||||
|
response = http.HttpResponse()
|
||||||
|
response['Access-Control-Allow-Methods'] = ", ".join(
|
||||||
|
methods)
|
||||||
|
|
||||||
|
# TODO: We might need to change this
|
||||||
|
response['Access-Control-Allow-Headers'] = \
|
||||||
|
request.META['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']
|
||||||
|
else:
|
||||||
|
return http.HttpResponseBadRequest()
|
||||||
|
elif request.method in methods:
|
||||||
|
response = f(request, *args, **kwargs)
|
||||||
|
else:
|
||||||
|
return http.HttpResponseBadRequest()
|
||||||
|
|
||||||
|
response['Access-Control-Allow-Origin'] = origin
|
||||||
|
return response
|
||||||
|
return decorated_func
|
||||||
|
return decorator
|
||||||
|
|
|
@ -35,6 +35,7 @@ urlpatterns = patterns('',
|
||||||
(r'^products', include('kitsune.products.urls')),
|
(r'^products', include('kitsune.products.urls')),
|
||||||
(r'^announcements', include('kitsune.announcements.urls')),
|
(r'^announcements', include('kitsune.announcements.urls')),
|
||||||
(r'^badges/', include('kitsune.kbadge.urls')),
|
(r'^badges/', include('kitsune.kbadge.urls')),
|
||||||
|
(r'^offline', include('kitsune.offline.urls')),
|
||||||
|
|
||||||
# Kitsune admin (not Django admin).
|
# Kitsune admin (not Django admin).
|
||||||
(r'^admin/', include(admin.site.urls)),
|
(r'^admin/', include(admin.site.urls)),
|
||||||
|
|
|
@ -34,6 +34,7 @@ HOME = /tmp
|
||||||
0 5 * * * {{ cron }} reindex_kb
|
0 5 * * * {{ cron }} reindex_kb
|
||||||
0 6 * * * {{ cron }} process_exit_surveys
|
0 6 * * * {{ cron }} process_exit_surveys
|
||||||
0 1 * * * {{ cron }} update_l10n_coverage_metrics
|
0 1 * * * {{ cron }} update_l10n_coverage_metrics
|
||||||
|
45 4 * * * {{ cron }} build_kb_bundles
|
||||||
|
|
||||||
# Twice per week.
|
# Twice per week.
|
||||||
#05 01 * * 1,4 {{ cron }} update_weekly_votes
|
#05 01 * * 1,4 {{ cron }} update_weekly_votes
|
||||||
|
|
Загрузка…
Ссылка в новой задаче