[bug 538285] Sphinx integration complete with commands for starting/stopping server, reindexing, tests, etc.

This commit is contained in:
Dave Dash 2010-01-22 16:52:41 -08:00
Родитель b98decc1c1
Коммит a7460f7a1b
13 изменённых файлов: 533 добавлений и 1 удалений

Просмотреть файл

@ -44,3 +44,16 @@ COLLECTION_AUTHOR_CHOICES = {
COLLECTION_ROLE_PUBLISHER: 'Publisher',
COLLECTION_ROLE_ADMIN: 'Admin',
}
# Addon types
ADDON_ANY = -1
ADDON_EXTENSION = 1
ADDON_THEME = 2
ADDON_DICT = 3
ADDON_SEARCH = 4
ADDON_LPAPP = 5
ADDON_LPADDON = 6
ADDON_PLUGIN = 7
ADDON_API = 8 # not actually a type but used to identify extensions + themes
ADDON_PERSONA = 9

0
apps/search/__init__.py Normal file
Просмотреть файл

154
apps/search/client.py Normal file
Просмотреть файл

@ -0,0 +1,154 @@
import re
from django.conf import settings
from django.utils import translation
import amo
from .sphinxapi import SphinxClient
import sphinxapi as sphinx
from .utils import convert_version, crc32
m_dot_n_re = re.compile(r'^\d+\.\d+$')
SEARCH_ENGINE_APP = 99
class Client(object):
"""
A search client that queries sphinx for addons.
"""
def __init__(self):
self.sphinx = SphinxClient()
self.sphinx.SetServer(settings.SPHINX_HOST, settings.SPHINX_PORT)
def restrict_version(self, version):
"""
Restrict a search to a specific version.
We can make the search a little fuzzy so that 3.7 includes
pre-releases.
This is done by using a high_int and a low_int. For alpha/pre-release
searches we assume the search needs to be specific.
"""
sc = self.sphinx
high_int = convert_version(version)
low_int = high_int
if m_dot_n_re.match(version):
low_int = convert_version(version + "apre")
# SetFilterRange requires a max and min even if you just want a
# lower-bound. To work-around this limitation we set max_ver's
# upperbound to be ridiculously large (10x the high_int).
if high_int:
sc.SetFilterRange('max_ver', low_int, 10 * high_int)
sc.SetFilterRange('min_ver', 0, high_int)
def query(self, term, **kwargs):
"""
Queries sphinx for a term, and parses specific options.
The following kwargs will do things:
limit: limits the number of results. Default is 2000.
admin: if present we are in "admin" mode which lets you find addons
without files and overrides any 'xenophobia' settings.
type: specifies an addon_type by id
sort: specifies a specific sort mode. acceptable values are 'newest',
'updated, 'name', 'averagerating' or 'weeklydownloads'. If no
sort mode is specified we use relevance.
'app': specifies which application_id to limit searches by
'version': specifies which version of an app (as specified) that
addons need to be compatble
'xenophobia': restricts addons to the users own locale
'locale': restricts addons to the specified locale
"""
sc = self.sphinx
# Setup some default parameters for the search.
fields = "addon_id, app, category"
limit = kwargs.get('limit', 2000)
sc.SetSelect(fields)
sc.SetFieldWeights({'name': 4})
sc.SetLimits(0, limit)
sc.SetFilter('inactive', (0,))
# STATUS_DISABLED and 0 (which likely means null) are filtered from
# search
sc.SetFilter('status', (0, amo.STATUS_DISABLED), True)
# Unless we're in admin mode, or we're looking at stub entries,
# everything must have a file.
if (('admin' not in kwargs) and
('type' not in kwargs or kwargs['type'] != amo.ADDON_PERSONA)):
sc.SetFilter('num_files', (0,), True)
# Sorting
if 'sort' in kwargs:
if kwargs['sort'] == 'newest':
sc.SetSortMode(sphinx.SPH_SORT_ATTR_DESC, 'created')
elif kwargs['sort'] == 'updated':
sc.SetSortMode(sphinx.SPH_SORT_ATTR_DESC, 'modified')
elif kwargs['sort'] == 'name':
sc.SetSortMode(sphinx.SPH_SORT_ATTR_ASC, 'name_ord')
elif (kwargs['sort'] == 'averagerating' or
kwargs['sort'] == 'bayesianrating'):
sc.SetSortMode(sphinx.SPH_SORT_ATTR_DESC, 'averagerating')
elif kwargs['sort'] == 'weeklydownloads':
sc.SetSortMode(sphinx.SPH_SORT_ATTR_DESC, 'weeklydownloads')
else:
# We want to boost public addons, and addons in your native
# language.
expr = ("@weight + IF(status=%d, 30, 0) + "
"IF(locale_ord=%d, 29, 0)") % (amo.STATUS_PUBLIC,
crc32(translation.get_language()))
sc.SetSortMode(sphinx.SPH_SORT_EXPR, expr)
# We should always have an 'app' except for the admin.
if 'app' in kwargs:
# We add SEARCH_ENGINE_APP since search engines work on all apps.
sc.SetFilter('app', (kwargs['app'], SEARCH_ENGINE_APP))
# Version filtering.
match = re.match('\bversion:([0-9\.]+)/', term)
if match:
term = term.replace(match.group(0), '')
self.restrict_version(match.group(1))
elif 'version' in kwargs:
self.restrict_version(kwargs['version'])
# Xenophobia - restrict to just my language.
if 'xenophobia' in kwargs and 'admin' not in kwargs:
kwargs['locale'] = translation.get_language()
# Locale filtering
if 'locale' in kwargs:
sc.SetFilter('locale_ord', (crc32(kwargs['locale']),))
# XXX - Todo:
# In the interest of having working code sooner than later, we're
# skipping the following... for now:
# * Type filter
# * Platform filter
# * Date filter
# * GUID filter
# * Category filter
# * Tag filter
# * Num apps filter
# * Logging
result = sc.Query(term)
if result:
return result['matches']

Просмотреть файл

@ -0,0 +1,110 @@
[
{
"pk": 1,
"model": "files.platform",
"fields": {
"icontype": "",
"modified": "2008-04-07 08:16:55",
"created": "2007-03-05 13:09:27"
}
},
{
"pk": 1,
"model": "addons.addontype",
"fields": {
"modified": "2009-02-06 08:42:28",
"created": "2006-08-21 23:53:19"
}
},
{
"pk": 15000,
"model": "translations.translation",
"fields": {
"locale": "en-US",
"created": "2007-03-05 13:10:42",
"id": 15000,
"modified": "2009-03-26 07:41:10",
"localized_string": "Delicious Bookmarks"
}
},
{
"pk": 29034,
"model": "translations.translation",
"fields": {
"locale": "en-US",
"created": "2007-03-05 13:10:42",
"id": 15002,
"modified": "2009-04-29 11:20:57",
"localized_string": "This extension integrates your browser with Delicious (http://delicious.com), the leading social bookmarking service on the Web. It does this by augmenting the bookmarking functionality in Firefox with an enhanced experience that offers the following advantages:\r\n\r\n- Search and browse your Delicious bookmarks\r\n- Keep up to date on your Network and Links For You\r\n- Access your bookmarks from any computer at any time\r\n- Keep your bookmarks organized using tags\r\n- Share your bookmarks with friends or anyone on the Web\r\n- Import your existing Firefox bookmarks\r\n\r\nWe've also included a Classic Mode for those who want the basic Delicious buttons without any sync or search features. This option is available when you install the add-on, and can be accessed later from the Delicious menu in Firefox.\r\n\r\nFor a product tour and more details about this extension, please visit:\r\nhttp://delicious.com/help/quicktour/firefox\r\n\r\nFor product feedback, commentary & enhancement requests, please post to our discussion group, found here:\r\nhttp://tech.groups.yahoo.com/group/delicious-firefox-extension/\r\n\r\nor try the Delicious support forums:\r\n\r\nhttp://support.delicious.com/\r\n"
}
},
{
"pk": 3615,
"model": "addons.addon",
"fields": {
"dev_agreement": 1,
"publicstats": 0,
"modified": "2009-10-21 09:58:52",
"weeklydownloads": 20178,
"sharecount": 5,
"adminreview": 0,
"average_daily_downloads": 5928,
"show_beta": 1,
"trusted": 1,
"averagerating": "3.02",
"binary": 0,
"totalreviews": 389,
"viewsource": 0,
"externalsoftware": 0,
"average_daily_users": 493241,
"totaldownloads": 5175276,
"icontype": "image/png",
"status": 4,
"description": 15002,
"sitespecific": 1,
"nominationdate": "2009-03-26 07:41:12",
"wants_contributions": 0,
"prerelease": 0,
"guid": "{2fa4ed95-0317-4c6a-a74c-5f3e3912c1f9}",
"bayesianrating": 3.06941,
"name": 15000,
"created": "2006-10-23 12:57:41",
"paypal_id": "",
"annoying": 0,
"inactive": 0,
"addontype": 1,
"higheststatus": 4,
"defaultlocale": "en-US"
}
},
{
"pk": 24007,
"model": "versions.version",
"fields": {
"license": null,
"created": "2006-10-23 12:57:41",
"approvalnotes": "",
"modified": "2006-10-23 19:22:18",
"version": "1.0.43",
"addon": 3615
}
},
{
"pk": 11993,
"model": "files.file",
"fields": {
"status": 4,
"codereview": 0,
"hash": "sha256:5b5aaf7b38e332cc95d92ba759c01c3076b53a840f6c16e01dc272eefcb29566",
"created": "2007-03-05 13:19:15",
"modified": "2007-04-04 12:30:11",
"filename": "del.icio.us_bookmarks-1.0.43-fx.xpi",
"platform": 1,
"version": 24007,
"datestatuschanged": null,
"size": 169
}
}
]

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -0,0 +1,21 @@
import os
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
class Command(BaseCommand):
help = ("Runs the indexer script for sphinx as defined in "
" settings.SPHINX_INDEXER")
requires_model_validation = False
def handle(self, **options):
try:
os.execvp(settings.SPHINX_INDEXER,
(settings.SPHINX_INDEXER, '--all', '--rotate', '--config',
settings.SPHINX_CONFIG_FILE))
except OSError:
raise CommandError('You appear not to have the %r program '
'installed or on your path' % settings.SPHINX_INDEXER)

3
apps/search/models.py Normal file
Просмотреть файл

@ -0,0 +1,3 @@
from django.db import models
# Create your models here.

71
apps/search/tests.py Normal file
Просмотреть файл

@ -0,0 +1,71 @@
"""
Tests for the search (sphinx) app.
"""
import os
import shutil
import time
from django.test import TransactionTestCase
from nose.tools import eq_
from .utils import start_sphinx, stop_sphinx, reindex, convert_version
from .client import Client as SearchClient
def test_convert_version():
def c(x, y):
x = convert_version(x)
y = convert_version(y)
if (x > y):
return 1
elif (x < y):
return - 1
return 0
v = ['1.9.0a1pre', '1.9.0a1', '1.9.1.b5', '1.9.1.b5', '1.9.1pre', \
'1.9.1', '1.9.0']
eq_(c(v[0],v[1]), -1)
eq_(c(v[1],v[2]), -1)
eq_(c(v[2],v[3]), 0)
eq_(c(v[3],v[4]), -1)
eq_(c(v[4],v[5]), -1)
eq_(c(v[5],v[6]), 1)
class SphinxTest(TransactionTestCase):
fixtures = ["search/sphinx.json"]
sphinx = True
def setUp(self):
os.environ['DJANGO_ENVIRONMENT'] = 'test'
if os.path.exists('/tmp/data/sphinx'):
shutil.rmtree('/tmp/data/sphinx')
if os.path.exists('/tmp/log/searchd'):
shutil.rmtree('/tmp/log/searchd')
os.makedirs('/tmp/data/sphinx')
os.makedirs('/tmp/log/searchd')
reindex()
start_sphinx()
time.sleep(1)
def tearDown(self):
stop_sphinx()
def test_sphinx_indexer(self):
"""
This tests that sphinx will properly index an addon.
"""
# we have to specify to sphinx to look at test_ dbs
c = SearchClient()
results = c.query('Delicious')
assert results[0]['attrs']['addon_id'] == 3615, \
"Didn't get the addon ID I wanted."

139
apps/search/utils.py Normal file
Просмотреть файл

@ -0,0 +1,139 @@
import subprocess
import zlib
import re
from django.conf import settings
from amo import constants as const
def reindex():
"""
Reindexes sphinx. Note this is only to be used in dev and test
environments.
"""
subprocess.call([settings.SPHINX_INDEXER, '--all', '--rotate',
'--config', settings.SPHINX_CONFIG_PATH])
def start_sphinx():
"""
Starts sphinx. Note this is only to be used in dev and test environments.
"""
subprocess.Popen([settings.SPHINX_SEARCHD, '--config',
settings.SPHINX_CONFIG_PATH])
def stop_sphinx():
"""
Stops sphinx. Note this is only to be used in dev and test environments.
"""
subprocess.call([settings.SPHINX_SEARCHD, '--stop', '--config',
settings.SPHINX_CONFIG_PATH])
pattern = re.compile(r"""(\d+) # major (x in x.y)
\.(\d+) # minor1 (y in x.y)
\.?(\d+)? # minor2 (z in x.y.z)
\.?(\d+)? # minor3 (w in x.y.z.w)
([a|b]?) # alpha/beta
(\d*) # alpha/beta version
(pre)? # pre release
(\d)? # pre release version""", re.VERBOSE)
pattern_plus = re.compile(r'((\d+)\+)')
def convert_type(type):
if type == 'extension' or type == 'extensions':
return const.ADDON_EXTENSIONS
elif type == 'theme' or type == 'themes':
return const.ADDON_THEME
elif type == 'dict' or type == 'dicts':
return const.ADDON_DICT
elif type == 'language' or type == 'languages':
return const.ADDON_LPAPP
elif type == 'plugin' or type == 'plugins':
return const.ADDON_PLUGIN
def convert_version(version_string):
"""
This will enumerate a version so that it can be used for comparisons and
indexing.
"""
# Replace .x or .* with .99 since these are equivalent.
version_string = version_string.replace('.x', '.99')
version_string = version_string.replace('.*', '.99')
# Replace \d+\+ with $1++pre0 (e.g. 2.1+ => 2.2pre0).
match = re.search(pattern_plus, version_string)
if match:
(old, ver) = match.groups()
replacement = "%dpre0" % (int(ver) + 1)
version_string = version_string.replace(old, replacement)
# Now we break up a version into components.
#
# e.g. 3.7.2.1b3pre3
# we break into:
# major => 3
# minor1 => 7
# minor2 => 2
# minor3 => 1
# alpha => b => 1
# alpha_n => 3
# pre => 0
# pre_n => 3
#
# Alpha is 0,1,2 based on whether a version is alpha, beta or a release.
# Pre is 0 or 1. 0 indicates that this is a pre-release.
#
# The numbers are chosen based on sorting rules, not for any deep meaning.
match = re.match(pattern, version_string)
if match:
(major, minor1, minor2, minor3, alpha, alpha_n, pre,
pre_n) = match.groups()
# normalize data
major = int(major)
minor1 = int(minor1)
minor2 = int(minor2) if minor2 else 0
minor3 = int(minor3) if minor3 else 0
if alpha == 'a':
alpha = 0
elif alpha == 'b':
alpha = 1
else:
alpha = 2
if alpha_n:
alpha_n = int(alpha_n)
else:
alpha_n = 0
if pre == 'pre':
pre = 0
else:
pre = 1
if pre_n:
pre_n = int(pre_n)
else:
pre_n = 0
# We recombine everything into a single large integer.
int_str = ("%02d%02d%02d%02d%d%02d%d%02d"
% (major, minor1, minor2, minor3, alpha, alpha_n, pre, pre_n) )
return int(int_str)
crc32 = lambda x: zlib.crc32(x) & 0xffffffff

1
apps/search/views.py Normal file
Просмотреть файл

@ -0,0 +1 @@
# Create your views here.

Просмотреть файл

@ -2,8 +2,10 @@ from django.db import models
import amo
from addons.models import Addon
from users.models import UserProfile
from applications.models import Application
from applications.models import Application, AppVersion
from translations.fields import TranslatedField
@ -47,3 +49,14 @@ class VersionSummary(amo.ModelBase):
class Meta(amo.ModelBase.Meta):
db_table = 'versions_summary'
class ApplicationsVersions(models.Model):
application = models.ForeignKey(Application)
version = models.ForeignKey(Version)
min = models.ForeignKey(AppVersion, db_column='min', related_name='min_set')
max = models.ForeignKey(AppVersion, db_column='max', related_name='max_set')
class Meta:
db_table = u'applications_versions'

Просмотреть файл

@ -160,6 +160,7 @@ INSTALLED_APPS = (
'editors',
'files',
'reviews',
'search',
'tags',
'translations',
'users',
@ -203,3 +204,9 @@ DEFAULT_APP = 'firefox'
CACHE_DURATION = 60 # seconds
AUTH_PROFILE_MODULE = 'users.UserProfile'
SPHINX_INDEXER = 'indexer'
SPHINX_SEARCHD = 'searchd'
SPHINX_CONFIG_PATH = path('configs/sphinx/sphinx.conf')
SPHINX_HOST = '127.0.0.1'
SPHINX_PORT = 3312