diff --git a/apps/amo/constants.py b/apps/amo/constants.py index fc246d79c8..39e2f6584f 100644 --- a/apps/amo/constants.py +++ b/apps/amo/constants.py @@ -44,3 +44,16 @@ COLLECTION_AUTHOR_CHOICES = { COLLECTION_ROLE_PUBLISHER: 'Publisher', COLLECTION_ROLE_ADMIN: 'Admin', } + +# Addon types +ADDON_ANY = -1 +ADDON_EXTENSION = 1 +ADDON_THEME = 2 +ADDON_DICT = 3 +ADDON_SEARCH = 4 +ADDON_LPAPP = 5 +ADDON_LPADDON = 6 +ADDON_PLUGIN = 7 +ADDON_API = 8 # not actually a type but used to identify extensions + themes +ADDON_PERSONA = 9 + diff --git a/apps/search/__init__.py b/apps/search/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/search/client.py b/apps/search/client.py new file mode 100644 index 0000000000..367010538f --- /dev/null +++ b/apps/search/client.py @@ -0,0 +1,154 @@ +import re + +from django.conf import settings +from django.utils import translation + +import amo +from .sphinxapi import SphinxClient +import sphinxapi as sphinx +from .utils import convert_version, crc32 + +m_dot_n_re = re.compile(r'^\d+\.\d+$') +SEARCH_ENGINE_APP = 99 + + +class Client(object): + """ + A search client that queries sphinx for addons. + """ + + def __init__(self): + self.sphinx = SphinxClient() + self.sphinx.SetServer(settings.SPHINX_HOST, settings.SPHINX_PORT) + + def restrict_version(self, version): + """ + Restrict a search to a specific version. + + We can make the search a little fuzzy so that 3.7 includes + pre-releases. + This is done by using a high_int and a low_int. For alpha/pre-release + searches we assume the search needs to be specific. + """ + + sc = self.sphinx + + high_int = convert_version(version) + low_int = high_int + + + if m_dot_n_re.match(version): + low_int = convert_version(version + "apre") + + # SetFilterRange requires a max and min even if you just want a + # lower-bound. To work-around this limitation we set max_ver's + # upperbound to be ridiculously large (10x the high_int). + + if high_int: + sc.SetFilterRange('max_ver', low_int, 10 * high_int) + sc.SetFilterRange('min_ver', 0, high_int) + + def query(self, term, **kwargs): + """ + Queries sphinx for a term, and parses specific options. + + The following kwargs will do things: + + limit: limits the number of results. Default is 2000. + admin: if present we are in "admin" mode which lets you find addons + without files and overrides any 'xenophobia' settings. + type: specifies an addon_type by id + sort: specifies a specific sort mode. acceptable values are 'newest', + 'updated, 'name', 'averagerating' or 'weeklydownloads'. If no + sort mode is specified we use relevance. + 'app': specifies which application_id to limit searches by + 'version': specifies which version of an app (as specified) that + addons need to be compatble + 'xenophobia': restricts addons to the users own locale + 'locale': restricts addons to the specified locale + + """ + + sc = self.sphinx + + # Setup some default parameters for the search. + fields = "addon_id, app, category" + + limit = kwargs.get('limit', 2000) + + sc.SetSelect(fields) + sc.SetFieldWeights({'name': 4}) + sc.SetLimits(0, limit) + sc.SetFilter('inactive', (0,)) + + # STATUS_DISABLED and 0 (which likely means null) are filtered from + # search + + sc.SetFilter('status', (0, amo.STATUS_DISABLED), True) + + # Unless we're in admin mode, or we're looking at stub entries, + # everything must have a file. + if (('admin' not in kwargs) and + ('type' not in kwargs or kwargs['type'] != amo.ADDON_PERSONA)): + sc.SetFilter('num_files', (0,), True) + + # Sorting + if 'sort' in kwargs: + if kwargs['sort'] == 'newest': + sc.SetSortMode(sphinx.SPH_SORT_ATTR_DESC, 'created') + elif kwargs['sort'] == 'updated': + sc.SetSortMode(sphinx.SPH_SORT_ATTR_DESC, 'modified') + elif kwargs['sort'] == 'name': + sc.SetSortMode(sphinx.SPH_SORT_ATTR_ASC, 'name_ord') + elif (kwargs['sort'] == 'averagerating' or + kwargs['sort'] == 'bayesianrating'): + sc.SetSortMode(sphinx.SPH_SORT_ATTR_DESC, 'averagerating') + elif kwargs['sort'] == 'weeklydownloads': + sc.SetSortMode(sphinx.SPH_SORT_ATTR_DESC, 'weeklydownloads') + + else: + # We want to boost public addons, and addons in your native + # language. + expr = ("@weight + IF(status=%d, 30, 0) + " + "IF(locale_ord=%d, 29, 0)") % (amo.STATUS_PUBLIC, + crc32(translation.get_language())) + sc.SetSortMode(sphinx.SPH_SORT_EXPR, expr) + + # We should always have an 'app' except for the admin. + if 'app' in kwargs: + # We add SEARCH_ENGINE_APP since search engines work on all apps. + sc.SetFilter('app', (kwargs['app'], SEARCH_ENGINE_APP)) + + # Version filtering. + match = re.match('\bversion:([0-9\.]+)/', term) + + if match: + term = term.replace(match.group(0), '') + self.restrict_version(match.group(1)) + elif 'version' in kwargs: + self.restrict_version(kwargs['version']) + + # Xenophobia - restrict to just my language. + if 'xenophobia' in kwargs and 'admin' not in kwargs: + kwargs['locale'] = translation.get_language() + + # Locale filtering + if 'locale' in kwargs: + sc.SetFilter('locale_ord', (crc32(kwargs['locale']),)) + + # XXX - Todo: + # In the interest of having working code sooner than later, we're + # skipping the following... for now: + # * Type filter + # * Platform filter + # * Date filter + # * GUID filter + # * Category filter + # * Tag filter + # * Num apps filter + # * Logging + + result = sc.Query(term) + + if result: + return result['matches'] diff --git a/apps/search/fixtures/search/sphinx.json b/apps/search/fixtures/search/sphinx.json new file mode 100644 index 0000000000..7aefdf7c15 --- /dev/null +++ b/apps/search/fixtures/search/sphinx.json @@ -0,0 +1,110 @@ +[ + { + "pk": 1, + "model": "files.platform", + "fields": { + "icontype": "", + "modified": "2008-04-07 08:16:55", + "created": "2007-03-05 13:09:27" + } + }, + { + "pk": 1, + "model": "addons.addontype", + "fields": { + "modified": "2009-02-06 08:42:28", + "created": "2006-08-21 23:53:19" + } + }, + { + "pk": 15000, + "model": "translations.translation", + "fields": { + "locale": "en-US", + "created": "2007-03-05 13:10:42", + "id": 15000, + "modified": "2009-03-26 07:41:10", + "localized_string": "Delicious Bookmarks" + } + }, + + { + "pk": 29034, + "model": "translations.translation", + "fields": { + "locale": "en-US", + "created": "2007-03-05 13:10:42", + "id": 15002, + "modified": "2009-04-29 11:20:57", + "localized_string": "This extension integrates your browser with Delicious (http://delicious.com), the leading social bookmarking service on the Web. It does this by augmenting the bookmarking functionality in Firefox with an enhanced experience that offers the following advantages:\r\n\r\n- Search and browse your Delicious bookmarks\r\n- Keep up to date on your Network and Links For You\r\n- Access your bookmarks from any computer at any time\r\n- Keep your bookmarks organized using tags\r\n- Share your bookmarks with friends or anyone on the Web\r\n- Import your existing Firefox bookmarks\r\n\r\nWe've also included a Classic Mode for those who want the basic Delicious buttons without any sync or search features. This option is available when you install the add-on, and can be accessed later from the Delicious menu in Firefox.\r\n\r\nFor a product tour and more details about this extension, please visit:\r\nhttp://delicious.com/help/quicktour/firefox\r\n\r\nFor product feedback, commentary & enhancement requests, please post to our discussion group, found here:\r\nhttp://tech.groups.yahoo.com/group/delicious-firefox-extension/\r\n\r\nor try the Delicious support forums:\r\n\r\nhttp://support.delicious.com/\r\n" + } + }, + + { + "pk": 3615, + "model": "addons.addon", + "fields": { + "dev_agreement": 1, + "publicstats": 0, + "modified": "2009-10-21 09:58:52", + "weeklydownloads": 20178, + "sharecount": 5, + "adminreview": 0, + "average_daily_downloads": 5928, + "show_beta": 1, + "trusted": 1, + "averagerating": "3.02", + "binary": 0, + "totalreviews": 389, + "viewsource": 0, + "externalsoftware": 0, + "average_daily_users": 493241, + "totaldownloads": 5175276, + "icontype": "image/png", + "status": 4, + "description": 15002, + "sitespecific": 1, + "nominationdate": "2009-03-26 07:41:12", + "wants_contributions": 0, + "prerelease": 0, + "guid": "{2fa4ed95-0317-4c6a-a74c-5f3e3912c1f9}", + "bayesianrating": 3.06941, + "name": 15000, + "created": "2006-10-23 12:57:41", + "paypal_id": "", + "annoying": 0, + "inactive": 0, + "addontype": 1, + "higheststatus": 4, + "defaultlocale": "en-US" + } + }, + { + "pk": 24007, + "model": "versions.version", + "fields": { + "license": null, + "created": "2006-10-23 12:57:41", + "approvalnotes": "", + "modified": "2006-10-23 19:22:18", + "version": "1.0.43", + "addon": 3615 + } + }, + { + "pk": 11993, + "model": "files.file", + "fields": { + "status": 4, + "codereview": 0, + "hash": "sha256:5b5aaf7b38e332cc95d92ba759c01c3076b53a840f6c16e01dc272eefcb29566", + "created": "2007-03-05 13:19:15", + "modified": "2007-04-04 12:30:11", + "filename": "del.icio.us_bookmarks-1.0.43-fx.xpi", + "platform": 1, + "version": 24007, + "datestatuschanged": null, + "size": 169 + } + } +] diff --git a/apps/search/management/__init__.py b/apps/search/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/search/management/commands/__init__.py b/apps/search/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/search/management/commands/sphinxreindex.py b/apps/search/management/commands/sphinxreindex.py new file mode 100644 index 0000000000..e490fd2fe5 --- /dev/null +++ b/apps/search/management/commands/sphinxreindex.py @@ -0,0 +1,21 @@ +import os + +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError + + +class Command(BaseCommand): + help = ("Runs the indexer script for sphinx as defined in " + " settings.SPHINX_INDEXER") + + requires_model_validation = False + + def handle(self, **options): + try: + os.execvp(settings.SPHINX_INDEXER, + (settings.SPHINX_INDEXER, '--all', '--rotate', '--config', + settings.SPHINX_CONFIG_FILE)) + + except OSError: + raise CommandError('You appear not to have the %r program ' + 'installed or on your path' % settings.SPHINX_INDEXER) diff --git a/apps/search/models.py b/apps/search/models.py new file mode 100644 index 0000000000..71a8362390 --- /dev/null +++ b/apps/search/models.py @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. diff --git a/apps/search/tests.py b/apps/search/tests.py new file mode 100644 index 0000000000..3e8dace6ff --- /dev/null +++ b/apps/search/tests.py @@ -0,0 +1,71 @@ +""" +Tests for the search (sphinx) app. +""" +import os +import shutil +import time + +from django.test import TransactionTestCase + +from nose.tools import eq_ + +from .utils import start_sphinx, stop_sphinx, reindex, convert_version +from .client import Client as SearchClient + + +def test_convert_version(): + def c(x, y): + x = convert_version(x) + y = convert_version(y) + + if (x > y): + return 1 + elif (x < y): + return - 1 + + return 0 + + v = ['1.9.0a1pre', '1.9.0a1', '1.9.1.b5', '1.9.1.b5', '1.9.1pre', \ + '1.9.1', '1.9.0'] + + eq_(c(v[0],v[1]), -1) + eq_(c(v[1],v[2]), -1) + eq_(c(v[2],v[3]), 0) + eq_(c(v[3],v[4]), -1) + eq_(c(v[4],v[5]), -1) + eq_(c(v[5],v[6]), 1) + + +class SphinxTest(TransactionTestCase): + + fixtures = ["search/sphinx.json"] + sphinx = True + + def setUp(self): + os.environ['DJANGO_ENVIRONMENT'] = 'test' + + if os.path.exists('/tmp/data/sphinx'): + shutil.rmtree('/tmp/data/sphinx') + if os.path.exists('/tmp/log/searchd'): + shutil.rmtree('/tmp/log/searchd') + + os.makedirs('/tmp/data/sphinx') + os.makedirs('/tmp/log/searchd') + reindex() + start_sphinx() + time.sleep(1) + + + def tearDown(self): + stop_sphinx() + + def test_sphinx_indexer(self): + """ + This tests that sphinx will properly index an addon. + """ + + # we have to specify to sphinx to look at test_ dbs + c = SearchClient() + results = c.query('Delicious') + assert results[0]['attrs']['addon_id'] == 3615, \ + "Didn't get the addon ID I wanted." diff --git a/apps/search/utils.py b/apps/search/utils.py new file mode 100644 index 0000000000..a0ded2ebb9 --- /dev/null +++ b/apps/search/utils.py @@ -0,0 +1,139 @@ +import subprocess +import zlib +import re + +from django.conf import settings + +from amo import constants as const + + +def reindex(): + """ + Reindexes sphinx. Note this is only to be used in dev and test + environments. + """ + + subprocess.call([settings.SPHINX_INDEXER, '--all', '--rotate', + '--config', settings.SPHINX_CONFIG_PATH]) + + +def start_sphinx(): + """ + Starts sphinx. Note this is only to be used in dev and test environments. + """ + + subprocess.Popen([settings.SPHINX_SEARCHD, '--config', + settings.SPHINX_CONFIG_PATH]) + + +def stop_sphinx(): + """ + Stops sphinx. Note this is only to be used in dev and test environments. + """ + + subprocess.call([settings.SPHINX_SEARCHD, '--stop', '--config', + settings.SPHINX_CONFIG_PATH]) + +pattern = re.compile(r"""(\d+) # major (x in x.y) + \.(\d+) # minor1 (y in x.y) + \.?(\d+)? # minor2 (z in x.y.z) + \.?(\d+)? # minor3 (w in x.y.z.w) + ([a|b]?) # alpha/beta + (\d*) # alpha/beta version + (pre)? # pre release + (\d)? # pre release version""", re.VERBOSE) +pattern_plus = re.compile(r'((\d+)\+)') + + +def convert_type(type): + if type == 'extension' or type == 'extensions': + return const.ADDON_EXTENSIONS + elif type == 'theme' or type == 'themes': + return const.ADDON_THEME + elif type == 'dict' or type == 'dicts': + return const.ADDON_DICT + elif type == 'language' or type == 'languages': + return const.ADDON_LPAPP + elif type == 'plugin' or type == 'plugins': + return const.ADDON_PLUGIN + + +def convert_version(version_string): + """ + This will enumerate a version so that it can be used for comparisons and + indexing. + """ + + # Replace .x or .* with .99 since these are equivalent. + version_string = version_string.replace('.x', '.99') + version_string = version_string.replace('.*', '.99') + + # Replace \d+\+ with $1++pre0 (e.g. 2.1+ => 2.2pre0). + + match = re.search(pattern_plus, version_string) + + if match: + (old, ver) = match.groups() + replacement = "%dpre0" % (int(ver) + 1) + version_string = version_string.replace(old, replacement) + + # Now we break up a version into components. + # + # e.g. 3.7.2.1b3pre3 + # we break into: + # major => 3 + # minor1 => 7 + # minor2 => 2 + # minor3 => 1 + # alpha => b => 1 + # alpha_n => 3 + # pre => 0 + # pre_n => 3 + # + # Alpha is 0,1,2 based on whether a version is alpha, beta or a release. + # Pre is 0 or 1. 0 indicates that this is a pre-release. + # + # The numbers are chosen based on sorting rules, not for any deep meaning. + + match = re.match(pattern, version_string) + + if match: + (major, minor1, minor2, minor3, alpha, alpha_n, pre, + pre_n) = match.groups() + + # normalize data + major = int(major) + minor1 = int(minor1) + minor2 = int(minor2) if minor2 else 0 + minor3 = int(minor3) if minor3 else 0 + + if alpha == 'a': + alpha = 0 + elif alpha == 'b': + alpha = 1 + else: + alpha = 2 + + if alpha_n: + alpha_n = int(alpha_n) + else: + alpha_n = 0 + + if pre == 'pre': + pre = 0 + else: + pre = 1 + + if pre_n: + pre_n = int(pre_n) + else: + pre_n = 0 + + # We recombine everything into a single large integer. + int_str = ("%02d%02d%02d%02d%d%02d%d%02d" + % (major, minor1, minor2, minor3, alpha, alpha_n, pre, pre_n) ) + + return int(int_str) + + +crc32 = lambda x: zlib.crc32(x) & 0xffffffff diff --git a/apps/search/views.py b/apps/search/views.py new file mode 100644 index 0000000000..60f00ef0ef --- /dev/null +++ b/apps/search/views.py @@ -0,0 +1 @@ +# Create your views here. diff --git a/apps/versions/models.py b/apps/versions/models.py index 728f9ea1f7..6db5c2fcee 100644 --- a/apps/versions/models.py +++ b/apps/versions/models.py @@ -2,8 +2,10 @@ from django.db import models import amo from addons.models import Addon + from users.models import UserProfile -from applications.models import Application +from applications.models import Application, AppVersion + from translations.fields import TranslatedField @@ -47,3 +49,14 @@ class VersionSummary(amo.ModelBase): class Meta(amo.ModelBase.Meta): db_table = 'versions_summary' + + +class ApplicationsVersions(models.Model): + + application = models.ForeignKey(Application) + version = models.ForeignKey(Version) + min = models.ForeignKey(AppVersion, db_column='min', related_name='min_set') + max = models.ForeignKey(AppVersion, db_column='max', related_name='max_set') + + class Meta: + db_table = u'applications_versions' diff --git a/settings.py b/settings.py index 3951b7733d..062254d426 100644 --- a/settings.py +++ b/settings.py @@ -160,6 +160,7 @@ INSTALLED_APPS = ( 'editors', 'files', 'reviews', + 'search', 'tags', 'translations', 'users', @@ -203,3 +204,9 @@ DEFAULT_APP = 'firefox' CACHE_DURATION = 60 # seconds AUTH_PROFILE_MODULE = 'users.UserProfile' + +SPHINX_INDEXER = 'indexer' +SPHINX_SEARCHD = 'searchd' +SPHINX_CONFIG_PATH = path('configs/sphinx/sphinx.conf') +SPHINX_HOST = '127.0.0.1' +SPHINX_PORT = 3312