diff --git a/.gitmodules b/.gitmodules index a467ba8118..ba13100d5c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "media/js"] path = media/js url = git://github.com/davedash/amo-js.git +[submodule "configs"] + path = configs + url = git://github.com/davedash/amo-configs diff --git a/apps/addons/admin.py b/apps/addons/admin.py index cb2eef15a5..d29c3271dc 100644 --- a/apps/addons/admin.py +++ b/apps/addons/admin.py @@ -1,22 +1,9 @@ -from django import forms from django.contrib import admin from .models import Addon, BlacklistedGuid, Feature, Category -class AddonForm(forms.ModelForm): - - class Meta: - model = Addon - - def clean(self): - # Override clean so we ignore uniqueness checks on the django side, - # since they don't handle translations correctly. - return self.cleaned_data - - class AddonAdmin(admin.ModelAdmin): - form = AddonForm exclude = ('users',) list_display = ('__unicode__', 'addontype', 'status', 'averagerating') list_filter = ('addontype', 'status') diff --git a/apps/addons/models.py b/apps/addons/models.py index 3e6f313a89..01400dc5e6 100644 --- a/apps/addons/models.py +++ b/apps/addons/models.py @@ -15,7 +15,7 @@ class Addon(amo.ModelBase): (3, 'Roadblock; User shown splash screen before download'), ) - guid = models.CharField(max_length=255, unique=True) + guid = models.CharField(max_length=255, unique=True, null=True) name = TranslatedField() defaultlocale = models.CharField(max_length=10, default=settings.LANGUAGE_CODE) diff --git a/apps/admin/fixtures/admin/tests/flagged.json b/apps/admin/fixtures/admin/tests/flagged.json index c649816547..13c566843b 100644 --- a/apps/admin/fixtures/admin/tests/flagged.json +++ b/apps/admin/fixtures/admin/tests/flagged.json @@ -189,5 +189,54 @@ "addon": 3, "file": 2 } + }, + { + "pk": 4043307, + "model": "auth.user", + "fields": { + "username": "jbalogh", + "first_name": "Jeff", + "last_name": "Balogh", + "is_active": 1, + "is_superuser": 1, + "is_staff": 1, + "last_login": "2010-01-13 17:17:23", + "groups": [], + "user_permissions": [], + "password": "sha512$7b5436061f8c0902088c292c057be69fdb17312e2f71607c9c51641f5d876522$08d1d370d89e2ae92755fd03464a7276ca607c431d04a52d659f7a184f3f9918073637d82fc88981c7099c7c46a1137b9fdeb675304eb98801038905a9ee0600", + "email": "jbalogh@mozilla.com", + "date_joined": "2009-02-02 11:50:31" + } + }, + { + "pk": 4043307, + "model": "users.userprofile", + "fields": { + "sandboxshown": 1, + "display_collections_fav": 1, + "display_collections": 1, + "occupation": "", + "confirmationcode": "", + "location": "", + "picture_type": "", + "averagerating": "", + "homepage": "http://jeffbalogh.org", + "email": "jbalogh@mozilla.com", + "notifycompat": 1, + "bio": null, + "firstname": "Jeff", + "deleted": 0, + "lastname": "Balogh", + "emailhidden": 0, + "user": 4043307, + "password": "sha512$7b5436061f8c0902088c292c057be69fdb17312e2f71607c9c51641f5d876522$08d1d370d89e2ae92755fd03464a7276ca607c431d04a52d659f7a184f3f9918073637d82fc88981c7099c7c46a1137b9fdeb675304eb98801038905a9ee0600", + "nickname": "jbalogh", + "resetcode_expires": "2010-01-12 15:28:07", + "resetcode": "", + "created": "2009-02-02 11:50:31", + "notes": "", + "modified": "2010-01-12 17:01:41", + "notifyevents": 1 + } } ] diff --git a/apps/admin/templates/admin/base.html b/apps/admin/templates/admin/base.html index 121a8fd003..93f4a5d844 100644 --- a/apps/admin/templates/admin/base.html +++ b/apps/admin/templates/admin/base.html @@ -8,9 +8,16 @@ {% block extrahead %} + href="{{ MEDIA_URL }}css/zamboni/admin-mozilla.css"> + + +{% endblock %} + +{% block js %} + + {% endblock %} {% block navbar %} diff --git a/apps/admin/templates/admin/base_site.html b/apps/admin/templates/admin/base_site.html index d801d30b9f..946db2cd8e 100644 --- a/apps/admin/templates/admin/base_site.html +++ b/apps/admin/templates/admin/base_site.html @@ -14,7 +14,7 @@ them. See admin.django_to_jinja for more details. {% templatetag openblock %} block extrahead {% templatetag closeblock %} {% templatetag openvariable %} super() {% templatetag closevariable %} - + {% block extrastyle %}{% endblock %} {% block extrahead %}{% endblock %} {% templatetag openblock %} endblock {% templatetag closeblock %} diff --git a/apps/admin/tests/test_views.py b/apps/admin/tests/test_views.py index 30401de5fe..4b43ece6df 100644 --- a/apps/admin/tests/test_views.py +++ b/apps/admin/tests/test_views.py @@ -14,6 +14,7 @@ class TestFlagged(test_utils.TestCase): fixtures = ['admin/tests/flagged'] def setUp(self): + self.client.login(username='jbalogh@mozilla.com', password='password') cache.clear() def test_get(self): diff --git a/apps/admin/views.py b/apps/admin/views.py index 93335de79b..9d49f5114f 100644 --- a/apps/admin/views.py +++ b/apps/admin/views.py @@ -1,3 +1,4 @@ +from django.contrib import admin from django.db.models import Q from django.shortcuts import redirect @@ -9,6 +10,7 @@ from files.models import Approval from versions.models import Version +@admin.site.admin_view def flagged(request): addons = Addon.objects.filter(adminreview=True).order_by('-created') diff --git a/apps/amo/constants.py b/apps/amo/constants.py index fc246d79c8..39e2f6584f 100644 --- a/apps/amo/constants.py +++ b/apps/amo/constants.py @@ -44,3 +44,16 @@ COLLECTION_AUTHOR_CHOICES = { COLLECTION_ROLE_PUBLISHER: 'Publisher', COLLECTION_ROLE_ADMIN: 'Admin', } + +# Addon types +ADDON_ANY = -1 +ADDON_EXTENSION = 1 +ADDON_THEME = 2 +ADDON_DICT = 3 +ADDON_SEARCH = 4 +ADDON_LPAPP = 5 +ADDON_LPADDON = 6 +ADDON_PLUGIN = 7 +ADDON_API = 8 # not actually a type but used to identify extensions + themes +ADDON_PERSONA = 9 + diff --git a/apps/amo/management/__init__.py b/apps/amo/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/amo/management/commands/__init__.py b/apps/amo/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/amo/management/commands/fix_charfields.py b/apps/amo/management/commands/fix_charfields.py new file mode 100644 index 0000000000..3dba866e02 --- /dev/null +++ b/apps/amo/management/commands/fix_charfields.py @@ -0,0 +1,50 @@ +import itertools + +from django.core.management.base import BaseCommand +from django.db import models, connection + +qn = connection.ops.quote_name + + +def fix(table_name, field): + d = {'table': table_name, 'field': qn(field.column), 'sql': sql(field)} + update = "UPDATE {table} SET {field}='' WHERE {field} IS NULL".format(**d) + alter = "MODIFY {sql}".format(**d) + return update, alter + + +def sql(field): + o = ['%s' % qn(field.column), field.db_type()] + if not field.null: + o.append('NOT NULL') + if field.primary_key: + o.append('PRIMARY KEY') + if field.default is not models.fields.NOT_PROVIDED: + o.append('default %r' % field.default) + return ' '.join(o) + + +class Command(BaseCommand): + help = 'Print SQL to change CharFields to be non-null.' + args = '[appname ...]' + + def handle(self, *app_labels, **options): + if app_labels: + modules = [models.loading.get_app(app) for app in app_labels] + models_ = itertools.chain(*[models.loading.get_models(mod) + for mod in modules]) + else: + models_ = models.loading.get_models() + + updates, alters = [], [] + for model in models_: + model_alters = [] + table = model._meta.db_table + for field in model._meta.fields: + if isinstance(field, models.CharField) and not field.null: + update, alter = fix(table, field) + updates.append(update) + model_alters.append(alter) + if model_alters: + alters.append('ALTER TABLE %s\n\t%s' % (table, ',\n\t'.join(model_alters))) + print ';\n'.join(updates + alters) + ';' diff --git a/apps/files/models.py b/apps/files/models.py index f820bf9272..777b9c9a88 100644 --- a/apps/files/models.py +++ b/apps/files/models.py @@ -90,7 +90,7 @@ class TestResultCache(amo.ModelBase): """When a file is checked the results are stored here in JSON. This is temporary storage and removed with the garbage cleanup cron.""" date = models.DateTimeField() - key = models.CharField(max_length=255) + key = models.CharField(max_length=255, db_index=True) test_case = models.ForeignKey(TestCase) message = models.TextField(blank=True) diff --git a/apps/search/__init__.py b/apps/search/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/search/client.py b/apps/search/client.py new file mode 100644 index 0000000000..367010538f --- /dev/null +++ b/apps/search/client.py @@ -0,0 +1,154 @@ +import re + +from django.conf import settings +from django.utils import translation + +import amo +from .sphinxapi import SphinxClient +import sphinxapi as sphinx +from .utils import convert_version, crc32 + +m_dot_n_re = re.compile(r'^\d+\.\d+$') +SEARCH_ENGINE_APP = 99 + + +class Client(object): + """ + A search client that queries sphinx for addons. + """ + + def __init__(self): + self.sphinx = SphinxClient() + self.sphinx.SetServer(settings.SPHINX_HOST, settings.SPHINX_PORT) + + def restrict_version(self, version): + """ + Restrict a search to a specific version. + + We can make the search a little fuzzy so that 3.7 includes + pre-releases. + This is done by using a high_int and a low_int. For alpha/pre-release + searches we assume the search needs to be specific. + """ + + sc = self.sphinx + + high_int = convert_version(version) + low_int = high_int + + + if m_dot_n_re.match(version): + low_int = convert_version(version + "apre") + + # SetFilterRange requires a max and min even if you just want a + # lower-bound. To work-around this limitation we set max_ver's + # upperbound to be ridiculously large (10x the high_int). + + if high_int: + sc.SetFilterRange('max_ver', low_int, 10 * high_int) + sc.SetFilterRange('min_ver', 0, high_int) + + def query(self, term, **kwargs): + """ + Queries sphinx for a term, and parses specific options. + + The following kwargs will do things: + + limit: limits the number of results. Default is 2000. + admin: if present we are in "admin" mode which lets you find addons + without files and overrides any 'xenophobia' settings. + type: specifies an addon_type by id + sort: specifies a specific sort mode. acceptable values are 'newest', + 'updated, 'name', 'averagerating' or 'weeklydownloads'. If no + sort mode is specified we use relevance. + 'app': specifies which application_id to limit searches by + 'version': specifies which version of an app (as specified) that + addons need to be compatble + 'xenophobia': restricts addons to the users own locale + 'locale': restricts addons to the specified locale + + """ + + sc = self.sphinx + + # Setup some default parameters for the search. + fields = "addon_id, app, category" + + limit = kwargs.get('limit', 2000) + + sc.SetSelect(fields) + sc.SetFieldWeights({'name': 4}) + sc.SetLimits(0, limit) + sc.SetFilter('inactive', (0,)) + + # STATUS_DISABLED and 0 (which likely means null) are filtered from + # search + + sc.SetFilter('status', (0, amo.STATUS_DISABLED), True) + + # Unless we're in admin mode, or we're looking at stub entries, + # everything must have a file. + if (('admin' not in kwargs) and + ('type' not in kwargs or kwargs['type'] != amo.ADDON_PERSONA)): + sc.SetFilter('num_files', (0,), True) + + # Sorting + if 'sort' in kwargs: + if kwargs['sort'] == 'newest': + sc.SetSortMode(sphinx.SPH_SORT_ATTR_DESC, 'created') + elif kwargs['sort'] == 'updated': + sc.SetSortMode(sphinx.SPH_SORT_ATTR_DESC, 'modified') + elif kwargs['sort'] == 'name': + sc.SetSortMode(sphinx.SPH_SORT_ATTR_ASC, 'name_ord') + elif (kwargs['sort'] == 'averagerating' or + kwargs['sort'] == 'bayesianrating'): + sc.SetSortMode(sphinx.SPH_SORT_ATTR_DESC, 'averagerating') + elif kwargs['sort'] == 'weeklydownloads': + sc.SetSortMode(sphinx.SPH_SORT_ATTR_DESC, 'weeklydownloads') + + else: + # We want to boost public addons, and addons in your native + # language. + expr = ("@weight + IF(status=%d, 30, 0) + " + "IF(locale_ord=%d, 29, 0)") % (amo.STATUS_PUBLIC, + crc32(translation.get_language())) + sc.SetSortMode(sphinx.SPH_SORT_EXPR, expr) + + # We should always have an 'app' except for the admin. + if 'app' in kwargs: + # We add SEARCH_ENGINE_APP since search engines work on all apps. + sc.SetFilter('app', (kwargs['app'], SEARCH_ENGINE_APP)) + + # Version filtering. + match = re.match('\bversion:([0-9\.]+)/', term) + + if match: + term = term.replace(match.group(0), '') + self.restrict_version(match.group(1)) + elif 'version' in kwargs: + self.restrict_version(kwargs['version']) + + # Xenophobia - restrict to just my language. + if 'xenophobia' in kwargs and 'admin' not in kwargs: + kwargs['locale'] = translation.get_language() + + # Locale filtering + if 'locale' in kwargs: + sc.SetFilter('locale_ord', (crc32(kwargs['locale']),)) + + # XXX - Todo: + # In the interest of having working code sooner than later, we're + # skipping the following... for now: + # * Type filter + # * Platform filter + # * Date filter + # * GUID filter + # * Category filter + # * Tag filter + # * Num apps filter + # * Logging + + result = sc.Query(term) + + if result: + return result['matches'] diff --git a/apps/search/fixtures/search/sphinx.json b/apps/search/fixtures/search/sphinx.json new file mode 100644 index 0000000000..7aefdf7c15 --- /dev/null +++ b/apps/search/fixtures/search/sphinx.json @@ -0,0 +1,110 @@ +[ + { + "pk": 1, + "model": "files.platform", + "fields": { + "icontype": "", + "modified": "2008-04-07 08:16:55", + "created": "2007-03-05 13:09:27" + } + }, + { + "pk": 1, + "model": "addons.addontype", + "fields": { + "modified": "2009-02-06 08:42:28", + "created": "2006-08-21 23:53:19" + } + }, + { + "pk": 15000, + "model": "translations.translation", + "fields": { + "locale": "en-US", + "created": "2007-03-05 13:10:42", + "id": 15000, + "modified": "2009-03-26 07:41:10", + "localized_string": "Delicious Bookmarks" + } + }, + + { + "pk": 29034, + "model": "translations.translation", + "fields": { + "locale": "en-US", + "created": "2007-03-05 13:10:42", + "id": 15002, + "modified": "2009-04-29 11:20:57", + "localized_string": "This extension integrates your browser with Delicious (http://delicious.com), the leading social bookmarking service on the Web. It does this by augmenting the bookmarking functionality in Firefox with an enhanced experience that offers the following advantages:\r\n\r\n- Search and browse your Delicious bookmarks\r\n- Keep up to date on your Network and Links For You\r\n- Access your bookmarks from any computer at any time\r\n- Keep your bookmarks organized using tags\r\n- Share your bookmarks with friends or anyone on the Web\r\n- Import your existing Firefox bookmarks\r\n\r\nWe've also included a Classic Mode for those who want the basic Delicious buttons without any sync or search features. This option is available when you install the add-on, and can be accessed later from the Delicious menu in Firefox.\r\n\r\nFor a product tour and more details about this extension, please visit:\r\nhttp://delicious.com/help/quicktour/firefox\r\n\r\nFor product feedback, commentary & enhancement requests, please post to our discussion group, found here:\r\nhttp://tech.groups.yahoo.com/group/delicious-firefox-extension/\r\n\r\nor try the Delicious support forums:\r\n\r\nhttp://support.delicious.com/\r\n" + } + }, + + { + "pk": 3615, + "model": "addons.addon", + "fields": { + "dev_agreement": 1, + "publicstats": 0, + "modified": "2009-10-21 09:58:52", + "weeklydownloads": 20178, + "sharecount": 5, + "adminreview": 0, + "average_daily_downloads": 5928, + "show_beta": 1, + "trusted": 1, + "averagerating": "3.02", + "binary": 0, + "totalreviews": 389, + "viewsource": 0, + "externalsoftware": 0, + "average_daily_users": 493241, + "totaldownloads": 5175276, + "icontype": "image/png", + "status": 4, + "description": 15002, + "sitespecific": 1, + "nominationdate": "2009-03-26 07:41:12", + "wants_contributions": 0, + "prerelease": 0, + "guid": "{2fa4ed95-0317-4c6a-a74c-5f3e3912c1f9}", + "bayesianrating": 3.06941, + "name": 15000, + "created": "2006-10-23 12:57:41", + "paypal_id": "", + "annoying": 0, + "inactive": 0, + "addontype": 1, + "higheststatus": 4, + "defaultlocale": "en-US" + } + }, + { + "pk": 24007, + "model": "versions.version", + "fields": { + "license": null, + "created": "2006-10-23 12:57:41", + "approvalnotes": "", + "modified": "2006-10-23 19:22:18", + "version": "1.0.43", + "addon": 3615 + } + }, + { + "pk": 11993, + "model": "files.file", + "fields": { + "status": 4, + "codereview": 0, + "hash": "sha256:5b5aaf7b38e332cc95d92ba759c01c3076b53a840f6c16e01dc272eefcb29566", + "created": "2007-03-05 13:19:15", + "modified": "2007-04-04 12:30:11", + "filename": "del.icio.us_bookmarks-1.0.43-fx.xpi", + "platform": 1, + "version": 24007, + "datestatuschanged": null, + "size": 169 + } + } +] diff --git a/apps/search/management/__init__.py b/apps/search/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/search/management/commands/__init__.py b/apps/search/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/search/management/commands/sphinxreindex.py b/apps/search/management/commands/sphinxreindex.py new file mode 100644 index 0000000000..e490fd2fe5 --- /dev/null +++ b/apps/search/management/commands/sphinxreindex.py @@ -0,0 +1,21 @@ +import os + +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError + + +class Command(BaseCommand): + help = ("Runs the indexer script for sphinx as defined in " + " settings.SPHINX_INDEXER") + + requires_model_validation = False + + def handle(self, **options): + try: + os.execvp(settings.SPHINX_INDEXER, + (settings.SPHINX_INDEXER, '--all', '--rotate', '--config', + settings.SPHINX_CONFIG_FILE)) + + except OSError: + raise CommandError('You appear not to have the %r program ' + 'installed or on your path' % settings.SPHINX_INDEXER) diff --git a/apps/search/models.py b/apps/search/models.py new file mode 100644 index 0000000000..71a8362390 --- /dev/null +++ b/apps/search/models.py @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. diff --git a/apps/search/sphinxapi.py b/apps/search/sphinxapi.py new file mode 100644 index 0000000000..fa80d35694 --- /dev/null +++ b/apps/search/sphinxapi.py @@ -0,0 +1,972 @@ +# +# $Id$ +# +# Python version of Sphinx searchd client (Python API) +# +# Copyright (c) 2006-2008, Andrew Aksyonoff +# Copyright (c) 2006, Mike Osadnik +# All rights reserved +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License. You should have +# received a copy of the GPL license along with this program; if you +# did not, you can find it at http://www.gnu.org/ +# + +import sys +import select +import socket +import re +from struct import * + + +# known searchd commands +SEARCHD_COMMAND_SEARCH = 0 +SEARCHD_COMMAND_EXCERPT = 1 +SEARCHD_COMMAND_UPDATE = 2 +SEARCHD_COMMAND_KEYWORDS= 3 +SEARCHD_COMMAND_PERSIST = 4 + +# current client-side command implementation versions +VER_COMMAND_SEARCH = 0x116 +VER_COMMAND_EXCERPT = 0x100 +VER_COMMAND_UPDATE = 0x101 +VER_COMMAND_KEYWORDS = 0x100 + +# known searchd status codes +SEARCHD_OK = 0 +SEARCHD_ERROR = 1 +SEARCHD_RETRY = 2 +SEARCHD_WARNING = 3 + +# known match modes +SPH_MATCH_ALL = 0 +SPH_MATCH_ANY = 1 +SPH_MATCH_PHRASE = 2 +SPH_MATCH_BOOLEAN = 3 +SPH_MATCH_EXTENDED = 4 +SPH_MATCH_FULLSCAN = 5 +SPH_MATCH_EXTENDED2 = 6 + +# known ranking modes (extended2 mode only) +SPH_RANK_PROXIMITY_BM25 = 0 # default mode, phrase proximity major factor and BM25 minor one +SPH_RANK_BM25 = 1 # statistical mode, BM25 ranking only (faster but worse quality) +SPH_RANK_NONE = 2 # no ranking, all matches get a weight of 1 +SPH_RANK_WORDCOUNT = 3 # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts + +# known sort modes +SPH_SORT_RELEVANCE = 0 +SPH_SORT_ATTR_DESC = 1 +SPH_SORT_ATTR_ASC = 2 +SPH_SORT_TIME_SEGMENTS = 3 +SPH_SORT_EXTENDED = 4 +SPH_SORT_EXPR = 5 + +# known filter types +SPH_FILTER_VALUES = 0 +SPH_FILTER_RANGE = 1 +SPH_FILTER_FLOATRANGE = 2 + +# known attribute types +SPH_ATTR_NONE = 0 +SPH_ATTR_INTEGER = 1 +SPH_ATTR_TIMESTAMP = 2 +SPH_ATTR_ORDINAL = 3 +SPH_ATTR_BOOL = 4 +SPH_ATTR_FLOAT = 5 +SPH_ATTR_BIGINT = 6 +SPH_ATTR_MULTI = 0X40000000L + +SPH_ATTR_TYPES = (SPH_ATTR_NONE, + SPH_ATTR_INTEGER, + SPH_ATTR_TIMESTAMP, + SPH_ATTR_ORDINAL, + SPH_ATTR_BOOL, + SPH_ATTR_FLOAT, + SPH_ATTR_BIGINT, + SPH_ATTR_MULTI) + +# known grouping functions +SPH_GROUPBY_DAY = 0 +SPH_GROUPBY_WEEK = 1 +SPH_GROUPBY_MONTH = 2 +SPH_GROUPBY_YEAR = 3 +SPH_GROUPBY_ATTR = 4 +SPH_GROUPBY_ATTRPAIR = 5 + + +class SphinxClient: + def __init__ (self): + """ + Create a new client object, and fill defaults. + """ + self._host = 'localhost' # searchd host (default is "localhost") + self._port = 9312 # searchd port (default is 9312) + self._path = None # searchd unix-domain socket path + self._socket = None + self._offset = 0 # how much records to seek from result-set start (default is 0) + self._limit = 20 # how much records to return from result-set starting at offset (default is 20) + self._mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL) + self._weights = [] # per-field weights (default is 1 for all fields) + self._sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE) + self._sortby = '' # attribute to sort by (defualt is "") + self._min_id = 0 # min ID to match (default is 0) + self._max_id = 0 # max ID to match (default is UINT_MAX) + self._filters = [] # search filters + self._groupby = '' # group-by attribute name + self._groupfunc = SPH_GROUPBY_DAY # group-by function (to pre-process group-by attribute value with) + self._groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with) + self._groupdistinct = '' # group-by count-distinct attribute + self._maxmatches = 1000 # max matches to retrieve + self._cutoff = 0 # cutoff to stop searching at + self._retrycount = 0 # distributed retry count + self._retrydelay = 0 # distributed retry delay + self._anchor = {} # geographical anchor point + self._indexweights = {} # per-index weights + self._ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode + self._maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit) + self._fieldweights = {} # per-field-name weights + self._overrides = {} # per-query attribute values overrides + self._select = '*' # select-list (attributes or expressions, with optional aliases) + + self._error = '' # last error message + self._warning = '' # last warning message + self._reqs = [] # requests array for multi-query + + def __del__ (self): + if self._socket: + self._socket.close() + + + def GetLastError (self): + """ + Get last error message (string). + """ + return self._error + + + def GetLastWarning (self): + """ + Get last warning message (string). + """ + return self._warning + + + def SetServer (self, host, port = None): + """ + Set searchd server host and port. + """ + assert(isinstance(host, str)) + if host.startswith('/'): + self._path = host + return + elif host.startswith('unix://'): + self._path = host[7:] + return + assert(isinstance(port, int)) + self._host = host + self._port = port + self._path = None + + + def _Connect (self): + """ + INTERNAL METHOD, DO NOT CALL. Connects to searchd server. + """ + if self._socket: + # we have a socket, but is it still alive? + sr, sw, _ = select.select ( [self._socket], [self._socket], [], 0 ) + + # this is how alive socket should look + if len(sr)==0 and len(sw)==1: + return self._socket + + # oops, looks like it was closed, lets reopen + self._socket.close() + self._socket = None + + try: + if self._path: + af = socket.AF_UNIX + addr = self._path + desc = self._path + else: + af = socket.AF_INET + addr = ( self._host, self._port ) + desc = '%s;%s' % addr + sock = socket.socket ( af, socket.SOCK_STREAM ) + sock.connect ( addr ) + except socket.error, msg: + if sock: + sock.close() + self._error = 'connection to %s failed (%s)' % ( desc, msg ) + return + + v = unpack('>L', sock.recv(4)) + if v<1: + sock.close() + self._error = 'expected searchd protocol version, got %s' % v + return + + # all ok, send my version + sock.send(pack('>L', 1)) + return sock + + + def _GetResponse (self, sock, client_ver): + """ + INTERNAL METHOD, DO NOT CALL. Gets and checks response packet from searchd server. + """ + (status, ver, length) = unpack('>2HL', sock.recv(8)) + response = '' + left = length + while left>0: + chunk = sock.recv(left) + if chunk: + response += chunk + left -= len(chunk) + else: + break + + if not self._socket: + sock.close() + + # check response + read = len(response) + if not response or read!=length: + if length: + self._error = 'failed to read searchd response (status=%s, ver=%s, len=%s, read=%s)' \ + % (status, ver, length, read) + else: + self._error = 'received zero-sized searchd response' + return None + + # check status + if status==SEARCHD_WARNING: + wend = 4 + unpack ( '>L', response[0:4] )[0] + self._warning = response[4:wend] + return response[wend:] + + if status==SEARCHD_ERROR: + self._error = 'searchd error: '+response[4:] + return None + + if status==SEARCHD_RETRY: + self._error = 'temporary searchd error: '+response[4:] + return None + + if status!=SEARCHD_OK: + self._error = 'unknown status code %d' % status + return None + + # check version + if ver>8, ver&0xff, client_ver>>8, client_ver&0xff) + + return response + + + def SetLimits (self, offset, limit, maxmatches=0, cutoff=0): + """ + Set offset and count into result set, and optionally set max-matches and cutoff limits. + """ + assert ( type(offset) in [int,long] and 0<=offset<16777216 ) + assert ( type(limit) in [int,long] and 0=0) + self._offset = offset + self._limit = limit + if maxmatches>0: + self._maxmatches = maxmatches + if cutoff>=0: + self._cutoff = cutoff + + + def SetMaxQueryTime (self, maxquerytime): + """ + Set maximum query time, in milliseconds, per-index. 0 means 'do not limit'. + """ + assert(isinstance(maxquerytime,int) and maxquerytime>0) + self._maxquerytime = maxquerytime + + + def SetMatchMode (self, mode): + """ + Set matching mode. + """ + assert(mode in [SPH_MATCH_ALL, SPH_MATCH_ANY, SPH_MATCH_PHRASE, SPH_MATCH_BOOLEAN, SPH_MATCH_EXTENDED, SPH_MATCH_FULLSCAN, SPH_MATCH_EXTENDED2]) + self._mode = mode + + + def SetRankingMode (self, ranker): + """ + Set ranking mode. + """ + assert(ranker in [SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, SPH_RANK_NONE, SPH_RANK_WORDCOUNT]) + self._ranker = ranker + + + def SetSortMode ( self, mode, clause='' ): + """ + Set sorting mode. + """ + assert ( mode in [SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, SPH_SORT_ATTR_ASC, SPH_SORT_TIME_SEGMENTS, SPH_SORT_EXTENDED, SPH_SORT_EXPR] ) + assert ( isinstance ( clause, str ) ) + self._sort = mode + self._sortby = clause + + + def SetWeights (self, weights): + """ + Set per-field weights. + WARNING, DEPRECATED; do not use it! use SetFieldWeights() instead + """ + assert(isinstance(weights, list)) + for w in weights: + assert(isinstance(w, int)) + self._weights = weights + + + def SetFieldWeights (self, weights): + """ + Bind per-field weights by name; expects (name,field_weight) dictionary as argument. + """ + assert(isinstance(weights,dict)) + for key,val in weights.items(): + assert(isinstance(key,str)) + assert(isinstance(val,int)) + self._fieldweights = weights + + + def SetIndexWeights (self, weights): + """ + Bind per-index weights by name; expects (name,index_weight) dictionary as argument. + """ + assert(isinstance(weights,dict)) + for key,val in weights.items(): + assert(isinstance(key,str)) + assert(isinstance(val,int)) + self._indexweights = weights + + + def SetIDRange (self, minid, maxid): + """ + Set IDs range to match. + Only match records if document ID is beetwen $min and $max (inclusive). + """ + assert(isinstance(minid, (int, long))) + assert(isinstance(maxid, (int, long))) + assert(minid<=maxid) + self._min_id = minid + self._max_id = maxid + + + def SetFilter ( self, attribute, values, exclude=0 ): + """ + Set values set filter. + Only match records where 'attribute' value is in given 'values' set. + """ + assert(isinstance(attribute, str)) + assert iter(values) + + for value in values: + assert(isinstance(value, int)) + + self._filters.append ( { 'type':SPH_FILTER_VALUES, 'attr':attribute, 'exclude':exclude, 'values':values } ) + + + def SetFilterRange (self, attribute, min_, max_, exclude=0 ): + """ + Set range filter. + Only match records if 'attribute' value is beetwen 'min_' and 'max_' (inclusive). + """ + assert(isinstance(attribute, str)) + assert(isinstance(min_, int)) + assert(isinstance(max_, int)) + assert(min_<=max_) + + self._filters.append ( { 'type':SPH_FILTER_RANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_ } ) + + + def SetFilterFloatRange (self, attribute, min_, max_, exclude=0 ): + assert(isinstance(attribute,str)) + assert(isinstance(min_,float)) + assert(isinstance(max_,float)) + assert(min_ <= max_) + self._filters.append ( {'type':SPH_FILTER_FLOATRANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_} ) + + + def SetGeoAnchor (self, attrlat, attrlong, latitude, longitude): + assert(isinstance(attrlat,str)) + assert(isinstance(attrlong,str)) + assert(isinstance(latitude,float)) + assert(isinstance(longitude,float)) + self._anchor['attrlat'] = attrlat + self._anchor['attrlong'] = attrlong + self._anchor['lat'] = latitude + self._anchor['long'] = longitude + + + def SetGroupBy ( self, attribute, func, groupsort='@group desc' ): + """ + Set grouping attribute and function. + """ + assert(isinstance(attribute, str)) + assert(func in [SPH_GROUPBY_DAY, SPH_GROUPBY_WEEK, SPH_GROUPBY_MONTH, SPH_GROUPBY_YEAR, SPH_GROUPBY_ATTR, SPH_GROUPBY_ATTRPAIR] ) + assert(isinstance(groupsort, str)) + + self._groupby = attribute + self._groupfunc = func + self._groupsort = groupsort + + + def SetGroupDistinct (self, attribute): + assert(isinstance(attribute,str)) + self._groupdistinct = attribute + + + def SetRetries (self, count, delay=0): + assert(isinstance(count,int) and count>=0) + assert(isinstance(delay,int) and delay>=0) + self._retrycount = count + self._retrydelay = delay + + + def SetOverride (self, name, type, values): + assert(isinstance(name, str)) + assert(type in SPH_ATTR_TYPES) + assert(isinstance(values, dict)) + + self._overrides[name] = {'name': name, 'type': type, 'values': values} + + def SetSelect (self, select): + assert(isinstance(select, str)) + self._select = select + + + def ResetOverrides (self): + self._overrides = {} + + + def ResetFilters (self): + """ + Clear all filters (for multi-queries). + """ + self._filters = [] + self._anchor = {} + + + def ResetGroupBy (self): + """ + Clear groupby settings (for multi-queries). + """ + self._groupby = '' + self._groupfunc = SPH_GROUPBY_DAY + self._groupsort = '@group desc' + self._groupdistinct = '' + + + def Query (self, query, index='*', comment=''): + """ + Connect to searchd server and run given search query. + Returns None on failure; result set hash on success (see documentation for details). + """ + assert(len(self._reqs)==0) + self.AddQuery(query,index,comment) + results = self.RunQueries() + + if not results or len(results)==0: + return None + self._error = results[0]['error'] + self._warning = results[0]['warning'] + if results[0]['status'] == SEARCHD_ERROR: + return None + return results[0] + + + def AddQuery (self, query, index='*', comment=''): + """ + Add query to batch. + """ + # build request + req = [pack('>5L', self._offset, self._limit, self._mode, self._ranker, self._sort)] + req.append(pack('>L', len(self._sortby))) + req.append(self._sortby) + + if isinstance(query,unicode): + query = query.encode('utf-8') + assert(isinstance(query,str)) + + req.append(pack('>L', len(query))) + req.append(query) + + req.append(pack('>L', len(self._weights))) + for w in self._weights: + req.append(pack('>L', w)) + req.append(pack('>L', len(index))) + req.append(index) + req.append(pack('>L',1)) # id64 range marker + req.append(pack('>Q', self._min_id)) + req.append(pack('>Q', self._max_id)) + + # filters + req.append ( pack ( '>L', len(self._filters) ) ) + for f in self._filters: + req.append ( pack ( '>L', len(f['attr'])) + f['attr']) + filtertype = f['type'] + req.append ( pack ( '>L', filtertype)) + if filtertype == SPH_FILTER_VALUES: + req.append ( pack ('>L', len(f['values']))) + for val in f['values']: + req.append ( pack ('>q', val)) + elif filtertype == SPH_FILTER_RANGE: + req.append ( pack ('>2q', f['min'], f['max'])) + elif filtertype == SPH_FILTER_FLOATRANGE: + req.append ( pack ('>2f', f['min'], f['max'])) + req.append ( pack ( '>L', f['exclude'] ) ) + + # group-by, max-matches, group-sort + req.append ( pack ( '>2L', self._groupfunc, len(self._groupby) ) ) + req.append ( self._groupby ) + req.append ( pack ( '>2L', self._maxmatches, len(self._groupsort) ) ) + req.append ( self._groupsort ) + req.append ( pack ( '>LLL', self._cutoff, self._retrycount, self._retrydelay)) + req.append ( pack ( '>L', len(self._groupdistinct))) + req.append ( self._groupdistinct) + + # anchor point + if len(self._anchor) == 0: + req.append ( pack ('>L', 0)) + else: + attrlat, attrlong = self._anchor['attrlat'], self._anchor['attrlong'] + latitude, longitude = self._anchor['lat'], self._anchor['long'] + req.append ( pack ('>L', 1)) + req.append ( pack ('>L', len(attrlat)) + attrlat) + req.append ( pack ('>L', len(attrlong)) + attrlong) + req.append ( pack ('>f', latitude) + pack ('>f', longitude)) + + # per-index weights + req.append ( pack ('>L',len(self._indexweights))) + for indx,weight in self._indexweights.items(): + req.append ( pack ('>L',len(indx)) + indx + pack ('>L',weight)) + + # max query time + req.append ( pack ('>L', self._maxquerytime) ) + + # per-field weights + req.append ( pack ('>L',len(self._fieldweights) ) ) + for field,weight in self._fieldweights.items(): + req.append ( pack ('>L',len(field)) + field + pack ('>L',weight) ) + + # comment + req.append ( pack('>L',len(comment)) + comment ) + + # attribute overrides + req.append ( pack('>L', len(self._overrides)) ) + for v in self._overrides.values(): + req.extend ( ( pack('>L', len(v['name'])), v['name'] ) ) + req.append ( pack('>LL', v['type'], len(v['values'])) ) + for id, value in v['values'].iteritems(): + req.append ( pack('>Q', id) ) + if v['type'] == SPH_ATTR_FLOAT: + req.append ( pack('>f', value) ) + elif v['type'] == SPH_ATTR_BIGINT: + req.append ( pack('>q', value) ) + else: + req.append ( pack('>l', value) ) + + # select-list + req.append ( pack('>L', len(self._select)) ) + req.append ( self._select ) + + # send query, get response + req = ''.join(req) + + self._reqs.append(req) + return + + + def RunQueries (self): + """ + Run queries batch. + Returns None on network IO failure; or an array of result set hashes on success. + """ + if len(self._reqs)==0: + self._error = 'no queries defined, issue AddQuery() first' + return None + + sock = self._Connect() + if not sock: + return None + + req = ''.join(self._reqs) + length = len(req)+4 + req = pack('>HHLL', SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, length, len(self._reqs))+req + sock.send(req) + + response = self._GetResponse(sock, VER_COMMAND_SEARCH) + if not response: + return None + + nreqs = len(self._reqs) + + # parse response + max_ = len(response) + p = 0 + + results = [] + for i in range(0,nreqs,1): + result = {} + results.append(result) + + result['error'] = '' + result['warning'] = '' + status = unpack('>L', response[p:p+4])[0] + p += 4 + result['status'] = status + if status != SEARCHD_OK: + length = unpack('>L', response[p:p+4])[0] + p += 4 + message = response[p:p+length] + p += length + + if status == SEARCHD_WARNING: + result['warning'] = message + else: + result['error'] = message + continue + + # read schema + fields = [] + attrs = [] + + nfields = unpack('>L', response[p:p+4])[0] + p += 4 + while nfields>0 and pL', response[p:p+4])[0] + p += 4 + fields.append(response[p:p+length]) + p += length + + result['fields'] = fields + + nattrs = unpack('>L', response[p:p+4])[0] + p += 4 + while nattrs>0 and pL', response[p:p+4])[0] + p += 4 + attr = response[p:p+length] + p += length + type_ = unpack('>L', response[p:p+4])[0] + p += 4 + attrs.append([attr,type_]) + + result['attrs'] = attrs + + # read match count + count = unpack('>L', response[p:p+4])[0] + p += 4 + id64 = unpack('>L', response[p:p+4])[0] + p += 4 + + # read matches + result['matches'] = [] + while count>0 and pQL', response[p:p+12]) + p += 12 + else: + doc, weight = unpack('>2L', response[p:p+8]) + p += 8 + + match = { 'id':doc, 'weight':weight, 'attrs':{} } + for i in range(len(attrs)): + if attrs[i][1] == SPH_ATTR_FLOAT: + match['attrs'][attrs[i][0]] = unpack('>f', response[p:p+4])[0] + elif attrs[i][1] == SPH_ATTR_BIGINT: + match['attrs'][attrs[i][0]] = unpack('>q', response[p:p+8])[0] + p += 4 + elif attrs[i][1] == (SPH_ATTR_MULTI | SPH_ATTR_INTEGER): + match['attrs'][attrs[i][0]] = [] + nvals = unpack('>L', response[p:p+4])[0] + p += 4 + for n in range(0,nvals,1): + match['attrs'][attrs[i][0]].append(unpack('>L', response[p:p+4])[0]) + p += 4 + p -= 4 + else: + match['attrs'][attrs[i][0]] = unpack('>L', response[p:p+4])[0] + p += 4 + + result['matches'].append ( match ) + + result['total'], result['total_found'], result['time'], words = unpack('>4L', response[p:p+16]) + + result['time'] = '%.3f' % (result['time']/1000.0) + p += 16 + + result['words'] = [] + while words>0: + words -= 1 + length = unpack('>L', response[p:p+4])[0] + p += 4 + word = response[p:p+length] + p += length + docs, hits = unpack('>2L', response[p:p+8]) + p += 8 + + result['words'].append({'word':word, 'docs':docs, 'hits':hits}) + + self._reqs = [] + return results + + + def BuildExcerpts (self, docs, index, words, opts=None): + """ + Connect to searchd server and generate exceprts from given documents. + """ + if not opts: + opts = {} + if isinstance(words,unicode): + words = words.encode('utf-8') + + assert(isinstance(docs, list)) + assert(isinstance(index, str)) + assert(isinstance(words, str)) + assert(isinstance(opts, dict)) + + sock = self._Connect() + + if not sock: + return None + + # fixup options + opts.setdefault('before_match', '') + opts.setdefault('after_match', '') + opts.setdefault('chunk_separator', ' ... ') + opts.setdefault('limit', 256) + opts.setdefault('around', 5) + + # build request + # v.1.0 req + + flags = 1 # (remove spaces) + if opts.get('exact_phrase'): flags |= 2 + if opts.get('single_passage'): flags |= 4 + if opts.get('use_boundaries'): flags |= 8 + if opts.get('weight_order'): flags |= 16 + + # mode=0, flags + req = [pack('>2L', 0, flags)] + + # req index + req.append(pack('>L', len(index))) + req.append(index) + + # req words + req.append(pack('>L', len(words))) + req.append(words) + + # options + req.append(pack('>L', len(opts['before_match']))) + req.append(opts['before_match']) + + req.append(pack('>L', len(opts['after_match']))) + req.append(opts['after_match']) + + req.append(pack('>L', len(opts['chunk_separator']))) + req.append(opts['chunk_separator']) + + req.append(pack('>L', int(opts['limit']))) + req.append(pack('>L', int(opts['around']))) + + # documents + req.append(pack('>L', len(docs))) + for doc in docs: + if isinstance(doc,unicode): + doc = doc.encode('utf-8') + assert(isinstance(doc, str)) + req.append(pack('>L', len(doc))) + req.append(doc) + + req = ''.join(req) + + # send query, get response + length = len(req) + + # add header + req = pack('>2HL', SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, length)+req + wrote = sock.send(req) + + response = self._GetResponse(sock, VER_COMMAND_EXCERPT ) + if not response: + return [] + + # parse response + pos = 0 + res = [] + rlen = len(response) + + for i in range(len(docs)): + length = unpack('>L', response[pos:pos+4])[0] + pos += 4 + + if pos+length > rlen: + self._error = 'incomplete reply' + return [] + + res.append(response[pos:pos+length]) + pos += length + + return res + + + def UpdateAttributes ( self, index, attrs, values ): + """ + Update given attribute values on given documents in given indexes. + Returns amount of updated documents (0 or more) on success, or -1 on failure. + + 'attrs' must be a list of strings. + 'values' must be a dict with int key (document ID) and list of int values (new attribute values). + + Example: + res = cl.UpdateAttributes ( 'test1', [ 'group_id', 'date_added' ], { 2:[123,1000000000], 4:[456,1234567890] } ) + """ + assert ( isinstance ( index, str ) ) + assert ( isinstance ( attrs, list ) ) + assert ( isinstance ( values, dict ) ) + for attr in attrs: + assert ( isinstance ( attr, str ) ) + for docid, entry in values.items(): + assert ( isinstance ( docid, int ) ) + assert ( isinstance ( entry, list ) ) + assert ( len(attrs)==len(entry) ) + for val in entry: + assert ( isinstance ( val, int ) ) + + # build request + req = [ pack('>L',len(index)), index ] + + req.append ( pack('>L',len(attrs)) ) + for attr in attrs: + req.append ( pack('>L',len(attr)) + attr ) + + req.append ( pack('>L',len(values)) ) + for docid, entry in values.items(): + req.append ( pack('>Q',docid) ) + for val in entry: + req.append ( pack('>L',val) ) + + # connect, send query, get response + sock = self._Connect() + if not sock: + return None + + req = ''.join(req) + length = len(req) + req = pack ( '>2HL', SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, length ) + req + wrote = sock.send ( req ) + + response = self._GetResponse ( sock, VER_COMMAND_UPDATE ) + if not response: + return -1 + + # parse response + updated = unpack ( '>L', response[0:4] )[0] + return updated + + + def BuildKeywords ( self, query, index, hits ): + """ + Connect to searchd server, and generate keywords list for a given query. + Returns None on failure, or a list of keywords on success. + """ + assert ( isinstance ( query, str ) ) + assert ( isinstance ( index, str ) ) + assert ( isinstance ( hits, int ) ) + + # build request + req = [ pack ( '>L', len(query) ) + query ] + req.append ( pack ( '>L', len(index) ) + index ) + req.append ( pack ( '>L', hits ) ) + + # connect, send query, get response + sock = self._Connect() + if not sock: + return None + + req = ''.join(req) + length = len(req) + req = pack ( '>2HL', SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, length ) + req + wrote = sock.send ( req ) + + response = self._GetResponse ( sock, VER_COMMAND_KEYWORDS ) + if not response: + return None + + # parse response + res = [] + + nwords = unpack ( '>L', response[0:4] )[0] + p = 4 + max_ = len(response) + + while nwords>0 and pL', response[p:p+4] )[0] + p += 4 + tokenized = response[p:p+length] + p += length + + length = unpack ( '>L', response[p:p+4] )[0] + p += 4 + normalized = response[p:p+length] + p += length + + entry = { 'tokenized':tokenized, 'normalized':normalized } + if hits: + entry['docs'], entry['hits'] = unpack ( '>2L', response[p:p+8] ) + p += 8 + + res.append ( entry ) + + if nwords>0 or p>max_: + self._error = 'incomplete reply' + return None + + return res + + ### persistent connections + + def Open(self): + if self._socket: + self._error = 'already connected' + return + + server = self._Connect() + if not server: + return + + # command, command version = 0, body length = 4, body = 1 + request = pack ( '>hhII', SEARCHD_COMMAND_PERSIST, 0, 4, 1 ) + server.send ( request ) + + self._socket = server + + def Close(self): + if not self._socket: + self._error = 'not connected' + return + self._socket.close() + self._socket = None + + def EscapeString(self, string): + return re.sub(r"([=\(\)|\-!@~\"&/\\\^\$\=])", r"\\\1", string) + +# +# $Id$ +# diff --git a/apps/search/tests.py b/apps/search/tests.py new file mode 100644 index 0000000000..3e8dace6ff --- /dev/null +++ b/apps/search/tests.py @@ -0,0 +1,71 @@ +""" +Tests for the search (sphinx) app. +""" +import os +import shutil +import time + +from django.test import TransactionTestCase + +from nose.tools import eq_ + +from .utils import start_sphinx, stop_sphinx, reindex, convert_version +from .client import Client as SearchClient + + +def test_convert_version(): + def c(x, y): + x = convert_version(x) + y = convert_version(y) + + if (x > y): + return 1 + elif (x < y): + return - 1 + + return 0 + + v = ['1.9.0a1pre', '1.9.0a1', '1.9.1.b5', '1.9.1.b5', '1.9.1pre', \ + '1.9.1', '1.9.0'] + + eq_(c(v[0],v[1]), -1) + eq_(c(v[1],v[2]), -1) + eq_(c(v[2],v[3]), 0) + eq_(c(v[3],v[4]), -1) + eq_(c(v[4],v[5]), -1) + eq_(c(v[5],v[6]), 1) + + +class SphinxTest(TransactionTestCase): + + fixtures = ["search/sphinx.json"] + sphinx = True + + def setUp(self): + os.environ['DJANGO_ENVIRONMENT'] = 'test' + + if os.path.exists('/tmp/data/sphinx'): + shutil.rmtree('/tmp/data/sphinx') + if os.path.exists('/tmp/log/searchd'): + shutil.rmtree('/tmp/log/searchd') + + os.makedirs('/tmp/data/sphinx') + os.makedirs('/tmp/log/searchd') + reindex() + start_sphinx() + time.sleep(1) + + + def tearDown(self): + stop_sphinx() + + def test_sphinx_indexer(self): + """ + This tests that sphinx will properly index an addon. + """ + + # we have to specify to sphinx to look at test_ dbs + c = SearchClient() + results = c.query('Delicious') + assert results[0]['attrs']['addon_id'] == 3615, \ + "Didn't get the addon ID I wanted." diff --git a/apps/search/utils.py b/apps/search/utils.py new file mode 100644 index 0000000000..a0ded2ebb9 --- /dev/null +++ b/apps/search/utils.py @@ -0,0 +1,139 @@ +import subprocess +import zlib +import re + +from django.conf import settings + +from amo import constants as const + + +def reindex(): + """ + Reindexes sphinx. Note this is only to be used in dev and test + environments. + """ + + subprocess.call([settings.SPHINX_INDEXER, '--all', '--rotate', + '--config', settings.SPHINX_CONFIG_PATH]) + + +def start_sphinx(): + """ + Starts sphinx. Note this is only to be used in dev and test environments. + """ + + subprocess.Popen([settings.SPHINX_SEARCHD, '--config', + settings.SPHINX_CONFIG_PATH]) + + +def stop_sphinx(): + """ + Stops sphinx. Note this is only to be used in dev and test environments. + """ + + subprocess.call([settings.SPHINX_SEARCHD, '--stop', '--config', + settings.SPHINX_CONFIG_PATH]) + +pattern = re.compile(r"""(\d+) # major (x in x.y) + \.(\d+) # minor1 (y in x.y) + \.?(\d+)? # minor2 (z in x.y.z) + \.?(\d+)? # minor3 (w in x.y.z.w) + ([a|b]?) # alpha/beta + (\d*) # alpha/beta version + (pre)? # pre release + (\d)? # pre release version""", re.VERBOSE) +pattern_plus = re.compile(r'((\d+)\+)') + + +def convert_type(type): + if type == 'extension' or type == 'extensions': + return const.ADDON_EXTENSIONS + elif type == 'theme' or type == 'themes': + return const.ADDON_THEME + elif type == 'dict' or type == 'dicts': + return const.ADDON_DICT + elif type == 'language' or type == 'languages': + return const.ADDON_LPAPP + elif type == 'plugin' or type == 'plugins': + return const.ADDON_PLUGIN + + +def convert_version(version_string): + """ + This will enumerate a version so that it can be used for comparisons and + indexing. + """ + + # Replace .x or .* with .99 since these are equivalent. + version_string = version_string.replace('.x', '.99') + version_string = version_string.replace('.*', '.99') + + # Replace \d+\+ with $1++pre0 (e.g. 2.1+ => 2.2pre0). + + match = re.search(pattern_plus, version_string) + + if match: + (old, ver) = match.groups() + replacement = "%dpre0" % (int(ver) + 1) + version_string = version_string.replace(old, replacement) + + # Now we break up a version into components. + # + # e.g. 3.7.2.1b3pre3 + # we break into: + # major => 3 + # minor1 => 7 + # minor2 => 2 + # minor3 => 1 + # alpha => b => 1 + # alpha_n => 3 + # pre => 0 + # pre_n => 3 + # + # Alpha is 0,1,2 based on whether a version is alpha, beta or a release. + # Pre is 0 or 1. 0 indicates that this is a pre-release. + # + # The numbers are chosen based on sorting rules, not for any deep meaning. + + match = re.match(pattern, version_string) + + if match: + (major, minor1, minor2, minor3, alpha, alpha_n, pre, + pre_n) = match.groups() + + # normalize data + major = int(major) + minor1 = int(minor1) + minor2 = int(minor2) if minor2 else 0 + minor3 = int(minor3) if minor3 else 0 + + if alpha == 'a': + alpha = 0 + elif alpha == 'b': + alpha = 1 + else: + alpha = 2 + + if alpha_n: + alpha_n = int(alpha_n) + else: + alpha_n = 0 + + if pre == 'pre': + pre = 0 + else: + pre = 1 + + if pre_n: + pre_n = int(pre_n) + else: + pre_n = 0 + + # We recombine everything into a single large integer. + int_str = ("%02d%02d%02d%02d%d%02d%d%02d" + % (major, minor1, minor2, minor3, alpha, alpha_n, pre, pre_n) ) + + return int(int_str) + + +crc32 = lambda x: zlib.crc32(x) & 0xffffffff diff --git a/apps/search/views.py b/apps/search/views.py new file mode 100644 index 0000000000..60f00ef0ef --- /dev/null +++ b/apps/search/views.py @@ -0,0 +1 @@ +# Create your views here. diff --git a/apps/translations/fields.py b/apps/translations/fields.py index 6f314b623c..ed7eb5eb52 100644 --- a/apps/translations/fields.py +++ b/apps/translations/fields.py @@ -3,8 +3,10 @@ from django.conf import settings from django.db import models from django.db.models.fields import related from django.utils import translation as translation_utils +from django.utils.translation.trans_real import to_language from .models import Translation +from .widgets import TranslationWidget class TranslatedField(models.ForeignKey): @@ -50,6 +52,11 @@ class TranslatedField(models.ForeignKey): defaults.update(kw) return super(TranslatedField, self).formfield(**defaults) + def validate(self, value, model_instance): + # Skip ForeignKey.validate since that expects only one Translation when + # doing .get(id=id) + return models.Field.validate(self, value, model_instance) + class TranslationDescriptor(related.ReverseSingleRelatedObjectDescriptor): """ @@ -70,32 +77,60 @@ class TranslationDescriptor(related.ReverseSingleRelatedObjectDescriptor): return None def __set__(self, instance, value): + lang = translation_utils.get_language() if isinstance(value, basestring): - lang = translation_utils.get_language() - try: - trans = getattr(instance, self.field.name) - trans_id = getattr(instance, self.field.attname) - if trans is None and trans_id is not None: - # This locale doesn't have a translation set, but there are - # translations in another locale, so we have an id already. - trans = Translation.new(value, lang, id=trans_id) - elif trans.locale.lower() == lang.lower(): - # Replace the translation in the current language. - trans.localized_string = value - trans.save() - else: - # We already have a translation in a different language. - trans = Translation.new(value, lang, id=trans.id) - except AttributeError: - # Create a brand new translation. - trans = Translation.new(value, lang) - value = trans + value = self.translation_from_string(instance, lang, value) + elif hasattr(value, 'items'): + value = self.translation_from_dict(instance, lang, value) + # Don't let this be set to None, because Django will then blank out the # foreign key for this object. That's incorrect for translations. if value is not None: super(TranslationDescriptor, self).__set__(instance, value) + def translation_from_string(self, instance, lang, string): + """Create, save, and return a Translation from a string.""" + try: + trans = getattr(instance, self.field.name) + trans_id = getattr(instance, self.field.attname) + if trans is None and trans_id is not None: + # This locale doesn't have a translation set, but there are + # translations in another locale, so we have an id already. + return Translation.new(string, lang, id=trans_id) + elif to_language(trans.locale) == lang.lower(): + # Replace the translation in the current language. + trans.localized_string = string + trans.save() + return trans + else: + # We already have a translation in a different language. + return Translation.new(string, lang, id=trans.id) + except AttributeError: + # Create a brand new translation. + return Translation.new(string, lang) + + def translation_from_dict(self, instance, lang, dict_): + """ + Create Translations from a {'locale': 'string'} mapping. + + If one of the locales matches lang, that Translation will be returned. + """ + rv = None + for locale, string in dict_.items(): + # The Translation is created and saved in here. + trans = self.translation_from_string(instance, locale, string) + + # Set the Translation on the object because translation_from_string + # doesn't expect Translations to be created but not attached. + self.__set__(instance, trans) + + # If we're setting the current locale, set it to the object so + # callers see the expected effect. + if to_language(locale) == lang: + rv = trans + return rv + class TranslatedFieldMixin(object): """Mixin that fetches all ``TranslatedFields`` after instantiation.""" @@ -138,7 +173,8 @@ def translations_with_fallback(ids, lang, default): if not ids: return [] - fetched = Translation.objects.filter(id__in=ids, locale=lang) + fetched = Translation.objects.filter(id__in=ids, locale=lang, + localized_string__isnull=False) # Try to find any missing translations in the default locale. missing = set(ids).difference(t.id for t in fetched) @@ -149,25 +185,6 @@ def translations_with_fallback(ids, lang, default): return fetched -class TranslationWidget(forms.widgets.Textarea): - - # Django expects ForeignKey widgets to have a choices attribute. - choices = None - - def render(self, name, value, attrs=None): - lang = translation_utils.get_language() - try: - trans_id = int(value) - try: - trans = Translation.objects.get(id=trans_id, locale=lang) - value = trans.localized_string - except Translation.DoesNotExist: - value = '' - except (TypeError, ValueError): - pass - return super(TranslationWidget, self).render(name, value, attrs) - - class TranslationFormField(forms.Field): widget = TranslationWidget @@ -175,3 +192,6 @@ class TranslationFormField(forms.Field): del kwargs['queryset'] del kwargs['to_field_name'] super(TranslationFormField, self).__init__(*args, **kwargs) + + def clean(self, value): + return dict(value) diff --git a/apps/translations/models.py b/apps/translations/models.py index 9d18edcff5..2edd6e52bc 100644 --- a/apps/translations/models.py +++ b/apps/translations/models.py @@ -14,7 +14,7 @@ class Translation(caching.CachingMixin, models.Model): autoid = models.AutoField(primary_key=True) id = models.IntegerField() locale = models.CharField(max_length=10) - localized_string = models.TextField() + localized_string = models.TextField(null=True) # These are normally from amo.ModelBase, but we don't want to have weird # circular dependencies between ModelBase and Translations. @@ -56,8 +56,16 @@ class Translation(caching.CachingMixin, models.Model): cursor.execute('SELECT LAST_INSERT_ID() FROM translations_seq') id = cursor.fetchone()[0] - return Translation.objects.create(id=id, localized_string=string, - locale=locale) + # Update if one exists, otherwise create a new one. + q = {'id': id, 'locale': locale} + try: + trans = Translation.objects.get(**q) + trans.localized_string = string + trans.save(force_update=True) + except Translation.DoesNotExist: + trans = Translation.objects.create(localized_string=string, **q) + + return trans class TranslationSequence(models.Model): diff --git a/apps/translations/templates/translations/transbox.html b/apps/translations/templates/translations/transbox.html new file mode 100644 index 0000000000..29fbe08188 --- /dev/null +++ b/apps/translations/templates/translations/transbox.html @@ -0,0 +1,25 @@ +
+
    {# Avoid flash-of-unstyled-content #} + {% for lang, widget in widgets|dictsort %} +
  • + {{ lang }}
  • + {% endfor %} +
  • +
  • +
  • +
+ {% for lang, widget in widgets|dictsort %} +
{{ widget|safe }}
+ {% endfor %} +
+

{{ _('Select a locale to add a new {0}.')|f(name) }}

+ + +
+
diff --git a/apps/translations/tests/test_models.py b/apps/translations/tests/test_models.py index 63e1432432..c985be671e 100644 --- a/apps/translations/tests/test_models.py +++ b/apps/translations/tests/test_models.py @@ -4,14 +4,19 @@ from nose.tools import eq_ from test_utils import ExtraAppTestCase, trans_eq +from caching import cache from testapp.models import TranslatedModel, UntranslatedModel -from translations.models import Translation, TranslationSequence +from translations.models import Translation +from translations import widgets class TranslationTestCase(ExtraAppTestCase): fixtures = ['testapp/test_models.json'] extra_apps = ['translations.tests.testapp'] + def setUp(self): + cache.clear() + def test_fetch_translations(self): """Basic check of fetching translations in the current locale.""" o = TranslatedModel.objects.get(id=1) @@ -89,6 +94,58 @@ class TranslationTestCase(ExtraAppTestCase): # Make sure it was an update, not an insert. eq_(o.name.autoid, translation_id) + def test_create_with_dict(self): + # Set translations with a dict. + strings = {'en-US': 'right language', 'de': 'wrong language'} + o = TranslatedModel.objects.create(name=strings) + + # Make sure we get the English text since we're in en-US. + trans_eq(o.name, 'right language', 'en-US') + + # Check that de was set. + translation.activate('de') + o = TranslatedModel.objects.get(id=o.id) + trans_eq(o.name, 'wrong language', 'de') + + # We're in de scope, so we should see the de text. + de = TranslatedModel.objects.create(name=strings) + trans_eq(o.name, 'wrong language', 'de') + + # Make sure en-US was still set. + translation.deactivate() + o = TranslatedModel.objects.get(id=de.id) + trans_eq(o.name, 'right language', 'en-US') + + def test_update_with_dict(self): + # There's existing en-US and de strings. + strings = {'de': None, 'fr': 'oui'} + get_model = lambda: TranslatedModel.objects.get(id=1) + + # Don't try checking that the model's name value is en-US. It will be + # one of the other locales, but we don't know which one. You just set + # the name to a dict, deal with it. + get_model().name = strings + + # en-US was not touched. + trans_eq(get_model().name, 'some name', 'en-US') + + # de was updated to NULL, so it falls back to en-US. + translation.activate('de') + trans_eq(get_model().name, 'some name', 'en-US') + + # fr was added. + translation.activate('fr') + trans_eq(get_model().name, 'oui', 'fr') + + def test_widget(self): + strings = {'de': None, 'fr': 'oui'} + o = TranslatedModel.objects.get(id=1) + o.name = strings + + # Shouldn't see de since that's NULL now. + ws = widgets.trans_widgets(o.name_id, lambda *args: None) + eq_(sorted(dict(ws).keys()), ['en-us', 'fr']) + def test_translation_bool(): t = lambda s: Translation(localized_string=s) @@ -96,3 +153,10 @@ def test_translation_bool(): assert bool(t('text')) is True assert bool(t(' ')) is False assert bool(t('')) is False + + +def test_widget_value_from_datadict(): + data = {'f_en-US': 'woo', 'f_de': 'herr', 'f_fr_delete': ''} + actual = widgets.TranslationWidget().value_from_datadict(data, [], 'f') + expected = {'en-US': 'woo', 'de': 'herr', 'fr': None} + eq_(actual, expected) diff --git a/apps/translations/widgets.py b/apps/translations/widgets.py new file mode 100644 index 0000000000..9cabf6f457 --- /dev/null +++ b/apps/translations/widgets.py @@ -0,0 +1,78 @@ +from django import forms +from django.conf import settings +from django.forms.util import flatatt +from django.utils import translation +from django.utils.translation.trans_real import to_language + +import jinja2 + +import jingo + +from .models import Translation + + +attrs = 'name="{name}_{locale}" data-locale="{locale}" {attrs}' +input = u'' % attrs +textarea = u'' % attrs + + +class TranslationWidget(forms.widgets.Textarea): + + # Django expects ForeignKey widgets to have a choices attribute. + choices = None + + def render(self, name, value, attrs=None): + + attrs = self.build_attrs(attrs) + widget = widget_builder(name, attrs) + id = attrs.pop('id') + + lang = translation.get_language() + widgets = {} + widgets[lang] = widget(lang, value='') + + try: + trans_id = int(value) + widgets.update(trans_widgets(trans_id, widget)) + except (TypeError, ValueError), e: + pass + + langs = [(to_language(i[0]), i[1]) for i in settings.LANGUAGES.items()] + languages = dict((lang, val) for lang, val in langs) + + template = jingo.env.get_template('translations/transbox.html') + return template.render(id=id, name=name, widgets=widgets, + languages=languages) + + def value_from_datadict(self, data, files, name): + # All the translations for this field are called {name}_{locale}, so + # pull out everything that starts with name. + rv = {} + prefix = '%s_' % name + locale = lambda s: s[len(prefix):] + delete_locale = lambda s: s[len(prefix):-len('_delete')] + for key in data: + if key.startswith(prefix): + if key.endswith('_delete'): + rv[delete_locale(key)] = None + else: + rv[locale(key)] = data[key] + return rv + + +def trans_widgets(trans_id, widget): + translations = (Translation.objects.filter(id=trans_id) + .filter(localized_string__isnull=False) + .values_list('locale', 'localized_string')) + return [(to_language(locale), widget(locale, val)) + for locale, val in translations if val is not None] + + +def widget_builder(name, attrs): + def widget(locale, value): + locale = to_language(locale) + value = jinja2.escape(value) + attrs_ = dict(id='trans_%s_%s' % (name, locale), **attrs) + return textarea.format(name=name, locale=locale, + attrs=flatatt(attrs_), value=value) + return widget diff --git a/apps/versions/models.py b/apps/versions/models.py index 050ec7c07b..6db5c2fcee 100644 --- a/apps/versions/models.py +++ b/apps/versions/models.py @@ -2,14 +2,16 @@ from django.db import models import amo from addons.models import Addon + from users.models import UserProfile -from applications.models import Application +from applications.models import Application, AppVersion + from translations.fields import TranslatedField class Version(amo.ModelBase): addon = models.ForeignKey(Addon) - license = models.ForeignKey('License') + license = models.ForeignKey('License', null=True) releasenotes = TranslatedField() approvalnotes = models.TextField() version = models.CharField(max_length=255, default=0) @@ -47,3 +49,14 @@ class VersionSummary(amo.ModelBase): class Meta(amo.ModelBase.Meta): db_table = 'versions_summary' + + +class ApplicationsVersions(models.Model): + + application = models.ForeignKey(Application) + version = models.ForeignKey(Version) + min = models.ForeignKey(AppVersion, db_column='min', related_name='min_set') + max = models.ForeignKey(AppVersion, db_column='max', related_name='max_set') + + class Meta: + db_table = u'applications_versions' diff --git a/configs b/configs new file mode 160000 index 0000000000..d9ba389ed4 --- /dev/null +++ b/configs @@ -0,0 +1 @@ +Subproject commit d9ba389ed4011f745d878967c5a39cd1fa236106 diff --git a/fabfile.py b/fabfile.py index 1860c2d805..f820d12ab2 100644 --- a/fabfile.py +++ b/fabfile.py @@ -3,5 +3,9 @@ from fabric.api import local def pep8(): local("pep8 --repeat --ignore E221" " --exclude *.sh,*.html,*.json,*.txt,*.pyc,.DS_Store,README," - "migrations" + "migrations,sphinxapi.py" " apps", capture=False) + + +def test(): + local("python manage.py test --noinput --logging-clear-handlers") diff --git a/lib/test_utils/__init__.py b/lib/test_utils/__init__.py index cde7e20774..9ef3db242c 100644 --- a/lib/test_utils/__init__.py +++ b/lib/test_utils/__init__.py @@ -3,13 +3,13 @@ from django.conf import settings from django.core import management from django.db.models import loading from django.utils.encoding import smart_unicode as unicode +from django.utils.translation.trans_real import to_language from nose.tools import eq_ from nose import SkipTest from selenium import selenium import jinja2 - # We only want to run through setup_test_environment once. IS_SETUP = False @@ -89,7 +89,7 @@ class SeleniumTestCase(TestCase): # Comparisons def locale_eq(a, b): - eq_(a.lower(), b.lower()) + eq_(*map(to_language, [a, b])) def trans_eq(translation, string, locale=None): diff --git a/media/css b/media/css index 994913f364..e28d567487 160000 --- a/media/css +++ b/media/css @@ -1 +1 @@ -Subproject commit 994913f36473d6fd9f6bfb9e06507512582970e7 +Subproject commit e28d567487396ee0ee17a1737396a027528b4260 diff --git a/media/js b/media/js index 2696899cf6..94a9385fae 160000 --- a/media/js +++ b/media/js @@ -1 +1 @@ -Subproject commit 2696899cf67c0074b7b0b01892f7c921904735eb +Subproject commit 94a9385fae59a16225468b91384f033f7dd4ad51 diff --git a/settings.py b/settings.py index 5d85c71620..062254d426 100644 --- a/settings.py +++ b/settings.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Django settings for zamboni project. import os @@ -46,7 +47,41 @@ TIME_ZONE = 'America/Los_Angeles' LANGUAGE_CODE = 'en-US' # Accepted locales and apps -LANGUAGES = {'en-US': 'English (US)', 'ja': 'Japanese'} +LANGUAGES = { + 'ar': u'عربي', + 'ca': u'català', + 'cs': u'Čeština', + 'da': u'Dansk', + 'de': u'Deutsch', + 'el': u'Ελληνικά', + 'en-US': u'English (US)', + 'es-ES': u'Español (de España)', + 'eu': u'Euskara', + 'fa': u'فارسی', + 'fi': u'suomi', + 'fr': u'Français', + 'ga-IE': u'Gaeilge', + 'he': u'עברית', + 'hu': u'Magyar', + 'id': u'Bahasa Indonesia', + 'it': u'Italiano', + 'ja': u'日本語', + 'ko': u'한국어', + 'mn': u'Монгол', + 'nl': u'Nederlands', + 'pl': u'Polski', + 'pt-BR': u'Português (do Brasil)', + 'pt-PT': u'Português (Europeu)', + 'ro': u'română', + 'ru': u'Русский', + 'sk': u'slovenčina', + 'sq': u'Shqip', + 'sv-SE': u'Svenska', + 'uk': u'Українська', + 'vi': u'Tiếng Việt', + 'zh-CN': u'中文 (简体)', + 'zh-TW': u'正體中文 (繁體)', +} SITE_ID = 1 @@ -125,6 +160,7 @@ INSTALLED_APPS = ( 'editors', 'files', 'reviews', + 'search', 'tags', 'translations', 'users', @@ -168,3 +204,9 @@ DEFAULT_APP = 'firefox' CACHE_DURATION = 60 # seconds AUTH_PROFILE_MODULE = 'users.UserProfile' + +SPHINX_INDEXER = 'indexer' +SPHINX_SEARCHD = 'searchd' +SPHINX_CONFIG_PATH = path('configs/sphinx/sphinx.conf') +SPHINX_HOST = '127.0.0.1' +SPHINX_PORT = 3312 diff --git a/templates/base.html b/templates/base.html index f0f142d030..1711e1b510 100644 --- a/templates/base.html +++ b/templates/base.html @@ -54,37 +54,20 @@ - - - - - + + + + + + - - - - - - + + + + - if (typeof __utmSetVar == 'function') { - __utmSetVar("Loggedin"); - } - }); - // ]]> - - + + {% block js %}{% endblock %}