366 строки
12 KiB
Python
366 строки
12 KiB
Python
import calendar
|
|
import json
|
|
import re
|
|
import time
|
|
import urllib2
|
|
from datetime import datetime, timedelta
|
|
from subprocess import Popen, PIPE
|
|
|
|
from django.conf import settings
|
|
|
|
from celeryutils import task
|
|
import cronjobs
|
|
import commonware.log
|
|
import phpserialize
|
|
import redisutils
|
|
|
|
import amo
|
|
from amo.utils import chunked
|
|
from addons.models import Addon, AddonCategory
|
|
from addons.utils import AdminActivityLogMigrationTracker, MigrationTracker
|
|
from applications.models import Application, AppVersion
|
|
from bandwagon.models import Collection
|
|
from cake.models import Session
|
|
from devhub.models import ActivityLog, LegacyAddonLog
|
|
from editors.models import EventLog
|
|
from files.models import TestResultCache
|
|
from reviews.models import Review
|
|
from sharing import SERVICES_LIST
|
|
from stats.models import AddonShareCount, Contribution
|
|
from users.models import UserProfile
|
|
|
|
log = commonware.log.getLogger('z.cron')
|
|
|
|
|
|
@task
|
|
def _trim_categories(results, app_id, **kw):
|
|
"""
|
|
`results` is a list of dicts. E.g.:
|
|
|
|
[{'addon_id': 138L, 'num_cats': 4}, ...]
|
|
"""
|
|
log.info('[%s@%s] Trimming category-fat add-ons' %
|
|
(len(results), _trim_categories.rate_limit))
|
|
|
|
delete_me = []
|
|
pks = [r['addon_id'] for r in results]
|
|
|
|
for addon in Addon.objects.filter(pk__in=pks):
|
|
qs = addon.addoncategory_set.filter(category__application=app_id)[2:]
|
|
delete_me.extend(qs.values_list('id', flat=True))
|
|
|
|
log.info('Deleting %d add-on categories.' % len(delete_me))
|
|
AddonCategory.objects.filter(pk__in=delete_me).delete()
|
|
|
|
|
|
@cronjobs.register
|
|
def gc(test_result=True):
|
|
"""Site-wide garbage collections."""
|
|
|
|
days_ago = lambda days: datetime.today() - timedelta(days=days)
|
|
one_hour_ago = datetime.today() - timedelta(hours=1)
|
|
|
|
log.debug('Collecting data to delete')
|
|
|
|
logs = (ActivityLog.objects.filter(created__lt=days_ago(90))
|
|
.exclude(action__in=amo.LOG_KEEP).values_list('id', flat=True))
|
|
|
|
# Paypal only keeps retrying to verify transactions for up to 3 days. If we
|
|
# still have an unverified transaction after 6 days, we might as well get
|
|
# rid of it.
|
|
contributions_to_delete = (Contribution.objects
|
|
.filter(transaction_id__isnull=True, created__lt=days_ago(6))
|
|
.values_list('id', flat=True))
|
|
|
|
collections_to_delete = (Collection.objects.filter(
|
|
created__lt=days_ago(2), type=amo.COLLECTION_ANONYMOUS)
|
|
.values_list('id', flat=True))
|
|
|
|
# Remove Incomplete add-ons older than 4 days.
|
|
addons_to_delete = (Addon.objects.filter(
|
|
highest_status=amo.STATUS_NULL, status=amo.STATUS_NULL,
|
|
created__lt=days_ago(4))
|
|
.values_list('id', flat=True))
|
|
|
|
for chunk in chunked(logs, 100):
|
|
_delete_logs.delay(chunk)
|
|
for chunk in chunked(contributions_to_delete, 100):
|
|
_delete_stale_contributions.delay(args=chunk)
|
|
for chunk in chunked(collections_to_delete, 100):
|
|
_delete_anonymous_collections.delay(args=chunk)
|
|
for chunk in chunked(addons_to_delete, 100):
|
|
_delete_incomplete_addons.delay(args=chunk)
|
|
|
|
log.debug('Cleaning up sharing services.')
|
|
AddonShareCount.objects.exclude(
|
|
service__in=[s.shortname for s in SERVICES_LIST]).delete()
|
|
|
|
log.debug('Cleaning up cake sessions.')
|
|
# cake.Session uses Unix Timestamps
|
|
two_days_ago = calendar.timegm(days_ago(2).utctimetuple())
|
|
Session.objects.filter(expires__lt=two_days_ago).delete()
|
|
|
|
log.debug('Cleaning up test results cache.')
|
|
TestResultCache.objects.filter(date__lt=one_hour_ago).delete()
|
|
|
|
log.debug('Cleaning up test results extraction cache.')
|
|
if settings.NETAPP_STORAGE and settings.NETAPP_STORAGE != '/':
|
|
cmd = ('find', settings.NETAPP_STORAGE, '-maxdepth', '1', '-name',
|
|
'validate-*', '-mtime', '+7', '-type', 'd',
|
|
'-exec', 'rm', '-rf', "{}", ';')
|
|
|
|
output = Popen(cmd, stdout=PIPE).communicate()[0]
|
|
|
|
for line in output.split("\n"):
|
|
log.debug(line)
|
|
|
|
else:
|
|
log.warning('NETAPP_STORAGE not defined.')
|
|
|
|
if settings.COLLECTIONS_ICON_PATH:
|
|
log.debug('Cleaning up uncompressed icons.')
|
|
|
|
cmd = ('find', settings.COLLECTIONS_ICON_PATH,
|
|
'-name', '*__unconverted', '-mtime', '+1', '-type', 'f',
|
|
'-exec', 'rm', '{}', ';')
|
|
output = Popen(cmd, stdout=PIPE).communicate()[0]
|
|
|
|
for line in output.split("\n"):
|
|
log.debug(line)
|
|
|
|
if settings.USERPICS_PATH:
|
|
log.debug('Cleaning up uncompressed userpics.')
|
|
|
|
cmd = ('find', settings.USERPICS_PATH,
|
|
'-name', '*__unconverted', '-mtime', '+1', '-type', 'f',
|
|
'-exec', 'rm', '{}', ';')
|
|
output = Popen(cmd, stdout=PIPE).communicate()[0]
|
|
|
|
for line in output.split("\n"):
|
|
log.debug(line)
|
|
|
|
|
|
@task
|
|
def _delete_logs(items, **kw):
|
|
log.info('[%s@%s] Deleting logs' % (len(items), _delete_logs.rate_limit))
|
|
ActivityLog.objects.filter(pk__in=items).exclude(
|
|
action__in=amo.LOG_KEEP).delete()
|
|
|
|
|
|
@task
|
|
def _delete_stale_contributions(items, **kw):
|
|
log.info('[%s@%s] Deleting stale collections' %
|
|
(len(items), _delete_stale_contributions.rate_limit))
|
|
Contribution.objects.filter(
|
|
transaction_id__isnull=True, pk__in=items).delete()
|
|
|
|
|
|
@task
|
|
def _delete_anonymous_collections(items, **kw):
|
|
log.info('[%s@%s] Deleting anonymous collections' %
|
|
(len(items), _delete_anonymous_collections.rate_limit))
|
|
Collection.objects.filter(type=amo.COLLECTION_ANONYMOUS,
|
|
pk__in=items).delete()
|
|
|
|
|
|
@task
|
|
def _delete_incomplete_addons(items, **kw):
|
|
log.info('[%s@%s] Deleting incomplete add-ons' %
|
|
(len(items), _delete_incomplete_addons.rate_limit))
|
|
for addon in Addon.objects.filter(
|
|
highest_status=0, status=0, pk__in=items):
|
|
try:
|
|
addon.delete('Deleted for incompleteness')
|
|
except Exception as e:
|
|
log.error("Couldn't delete add-on %s: %s" % (addon.id, e))
|
|
|
|
|
|
@cronjobs.register
|
|
def migrate_admin_logs():
|
|
# Get the highest id we've looked at.
|
|
a = AdminActivityLogMigrationTracker()
|
|
id = a.get() or 0
|
|
|
|
# filter here for addappversion
|
|
items = LegacyAddonLog.objects.filter(
|
|
type=amo.LOG.ADD_APPVERSION.id, pk__gt=id).values_list(
|
|
'id', flat=True)
|
|
for chunk in chunked(items, 100):
|
|
_migrate_admin_logs.delay(chunk)
|
|
a.set(chunk[-1])
|
|
|
|
|
|
@task
|
|
def _migrate_admin_logs(items, **kw):
|
|
print 'Processing: %d..%d' % (items[0], items[-1])
|
|
for item in LegacyAddonLog.objects.filter(pk__in=items):
|
|
kw = dict(user=item.user, created=item.created)
|
|
amo.log(amo.LOG.ADD_APPVERSION, (Application, item.object1_id),
|
|
(AppVersion, item.object2_id), **kw)
|
|
|
|
|
|
# TODO(davedash): remove after /editors is on zamboni
|
|
@cronjobs.register
|
|
def migrate_editor_eventlog():
|
|
a = MigrationTracker('eventlog')
|
|
id = a.get() or 0
|
|
|
|
items = EventLog.objects.filter(type='editor', pk__gt=id).values_list(
|
|
'id', flat=True)
|
|
|
|
for chunk in chunked(items, 100):
|
|
_migrate_editor_eventlog(chunk)
|
|
a.set(chunk[-1])
|
|
|
|
|
|
@task
|
|
def _migrate_editor_eventlog(items, **kw):
|
|
log.info('[%s@%s] Migrating eventlog items' %
|
|
(len(items), _migrate_editor_eventlog.rate_limit))
|
|
for item in EventLog.objects.filter(pk__in=items):
|
|
kw = dict(user=item.user, created=item.created)
|
|
if item.action == 'review_delete':
|
|
details = None
|
|
try:
|
|
details = phpserialize.loads(item.notes)
|
|
except ValueError:
|
|
pass
|
|
amo.log(amo.LOG.DELETE_REVIEW, item.changed_id, details=details,
|
|
**kw)
|
|
elif item.action == 'review_approve':
|
|
try:
|
|
r = Review.objects.get(pk=item.changed_id)
|
|
amo.log(amo.LOG.ADD_REVIEW, r, r.addon, **kw)
|
|
except Review.DoesNotExist:
|
|
log.warning("Couldn't find review for %d" % item.changed_id)
|
|
|
|
|
|
@cronjobs.register
|
|
def dissolve_outgoing_urls():
|
|
"""Over time, some outgoing.m.o URLs have been encoded several times in the
|
|
db. This removes the layers of encoding and sets URLs to their real value.
|
|
The app will take care of sending things through outgoing.m.o. See bug
|
|
608117."""
|
|
|
|
needle = 'outgoing.mozilla.org'
|
|
|
|
users = (UserProfile.objects.filter(homepage__contains=needle)
|
|
.values_list('id', 'homepage'))
|
|
|
|
if not users:
|
|
print "Didn't find any add-ons with messed up homepages."
|
|
return
|
|
|
|
print 'Found %s users to fix. Sending them to celeryd.' % len(users)
|
|
|
|
for chunk in chunked(users, 100):
|
|
_dissolve_outgoing_urls.delay(chunk)
|
|
|
|
|
|
@task(rate_limit='60/h')
|
|
def _dissolve_outgoing_urls(items, **kw):
|
|
log.info('[%s@%s] Dissolving outgoing urls' %
|
|
(len(items), _dissolve_outgoing_urls.rate_limit))
|
|
|
|
regex = re.compile('^http://outgoing.mozilla.org/v1/[0-9a-f]+/(.*?)$')
|
|
|
|
def peel_the_onion(url):
|
|
match = regex.match(url)
|
|
|
|
if not match:
|
|
return None
|
|
|
|
new = urllib2.unquote(match.group(1))
|
|
are_we_there_yet = peel_the_onion(new) # That's right. You love it.
|
|
|
|
if not are_we_there_yet:
|
|
return new
|
|
else:
|
|
return are_we_there_yet
|
|
|
|
for user in items:
|
|
url = peel_the_onion(user[1])
|
|
|
|
# 20 or so of these are just to outgoing.m.o, so just whack them
|
|
if url == 'http://outgoing.mozilla.org':
|
|
url = None
|
|
|
|
UserProfile.objects.filter(pk=user[0]).update(homepage=url)
|
|
|
|
|
|
# TODO(davedash): Remove after 5.12.7 is pushed.
|
|
@cronjobs.register
|
|
def activity_log_scrubber():
|
|
"""
|
|
Scans activity log for REMOVE_FROM_COLLECTION and ADD_TO_COLLECTION, looks
|
|
for collections in arguments and checks whether collection is listed.
|
|
"""
|
|
|
|
items = (ActivityLog.objects.filter(
|
|
action__in=[amo.LOG.ADD_TO_COLLECTION.id,
|
|
amo.LOG.REMOVE_FROM_COLLECTION.id])
|
|
.values('id', '_arguments'))
|
|
ids = []
|
|
count = 0
|
|
# ~127K
|
|
for item in items:
|
|
count += 1
|
|
for k in json.loads(item['_arguments']):
|
|
if 'bandwagon.collection' not in k:
|
|
continue
|
|
if not all(Collection.objects.filter(pk=k.values()[0])
|
|
.values_list('listed', flat=True)):
|
|
log.debug('%d items seen.' % count)
|
|
ids.append(item['id'])
|
|
if len(ids) > 100:
|
|
_activity_log_scrubber.delay(ids)
|
|
ids = []
|
|
|
|
# get everyone else
|
|
_activity_log_scrubber.delay(ids)
|
|
|
|
|
|
@task(rate_limit='60/h')
|
|
def _activity_log_scrubber(items, **kw):
|
|
log.info('[%s@%s] Deleting activity log items' %
|
|
(len(items), _activity_log_scrubber.rate_limit))
|
|
|
|
ActivityLog.objects.filter(id__in=items).delete()
|
|
|
|
|
|
class QueueCheck(object):
|
|
key = 'cron:queuecheck:%s:%s'
|
|
|
|
def __init__(self):
|
|
self.redis = redisutils.connections['master']
|
|
|
|
def queues(self):
|
|
# Figure out all the queues we're using. celery is the default, with a
|
|
# warning threshold of 10 minutes.
|
|
queues = {'celery': 60 * 60}
|
|
others = set(r['queue'] for r in settings.CELERY_ROUTES.values())
|
|
# 30 second threshold for the fast queues.
|
|
queues.update((q, 30) for q in others)
|
|
return queues
|
|
|
|
def set(self, action, queue):
|
|
self.redis.set(self.key % (action, queue), time.time())
|
|
|
|
def get(self, action, queue):
|
|
return self.redis.get(self.key % (action, queue))
|
|
|
|
|
|
@cronjobs.register
|
|
def check_queues():
|
|
checker = QueueCheck()
|
|
for queue in checker.queues():
|
|
checker.set('ping', queue)
|
|
ping.apply_async(queue=queue, routing_key=queue, exchange=queue)
|
|
|
|
|
|
@task
|
|
def ping(**kw):
|
|
queue = kw['delivery_info']['routing_key']
|
|
log.info('[1@None] Checking the %s queue' % queue)
|
|
QueueCheck().set('pong', queue)
|