Remove recommendations cron (already broken, the table was dropped), refs #1174

This commit is contained in:
Mathieu Pillard 2016-01-04 23:13:02 +01:00
Родитель 4dcda19f80
Коммит 797390fb53
2 изменённых файлов: 0 добавлений и 82 удалений

Просмотреть файл

@ -3,7 +3,6 @@ import itertools
import logging
import operator
import os
import subprocess
import time
from datetime import datetime, timedelta
@ -13,7 +12,6 @@ from django.db.models import Q, F, Avg
import cronjobs
import multidb
from lib import recommend
from celery.task.sets import TaskSet
import waffle
@ -351,85 +349,6 @@ def deliver_hotness():
time.sleep(10)
@cronjobs.register
def recs():
start = time.time()
cursor = connections[multidb.get_slave()].cursor()
cursor.execute("""
SELECT addon_id, collection_id
FROM synced_addons_collections ac
INNER JOIN addons ON
(ac.addon_id=addons.id AND inactive=0 AND status=4
AND addontype_id <> 9 AND current_version IS NOT NULL)
ORDER BY addon_id, collection_id
""")
qs = cursor.fetchall()
recs_log.info('%.2fs (query) : %s rows' % (time.time() - start, len(qs)))
addons = _group_addons(qs)
recs_log.info('%.2fs (groupby) : %s addons' %
((time.time() - start), len(addons)))
if not len(addons):
return
# Check our memory usage.
try:
p = subprocess.Popen('%s -p%s -o rss' % (settings.PS_BIN, os.getpid()),
shell=True, stdout=subprocess.PIPE)
recs_log.info('%s bytes' % ' '.join(p.communicate()[0].split()))
except Exception:
log.error('Could not call ps', exc_info=True)
sim = recommend.similarity # Locals are faster.
sims, start, timers = {}, [time.time()], {'calc': [], 'sql': []}
def write_recs():
calc = time.time()
timers['calc'].append(calc - start[0])
try:
_dump_recs(sims)
except Exception:
recs_log.error('Error dumping recommendations. SQL issue.',
exc_info=True)
sims.clear()
timers['sql'].append(time.time() - calc)
start[0] = time.time()
for idx, (addon, collections) in enumerate(addons.iteritems(), 1):
xs = [(other, sim(collections, cs))
for other, cs in addons.iteritems()]
# Sort by similarity and keep the top N.
others = sorted(xs, key=operator.itemgetter(1), reverse=True)
sims[addon] = [(k, v) for k, v in others[:11] if k != addon]
if idx % 50 == 0:
write_recs()
else:
write_recs()
avg_len = sum(len(v) for v in addons.itervalues()) / float(len(addons))
recs_log.info('%s addons: average length: %.2f' % (len(addons), avg_len))
recs_log.info('Processing time: %.2fs' % sum(timers['calc']))
recs_log.info('SQL time: %.2fs' % sum(timers['sql']))
def _dump_recs(sims):
# Dump a dictionary of {addon: (other_addon, score)} into the
# addon_recommendations table.
cursor = connections['default'].cursor()
addons = sims.keys()
vals = [(addon, other, score)
for addon, others in sims.items()
for other, score in others]
cursor.execute('BEGIN')
cursor.execute('DELETE FROM addon_recommendations WHERE addon_id IN %s',
[addons])
cursor.executemany("""
INSERT INTO addon_recommendations (addon_id, other_addon_id, score)
VALUES (%s, %s, %s)""", vals)
cursor.execute('COMMIT')
def _group_addons(qs):
# qs is a list of (addon_id, collection_id) order by addon_id.
# Return a dict of {addon_id: [collection_id]}.

Просмотреть файл

@ -38,7 +38,6 @@ HOME=/tmp
30 14 * * * %(z_cron)s category_totals
30 15 * * * %(z_cron)s collection_subscribers
30 17 * * * %(z_cron)s share_count_totals
30 18 * * * %(z_cron)s recs
0 22 * * * %(z_cron)s gc
30 6 * * * %(z_cron)s deliver_hotness
45 7 * * * %(django)s dump_apps