Merge pull request #2837 from pmclanahan/update-external-files-cron-1142850

Fix bug 1142850: Add cron job to update external files hourly.
This commit is contained in:
Josh Mize 2015-03-18 14:49:47 -05:00
Родитель e2a7657738 5435b548ea
Коммит 6d87486a3b
11 изменённых файлов: 82 добавлений и 21 удалений

Просмотреть файл

@ -5,7 +5,6 @@ python:
env:
global:
- PIP_DOWNLOAD_CACHE="pip_cache"
- CRON_LOG_DIR="/tmp/bedrock-cron-log"
branches:
only:
- master

Просмотреть файл

@ -9,6 +9,7 @@ from os.path import abspath, basename, dirname, exists, join
from datetime import datetime
from django.conf import settings
from django.core.cache import get_cache, InvalidCacheBackendError
from django.utils.functional import cached_property
from django.utils.http import parse_http_date_safe
@ -21,12 +22,19 @@ UPDATED_FILE = '{0}.updated.txt'
class ExternalFile(object):
cache_key = None
def __init__(self, file_id):
try:
fileinfo = settings.EXTERNAL_FILES[file_id]
except KeyError:
raise ValueError('No external file with the {0} ID.'.format(file_id))
try:
self._cache = get_cache('externalfiles')
except InvalidCacheBackendError:
self._cache = get_cache('default')
self.file_id = file_id
self.url = fileinfo['url']
self.name = fileinfo.get('name', basename(self.url))
@ -131,3 +139,7 @@ class ExternalFile(object):
log.info('Successfully updated {0}.'.format(self.name))
return True
def clear_cache(self):
if self.cache_key:
self._cache.delete(self.cache_key)

Просмотреть файл

@ -22,13 +22,19 @@ class Command(BaseCommand):
dest='quiet',
default=False,
help='Do not print output to stdout.'),
make_option('--status',
action='store_true',
dest='status',
default=False,
help='Print only a final status to stdout. Mostly for scripts.')
)
def handle(self, *args, **options):
file_ids = args or settings.EXTERNAL_FILES.keys()
updated = False
def printout(msg, ending=None):
if not options['quiet']:
if not (options['quiet'] or options['status']):
self.stdout.write(msg, ending=ending)
for fid in file_ids:
@ -42,4 +48,11 @@ class Command(BaseCommand):
if result is None:
printout('already up-to-date')
else:
updated = True
printout('done')
if options['status']:
if updated:
self.stdout.write('updated')
else:
self.stdout.write('up-to-date')

Просмотреть файл

@ -5,14 +5,14 @@
import csv
from operator import itemgetter
from django.utils.functional import cached_property
from ordereddict import OrderedDict
from bedrock.externalfiles import ExternalFile
class CreditsFile(ExternalFile):
cache_key = 'credits-file-sorted-names'
def validate_content(self, content):
rows = list(csv.reader(content.strip().encode('utf8').split('\n')))
if len(rows) < 2200: # it's 2273 as of now
@ -23,7 +23,7 @@ class CreditsFile(ExternalFile):
return content
@cached_property
@property
def ordered(self):
"""
Returns an OrderedDict of sorted lists of names by first letter of sortkey.
@ -49,15 +49,20 @@ class CreditsFile(ExternalFile):
:param credits_data: any iterable of CSV formatted strings.
:return: list of lists
"""
names = []
for row in csv.reader(self.readlines()):
if len(row) == 1:
name = sortkey = row[0]
elif len(row) == 2:
name, sortkey = row
else:
continue
sorted_names = self._cache.get(self.cache_key)
if sorted_names is None:
names = []
for row in csv.reader(self.readlines()):
if len(row) == 1:
name = sortkey = row[0]
elif len(row) == 2:
name, sortkey = row
else:
continue
names.append([name.decode('utf8'), sortkey.upper()])
names.append([name.decode('utf8'), sortkey.upper()])
return sorted(names, key=itemgetter(1))
sorted_names = sorted(names, key=itemgetter(1))
self._cache.set(self.cache_key, 3600) # 1 hour
return sorted_names

Просмотреть файл

@ -13,6 +13,7 @@ from bedrock.mozorg.tests import TestCase
class TestCredits(TestCase):
def setUp(self):
self.credits_file = credits.CreditsFile('credits')
self.credits_file.clear_cache()
def test_credits_list(self):
self.credits_file.readlines = Mock(return_value=[

Просмотреть файл

@ -7,7 +7,6 @@ from jinja2 import Template
HEADER = '!!AUTO-GENERATED!! Edit {template}.tmpl instead.'
TEMPLATE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'etc', 'cron.d'))
LOG_DIR = os.getenv('CRON_LOG_DIR', '/var/log/bedrock')
def main():
@ -23,6 +22,8 @@ def main():
'Only define for cron.d style crontabs.'))
parser.add_option('-p', '--python', default='python2.6',
help='Python interpreter to use.')
parser.add_option('-l', '--logdir',
help='Directory in which to store logs from cron jobs.')
(opts, args) = parser.parse_args()
@ -33,11 +34,11 @@ def main():
parser.error('-t must be defined')
# ensure log path exists
if not os.path.isdir(LOG_DIR):
if opts.logdir and not os.path.isdir(opts.logdir):
try:
os.mkdir(LOG_DIR)
os.mkdir(opts.logdir)
except OSError:
parser.error('failed to create log directory: ' + LOG_DIR)
parser.error('failed to create log directory: ' + opts.logdir)
log_file = 'cron-{0}.log'.format(opts.template.split('-')[1])
django_manage = 'cd {{dir}} && {py} manage.py'.format(py=opts.python)
@ -53,7 +54,7 @@ def main():
ctx[k] = '%s %s' % (opts.user, v)
# Needs to stay below the opts.user injection.
ctx['log'] = '>> {0}/{1} 2>&1'.format(LOG_DIR, log_file)
ctx['log'] = '>> {0}/{1} 2>&1'.format(opts.logdir, log_file) if opts.logdir else ''
ctx['user'] = opts.user
ctx['webapp'] = opts.webapp
ctx['source'] = opts.source

Просмотреть файл

@ -0,0 +1,11 @@
#!/bin/bash
cd /data/bedrock/src/www.mozilla.org-django
return_value=$(venv/bin/python bedrock/manage.py update_externalfiles --status)
if [ "$return_value" = "updated" ]; then
# file was updated, deploy
/data/bedrock/deploy www.mozilla.org-django/bedrock/bedrock/externalfiles/files_cache > /dev/null
echo "Successfully updated externalfiles."
fi

Просмотреть файл

@ -0,0 +1,11 @@
#!/bin/bash
cd /data/bedrock-stage/src/www.allizom.org-django
return_value=$(venv/bin/python bedrock/manage.py update_externalfiles --status)
if [ "$return_value" = "updated" ]; then
# file was updated, deploy
/data/bedrock/deploy www.allizom.org-django/bedrock/bedrock/externalfiles/files_cache > /dev/null
echo "Successfully updated externalfiles."
fi

Просмотреть файл

@ -19,6 +19,7 @@ NEW_RELIC_API_KEY = getattr(settings, 'NEW_RELIC_API_KEY', None)
NEW_RELIC_APP_ID = getattr(settings, 'NEW_RELIC_APP_ID', None)
NEW_RELIC_URL = 'https://rpm.newrelic.com/deployments.xml'
GITHUB_URL = 'https://github.com/mozilla/bedrock/compare/{oldrev}...{newrev}'
CRON_LOG_DIR = '/var/log/bedrock'
# ########## Commands run by chief ##############
@ -212,8 +213,9 @@ def generate_desc(from_commit, to_commit, changelog):
def generate_cron_file(ctx, tmpl_name):
with ctx.lcd(settings.WWW_DIR):
ctx.local("{python} bin/gen-crons.py -p {python} -s {src_dir} -w {www_dir} "
ctx.local("{python} bin/gen-crons.py -p {python} -s {src_dir} -w {www_dir} -l {log_dir}"
"-t {template}".format(python=PYTHON,
src_dir=settings.SRC_DIR,
www_dir=settings.WWW_DIR,
log_dir=CRON_LOG_DIR,
template=tmpl_name))

Просмотреть файл

@ -10,6 +10,9 @@ MAILTO="webops-cron@mozilla.com,cron-bedrock@mozilla.com"
*/10 * * * * {{ user }} {{ source }}/bin/update-scripts/prod/update-prod-php.sh {{ log }}
*/15 * * * * {{ user }} {{ source }}/bin/update-scripts/prod/update-prod-locale-cron.sh {{ log }}
# bug 1142850
0 * * * * {{ user }} {{ source }}/bin/update-scripts/prod/update-externalfiles.sh {{ log }}
*/5 * * * * {{ django_manage }} rnasync {{ log }}
# bug 996144 & 1014586

Просмотреть файл

@ -10,6 +10,9 @@ MAILTO="webops-cron@mozilla.com,cron-bedrock@mozilla.com"
*/10 * * * * {{ user }} {{ source }}/bin/update-scripts/stage/update-stage-php.sh {{ log }}
*/15 * * * * {{ user }} {{ source }}/bin/update-scripts/stage/update-stage-locale.sh {{ log }}
# bug 1142850
0 * * * * {{ user }} {{ source }}/bin/update-scripts/stage/update-externalfiles.sh {{ log }}
*/5 * * * * {{ django_manage }} rnasync {{ log }}
# bug 996144