This commit is contained in:
Wil Clouser 2010-04-22 09:36:17 -07:00
Родитель fe34c97b77
Коммит 6421dd5427
6 изменённых файлов: 98 добавлений и 1 удалений

Просмотреть файл

@ -1,3 +1,4 @@
import logging
import sys
from django.conf import settings
@ -5,6 +6,8 @@ from django.core.management.base import BaseCommand
import cronjobs
log = logging.getLogger('z.cron')
class Command(BaseCommand):
help = 'Run a script, often a cronjob'
@ -21,12 +24,16 @@ class Command(BaseCommand):
registered = cronjobs.registered
if not args:
log.error("Cron called but doesn't know what to do.")
print 'Try one of these: %s' % ', '.join(registered)
sys.exit(1)
script, args = args[0], args[1:]
if script not in registered:
log.error("Cron called with unrecognized command: %s %s" % (script, args))
print 'Unrecognized name: %s' % script
sys.exit(1)
log.debug("Beginning job: %s %s" % (script, args))
registered[script](*args)
log.debug("Ending job: %s %s" % (script, args))

64
apps/discovery/cron.py Normal file
Просмотреть файл

@ -0,0 +1,64 @@
import logging
import time
import urllib2
from pyquery import PyQuery as pq
import cronjobs
from .models import BlogCacheRyf
log = logging.getLogger('z.cron')
@cronjobs.register
def fetch_ryf_blog():
"""Currently used in the discovery pane from the API. This job queries
rockyourfirefox.com and pulls the latest entry from the RSS feed. """
url = "http://rockyourfirefox.com/feed/"
try:
p = pq(url=url)
except urllib2.URLError, e:
log.error("Couldn't open (%s): %s" % (url, e))
return
item = p('item:first')
# There should only be one row in this table, ever.
try:
page = BlogCacheRyf.objects.all()[0]
except IndexError:
page = BlogCacheRyf()
page.title = item('title').text()
page.excerpt = item('description').text()
page.permalink = item('link').text()
rfc_2822_format = "%a, %d %b %Y %H:%M:%S +0000"
t = time.strptime(item('pubDate').text(), rfc_2822_format)
page.date_posted = time.strftime("%Y-%m-%d %H:%M:%S", t)
# Another request because we have to get the image URL from the page. :-/
try:
p = pq(url=page.permalink)
except urllib2.URLError, e:
log.error("Couldn't open (%s): %s" % (url, e))
return
image = p('.main-image img').attr('src')
offset = image.find('/uploads')
if not image or offset == -1:
log.error("Couldn't find a featured image for blog post (%s). "
"Fligtar said this would never happen." % page.permalink)
# Image sources look like this:
# http://rockyourfirefox.com/rockyourfirefox_content/
# uploads/2010/04/Nature-SprinG-Persona1-672x367.jpg
# Hardcoding the length we're stripping doesn't seem great, but this is a
# pretty specific job and I don't know how we'd do it better. This turns
# the above example into:
# /uploads/2010/04/Nature-SprinG-Persona1-672x367.jpg
# which we'll load off of static.amo; bug 561160
page.image = image[offset:]
page.save()

17
apps/discovery/models.py Normal file
Просмотреть файл

@ -0,0 +1,17 @@
from datetime import datetime
from django.db import models
class BlogCacheRyf(models.Model):
title = models.CharField(max_length=255, default='', blank=True)
excerpt = models.TextField(blank=True)
permalink = models.CharField(max_length=255, default='', blank=True)
date_posted = models.DateTimeField(default=datetime.now, blank=True)
image = models.CharField(max_length=255, default='', blank=True)
class Meta:
db_table = 'blog_cache_ryf'
def __unicode__(self):
return self.title

Просмотреть файл

@ -0,0 +1,9 @@
CREATE TABLE `blog_cache_ryf` (
`id` int(11) unsigned NOT NULL auto_increment,
`title` VARCHAR(255) NOT NULL default '',
`excerpt` text,
`permalink` varchar(255) not null default '',
`image` varchar(255) not null default '',
`date_posted` datetime,
PRIMARY KEY (`id`)
) DEFAULT CHARSET=utf8;

Просмотреть файл

@ -4,6 +4,7 @@ MySQL-python==1.2.3c1
python-memcached==1.45
Babel==0.9.5
phpserialize==1.2
pyquery==0.4
South==0.7
pytz==2010e
GitPython==0.1.7

Просмотреть файл

@ -24,7 +24,6 @@ nose==0.11.1
coverage==3.2b4
selenium==1.0.1
mock==0.6.0
pyquery==0.4
translate-toolkit==1.6.0
pylint