Add ryf cron job; bug 560753

2010-04-22 09:36:17 -07:00 · 2010-04-22 09:36:17 -07:00 · 6421dd5427
--- a/apps/cronjobs/management/commands/cron.py
+++ b/apps/cronjobs/management/commands/cron.py
@ -1,3 +1,4 @@
+import logging
 import sys

 from django.conf import settings
@ -5,6 +6,8 @@ from django.core.management.base import BaseCommand

 import cronjobs

+log = logging.getLogger('z.cron')
+

 class Command(BaseCommand):
    help = 'Run a script, often a cronjob'
@ -21,12 +24,16 @@ class Command(BaseCommand):
        registered = cronjobs.registered

        if not args:
+            log.error("Cron called but doesn't know what to do.")
            print 'Try one of these: %s' % ', '.join(registered)
            sys.exit(1)

        script, args = args[0], args[1:]
        if script not in registered:
+            log.error("Cron called with unrecognized command: %s %s" % (script, args))
            print 'Unrecognized name: %s' % script
            sys.exit(1)

+        log.debug("Beginning job: %s %s" % (script, args))
        registered[script](*args)
+        log.debug("Ending job: %s %s" % (script, args))
--- a/apps/discovery/cron.py
+++ b/apps/discovery/cron.py
@ -0,0 +1,64 @@
+import logging
+import time
+import urllib2
+
+from pyquery import PyQuery as pq
+
+import cronjobs
+
+from .models import BlogCacheRyf
+
+log = logging.getLogger('z.cron')
+
+
+@cronjobs.register
+def fetch_ryf_blog():
+    """Currently used in the discovery pane from the API.  This job queries
+    rockyourfirefox.com and pulls the latest entry from the RSS feed. """
+    url = "http://rockyourfirefox.com/feed/"
+    try:
+        p = pq(url=url)
+    except urllib2.URLError, e:
+        log.error("Couldn't open (%s): %s" % (url, e))
+        return
+
+    item = p('item:first')
+
+    # There should only be one row in this table, ever.
+    try:
+        page = BlogCacheRyf.objects.all()[0]
+    except IndexError:
+        page = BlogCacheRyf()
+    page.title = item('title').text()
+    page.excerpt = item('description').text()
+    page.permalink = item('link').text()
+
+    rfc_2822_format = "%a, %d %b %Y %H:%M:%S +0000"
+    t = time.strptime(item('pubDate').text(), rfc_2822_format)
+    page.date_posted = time.strftime("%Y-%m-%d %H:%M:%S", t)
+
+    # Another request because we have to get the image URL from the page. :-/
+    try:
+        p = pq(url=page.permalink)
+    except urllib2.URLError, e:
+        log.error("Couldn't open (%s): %s" % (url, e))
+        return
+    image = p('.main-image img').attr('src')
+
+    offset = image.find('/uploads')
+
+    if not image or offset == -1:
+        log.error("Couldn't find a featured image for blog post (%s). "
+                  "Fligtar said this would never happen." % page.permalink)
+
+    # Image sources look like this:
+    #    http://rockyourfirefox.com/rockyourfirefox_content/
+    #                       uploads/2010/04/Nature-SprinG-Persona1-672x367.jpg
+    # Hardcoding the length we're stripping doesn't seem great, but this is a
+    # pretty specific job and I don't know how we'd do it better.  This turns
+    # the above example into:
+    #    /uploads/2010/04/Nature-SprinG-Persona1-672x367.jpg
+    # which we'll load off of static.amo; bug 561160
+    page.image = image[offset:]
+
+    page.save()
--- a/apps/discovery/models.py
+++ b/apps/discovery/models.py
@ -0,0 +1,17 @@
+from datetime import datetime
+from django.db import models
+
+
+class BlogCacheRyf(models.Model):
+
+    title = models.CharField(max_length=255, default='', blank=True)
+    excerpt = models.TextField(blank=True)
+    permalink = models.CharField(max_length=255, default='', blank=True)
+    date_posted = models.DateTimeField(default=datetime.now, blank=True)
+    image = models.CharField(max_length=255, default='', blank=True)
+
+    class Meta:
+        db_table = 'blog_cache_ryf'
+
+    def __unicode__(self):
+        return self.title
--- a/migrations/24-ryf-blog.sql
+++ b/migrations/24-ryf-blog.sql
@ -0,0 +1,9 @@
+CREATE TABLE `blog_cache_ryf` (
+    `id` int(11) unsigned NOT NULL auto_increment,
+    `title` VARCHAR(255) NOT NULL default '',
+    `excerpt` text,
+    `permalink` varchar(255) not null default '',
+    `image` varchar(255) not null default '',
+    `date_posted` datetime,
+    PRIMARY KEY  (`id`)
+) DEFAULT CHARSET=utf8;
--- a/requirements-prod.txt
+++ b/requirements-prod.txt
@ -4,6 +4,7 @@ MySQL-python==1.2.3c1
 python-memcached==1.45
 Babel==0.9.5
 phpserialize==1.2
+pyquery==0.4
 South==0.7
 pytz==2010e
 GitPython==0.1.7
--- a/requirements.txt
+++ b/requirements.txt
@ -24,7 +24,6 @@ nose==0.11.1
 coverage==3.2b4
 selenium==1.0.1
 mock==0.6.0
-pyquery==0.4
 translate-toolkit==1.6.0

 pylint