diff --git a/bedrock/settings/base.py b/bedrock/settings/base.py index 68ab337c73..6e88bb1290 100644 --- a/bedrock/settings/base.py +++ b/bedrock/settings/base.py @@ -430,6 +430,7 @@ INSTALLED_APPS = ( 'bedrock.thunderbird', 'bedrock.shapeoftheweb', 'bedrock.utils', + 'bedrock.wordpress', # last so that redirects here will be last 'bedrock.redirects', @@ -543,7 +544,18 @@ BLOG_FEEDS = { }, 'internetcitizen': { 'url': 'https://blog.mozilla.org/internetcitizen/', - 'name': 'Internet Citizen' + 'name': 'Internet Citizen', + }, +} +# same as above, but uses the JSON REST API and thus gets +# more data (e.g. tags and featured image urls). +# TODO: Once this rolls out for the Firefox blog, we should +# convert the others and remove the old XML feed blogs above. +WP_BLOGS = { + 'firefox': { + 'url': 'https://blog.mozilla.org/firefox/', + 'name': 'The Firefox Frontier', + 'num_posts': 20, }, } diff --git a/bedrock/wordpress/__init__.py b/bedrock/wordpress/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bedrock/wordpress/api.py b/bedrock/wordpress/api.py new file mode 100644 index 0000000000..c2b07cb8dd --- /dev/null +++ b/bedrock/wordpress/api.py @@ -0,0 +1,67 @@ +from __future__ import print_function, unicode_literals +from django.conf import settings + +import requests +from raven.contrib.django.raven_compat.models import client as sentry_client + + +def _request(api_url, limit=None, page=1): + # 100 is max per page from WP + per_page = limit or 100 + resp = requests.get(api_url, params={'per_page': per_page, 'page': page}, timeout=5) + resp.raise_for_status() + data = resp.json() + if limit is None and page == 1: + num_pages = int(resp.headers.get('x-wp-totalpages', 1)) + if num_pages > 1: + for i in range(2, num_pages + 1): + data.extend(_request(api_url, page=i)) + + return data + + +def _api_url(feed_url, data_type, data_id): + api_url = '{}/wp-json/wp/v2/{}/'.format(feed_url.rstrip('/'), data_type) + if data_id: + api_url += str(data_id) + return api_url + + +def get_wp_data(feed_id, data_type, data_id=None, limit=None): + try: + feed_config = settings.WP_BLOGS[feed_id] + if data_type == 'posts' and limit is None: + limit = feed_config.get('num_posts', 20) + api_url = _api_url(feed_config['url'], data_type, data_id) + if data_id: + data = _request(api_url, limit=1) + else: + data = _request(api_url, limit=limit) + + return data + except Exception: + sentry_client.captureException() + return None + + +def get_posts_data(feed_id, num_posts=None): + posts = get_wp_data(feed_id, 'posts', limit=num_posts) + tags = get_wp_data(feed_id, 'tags') + if not (posts and tags): + return None + + tags = {t['id']: t['slug'] for t in tags} + for post in posts: + post['tags'] = [tags[t] for t in post['tags']] + # some blogs set featured_media to 0 when none is set + if 'featured_media' in post and post['featured_media']: + media = get_wp_data(feed_id, 'media', post['featured_media']) + if media: + post['featured_media'] = media + else: + post['featured_media'] = {} + + return { + 'posts': posts, + 'wp_blog_slug': feed_id, + } diff --git a/bedrock/wordpress/management/__init__.py b/bedrock/wordpress/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bedrock/wordpress/management/commands/__init__.py b/bedrock/wordpress/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bedrock/wordpress/management/commands/update_wordpress.py b/bedrock/wordpress/management/commands/update_wordpress.py new file mode 100644 index 0000000000..8fb0f023c3 --- /dev/null +++ b/bedrock/wordpress/management/commands/update_wordpress.py @@ -0,0 +1,28 @@ +from __future__ import print_function + +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError + +from bedrock.wordpress.models import BlogPost + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument('-q', '--quiet', action='store_true', dest='quiet', default=False, + help='If no error occurs, swallow all output.'), + parser.add_argument('--database', default='default', + help=('Specifies the database to use. ' + 'Defaults to "default".')), + + def handle(self, *args, **options): + errors = [] + for feed_id in settings.WP_BLOGS: + updated = BlogPost.objects.refresh(feed_id, options['database']) + if updated and not options['quiet']: + print('Refreshed %s posts from the %s blog' % (updated, feed_id)) + + if not updated: + errors.append('Something has gone wrong with refreshing the %s blog' % feed_id) + + if errors: + raise CommandError('\n'.join(errors)) diff --git a/bedrock/wordpress/migrations/0001_initial.py b/bedrock/wordpress/migrations/0001_initial.py new file mode 100644 index 0000000000..80c383d749 --- /dev/null +++ b/bedrock/wordpress/migrations/0001_initial.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models +import django_extensions.db.fields.json + + +class Migration(migrations.Migration): + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='BlogPost', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('wp_id', models.IntegerField()), + ('wp_blog_slug', models.CharField(max_length=50)), + ('date', models.DateTimeField()), + ('modified', models.DateTimeField()), + ('title', models.CharField(max_length=255)), + ('excerpt', models.TextField()), + ('link', models.URLField()), + ('featured_media', django_extensions.db.fields.json.JSONField()), + ('tags', django_extensions.db.fields.json.JSONField()), + ], + options={ + 'ordering': ['-date'], + 'get_latest_by': 'date', + }, + ), + ] diff --git a/bedrock/wordpress/migrations/__init__.py b/bedrock/wordpress/migrations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bedrock/wordpress/models.py b/bedrock/wordpress/models.py new file mode 100644 index 0000000000..cf18194298 --- /dev/null +++ b/bedrock/wordpress/models.py @@ -0,0 +1,118 @@ +from __future__ import print_function, unicode_literals + +import operator +from django.conf import settings +from django.db import models, transaction +from django.db.models import Q +from django.db.utils import DatabaseError +from django.utils.html import strip_tags + +import bleach +from django_extensions.db.fields.json import JSONField +from jinja2 import Markup +from raven.contrib.django.raven_compat.models import client as sentry_client + +from bedrock.wordpress.api import get_posts_data + + +class BlogPostQuerySet(models.QuerySet): + def filter_by_blog(self, blog_slug): + return self.filter(wp_blog_slug=blog_slug) + + def filter_by_tags(self, *tags): + tag_qs = [Q(tags__contains='"{}"'.format(t)) for t in tags] + return self.filter(reduce(operator.or_, tag_qs)) + + +class BlogPostManager(models.Manager): + def get_queryset(self): + return BlogPostQuerySet(self.model, using=self._db) + + def filter_by_blog(self, blog_slug): + return self.get_queryset().filter_by_blog(blog_slug) + + def filter_by_tags(self, *tags): + return self.get_queryset().filter_by_tags(*tags) + + def update_posts(self, data, database='default'): + with transaction.atomic(using=database): + count = 0 + posts = data['posts'] + self.filter_by_blog(data['wp_blog_slug']).delete() + for post in posts: + try: + self.create( + wp_id=post['id'], + wp_blog_slug=data['wp_blog_slug'], + date=post['date_gmt'], + modified=post['modified_gmt'], + title=bleach.clean(post['title']['rendered']), + excerpt=bleach.clean(post['excerpt']['rendered']), + link=post['link'], + featured_media=post['featured_media'], + tags=post['tags'], + ) + count += 1 + except (DatabaseError, KeyError): + sentry_client.captureException() + raise + + return count + + def refresh(self, feed_id, database='default', num_posts=None): + data = get_posts_data(feed_id, num_posts) + if data: + return self.update_posts(data, database) + + # no data returned. something wrong. + return 0 + + +class BlogPost(models.Model): + wp_id = models.IntegerField() + wp_blog_slug = models.CharField(max_length=50) + date = models.DateTimeField() + modified = models.DateTimeField() + title = models.CharField(max_length=255) + excerpt = models.TextField() + link = models.URLField() + featured_media = JSONField() + tags = JSONField() + + objects = BlogPostManager() + + class Meta: + get_latest_by = 'date' + ordering = ['-date'] + + def __unicode__(self): + return '%s: %s' % (self.blog_name, self.title) + + def get_absolute_url(self): + return self.link + + def htmlify(self): + summary = strip_tags(self.excerpt).strip() + if summary.lower().endswith('continue reading'): + summary = summary[:-16] + + return Markup(summary) + + @property + def blog_title(self): + title = strip_tags(self.title).strip() + return Markup(title).unescape() + + @property + def blog_link(self): + return settings.WP_BLOGS[self.wp_blog_slug]['url'] + + @property + def blog_name(self): + return settings.WP_BLOGS[self.wp_blog_slug]['name'] + + def get_featured_image_url(self, size='large'): + try: + return self.featured_media['media_details']['sizes'][size]['source_url'] + except KeyError: + return None diff --git a/bedrock/wordpress/tests/__init__.py b/bedrock/wordpress/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bedrock/wordpress/tests/test_api.py b/bedrock/wordpress/tests/test_api.py new file mode 100644 index 0000000000..fab98a308d --- /dev/null +++ b/bedrock/wordpress/tests/test_api.py @@ -0,0 +1,72 @@ +from django.test import override_settings + +from bedrock.wordpress import api + +import responses +from mock import patch + + +TEST_WP_BLOGS = { + 'firefox': { + 'url': 'https://blog.mozilla.org/firefox/', + 'name': 'The Firefox Frontier', + 'num_posts': 10, + }, +} + + +@patch.object(api, 'requests') +def test_limited_request(req_mock): + api._request('some_url', limit=10) + req_mock.get.assert_called_once_with('some_url', + params={'per_page': 10, 'page': 1}, + timeout=5) + + +@responses.activate +def test_unlimited_request(): + api_url = api._api_url(TEST_WP_BLOGS['firefox']['url'], 'tags', None) + responses.add(responses.GET, + api_url + '?per_page=100&page=1', + match_querystring=True, + json=[1], + adding_headers={'X-WP-TotalPages': '3'}) + responses.add(responses.GET, + api_url + '?per_page=100&page=2', + json=[2, 2], + match_querystring=True, + adding_headers={'X-WP-TotalPages': '3'}) + responses.add(responses.GET, + api_url + '?per_page=100&page=3', + json=[3, 3, 3], + match_querystring=True, + adding_headers={'X-WP-TotalPages': '3'}) + + data = api._request(api_url, limit=None) + assert data == [1, 2, 2, 3, 3, 3] + assert len(responses.calls) == 3 + + +def test_api_url(): + assert (api._api_url('https://moz.blog/', 'posts', 4) == + 'https://moz.blog/wp-json/wp/v2/posts/4') + assert (api._api_url('https://moz.blog/', 'tags', None) == + 'https://moz.blog/wp-json/wp/v2/tags/') + assert (api._api_url('https://moz.blog', 'media', 55) == + 'https://moz.blog/wp-json/wp/v2/media/55') + + +@override_settings(WP_BLOGS=TEST_WP_BLOGS) +@patch.object(api, '_request') +def test_get_wp_data(req_mock): + api.get_wp_data('firefox', 'posts') + api_url = api._api_url(TEST_WP_BLOGS['firefox']['url'], 'posts', None) + req_mock.assert_called_with(api_url, limit=10) + + api.get_wp_data('firefox', 'media', 75) + api_url = api._api_url(TEST_WP_BLOGS['firefox']['url'], 'media', 75) + req_mock.assert_called_with(api_url, limit=1) + + api.get_wp_data('firefox', 'tags') + api_url = api._api_url(TEST_WP_BLOGS['firefox']['url'], 'tags', None) + req_mock.assert_called_with(api_url, limit=None) diff --git a/bedrock/wordpress/tests/test_data/media_75.json b/bedrock/wordpress/tests/test_data/media_75.json new file mode 100644 index 0000000000..c02156698d --- /dev/null +++ b/bedrock/wordpress/tests/test_data/media_75.json @@ -0,0 +1,139 @@ +{ + "id": 75, + "date": "2017-03-14T14:13:24", + "date_gmt": "2017-03-14T21:13:24", + "guid": { + "rendered": "https:\/\/blog.mozilla.org\/firefox\/files\/2017\/03\/Put-Your-Trust-in-Rust.png" + }, + "modified": "2017-03-14T14:14:55", + "modified_gmt": "2017-03-14T21:14:55", + "slug": "put-your-trust-in-rust", + "status": "inherit", + "type": "attachment", + "link": "https:\/\/blog.mozilla.org\/firefox\/put-trust-rust-shipping-now-firefox\/put-your-trust-in-rust\/", + "title": { + "rendered": "Put-Your-Trust-in-Rust" + }, + "author": 1431, + "comment_status": "closed", + "ping_status": "closed", + "template": "", + "meta": [], + "description": { + "rendered": "