Creates image proxy view (closes #104). (#142)

* Creates image proxy view (closes #104).

* Passes .env to environment when creating locally.

* Use image proxy URLs for served images.
This commit is contained in:
Chuck Harmston 2016-05-26 12:28:03 -06:00
Родитель ed501488af
Коммит 3cdd926046
16 изменённых файлов: 228 добавлений и 24 удалений

Просмотреть файл

@ -1,3 +1,3 @@
#!/usr/bin/env bash #!/usr/bin/env bash
exec uwsgi --http :${PORT:-8000} --wsgi-file /app/recommendation/wsgi.py --master exec uwsgi --wsgi-disable-file-wrapper --http :${PORT:-8000} --wsgi-file /app/recommendation/wsgi.py --master

Просмотреть файл

@ -5,9 +5,11 @@ DEBUG = env.get('RECOMMENDATION_ENV', 'development') == 'development'
TESTING = env.get('RECOMMENDATION_TESTING', None) == 'true' TESTING = env.get('RECOMMENDATION_TESTING', None) == 'true'
KEY_PREFIX = env.get('RECOMMENDATION_KEY_PREFIX', 'query_') KEY_PREFIX = env.get('RECOMMENDATION_KEY_PREFIX', 'query_')
SERVER_NAME = env.get('RECOMMENDATION_SERVER_NAME', 'universal-search.dev')
CACHE_TTL = int(env.get('RECOMMENDATION_CACHE_TTL', 7 * 24 * 60 * 60)) CACHE_TTL = int(env.get('RECOMMENDATION_CACHE_TTL', 7 * 24 * 60 * 60))
MEMCACHED_TTL = int(env.get('RECOMMENDATION_MEMCACHED_TTL', CACHE_TTL)) MEMCACHED_TTL = int(env.get('RECOMMENDATION_MEMCACHED_TTL', CACHE_TTL))
IMAGEPROXY_TTL = int(env.get('IMAGEPROXY_TTL', CACHE_TTL))
BING_ACCOUNT_KEY = env.get('BING_ACCOUNT_KEY', '') BING_ACCOUNT_KEY = env.get('BING_ACCOUNT_KEY', '')
EMBEDLY_API_KEY = env.get('EMBEDLY_API_KEY', '') EMBEDLY_API_KEY = env.get('EMBEDLY_API_KEY', '')

Просмотреть файл

@ -10,6 +10,7 @@ from recommendation.mozlog.formatter import MozLogFormatter
from recommendation.mozlog.middleware import request_timer, request_summary from recommendation.mozlog.middleware import request_timer, request_summary
from recommendation.views.debug import debug from recommendation.views.debug import debug
from recommendation.views.dummy import dummy from recommendation.views.dummy import dummy
from recommendation.views.images import images
from recommendation.views.main import main from recommendation.views.main import main
from recommendation.views.static import static from recommendation.views.static import static
from recommendation.views.status import status from recommendation.views.status import status
@ -24,6 +25,7 @@ def create_app():
# Register views. # Register views.
app.register_blueprint(main) app.register_blueprint(main)
app.register_blueprint(debug) app.register_blueprint(debug)
app.register_blueprint(images)
app.register_blueprint(static) app.register_blueprint(static)
app.register_blueprint(status) app.register_blueprint(status)
@ -45,7 +47,8 @@ def create_app():
app.config.update( app.config.update(
CELERY_BROKER_URL=conf.CELERY_BROKER_URL, CELERY_BROKER_URL=conf.CELERY_BROKER_URL,
DEBUG=conf.DEBUG DEBUG=conf.DEBUG,
SERVER_NAME=conf.SERVER_NAME
) )
return app return app

Просмотреть файл

@ -12,7 +12,8 @@ LOG_PATH_BLACKLIST = [
'/__heartbeat__', '/__heartbeat__',
'/__lbheartbeat__', '/__lbheartbeat__',
'/nginx_status', '/nginx_status',
'/robots.txt' '/robots.txt',
'/images'
] ]
@ -42,20 +43,20 @@ def request_summary(response):
log = {} log = {}
query = request.args.get('q') query = request.args.get('q')
data = response.get_data(as_text=True)
try:
body = json.loads(data)
except json.decoder.JSONDecodeError:
body = {}
log['agent'] = request.headers.get('User-Agent') log['agent'] = request.headers.get('User-Agent')
log['errno'] = 0 if response.status_code < 400 else response.status_code log['errno'] = 0 if response.status_code < 400 else response.status_code
log['lang'] = request.headers.get('Accept-Language') log['lang'] = request.headers.get('Accept-Language')
log['method'] = request.method log['method'] = request.method
log['path'] = request.path log['path'] = request.path
log['t'] = (request.finish_time - request.start_time) * 1000 # in ms log['t'] = (request.finish_time - request.start_time) * 1000 # in ms
if query: if query:
data = response.get_data(as_text=True)
try:
body = json.loads(data)
except json.decoder.JSONDecodeError:
body = {}
query = query.lower() query = query.lower()
log['predicates.query_length'] = len(query) > 20 log['predicates.query_length'] = len(query) > 20
log['predicates.is_protocol'] = (re.match(IS_PROTOCOL, query) is not log['predicates.is_protocol'] = (re.match(IS_PROTOCOL, query) is not

Просмотреть файл

@ -5,6 +5,7 @@ import requests
from recommendation import conf from recommendation import conf
from recommendation.memorize import memorize from recommendation.memorize import memorize
from recommendation.search.classification.base import BaseClassifier from recommendation.search.classification.base import BaseClassifier
from recommendation.util import image_url
class BaseEmbedlyClassifier(BaseClassifier): class BaseEmbedlyClassifier(BaseClassifier):
@ -54,7 +55,7 @@ class FaviconClassifier(BaseEmbedlyClassifier):
return {} return {}
return { return {
'color': self._get_color(api_data), 'color': self._get_color(api_data),
'url': favicon_url, 'url': image_url(favicon_url, width=32, height=32),
} }
@ -128,8 +129,10 @@ class WikipediaClassifier(BaseEmbedlyClassifier):
try: try:
image_data = self._get_image(api_data) image_data = self._get_image(api_data)
image = {k: image_data.get(k) for k in ['url', 'height', 'width']} image = {k: image_data.get(k) for k in ['url', 'height', 'width']}
image['url'] = image_url(image['url'])
except (KeyError, IndexError): except (KeyError, IndexError):
image = {} image = {}
return { return {
'image': image, 'image': image,
'title': self._get_title(api_data), 'title': self._get_title(api_data),

Просмотреть файл

@ -5,6 +5,7 @@ import requests
from recommendation.memorize import memorize from recommendation.memorize import memorize
from recommendation.search.classification.base import BaseClassifier from recommendation.search.classification.base import BaseClassifier
from recommendation.util import image_url
class MovieClassifier(BaseClassifier): class MovieClassifier(BaseClassifier):
@ -93,7 +94,7 @@ class MovieClassifier(BaseClassifier):
'title': data.get('Title'), 'title': data.get('Title'),
'year': data.get('Year'), 'year': data.get('Year'),
'plot': data.get('Plot'), 'plot': data.get('Plot'),
'poster': data.get('Poster'), 'poster': image_url(data.get('Poster')),
'rating': { 'rating': {
'imdb': self._score(data.get('imdbRating'), 10), 'imdb': self._score(data.get('imdbRating'), 10),
'metacritic': self._score(data.get('Metascore'), 100) 'metacritic': self._score(data.get('Metascore'), 100)

Просмотреть файл

@ -1,4 +1,3 @@
from unittest import TestCase
from unittest.mock import patch from unittest.mock import patch
from urllib.parse import parse_qs, urlparse from urllib.parse import parse_qs, urlparse
@ -8,6 +7,8 @@ from nose.tools import eq_, ok_
from recommendation.search.classification.embedly import ( from recommendation.search.classification.embedly import (
BaseEmbedlyClassifier, FaviconClassifier, WikipediaClassifier) BaseEmbedlyClassifier, FaviconClassifier, WikipediaClassifier)
from recommendation.tests.memcached import mock_memcached from recommendation.tests.memcached import mock_memcached
from recommendation.tests.util import AppTestCase
from recommendation.util import image_url
MOCK_API_KEY = '0123456789abcdef' MOCK_API_KEY = '0123456789abcdef'
@ -80,7 +81,7 @@ MOCK_WIKIPEDIA_RESPONSE = {
} }
class TestBaseEmbedlyClassifier(TestCase): class TestBaseEmbedlyClassifier(AppTestCase):
classifier_class = BaseEmbedlyClassifier classifier_class = BaseEmbedlyClassifier
def tearDown(self): def tearDown(self):
@ -149,7 +150,8 @@ class TestFaviconClassifier(TestBaseEmbedlyClassifier):
status=200) status=200)
enhanced = self._classifier(MOCK_RESULT_URL).enhance() enhanced = self._classifier(MOCK_RESULT_URL).enhance()
eq_(enhanced['color'], MOCK_RESPONSE['favicon_colors'][0]['color']) eq_(enhanced['color'], MOCK_RESPONSE['favicon_colors'][0]['color'])
eq_(enhanced['url'], MOCK_RESPONSE['favicon_url']) eq_(enhanced['url'], image_url(
MOCK_RESPONSE['favicon_url'], width=32, height=32))
@patch('recommendation.search.classification.embedly.FaviconClassifier' @patch('recommendation.search.classification.embedly.FaviconClassifier'
'._api_response') '._api_response')
@ -223,6 +225,8 @@ class TestWikipediaClassifier(TestBaseEmbedlyClassifier):
responses.add(responses.GET, MOCK_API_URL, json=MOCK_RESPONSE, responses.add(responses.GET, MOCK_API_URL, json=MOCK_RESPONSE,
status=200) status=200)
enhanced = self._classifier(MOCK_WIKIPEDIA_URL).enhance() enhanced = self._classifier(MOCK_WIKIPEDIA_URL).enhance()
MOCK_WIKIPEDIA_RESPONSE['image']['url'] = (
image_url(MOCK_WIKIPEDIA_RESPONSE['image']['url']))
eq_(enhanced, MOCK_WIKIPEDIA_RESPONSE) eq_(enhanced, MOCK_WIKIPEDIA_RESPONSE)
@patch('recommendation.search.classification.embedly.WikipediaClassifier' @patch('recommendation.search.classification.embedly.WikipediaClassifier'

Просмотреть файл

@ -1,5 +1,4 @@
from copy import copy from copy import copy
from unittest import TestCase
from unittest.mock import patch from unittest.mock import patch
from urllib.parse import parse_qs, urlparse from urllib.parse import parse_qs, urlparse
@ -8,6 +7,8 @@ from nose.tools import eq_, ok_
from recommendation.search.classification.movies import MovieClassifier from recommendation.search.classification.movies import MovieClassifier
from recommendation.tests.memcached import mock_memcached from recommendation.tests.memcached import mock_memcached
from recommendation.tests.util import AppTestCase
from recommendation.util import image_url
IMDB_ID = 'tt0116756' IMDB_ID = 'tt0116756'
@ -54,7 +55,7 @@ MOCK_RESPONSE = {
} }
class TestMovieClassifier(TestCase): class TestMovieClassifier(AppTestCase):
def setUp(self): def setUp(self):
self.classifier = MovieClassifier(RESULT_IMDB, []) self.classifier = MovieClassifier(RESULT_IMDB, [])
@ -125,7 +126,7 @@ class TestMovieClassifier(TestCase):
eq_(enhanced['title'], MOCK_RESPONSE['Title']) eq_(enhanced['title'], MOCK_RESPONSE['Title'])
eq_(enhanced['year'], MOCK_RESPONSE['Year']) eq_(enhanced['year'], MOCK_RESPONSE['Year'])
eq_(enhanced['plot'], MOCK_RESPONSE['Plot']) eq_(enhanced['plot'], MOCK_RESPONSE['Plot'])
eq_(enhanced['poster'], MOCK_RESPONSE['Poster']) eq_(enhanced['poster'], image_url(MOCK_RESPONSE['Poster']))
eq_(enhanced['rating']['imdb']['stars'], 1.4) eq_(enhanced['rating']['imdb']['stars'], 1.4)
eq_(enhanced['rating']['imdb']['raw'], 2.8) eq_(enhanced['rating']['imdb']['raw'], 2.8)
eq_(enhanced['rating']['metacritic']['stars'], 1.2) eq_(enhanced['rating']['metacritic']['stars'], 1.2)

Просмотреть файл

@ -1,4 +1,3 @@
from unittest import TestCase
from unittest.mock import patch from unittest.mock import patch
from urllib.parse import ParseResult from urllib.parse import ParseResult
@ -6,6 +5,8 @@ import responses
from nose.tools import eq_, ok_ from nose.tools import eq_, ok_
from recommendation.search.classification.tld import TLDClassifier from recommendation.search.classification.tld import TLDClassifier
from recommendation.tests.util import AppTestCase
from recommendation.util import image_url
DOMAIN = 'www.mozilla.com' DOMAIN = 'www.mozilla.com'
@ -13,7 +14,7 @@ URL = 'http://%s/' % DOMAIN
LOGO = 'https://logo.clearbit.com/%s' % DOMAIN LOGO = 'https://logo.clearbit.com/%s' % DOMAIN
class TestTLDClassifier(TestCase): class TestTLDClassifier(AppTestCase):
def _result(self, url): def _result(self, url):
return { return {
'url': url 'url': url
@ -71,4 +72,4 @@ class TestTLDClassifier(TestCase):
mock_logo_exists.return_value = False mock_logo_exists.return_value = False
eq_(self._enhance(URL), None) eq_(self._enhance(URL), None)
mock_logo_exists.return_value = True mock_logo_exists.return_value = True
eq_(self._enhance(URL), LOGO) eq_(self._enhance(URL), image_url(LOGO, width=64, height=64))

Просмотреть файл

@ -1,6 +1,7 @@
import requests import requests
from recommendation.search.classification.base import BaseClassifier from recommendation.search.classification.base import BaseClassifier
from recommendation.util import image_url
class TLDClassifier(BaseClassifier): class TLDClassifier(BaseClassifier):
@ -24,4 +25,4 @@ class TLDClassifier(BaseClassifier):
logo = self._get_logo() logo = self._get_logo()
if not self._logo_exists(logo): if not self._logo_exists(logo):
return None return None
return logo return image_url(logo, width=64, height=64)

Просмотреть файл

@ -1,4 +1,3 @@
from unittest import TestCase
from unittest.mock import patch from unittest.mock import patch
from nose.tools import eq_, ok_ from nose.tools import eq_, ok_
@ -16,6 +15,7 @@ from recommendation.search.query.yahoo import YahooQueryEngine
from recommendation.search.query.tests.test_yahoo import ( from recommendation.search.query.tests.test_yahoo import (
QUERY as YAHOO_QUERY, MOCK_RESPONSE as YAHOO_RESPONSE) QUERY as YAHOO_QUERY, MOCK_RESPONSE as YAHOO_RESPONSE)
from recommendation.tests.memcached import mock_memcached from recommendation.tests.memcached import mock_memcached
from recommendation.tests.util import AppTestCase
QUERY = 'Cubs' QUERY = 'Cubs'
@ -25,7 +25,7 @@ RESULT = {
SUGGESTIONS = ['a', 'b', 'c'] SUGGESTIONS = ['a', 'b', 'c']
class TestSearchRecommendation(TestCase): class TestSearchRecommendation(AppTestCase):
def setUp(self): def setUp(self):
self.instance = SearchRecommendation('') self.instance = SearchRecommendation('')

Просмотреть файл

@ -0,0 +1,47 @@
from urllib.parse import parse_qs, quote, urlparse
from flask import current_app
from nose.tools import eq_
from recommendation.tests.util import AppTestCase
from recommendation.util import image_url
DIMENSION = '64'
IMAGE = 'https://foo.bar/image.jpg'
EMBEDLY_BASE = 'https://i.embed.ly/'
EMBEDLY_IMAGE = '{}?url={}'.format(EMBEDLY_BASE, quote(IMAGE))
class TestImageUrl(AppTestCase):
def _image_url(self, url, **kwargs):
with current_app.app_context():
url = image_url(url, **kwargs)
parsed = urlparse(url) if url else None
qs = parse_qs(parsed.query) if parsed else None
return url, parsed, qs
def test_none(self):
url, parsed, qs = self._image_url(None)
eq_(url, None)
def test_formed(self):
url, parsed, qs = self._image_url(IMAGE, width=DIMENSION,
height=DIMENSION)
eq_(IMAGE, qs['url'][0])
eq_(DIMENSION, qs['width'][0])
eq_(DIMENSION, qs['height'][0])
def test_embedly(self):
url = self._image_url(IMAGE)
embedly_url = self._image_url(EMBEDLY_IMAGE)
eq_(url, embedly_url)
def test_embedly_no_url(self):
url, parsed, qs = self._image_url(EMBEDLY_BASE)
eq_(qs['url'][0], EMBEDLY_BASE)
def test_embedly_empty_url(self):
URL = '{}?url='.format(EMBEDLY_BASE)
url, parsed, qs = self._image_url(URL)
eq_(qs['url'][0], URL)

22
recommendation/util.py Normal file
Просмотреть файл

@ -0,0 +1,22 @@
from urllib.parse import parse_qs, urlparse
from flask import current_app, url_for
def image_url(url, **kwargs):
if not url:
return
kwargs['url'] = url
parsed = urlparse(url)
# If the image is already being proxied by Embedly, pull the `url`
# querystring param out and use that instead to prevent double-billing.
if parsed.netloc == 'i.embed.ly':
qs = parse_qs(parsed.query)
try:
kwargs['url'] = qs['url'][0]
except (IndexError, KeyError):
pass
with current_app.app_context():
return url_for('images.proxy', **kwargs)

Просмотреть файл

@ -0,0 +1,44 @@
from urllib.parse import urlencode
import requests
from flask import abort, Blueprint, request, Response, stream_with_context
from recommendation import conf
EMBEDLY_RESIZE = 'https://i.embed.ly/1/display/resize'
images = Blueprint('images', __name__)
def make_embedly_url(url, **kwargs):
"""
Passed the URL to an image, returns a string to the Embedly resize URL for
that image. Accepts optional `width` and `height` keyword arguments.
"""
qs = {}
for param in ['width', 'height']:
if param in kwargs:
qs[param] = kwargs[param][0]
qs['animate'] = 'false'
qs['compresspng'] = 'true'
qs['key'] = conf.EMBEDLY_API_KEY
return '{}?{}'.format(EMBEDLY_RESIZE, urlencode(qs))
@images.route('/images')
def proxy():
try:
url = make_embedly_url(**request.args)
except TypeError:
abort(400)
try:
req = requests.get(url, stream=True, timeout=10)
except requests.RequestException:
abort(400)
if req.status_code != 200:
abort(400)
response = Response(stream_with_context(req.iter_content()),
content_type=req.headers['content-type'])
response.headers['Cache-Control'] = 'max-age=%d' % conf.IMAGEPROXY_TTL
return response

Просмотреть файл

@ -0,0 +1,74 @@
from unittest.mock import patch
from urllib.parse import parse_qs, urlencode, urlparse
import responses
from nose.tools import eq_, ok_
from requests import RequestException
from recommendation.tests.util import AppTestCase
from recommendation.views.images import EMBEDLY_RESIZE, make_embedly_url
MOCK_API_KEY = 'hifrend'
MOCK_CONTENT_TYPE = 'image/jpeg'
MOCK_FILE_CONTENTS = ['Hello', 'World']
MOCK_URL = 'https://foo.bar/image.jpg'
def MOCK_FILE():
for piece in MOCK_FILE_CONTENTS:
yield piece
class TestImageViews(AppTestCase):
def _url(self, url, **kwargs):
url = make_embedly_url(url, **kwargs)
parsed = urlparse(url) if url else None
qs = parse_qs(parsed.query) if parsed else None
return url, parsed, qs
def _proxy(self, **kwargs):
response = self.client.get('/images?{}'.format(urlencode(kwargs)))
return response
@patch('recommendation.views.images.conf.EMBEDLY_API_KEY', MOCK_API_KEY)
def test_embedly_url(self):
url, parsed, qs = self._url(MOCK_URL)
ok_(url.startswith(EMBEDLY_RESIZE))
ok_(MOCK_API_KEY in qs['key'])
ok_('true' in qs['compresspng'])
ok_('false' in qs['animate'])
def test_embedly_url_kwargs(self):
WIDTH = ['64'] # Mimicking how parse_qs parses `width=64`
url, parsed, qs = self._url(MOCK_URL, width=WIDTH, foo='bar')
eq_(WIDTH, qs['width'])
ok_('foo' not in qs)
def test_no_url(self):
response = self._proxy()
eq_(response.status_code, 400)
@patch('recommendation.views.images.requests.get')
def test_request_exception(self, mock_get):
mock_get.side_effect = RequestException
response = self._proxy(url=MOCK_URL)
eq_(response.status_code, 400)
@responses.activate
def test_request_bad_response(self):
responses.add(responses.GET, EMBEDLY_RESIZE, status=500)
response = self._proxy(url=MOCK_URL)
eq_(response.status_code, 400)
@patch('recommendation.views.images.stream_with_context')
@responses.activate
def test_ok(self, mock_get_stream):
responses.add(responses.GET, EMBEDLY_RESIZE, status=200,
content_type=MOCK_CONTENT_TYPE)
mock_get_stream.return_value = MOCK_FILE()
response = self._proxy(url=MOCK_URL)
headers = dict(response.headers)
eq_(headers['Content-Type'], MOCK_CONTENT_TYPE)
ok_('Cache-Control' in headers)
eq_(response.data.decode('ASCII'), ''.join(MOCK_FILE_CONTENTS))

2
server
Просмотреть файл

@ -84,7 +84,7 @@ case "$1" in
docker build -t $APPLICATION . docker build -t $APPLICATION .
docker run -d \ docker run -d \
-e "RECOMMENDATION_SERVICES=`docker-machine ip $MACHINE`" \ -e "RECOMMENDATION_SERVICES=`docker-machine ip $MACHINE`" \
-p 80:8000/tcp --name="$APPLICATION" $APPLICATION -p 80:8000/tcp --env-file=.env --name="$APPLICATION" $APPLICATION
;; ;;
# Builds and starts the services supporting the application container. # Builds and starts the services supporting the application container.