Fix the utility to grab S3 JSON files

This additionally adds new test coverage and
works around a travis-ci/issues/7940 .
This commit is contained in:
Alessio Placitelli 2017-09-06 15:47:11 +02:00
Родитель 8dcc454f7a
Коммит 188b0824fb
4 изменённых файлов: 72 добавлений и 11 удалений

Просмотреть файл

@ -15,6 +15,8 @@ install:
- pip install -e . --process-dependency-links
- pip install -r test_requirements.txt
script:
# The line below is to work around https://github.com/travis-ci/travis-ci/issues/7940
- export BOTO_CONFIG=/dev/null
- flake8 taar tests
- py.test --cov taar tests
after_success:

Просмотреть файл

@ -1,10 +1,14 @@
import json
import os
from tempfile import gettempdir
import boto3
from botocore.exceptions import ClientError
import json
import logging
import os
import requests
import shutil
from botocore.exceptions import ClientError
from tempfile import gettempdir, NamedTemporaryFile
logger = logging.getLogger(__name__)
def fetch_json(uri):
@ -24,21 +28,44 @@ def fetch_json(uri):
return r.json()
def get_s3_cache_filename(s3_bucket, s3_key):
return os.path.join(gettempdir(),
'_'.join([s3_bucket, s3_key]).replace('/', '_'))
def get_s3_json_content(s3_bucket, s3_key):
"""Download and parse a json file stored on AWS S3.
The file is downloaded and then cached for future use.
"""
local_filename = '_'.join([s3_bucket, s3_key]).replace('/', '_')
local_path = os.path.join(gettempdir(), local_filename)
local_path = get_s3_cache_filename(s3_bucket, s3_key)
if not os.path.exists(local_path):
with open(local_path, 'wb') as data:
# Use NamedTemporaryFile, so that the file gets removed.
with NamedTemporaryFile() as temp_file:
try:
s3 = boto3.client('s3')
s3.download_fileobj(s3_bucket, s3_key, data)
s3.download_fileobj(s3_bucket, s3_key, temp_file)
# Flush the file.
temp_file.flush()
except ClientError:
logger.exception("Failed to download from S3", extra={
"bucket": s3_bucket,
"key": s3_key})
return None
with open(local_path, 'r') as data:
return json.loads(data.read())
with open(local_path, 'wb') as data:
temp_file.seek(0)
shutil.copyfileobj(temp_file, data)
# It can happen to have corrupted files. Account for the
# sad reality of life.
try:
with open(local_path, 'r') as data:
return json.loads(data.read())
except ValueError:
# Remove the corrupted cache file.
logging.error("Removing corrupted S3 cache", extra={"cache_path": local_path})
os.remove(local_path)
return None

Просмотреть файл

@ -1,5 +1,6 @@
pytest
pytest-cov
flake8
moto
responses
coveralls

31
tests/test_utils.py Normal file
Просмотреть файл

@ -0,0 +1,31 @@
import boto3
import os
import taar.recommenders.utils as utils
from moto import mock_s3
@mock_s3
def test_get_non_existing():
bucket = 'test-bucket'
key = 'non-existing.json'
conn = boto3.resource('s3', region_name='us-west-2')
conn.create_bucket(Bucket=bucket)
assert utils.get_s3_json_content(bucket, key) is None
assert os.path.exists(utils.get_s3_cache_filename(bucket, key)) is False
@mock_s3
def test_get_corrupted():
bucket = 'test-bucket'
key = 'corrupted.json'
conn = boto3.resource('s3', region_name='us-west-2')
conn.create_bucket(Bucket=bucket)
# Write a corrupted file to the mocked S3.
conn.Object(bucket, key).put(Body='This is invalid JSON.')
assert utils.get_s3_json_content(bucket, key) is None
assert os.path.exists(utils.get_s3_cache_filename(bucket, key)) is False