зеркало из https://github.com/mozilla/taar.git
Fix the utility to grab S3 JSON files
This additionally adds new test coverage and works around a travis-ci/issues/7940 .
This commit is contained in:
Родитель
8dcc454f7a
Коммит
188b0824fb
|
@ -15,6 +15,8 @@ install:
|
|||
- pip install -e . --process-dependency-links
|
||||
- pip install -r test_requirements.txt
|
||||
script:
|
||||
# The line below is to work around https://github.com/travis-ci/travis-ci/issues/7940
|
||||
- export BOTO_CONFIG=/dev/null
|
||||
- flake8 taar tests
|
||||
- py.test --cov taar tests
|
||||
after_success:
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
import json
|
||||
import os
|
||||
from tempfile import gettempdir
|
||||
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import requests
|
||||
import shutil
|
||||
from botocore.exceptions import ClientError
|
||||
from tempfile import gettempdir, NamedTemporaryFile
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def fetch_json(uri):
|
||||
|
@ -24,21 +28,44 @@ def fetch_json(uri):
|
|||
return r.json()
|
||||
|
||||
|
||||
def get_s3_cache_filename(s3_bucket, s3_key):
|
||||
return os.path.join(gettempdir(),
|
||||
'_'.join([s3_bucket, s3_key]).replace('/', '_'))
|
||||
|
||||
|
||||
def get_s3_json_content(s3_bucket, s3_key):
|
||||
"""Download and parse a json file stored on AWS S3.
|
||||
|
||||
The file is downloaded and then cached for future use.
|
||||
"""
|
||||
local_filename = '_'.join([s3_bucket, s3_key]).replace('/', '_')
|
||||
local_path = os.path.join(gettempdir(), local_filename)
|
||||
local_path = get_s3_cache_filename(s3_bucket, s3_key)
|
||||
|
||||
if not os.path.exists(local_path):
|
||||
with open(local_path, 'wb') as data:
|
||||
# Use NamedTemporaryFile, so that the file gets removed.
|
||||
with NamedTemporaryFile() as temp_file:
|
||||
try:
|
||||
s3 = boto3.client('s3')
|
||||
s3.download_fileobj(s3_bucket, s3_key, data)
|
||||
s3.download_fileobj(s3_bucket, s3_key, temp_file)
|
||||
# Flush the file.
|
||||
temp_file.flush()
|
||||
except ClientError:
|
||||
logger.exception("Failed to download from S3", extra={
|
||||
"bucket": s3_bucket,
|
||||
"key": s3_key})
|
||||
return None
|
||||
|
||||
with open(local_path, 'r') as data:
|
||||
return json.loads(data.read())
|
||||
with open(local_path, 'wb') as data:
|
||||
temp_file.seek(0)
|
||||
shutil.copyfileobj(temp_file, data)
|
||||
|
||||
# It can happen to have corrupted files. Account for the
|
||||
# sad reality of life.
|
||||
try:
|
||||
with open(local_path, 'r') as data:
|
||||
return json.loads(data.read())
|
||||
except ValueError:
|
||||
# Remove the corrupted cache file.
|
||||
logging.error("Removing corrupted S3 cache", extra={"cache_path": local_path})
|
||||
os.remove(local_path)
|
||||
|
||||
return None
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
pytest
|
||||
pytest-cov
|
||||
flake8
|
||||
moto
|
||||
responses
|
||||
coveralls
|
|
@ -0,0 +1,31 @@
|
|||
import boto3
|
||||
import os
|
||||
import taar.recommenders.utils as utils
|
||||
from moto import mock_s3
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_get_non_existing():
|
||||
bucket = 'test-bucket'
|
||||
key = 'non-existing.json'
|
||||
|
||||
conn = boto3.resource('s3', region_name='us-west-2')
|
||||
conn.create_bucket(Bucket=bucket)
|
||||
|
||||
assert utils.get_s3_json_content(bucket, key) is None
|
||||
assert os.path.exists(utils.get_s3_cache_filename(bucket, key)) is False
|
||||
|
||||
|
||||
@mock_s3
|
||||
def test_get_corrupted():
|
||||
bucket = 'test-bucket'
|
||||
key = 'corrupted.json'
|
||||
|
||||
conn = boto3.resource('s3', region_name='us-west-2')
|
||||
conn.create_bucket(Bucket=bucket)
|
||||
|
||||
# Write a corrupted file to the mocked S3.
|
||||
conn.Object(bucket, key).put(Body='This is invalid JSON.')
|
||||
|
||||
assert utils.get_s3_json_content(bucket, key) is None
|
||||
assert os.path.exists(utils.get_s3_cache_filename(bucket, key)) is False
|
Загрузка…
Ссылка в новой задаче