2014-03-06 14:56:30 +04:00
|
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
|
|
|
|
# Helpers for storage access (S3)
|
|
|
|
|
2014-04-23 11:24:41 +04:00
|
|
|
import errno
|
2014-03-06 14:56:30 +04:00
|
|
|
import httplib
|
|
|
|
import os
|
|
|
|
import time
|
|
|
|
import urllib2
|
|
|
|
|
|
|
|
|
|
|
|
class Storage(object):
|
|
|
|
'''
|
|
|
|
Abstract class defining the interface for Storage classes.
|
|
|
|
'''
|
|
|
|
def get(self, key):
|
|
|
|
raise NotImplementedError('%s.get is not implemented' %
|
|
|
|
self.__class__.__name__)
|
|
|
|
|
|
|
|
def put(self, key, data):
|
|
|
|
raise NotImplementedError('%s.put is not implemented' %
|
|
|
|
self.__class__.__name__)
|
|
|
|
|
|
|
|
_storage = None
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def from_environment():
|
|
|
|
'''
|
|
|
|
Return a Storage instance matching the configuration in the
|
|
|
|
environment.
|
|
|
|
SCCACHE_BUCKET sets the S3 bucket,
|
|
|
|
SCCACHE_NO_HTTPS defines whether put shall use HTTP instead of
|
|
|
|
HTTPS,
|
|
|
|
SCCACHE_NAMESERVER defines a DNS server to use instead of using
|
2014-04-23 11:24:41 +04:00
|
|
|
getaddrinfo,
|
|
|
|
SCCACHE_DIR defines a directory where to store a local cache.
|
|
|
|
Defining SCCACHE_DIR makes any of the above variable ignored.
|
2014-03-06 14:56:30 +04:00
|
|
|
'''
|
|
|
|
if Storage._storage:
|
|
|
|
return Storage._storage
|
|
|
|
|
|
|
|
storage = None
|
2014-04-23 11:24:41 +04:00
|
|
|
directory = os.environ.get('SCCACHE_DIR')
|
|
|
|
if directory:
|
|
|
|
storage = LocalStorage(directory)
|
|
|
|
|
|
|
|
bucket_name = os.environ.get('SCCACHE_BUCKET')
|
2014-03-06 14:56:30 +04:00
|
|
|
if bucket_name:
|
|
|
|
storage = S3Storage(bucket_name,
|
|
|
|
os.environ.get('SCCACHE_NO_HTTPS') != '1',
|
|
|
|
os.environ.get('SCCACHE_NAMESERVER'))
|
|
|
|
if storage:
|
|
|
|
Storage._storage = storage
|
|
|
|
return storage
|
|
|
|
raise RuntimeError('Cannot configure storage')
|
|
|
|
|
|
|
|
|
2014-04-23 11:24:41 +04:00
|
|
|
class LocalStorage(Storage):
|
|
|
|
'''
|
|
|
|
Storage class for a local directory.
|
|
|
|
'''
|
|
|
|
def __init__(self, directory):
|
|
|
|
self._ensure_dir(directory)
|
|
|
|
|
|
|
|
self._directory = directory
|
|
|
|
self.last_stats = {}
|
|
|
|
|
|
|
|
def _ensure_dir(self, path):
|
|
|
|
if not os.path.exists(path):
|
|
|
|
try:
|
|
|
|
os.makedirs(path)
|
|
|
|
except OSError as e:
|
|
|
|
if e.errno != errno.EEXIST:
|
|
|
|
raise
|
|
|
|
if not os.path.isdir(path):
|
|
|
|
raise RuntimeError('%s is not a directory' % directory)
|
|
|
|
|
2014-04-25 02:26:31 +04:00
|
|
|
def _normalize_key(self, key):
|
|
|
|
return '%s/%s/%s' % (key[0], key[1], key)
|
|
|
|
|
2014-04-23 11:24:41 +04:00
|
|
|
def get(self, key):
|
2014-04-25 02:26:31 +04:00
|
|
|
path = os.path.join(self._directory, self._normalize_key(key))
|
2014-04-23 11:24:41 +04:00
|
|
|
if os.path.isfile(path):
|
|
|
|
with open(path, 'rb') as data:
|
|
|
|
return data.read()
|
|
|
|
|
|
|
|
def put(self, key, data):
|
2014-04-25 02:26:31 +04:00
|
|
|
path = os.path.join(self._directory, self._normalize_key(key))
|
2014-04-23 11:24:41 +04:00
|
|
|
parent = os.path.dirname(path)
|
|
|
|
self._ensure_dir(os.path.dirname(path))
|
|
|
|
with open(path, 'wb') as out:
|
|
|
|
out.write(data)
|
|
|
|
|
|
|
|
|
|
|
|
class S3Storage(Storage):
|
2014-03-06 14:56:30 +04:00
|
|
|
'''
|
|
|
|
Storage class for S3.
|
|
|
|
'''
|
|
|
|
def __init__(self, bucket_name, store_with_https=True, dns_server=None):
|
|
|
|
assert bucket_name
|
|
|
|
self._bucket_name = bucket_name
|
|
|
|
self._store_with_https = store_with_https
|
|
|
|
|
2014-04-24 09:01:07 +04:00
|
|
|
from boto.s3.connection import S3Connection
|
|
|
|
from boto.utils import find_class
|
2014-03-06 14:56:30 +04:00
|
|
|
# The boto config can override the default calling format, and since
|
|
|
|
# we don't use boto for get(), we need to use the right calling format.
|
|
|
|
self._calling_format = find_class(S3Connection.DefaultCallingFormat)()
|
|
|
|
self._host = self._calling_format.build_host(S3Connection.DefaultHost,
|
|
|
|
self._bucket_name)
|
|
|
|
|
|
|
|
# Prepare the wrapper classes to use for urllib and boto.
|
|
|
|
dns_query = dns_query_function(dns_server)
|
|
|
|
self._http_connection_class = ConnectionWrapperFactory(
|
|
|
|
httplib.HTTPConnection, dns_query)
|
|
|
|
self._https_connection_class = ConnectionWrapperFactory(
|
|
|
|
httplib.HTTPSConnection, dns_query)
|
|
|
|
self._url_opener = OpenerFactory(
|
|
|
|
self._http_connection_class, self._https_connection_class)
|
|
|
|
|
|
|
|
# Get the boto S3 bucket instance
|
|
|
|
if store_with_https:
|
|
|
|
s3_connection = S3Connection(
|
|
|
|
https_connection_factory=(self._https_connection_class, ()))
|
|
|
|
else:
|
|
|
|
s3_connection = S3Connection(port=80, is_secure=False,
|
|
|
|
https_connection_factory=(self._http_connection_class, ()))
|
|
|
|
|
|
|
|
self._bucket = s3_connection.get_bucket(self._bucket_name,
|
|
|
|
validate=False)
|
|
|
|
|
|
|
|
self.last_stats = {}
|
|
|
|
|
2014-04-25 02:26:31 +04:00
|
|
|
def _normalize_key(self, key):
|
|
|
|
return '%s/%s/%s/%s' % (key[0], key[1], key[2], key)
|
|
|
|
|
2014-03-06 14:56:30 +04:00
|
|
|
def get(self, key):
|
|
|
|
# Don't use boto here, because it can't do simple GET requests, and those
|
|
|
|
# are actually significantly faster.
|
|
|
|
url = 'http://%s%s' % (self._host,
|
2014-04-25 02:26:31 +04:00
|
|
|
self._calling_format.build_path_base(self._bucket_name,
|
|
|
|
self._normalize_key(key)))
|
2014-03-06 14:56:30 +04:00
|
|
|
_last_stats.clear()
|
|
|
|
try:
|
|
|
|
data = self._url_opener.open(url).read()
|
|
|
|
_last_stats['size'] = len(data)
|
|
|
|
return data
|
|
|
|
except:
|
|
|
|
return None
|
|
|
|
finally:
|
|
|
|
if 'TINDERBOX_OUTPUT' in os.environ:
|
|
|
|
self.last_stats = dict(_last_stats)
|
|
|
|
|
|
|
|
def put(self, key, data):
|
|
|
|
# Store the given data on S3, and set an acl at the same time to allow
|
|
|
|
# public HTTP GETs later on (which we use in get())
|
|
|
|
_last_stats.clear()
|
|
|
|
_last_stats['size'] = len(data)
|
|
|
|
try:
|
2014-04-25 02:26:31 +04:00
|
|
|
k = self._bucket.new_key(self._normalize_key(key))
|
2014-03-06 14:56:30 +04:00
|
|
|
k.set_contents_from_string(data, headers={
|
|
|
|
'x-amz-acl': 'public-read',
|
|
|
|
})
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
finally:
|
|
|
|
if 'TINDERBOX_OUTPUT' in os.environ:
|
|
|
|
self.last_stats = dict(_last_stats)
|
|
|
|
|
|
|
|
|
|
|
|
_last_stats = {}
|
|
|
|
|
|
|
|
|
|
|
|
def ConnectionWrapperFactory(parent_class, dns_query):
|
|
|
|
'''
|
|
|
|
Create a httplib.HTTPConnection/httplib.HTTPSConnection subclass. The exact
|
|
|
|
parent class is given as parent_class, and the created subclass's connect
|
|
|
|
method uses the dns_query function to resolve the connection host name.
|
|
|
|
'''
|
|
|
|
class ConnectionWrapper(parent_class):
|
|
|
|
def connect(self):
|
|
|
|
t0 = time.time()
|
|
|
|
self.host = dns_query(self.host)
|
|
|
|
t1 = time.time()
|
|
|
|
_last_stats['dns'] = (t1 - t0) * 1000
|
|
|
|
parent_class.connect(self)
|
|
|
|
self._connect_time = time.time()
|
|
|
|
_last_stats['connect'] = (self._connect_time - t1) * 1000
|
|
|
|
|
|
|
|
def getresponse(self, buffering=False):
|
|
|
|
res = parent_class.getresponse(self, buffering)
|
|
|
|
_last_stats['response'] = (time.time() - self._connect_time) * 1000
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
|
|
return ConnectionWrapper
|
|
|
|
|
|
|
|
|
|
|
|
def OpenerFactory(HTTPConnection, HTTPSConnection):
|
|
|
|
'''
|
|
|
|
Create an OpenerDirector instance with handlers using the given
|
|
|
|
HTTPConnection and HTTPSConnection classes.
|
|
|
|
'''
|
|
|
|
class HTTPHandler(urllib2.HTTPHandler):
|
|
|
|
def http_open(self, req):
|
|
|
|
return self.do_open(HTTPConnection, req)
|
|
|
|
|
|
|
|
class HTTPSHandler(urllib2.HTTPSHandler):
|
|
|
|
def https_open(self, req):
|
|
|
|
return self.do_open(HTTPSConnection, req)
|
|
|
|
|
|
|
|
return urllib2.build_opener(HTTPHandler, HTTPSHandler)
|
|
|
|
|
|
|
|
|
|
|
|
def dns_query_function(server=None):
|
|
|
|
'''
|
|
|
|
Return a dns query function using the given DNS server address, or
|
|
|
|
getaddrinfo if none is given.
|
|
|
|
'''
|
|
|
|
if server:
|
|
|
|
from dns.resolver import Resolver, Cache
|
|
|
|
resolver = Resolver(configure=False)
|
|
|
|
resolver.cache = Cache()
|
|
|
|
resolver.nameservers.append(server)
|
|
|
|
def dns_query(host):
|
|
|
|
for rr in resolver.query(host):
|
|
|
|
return rr.address
|
|
|
|
else:
|
|
|
|
import socket
|
|
|
|
def dns_query(host):
|
|
|
|
for family, socktype, proto, canonname, sockaddr in \
|
|
|
|
socket.getaddrinfo(host, 0):
|
|
|
|
return sockaddr[0]
|
|
|
|
return dns_query
|