Bug 1561582 - tooltool.py now supports python 3 and 2 r=rail CLOSED TREE

Differential Revision: https://phabricator.services.mozilla.com/D36010

--HG--
extra : source : 7e05642566d001eb7b6900e80788212310243c3c
extra : histedit_source : 361d6b5974f9a4708f12f2c747645df6c7d706ee
This commit is contained in:
Rok Garbas 2019-06-27 12:54:07 +00:00
Родитель b8e6490b77
Коммит d8e57f535e
2 изменённых файлов: 194 добавлений и 87 удалений

Просмотреть файл

@ -1,4 +1,4 @@
#!/usr/bin/env python2.7
#!/usr/bin/env python
# tooltool is a lookaside cache implemented in Python
# Copyright (C) 2011 John H. Ford <john@johnford.info>
@ -17,8 +17,6 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
from __future__ import absolute_import, print_function
# A manifest file specifies files in that directory that are stored
# elsewhere. This file should only list files in the same directory
# in which the manifest file resides and it should be called
@ -26,12 +24,10 @@ from __future__ import absolute_import, print_function
from __future__ import print_function
import StringIO
import base64
import calendar
import hashlib
import hmac
import httplib
import json
import logging
import math
@ -45,10 +41,10 @@ import tarfile
import tempfile
import threading
import time
import urllib2
import urlparse
import zipfile
from io import open
from io import BytesIO
from subprocess import PIPE
from subprocess import Popen
@ -64,15 +60,50 @@ HAWK_VER = 1
PY3 = sys.version_info[0] == 3
if PY3:
# TODO: py3 coverage
six_binary_type = bytes # pragma: no cover
open_attrs = dict(mode='w', encoding='utf-8')
six_binary_type = bytes
six_text_type = str
unicode = str # Silence `pyflakes` from reporting `undefined name 'unicode'` in Python 3.
import urllib.request as urllib2
from http.client import HTTPSConnection, HTTPConnection
from urllib.parse import urlparse, urljoin
from urllib.request import Request
from urllib.error import HTTPError, URLError
else:
open_attrs = dict(mode='wb')
six_binary_type = str
six_text_type = unicode
import urllib2
from httplib import HTTPSConnection, HTTPConnection
from urllib2 import Request, HTTPError, URLError
from urlparse import urlparse, urljoin
log = logging.getLogger(__name__)
def request_has_data(req):
if PY3:
return req.data is not None
return req.has_data()
def to_binary(val):
if isinstance(val, six_text_type):
return val.encode('utf-8')
return val
def to_text(val):
if isinstance(val, six_binary_type):
return val.decode('utf-8')
return val
def get_hexdigest(val):
return hashlib.sha512(to_binary(val)).hexdigest()
class FileRecordJSONEncoderException(Exception):
pass
@ -109,7 +140,7 @@ class BadHeaderValue(Exception):
def parse_url(url):
url_parts = urlparse.urlparse(url)
url_parts = urlparse(url)
url_dict = {
'scheme': url_parts.scheme,
'hostname': url_parts.hostname,
@ -260,7 +291,7 @@ def make_taskcluster_header(credentials, req):
url_parts = parse_url(url)
content_hash = None
if req.has_data():
if request_has_data(req):
content_hash = calculate_payload_hash( # pragma: no cover
algorithm,
req.get_data(),
@ -526,16 +557,20 @@ class Manifest(object):
def dump(self, output_file, fmt='json'):
assert fmt in self.valid_formats
if fmt == 'json':
rv = json.dump(
self.file_records, output_file, indent=2, cls=FileRecordJSONEncoder,
separators=(',', ': '))
print('', file=output_file)
return rv
return json.dump(
self.file_records, output_file,
indent=2, separators=(',', ': '),
cls=FileRecordJSONEncoder,
)
def dumps(self, fmt='json'):
assert fmt in self.valid_formats
if fmt == 'json':
return json.dumps(self.file_records, cls=FileRecordJSONEncoder)
return json.dumps(
self.file_records,
indent=2, separators=(',', ': '),
cls=FileRecordJSONEncoder,
)
def digest_file(f, a):
@ -588,9 +623,9 @@ def list_manifest(manifest_file):
))
return False
for f in manifest.file_records:
print("%s\t%s\t%s" % ("P" if f.present() else "-",
"V" if f.present() and f.validate() else "-",
f.filename))
print("{}\t{}\t{}".format("P" if f.present() else "-",
"V" if f.present() and f.validate() else "-",
f.filename))
return True
@ -660,7 +695,7 @@ def add_files(manifest_file, algorithm, filenames, version, visibility, unpack):
for old_fr in old_manifest.file_records:
if old_fr.filename not in new_filenames:
new_manifest.file_records.append(old_fr)
with open(manifest_file, 'wb') as output:
with open(manifest_file, **open_attrs) as output:
new_manifest.dump(output, fmt='json')
return all_files_added
@ -682,8 +717,8 @@ def fetch_file(base_urls, file_record, grabchunk=1024 * 4, auth_file=None, regio
fetched_path = None
for base_url in base_urls:
# Generate the URL for the file on the server side
url = urlparse.urljoin(base_url,
'%s/%s' % (file_record.algorithm, file_record.digest))
url = urljoin(base_url,
'%s/%s' % (file_record.algorithm, file_record.digest))
if region is not None:
url += '?region=' + region
@ -691,26 +726,28 @@ def fetch_file(base_urls, file_record, grabchunk=1024 * 4, auth_file=None, regio
# Well, the file doesn't exist locally. Let's fetch it.
try:
req = urllib2.Request(url)
req = Request(url)
_authorize(req, auth_file)
f = urllib2.urlopen(req)
log.debug("opened %s for reading" % url)
with open(temp_path, 'wb') as out:
with open(temp_path, **open_attrs) as out:
k = True
size = 0
while k:
# TODO: print statistics as file transfers happen both for info and to stop
# buildbot timeouts
indata = f.read(grabchunk)
if PY3:
indata = to_text(indata)
out.write(indata)
size += len(indata)
if indata == '':
if len(indata) == 0:
k = False
log.info("File %s fetched from %s as %s" %
(file_record.filename, base_url, temp_path))
fetched_path = temp_path
break
except (urllib2.URLError, urllib2.HTTPError, ValueError):
except (URLError, HTTPError, ValueError):
log.info("...failed to fetch '%s' from %s" %
(file_record.filename, base_url), exc_info=True)
except IOError: # pragma: no cover
@ -760,7 +797,7 @@ def unpack_file(filename):
stdout, stderr = process.communicate()
if process.returncode != 0:
return False
fileobj = StringIO.StringIO()
fileobj = BytesIO()
fileobj.write(stdout)
fileobj.seek(0)
tar = tarfile.open(fileobj=fileobj, mode='r|')
@ -994,14 +1031,18 @@ def _authorize(req, auth_file):
def _send_batch(base_url, auth_file, batch, region):
url = urlparse.urljoin(base_url, 'upload')
url = urljoin(base_url, 'upload')
if region is not None:
url += "?region=" + region
req = urllib2.Request(url, json.dumps(batch), {'Content-Type': 'application/json'})
if PY3:
data = to_binary(json.dumps(batch))
else:
data = json.dumps(batch)
req = Request(url, data, {'Content-Type': 'application/json'})
_authorize(req, auth_file)
try:
resp = urllib2.urlopen(req)
except (urllib2.URLError, urllib2.HTTPError) as e:
except (URLError, HTTPError) as e:
_log_api_error(e)
return None
return json.load(resp)['result']
@ -1009,18 +1050,29 @@ def _send_batch(base_url, auth_file, batch, region):
def _s3_upload(filename, file):
# urllib2 does not support streaming, so we fall back to good old httplib
url = urlparse.urlparse(file['put_url'])
cls = httplib.HTTPSConnection if url.scheme == 'https' else httplib.HTTPConnection
url = urlparse(file['put_url'])
cls = HTTPSConnection if url.scheme == 'https' else HTTPConnection
host, port = url.netloc.split(':') if ':' in url.netloc else (url.netloc, 443)
port = int(port)
conn = cls(host, port)
try:
req_path = "%s?%s" % (url.path, url.query) if url.query else url.path
conn.request('PUT', req_path, open(filename, "rb"),
{'Content-Type': 'application/octet-stream'})
resp = conn.getresponse()
resp_body = resp.read()
conn.close()
with open(filename, 'rb') as f:
content = f.read()
content_length = len(content)
f.seek(0)
conn.request(
'PUT',
req_path,
f,
{
'Content-Type': 'application/octet-stream',
'Content-Length': str(content_length),
},
)
resp = conn.getresponse()
resp_body = resp.read()
conn.close()
if resp.status != 200:
raise RuntimeError("Non-200 return from AWS: %s %s\n%s" %
(resp.status, resp.reason, resp_body))
@ -1032,14 +1084,14 @@ def _s3_upload(filename, file):
def _notify_upload_complete(base_url, auth_file, file):
req = urllib2.Request(
urlparse.urljoin(
req = Request(
urljoin(
base_url,
'upload/complete/%(algorithm)s/%(digest)s' % file))
_authorize(req, auth_file)
try:
urllib2.urlopen(req)
except urllib2.HTTPError as e:
except HTTPError as e:
if e.code != 409:
_log_api_error(e)
return
@ -1090,7 +1142,7 @@ def upload(manifest, message, base_urls, auth_file, region):
# Upload the files, each in a thread. This allows us to start all of the
# uploads before any of the URLs expire.
threads = {}
for filename, file in files.iteritems():
for filename, file in files.items():
if 'put_url' in file:
log.info("%s: starting upload" % (filename,))
thd = threading.Thread(target=_s3_upload,
@ -1104,7 +1156,7 @@ def upload(manifest, message, base_urls, auth_file, region):
# re-join all of those threads as they exit
success = True
while threads:
for filename, thread in threads.items():
for filename, thread in list(threads.items()):
if not thread.is_alive():
# _s3_upload has annotated file with result information
file = files[filename]
@ -1120,7 +1172,7 @@ def upload(manifest, message, base_urls, auth_file, region):
# notify the server that the uploads are completed. If the notification
# fails, we don't consider that an error (the server will notice
# eventually)
for filename, file in files.iteritems():
for filename, file in files.items():
if 'put_url' in file and file['upload_ok']:
log.info("notifying server of upload completion for %s" % (filename,))
_notify_upload_complete(base_urls[0], auth_file, file)

Просмотреть файл

@ -1,4 +1,4 @@
#!/usr/bin/env python2.7
#!/usr/bin/env python
# tooltool is a lookaside cache implemented in Python
# Copyright (C) 2011 John H. Ford <john@johnford.info>
@ -24,12 +24,10 @@
from __future__ import print_function
import StringIO
import base64
import calendar
import hashlib
import hmac
import httplib
import json
import logging
import math
@ -43,10 +41,10 @@ import tarfile
import tempfile
import threading
import time
import urllib2
import urlparse
import zipfile
from io import open
from io import BytesIO
from subprocess import PIPE
from subprocess import Popen
@ -62,15 +60,50 @@ HAWK_VER = 1
PY3 = sys.version_info[0] == 3
if PY3:
# TODO: py3 coverage
six_binary_type = bytes # pragma: no cover
open_attrs = dict(mode='w', encoding='utf-8')
six_binary_type = bytes
six_text_type = str
unicode = str # Silence `pyflakes` from reporting `undefined name 'unicode'` in Python 3.
import urllib.request as urllib2
from http.client import HTTPSConnection, HTTPConnection
from urllib.parse import urlparse, urljoin
from urllib.request import Request
from urllib.error import HTTPError, URLError
else:
open_attrs = dict(mode='wb')
six_binary_type = str
six_text_type = unicode
import urllib2
from httplib import HTTPSConnection, HTTPConnection
from urllib2 import Request, HTTPError, URLError
from urlparse import urlparse, urljoin
log = logging.getLogger(__name__)
def request_has_data(req):
if PY3:
return req.data is not None
return req.has_data()
def to_binary(val):
if isinstance(val, six_text_type):
return val.encode('utf-8')
return val
def to_text(val):
if isinstance(val, six_binary_type):
return val.decode('utf-8')
return val
def get_hexdigest(val):
return hashlib.sha512(to_binary(val)).hexdigest()
class FileRecordJSONEncoderException(Exception):
pass
@ -107,7 +140,7 @@ class BadHeaderValue(Exception):
def parse_url(url):
url_parts = urlparse.urlparse(url)
url_parts = urlparse(url)
url_dict = {
'scheme': url_parts.scheme,
'hostname': url_parts.hostname,
@ -258,7 +291,7 @@ def make_taskcluster_header(credentials, req):
url_parts = parse_url(url)
content_hash = None
if req.has_data():
if request_has_data(req):
content_hash = calculate_payload_hash( # pragma: no cover
algorithm,
req.get_data(),
@ -524,16 +557,20 @@ class Manifest(object):
def dump(self, output_file, fmt='json'):
assert fmt in self.valid_formats
if fmt == 'json':
rv = json.dump(
self.file_records, output_file, indent=2, cls=FileRecordJSONEncoder,
separators=(',', ': '))
print('', file=output_file)
return rv
return json.dump(
self.file_records, output_file,
indent=2, separators=(',', ': '),
cls=FileRecordJSONEncoder,
)
def dumps(self, fmt='json'):
assert fmt in self.valid_formats
if fmt == 'json':
return json.dumps(self.file_records, cls=FileRecordJSONEncoder)
return json.dumps(
self.file_records,
indent=2, separators=(',', ': '),
cls=FileRecordJSONEncoder,
)
def digest_file(f, a):
@ -586,9 +623,9 @@ def list_manifest(manifest_file):
))
return False
for f in manifest.file_records:
print("%s\t%s\t%s" % ("P" if f.present() else "-",
"V" if f.present() and f.validate() else "-",
f.filename))
print("{}\t{}\t{}".format("P" if f.present() else "-",
"V" if f.present() and f.validate() else "-",
f.filename))
return True
@ -658,7 +695,7 @@ def add_files(manifest_file, algorithm, filenames, version, visibility, unpack):
for old_fr in old_manifest.file_records:
if old_fr.filename not in new_filenames:
new_manifest.file_records.append(old_fr)
with open(manifest_file, 'wb') as output:
with open(manifest_file, **open_attrs) as output:
new_manifest.dump(output, fmt='json')
return all_files_added
@ -680,8 +717,8 @@ def fetch_file(base_urls, file_record, grabchunk=1024 * 4, auth_file=None, regio
fetched_path = None
for base_url in base_urls:
# Generate the URL for the file on the server side
url = urlparse.urljoin(base_url,
'%s/%s' % (file_record.algorithm, file_record.digest))
url = urljoin(base_url,
'%s/%s' % (file_record.algorithm, file_record.digest))
if region is not None:
url += '?region=' + region
@ -689,26 +726,28 @@ def fetch_file(base_urls, file_record, grabchunk=1024 * 4, auth_file=None, regio
# Well, the file doesn't exist locally. Let's fetch it.
try:
req = urllib2.Request(url)
req = Request(url)
_authorize(req, auth_file)
f = urllib2.urlopen(req)
log.debug("opened %s for reading" % url)
with open(temp_path, 'wb') as out:
with open(temp_path, **open_attrs) as out:
k = True
size = 0
while k:
# TODO: print statistics as file transfers happen both for info and to stop
# buildbot timeouts
indata = f.read(grabchunk)
if PY3:
indata = to_text(indata)
out.write(indata)
size += len(indata)
if indata == '':
if len(indata) == 0:
k = False
log.info("File %s fetched from %s as %s" %
(file_record.filename, base_url, temp_path))
fetched_path = temp_path
break
except (urllib2.URLError, urllib2.HTTPError, ValueError):
except (URLError, HTTPError, ValueError):
log.info("...failed to fetch '%s' from %s" %
(file_record.filename, base_url), exc_info=True)
except IOError: # pragma: no cover
@ -758,7 +797,7 @@ def unpack_file(filename):
stdout, stderr = process.communicate()
if process.returncode != 0:
return False
fileobj = StringIO.StringIO()
fileobj = BytesIO()
fileobj.write(stdout)
fileobj.seek(0)
tar = tarfile.open(fileobj=fileobj, mode='r|')
@ -979,7 +1018,7 @@ def _authorize(req, auth_file):
try:
auth_file_content = json.loads(auth_file_content)
is_taskcluster_auth = True
except:
except Exception:
pass
if is_taskcluster_auth:
@ -992,14 +1031,18 @@ def _authorize(req, auth_file):
def _send_batch(base_url, auth_file, batch, region):
url = urlparse.urljoin(base_url, 'upload')
url = urljoin(base_url, 'upload')
if region is not None:
url += "?region=" + region
req = urllib2.Request(url, json.dumps(batch), {'Content-Type': 'application/json'})
if PY3:
data = to_binary(json.dumps(batch))
else:
data = json.dumps(batch)
req = Request(url, data, {'Content-Type': 'application/json'})
_authorize(req, auth_file)
try:
resp = urllib2.urlopen(req)
except (urllib2.URLError, urllib2.HTTPError) as e:
except (URLError, HTTPError) as e:
_log_api_error(e)
return None
return json.load(resp)['result']
@ -1007,18 +1050,29 @@ def _send_batch(base_url, auth_file, batch, region):
def _s3_upload(filename, file):
# urllib2 does not support streaming, so we fall back to good old httplib
url = urlparse.urlparse(file['put_url'])
cls = httplib.HTTPSConnection if url.scheme == 'https' else httplib.HTTPConnection
url = urlparse(file['put_url'])
cls = HTTPSConnection if url.scheme == 'https' else HTTPConnection
host, port = url.netloc.split(':') if ':' in url.netloc else (url.netloc, 443)
port = int(port)
conn = cls(host, port)
try:
req_path = "%s?%s" % (url.path, url.query) if url.query else url.path
conn.request('PUT', req_path, open(filename, "rb"),
{'Content-Type': 'application/octet-stream'})
resp = conn.getresponse()
resp_body = resp.read()
conn.close()
with open(filename, 'rb') as f:
content = f.read()
content_length = len(content)
f.seek(0)
conn.request(
'PUT',
req_path,
f,
{
'Content-Type': 'application/octet-stream',
'Content-Length': str(content_length),
},
)
resp = conn.getresponse()
resp_body = resp.read()
conn.close()
if resp.status != 200:
raise RuntimeError("Non-200 return from AWS: %s %s\n%s" %
(resp.status, resp.reason, resp_body))
@ -1030,14 +1084,14 @@ def _s3_upload(filename, file):
def _notify_upload_complete(base_url, auth_file, file):
req = urllib2.Request(
urlparse.urljoin(
req = Request(
urljoin(
base_url,
'upload/complete/%(algorithm)s/%(digest)s' % file))
_authorize(req, auth_file)
try:
urllib2.urlopen(req)
except urllib2.HTTPError as e:
except HTTPError as e:
if e.code != 409:
_log_api_error(e)
return
@ -1088,7 +1142,7 @@ def upload(manifest, message, base_urls, auth_file, region):
# Upload the files, each in a thread. This allows us to start all of the
# uploads before any of the URLs expire.
threads = {}
for filename, file in files.iteritems():
for filename, file in files.items():
if 'put_url' in file:
log.info("%s: starting upload" % (filename,))
thd = threading.Thread(target=_s3_upload,
@ -1102,7 +1156,7 @@ def upload(manifest, message, base_urls, auth_file, region):
# re-join all of those threads as they exit
success = True
while threads:
for filename, thread in threads.items():
for filename, thread in list(threads.items()):
if not thread.is_alive():
# _s3_upload has annotated file with result information
file = files[filename]
@ -1118,7 +1172,7 @@ def upload(manifest, message, base_urls, auth_file, region):
# notify the server that the uploads are completed. If the notification
# fails, we don't consider that an error (the server will notice
# eventually)
for filename, file in files.iteritems():
for filename, file in files.items():
if 'put_url' in file and file['upload_ok']:
log.info("notifying server of upload completion for %s" % (filename,))
_notify_upload_complete(base_urls[0], auth_file, file)
@ -1257,5 +1311,6 @@ def main(argv, _skip_logging=False):
return 0 if process_command(options, args) else 1
if __name__ == "__main__": # pragma: no cover
sys.exit(main(sys.argv))