Backed out changeset d99a232c3dd4 (bug 1533330) for causing build bustages. CLOSED TREE

This commit is contained in:
Cosmin Sabou 2019-04-24 13:20:12 +03:00
Родитель 1ed971fccf
Коммит 706f99c3db
2 изменённых файлов: 63 добавлений и 517 удалений

Просмотреть файл

@ -22,20 +22,12 @@
# in which the manifest file resides and it should be called
# 'manifest.tt'
from __future__ import print_function
import base64
import calendar
import hashlib
import httplib
import json
import hmac
import logging
import math
import optparse
import os
import pprint
import re
import shutil
import sys
import tarfile
@ -51,20 +43,8 @@ from subprocess import Popen
__version__ = '1'
# Allowed request header characters:
# !#$%&'()*+,-./:;<=>?@[]^_`{|}~ and space, a-z, A-Z, 0-9, \, "
REQUEST_HEADER_ATTRIBUTE_CHARS = re.compile(
r"^[ a-zA-Z0-9_\!#\$%&'\(\)\*\+,\-\./\:;<\=>\?@\[\]\^`\{\|\}~]*$")
DEFAULT_MANIFEST_NAME = 'manifest.tt'
TOOLTOOL_PACKAGE_SUFFIX = '.TOOLTOOL-PACKAGE'
HAWK_VER = 1
PY3 = sys.version_info[0] == 3
if PY3:
# TODO: py3 coverage
six_binary_type = bytes # pragma: no cover
else:
six_binary_type = str
log = logging.getLogger(__name__)
@ -97,204 +77,6 @@ class MissingFileException(ExceptionWithFilename):
pass
class InvalidCredentials(Exception):
pass
class BadHeaderValue(Exception):
pass
def parse_url(url):
url_parts = urlparse.urlparse(url)
url_dict = {
'scheme': url_parts.scheme,
'hostname': url_parts.hostname,
'port': url_parts.port,
'path': url_parts.path,
'resource': url_parts.path,
'query': url_parts.query,
}
if len(url_dict['query']) > 0:
url_dict['resource'] = '%s?%s' % (url_dict['resource'], # pragma: no cover
url_dict['query'])
if url_parts.port is None:
if url_parts.scheme == 'http':
url_dict['port'] = 80
elif url_parts.scheme == 'https': # pragma: no cover
url_dict['port'] = 443
return url_dict
def utc_now(offset_in_seconds=0.0):
return int(math.floor(calendar.timegm(time.gmtime()) + float(offset_in_seconds)))
def random_string(length):
return base64.urlsafe_b64encode(os.urandom(length))[:length]
def prepare_header_val(val):
if isinstance(val, six_binary_type):
val = val.decode('utf-8')
if not REQUEST_HEADER_ATTRIBUTE_CHARS.match(val):
raise BadHeaderValue( # pragma: no cover
'header value value={val} contained an illegal character'.format(val=repr(val)))
return val
def parse_content_type(content_type): # pragma: no cover
if content_type:
return content_type.split(';')[0].strip().lower()
else:
return ''
def calculate_payload_hash(algorithm, payload, content_type): # pragma: no cover
parts = [
part if isinstance(part, six_binary_type) else part.encode('utf8')
for part in ['hawk.' + str(HAWK_VER) + '.payload\n',
parse_content_type(content_type) + '\n',
payload or '',
'\n',
]
]
p_hash = hashlib.new(algorithm)
p_hash.update(''.join(parts))
log.debug('calculating payload hash from:\n{parts}'.format(parts=pprint.pformat(parts)))
return base64.b64encode(p_hash.digest())
def validate_taskcluster_credentials(credentials):
if not hasattr(credentials, '__getitem__'):
raise InvalidCredentials('credentials must be a dict-like object') # pragma: no cover
try:
credentials['clientId']
credentials['accessToken']
except KeyError: # pragma: no cover
etype, val, tb = sys.exc_info()
raise InvalidCredentials('{etype}: {val}'.format(etype=etype, val=val))
def normalize_header_attr(val):
if isinstance(val, six_binary_type):
return val.decode('utf-8')
return val # pragma: no cover
def normalize_string(mac_type,
timestamp,
nonce,
method,
name,
host,
port,
content_hash,
):
return '\n'.join([
normalize_header_attr(header)
# The blank lines are important. They follow what the Node Hawk lib does.
for header in ['hawk.' + str(HAWK_VER) + '.' + mac_type,
timestamp,
nonce,
method or '',
name or '',
host,
port,
content_hash or ''
'', # for ext which is empty in this case
'', # Add trailing new line.
]
])
def calculate_mac(mac_type,
access_token,
algorithm,
timestamp,
nonce,
method,
name,
host,
port,
content_hash,
):
normalized = normalize_string(mac_type,
timestamp,
nonce,
method,
name,
host,
port,
content_hash)
log.debug(u'normalized resource for mac calc: {norm}'.format(norm=normalized))
digestmod = getattr(hashlib, algorithm)
# Make sure we are about to hash binary strings.
if not isinstance(normalized, six_binary_type):
normalized = normalized.encode('utf8')
if not isinstance(access_token, six_binary_type):
access_token = access_token.encode('ascii')
result = hmac.new(access_token, normalized, digestmod)
return base64.b64encode(result.digest())
def make_taskcluster_header(credentials, req):
validate_taskcluster_credentials(credentials)
url = req.get_full_url()
method = req.get_method()
algorithm = 'sha256'
timestamp = str(utc_now())
nonce = random_string(6)
url_parts = parse_url(url)
content_hash = None
if req.has_data():
content_hash = calculate_payload_hash( # pragma: no cover
algorithm,
req.get_data(),
req.get_method(),
)
mac = calculate_mac('header',
credentials['accessToken'],
algorithm,
timestamp,
nonce,
method,
url_parts['resource'],
url_parts['hostname'],
str(url_parts['port']),
content_hash,
)
header = u'Hawk mac="{}"'.format(prepare_header_val(mac))
if content_hash: # pragma: no cover
header = u'{}, hash="{}"'.format(header, prepare_header_val(content_hash))
header = u'{header}, id="{id}", ts="{ts}", nonce="{nonce}"'.format(
header=header,
id=prepare_header_val(credentials['clientId']),
ts=prepare_header_val(timestamp),
nonce=prepare_header_val(nonce),
)
log.debug('Hawk header for URL={} method={}: {}'.format(url, method, header))
return header
class FileRecord(object):
def __init__(self, filename, size, digest, algorithm, unpack=False,
@ -313,14 +95,17 @@ class FileRecord(object):
self.visibility = visibility
def __eq__(self, other):
return self is other or (
self.filename == other.filename and
self.size == other.size and
self.digest == other.digest and
self.algorithm == other.algorithm and
self.version == other.version and
self.visibility == other.visibility
)
if self is other:
return True
if self.filename == other.filename and \
self.size == other.size and \
self.digest == other.digest and \
self.algorithm == other.algorithm and \
self.version == other.version and \
self.visibility == other.visibility:
return True
else:
return False
def __ne__(self, other):
return not self.__eq__(other)
@ -355,7 +140,10 @@ class FileRecord(object):
raise MissingFileException(filename=self.filename)
def validate(self):
return self.validate_size() and self.validate_digest()
if self.size is None or self.validate_size():
if self.validate_digest():
return True
return False
def describe(self):
if self.present() and self.validate():
@ -367,11 +155,12 @@ class FileRecord(object):
def create_file_record(filename, algorithm):
with open(filename, 'rb') as fo:
stored_filename = os.path.split(filename)[1]
fr = FileRecord(stored_filename, os.path.getsize(
filename), digest_file(fo, algorithm), algorithm)
return fr
fo = open(filename, 'rb')
stored_filename = os.path.split(filename)[1]
fr = FileRecord(stored_filename, os.path.getsize(
filename), digest_file(fo, algorithm), algorithm)
fo.close()
return fr
class FileRecordJSONEncoder(json.JSONEncoder):
@ -399,10 +188,10 @@ class FileRecordJSONEncoder(json.JSONEncoder):
def default(self, f):
if issubclass(type(f), list):
return [
self.encode_file_record(i)
for i in f
]
record_list = []
for i in f:
record_list.append(self.encode_file_record(i))
return record_list
else:
return self.encode_file_record(f)
@ -900,7 +689,7 @@ def fetch_files(manifest_file, base_urls, filenames=[], cache_folder=None,
try:
if not os.path.exists(cache_folder):
log.info("Creating cache in %s..." % cache_folder)
os.makedirs(cache_folder, 0o700)
os.makedirs(cache_folder, 0o0700)
shutil.copy(os.path.join(os.getcwd(), localfile.filename),
os.path.join(cache_folder, localfile.digest))
log.info("Local cache %s updated with %s" % (cache_folder,
@ -984,25 +773,10 @@ def _log_api_error(e):
def _authorize(req, auth_file):
if not auth_file:
return
is_taskcluster_auth = False
with open(auth_file) as f:
auth_file_content = f.read().strip()
try:
auth_file_content = json.loads(auth_file_content)
is_taskcluster_auth = True
except:
pass
if is_taskcluster_auth:
taskcluster_header = make_taskcluster_header(auth_file_content, req)
log.debug("Using taskcluster credentials in %s" % auth_file)
req.add_unredirected_header('Authorization', taskcluster_header)
else:
log.debug("Using Bearer token in %s" % auth_file)
req.add_unredirected_header('Authorization', 'Bearer %s' % auth_file_content)
if auth_file:
log.debug("using bearer token in %s" % auth_file)
req.add_unredirected_header('Authorization',
'Bearer %s' % (open(auth_file, "rb").read().strip()))
def _send_batch(base_url, auth_file, batch, region):
@ -1029,7 +803,7 @@ def _s3_upload(filename, file):
try:
req_path = "%s?%s" % (url.path, url.query) if url.query else url.path
conn.request('PUT', req_path, open(filename, "rb"),
{'Content-Type': 'application/octet-stream'})
{'Content-type': 'application/octet-stream'})
resp = conn.getresponse()
resp_body = resp.read()
conn.close()

Просмотреть файл

@ -22,20 +22,12 @@
# in which the manifest file resides and it should be called
# 'manifest.tt'
from __future__ import print_function
import base64
import calendar
import hashlib
import httplib
import json
import hmac
import logging
import math
import optparse
import os
import pprint
import re
import shutil
import sys
import tarfile
@ -51,20 +43,8 @@ from subprocess import Popen
__version__ = '1'
# Allowed request header characters:
# !#$%&'()*+,-./:;<=>?@[]^_`{|}~ and space, a-z, A-Z, 0-9, \, "
REQUEST_HEADER_ATTRIBUTE_CHARS = re.compile(
r"^[ a-zA-Z0-9_\!#\$%&'\(\)\*\+,\-\./\:;<\=>\?@\[\]\^`\{\|\}~]*$")
DEFAULT_MANIFEST_NAME = 'manifest.tt'
TOOLTOOL_PACKAGE_SUFFIX = '.TOOLTOOL-PACKAGE'
HAWK_VER = 1
PY3 = sys.version_info[0] == 3
if PY3:
# TODO: py3 coverage
six_binary_type = bytes # pragma: no cover
else:
six_binary_type = str
log = logging.getLogger(__name__)
@ -97,204 +77,6 @@ class MissingFileException(ExceptionWithFilename):
pass
class InvalidCredentials(Exception):
pass
class BadHeaderValue(Exception):
pass
def parse_url(url):
url_parts = urlparse.urlparse(url)
url_dict = {
'scheme': url_parts.scheme,
'hostname': url_parts.hostname,
'port': url_parts.port,
'path': url_parts.path,
'resource': url_parts.path,
'query': url_parts.query,
}
if len(url_dict['query']) > 0:
url_dict['resource'] = '%s?%s' % (url_dict['resource'], # pragma: no cover
url_dict['query'])
if url_parts.port is None:
if url_parts.scheme == 'http':
url_dict['port'] = 80
elif url_parts.scheme == 'https': # pragma: no cover
url_dict['port'] = 443
return url_dict
def utc_now(offset_in_seconds=0.0):
return int(math.floor(calendar.timegm(time.gmtime()) + float(offset_in_seconds)))
def random_string(length):
return base64.urlsafe_b64encode(os.urandom(length))[:length]
def prepare_header_val(val):
if isinstance(val, six_binary_type):
val = val.decode('utf-8')
if not REQUEST_HEADER_ATTRIBUTE_CHARS.match(val):
raise BadHeaderValue( # pragma: no cover
'header value value={val} contained an illegal character'.format(val=repr(val)))
return val
def parse_content_type(content_type): # pragma: no cover
if content_type:
return content_type.split(';')[0].strip().lower()
else:
return ''
def calculate_payload_hash(algorithm, payload, content_type): # pragma: no cover
parts = [
part if isinstance(part, six_binary_type) else part.encode('utf8')
for part in ['hawk.' + str(HAWK_VER) + '.payload\n',
parse_content_type(content_type) + '\n',
payload or '',
'\n',
]
]
p_hash = hashlib.new(algorithm)
p_hash.update(''.join(parts))
log.debug('calculating payload hash from:\n{parts}'.format(parts=pprint.pformat(parts)))
return base64.b64encode(p_hash.digest())
def validate_taskcluster_credentials(credentials):
if not hasattr(credentials, '__getitem__'):
raise InvalidCredentials('credentials must be a dict-like object') # pragma: no cover
try:
credentials['clientId']
credentials['accessToken']
except KeyError: # pragma: no cover
etype, val, tb = sys.exc_info()
raise InvalidCredentials('{etype}: {val}'.format(etype=etype, val=val))
def normalize_header_attr(val):
if isinstance(val, six_binary_type):
return val.decode('utf-8')
return val # pragma: no cover
def normalize_string(mac_type,
timestamp,
nonce,
method,
name,
host,
port,
content_hash,
):
return '\n'.join([
normalize_header_attr(header)
# The blank lines are important. They follow what the Node Hawk lib does.
for header in ['hawk.' + str(HAWK_VER) + '.' + mac_type,
timestamp,
nonce,
method or '',
name or '',
host,
port,
content_hash or ''
'', # for ext which is empty in this case
'', # Add trailing new line.
]
])
def calculate_mac(mac_type,
access_token,
algorithm,
timestamp,
nonce,
method,
name,
host,
port,
content_hash,
):
normalized = normalize_string(mac_type,
timestamp,
nonce,
method,
name,
host,
port,
content_hash)
log.debug(u'normalized resource for mac calc: {norm}'.format(norm=normalized))
digestmod = getattr(hashlib, algorithm)
# Make sure we are about to hash binary strings.
if not isinstance(normalized, six_binary_type):
normalized = normalized.encode('utf8')
if not isinstance(access_token, six_binary_type):
access_token = access_token.encode('ascii')
result = hmac.new(access_token, normalized, digestmod)
return base64.b64encode(result.digest())
def make_taskcluster_header(credentials, req):
validate_taskcluster_credentials(credentials)
url = req.get_full_url()
method = req.get_method()
algorithm = 'sha256'
timestamp = str(utc_now())
nonce = random_string(6)
url_parts = parse_url(url)
content_hash = None
if req.has_data():
content_hash = calculate_payload_hash( # pragma: no cover
algorithm,
req.get_data(),
req.get_method(),
)
mac = calculate_mac('header',
credentials['accessToken'],
algorithm,
timestamp,
nonce,
method,
url_parts['resource'],
url_parts['hostname'],
str(url_parts['port']),
content_hash,
)
header = u'Hawk mac="{}"'.format(prepare_header_val(mac))
if content_hash: # pragma: no cover
header = u'{}, hash="{}"'.format(header, prepare_header_val(content_hash))
header = u'{header}, id="{id}", ts="{ts}", nonce="{nonce}"'.format(
header=header,
id=prepare_header_val(credentials['clientId']),
ts=prepare_header_val(timestamp),
nonce=prepare_header_val(nonce),
)
log.debug('Hawk header for URL={} method={}: {}'.format(url, method, header))
return header
class FileRecord(object):
def __init__(self, filename, size, digest, algorithm, unpack=False,
@ -314,14 +96,17 @@ class FileRecord(object):
self.setup = setup
def __eq__(self, other):
return self is other or (
self.filename == other.filename and
self.size == other.size and
self.digest == other.digest and
self.algorithm == other.algorithm and
self.version == other.version and
self.visibility == other.visibility
)
if self is other:
return True
if self.filename == other.filename and \
self.size == other.size and \
self.digest == other.digest and \
self.algorithm == other.algorithm and \
self.version == other.version and \
self.visibility == other.visibility:
return True
else:
return False
def __ne__(self, other):
return not self.__eq__(other)
@ -356,7 +141,10 @@ class FileRecord(object):
raise MissingFileException(filename=self.filename)
def validate(self):
return self.validate_size() and self.validate_digest()
if self.size is None or self.validate_size():
if self.validate_digest():
return True
return False
def describe(self):
if self.present() and self.validate():
@ -368,11 +156,12 @@ class FileRecord(object):
def create_file_record(filename, algorithm):
with open(filename, 'rb') as fo:
stored_filename = os.path.split(filename)[1]
fr = FileRecord(stored_filename, os.path.getsize(
filename), digest_file(fo, algorithm), algorithm)
return fr
fo = open(filename, 'rb')
stored_filename = os.path.split(filename)[1]
fr = FileRecord(stored_filename, os.path.getsize(
filename), digest_file(fo, algorithm), algorithm)
fo.close()
return fr
class FileRecordJSONEncoder(json.JSONEncoder):
@ -402,10 +191,10 @@ class FileRecordJSONEncoder(json.JSONEncoder):
def default(self, f):
if issubclass(type(f), list):
return [
self.encode_file_record(i)
for i in f
]
record_list = []
for i in f:
record_list.append(self.encode_file_record(i))
return record_list
else:
return self.encode_file_record(f)
@ -549,7 +338,7 @@ def digest_file(f, a):
def execute(cmd):
"""Execute CMD, logging its stdout at the info level"""
process = Popen(cmd, shell=True, stdout=PIPE)
process = Popen(cmd, shell=True, stdout=PIPE, bufsize=0)
while True:
line = process.stdout.readline()
if not line:
@ -694,8 +483,6 @@ def fetch_file(base_urls, file_record, grabchunk=1024 * 4, auth_file=None, regio
k = True
size = 0
while k:
# TODO: print statistics as file transfers happen both for info and to stop
# buildbot timeouts
indata = f.read(grabchunk)
out.write(indata)
size += len(indata)
@ -884,7 +671,7 @@ def fetch_files(manifest_file, base_urls, filenames=[], cache_folder=None,
try:
if not os.path.exists(cache_folder):
log.info("Creating cache in %s..." % cache_folder)
os.makedirs(cache_folder, 0o700)
os.makedirs(cache_folder, 0o0700)
shutil.copy(os.path.join(os.getcwd(), localfile.filename),
os.path.join(cache_folder, localfile.digest))
log.info("Local cache %s updated with %s" % (cache_folder,
@ -968,25 +755,10 @@ def _log_api_error(e):
def _authorize(req, auth_file):
if not auth_file:
return
is_taskcluster_auth = False
with open(auth_file) as f:
auth_file_content = f.read().strip()
try:
auth_file_content = json.loads(auth_file_content)
is_taskcluster_auth = True
except:
pass
if is_taskcluster_auth:
taskcluster_header = make_taskcluster_header(auth_file_content, req)
log.debug("Using taskcluster credentials in %s" % auth_file)
req.add_unredirected_header('Authorization', taskcluster_header)
else:
log.debug("Using Bearer token in %s" % auth_file)
req.add_unredirected_header('Authorization', 'Bearer %s' % auth_file_content)
if auth_file:
log.debug("using bearer token in %s" % auth_file)
req.add_unredirected_header('Authorization',
'Bearer %s' % (open(auth_file, "rb").read().strip()))
def _send_batch(base_url, auth_file, batch, region):
@ -1013,7 +785,7 @@ def _s3_upload(filename, file):
try:
req_path = "%s?%s" % (url.path, url.query) if url.query else url.path
conn.request('PUT', req_path, open(filename, "rb"),
{'Content-Type': 'application/octet-stream'})
{'Content-type': 'application/octet-stream'})
resp = conn.getresponse()
resp_body = resp.read()
conn.close()