Backed out changeset d99a232c3dd4 (bug 1533330) for causing build bustages. CLOSED TREE

2019-04-24 13:20:12 +03:00 · 2019-04-24 13:20:12 +03:00 · 706f99c3db
--- a/python/mozbuild/mozbuild/action/tooltool.py
+++ b/python/mozbuild/mozbuild/action/tooltool.py
@ -22,20 +22,12 @@
 # in which the manifest file resides and it should be called
 # 'manifest.tt'

-from __future__ import print_function
-
-import base64
-import calendar
 import hashlib
 import httplib
 import json
-import hmac
 import logging
-import math
 import optparse
 import os
-import pprint
-import re
 import shutil
 import sys
 import tarfile
@ -51,20 +43,8 @@ from subprocess import Popen

 __version__ = '1'

-# Allowed request header characters:
-# !#$%&'()*+,-./:;<=>?@[]^_`{|}~ and space, a-z, A-Z, 0-9, \, "
-REQUEST_HEADER_ATTRIBUTE_CHARS = re.compile(
-    r"^[ a-zA-Z0-9_\!#\$%&'\(\)\*\+,\-\./\:;<\=>\?@\[\]\^`\{\|\}~]*$")
 DEFAULT_MANIFEST_NAME = 'manifest.tt'
 TOOLTOOL_PACKAGE_SUFFIX = '.TOOLTOOL-PACKAGE'
-HAWK_VER = 1
-PY3 = sys.version_info[0] == 3
-
-if PY3:
-    # TODO: py3 coverage
-    six_binary_type = bytes  # pragma: no cover
-else:
-    six_binary_type = str


 log = logging.getLogger(__name__)
@ -97,204 +77,6 @@ class MissingFileException(ExceptionWithFilename):
    pass


-class InvalidCredentials(Exception):
-    pass
-
-
-class BadHeaderValue(Exception):
-    pass
-
-
-def parse_url(url):
-    url_parts = urlparse.urlparse(url)
-    url_dict = {
-        'scheme': url_parts.scheme,
-        'hostname': url_parts.hostname,
-        'port': url_parts.port,
-        'path': url_parts.path,
-        'resource': url_parts.path,
-        'query': url_parts.query,
-    }
-    if len(url_dict['query']) > 0:
-        url_dict['resource'] = '%s?%s' % (url_dict['resource'],  # pragma: no cover
-                                          url_dict['query'])
-
-    if url_parts.port is None:
-        if url_parts.scheme == 'http':
-            url_dict['port'] = 80
-        elif url_parts.scheme == 'https':  # pragma: no cover
-            url_dict['port'] = 443
-    return url_dict
-
-
-def utc_now(offset_in_seconds=0.0):
-    return int(math.floor(calendar.timegm(time.gmtime()) + float(offset_in_seconds)))
-
-
-def random_string(length):
-    return base64.urlsafe_b64encode(os.urandom(length))[:length]
-
-
-def prepare_header_val(val):
-    if isinstance(val, six_binary_type):
-        val = val.decode('utf-8')
-
-    if not REQUEST_HEADER_ATTRIBUTE_CHARS.match(val):
-        raise BadHeaderValue(  # pragma: no cover
-            'header value value={val} contained an illegal character'.format(val=repr(val)))
-
-    return val
-
-
-def parse_content_type(content_type):  # pragma: no cover
-    if content_type:
-        return content_type.split(';')[0].strip().lower()
-    else:
-        return ''
-
-
-def calculate_payload_hash(algorithm, payload, content_type):  # pragma: no cover
-    parts = [
-        part if isinstance(part, six_binary_type) else part.encode('utf8')
-        for part in ['hawk.' + str(HAWK_VER) + '.payload\n',
-                     parse_content_type(content_type) + '\n',
-                     payload or '',
-                     '\n',
-                     ]
-    ]
-
-    p_hash = hashlib.new(algorithm)
-    p_hash.update(''.join(parts))
-
-    log.debug('calculating payload hash from:\n{parts}'.format(parts=pprint.pformat(parts)))
-
-    return base64.b64encode(p_hash.digest())
-
-
-def validate_taskcluster_credentials(credentials):
-    if not hasattr(credentials, '__getitem__'):
-        raise InvalidCredentials('credentials must be a dict-like object')  # pragma: no cover
-    try:
-        credentials['clientId']
-        credentials['accessToken']
-    except KeyError:  # pragma: no cover
-        etype, val, tb = sys.exc_info()
-        raise InvalidCredentials('{etype}: {val}'.format(etype=etype, val=val))
-
-
-def normalize_header_attr(val):
-    if isinstance(val, six_binary_type):
-        return val.decode('utf-8')
-    return val  # pragma: no cover
-
-
-def normalize_string(mac_type,
-                     timestamp,
-                     nonce,
-                     method,
-                     name,
-                     host,
-                     port,
-                     content_hash,
-                     ):
-    return '\n'.join([
-        normalize_header_attr(header)
-        # The blank lines are important. They follow what the Node Hawk lib does.
-        for header in ['hawk.' + str(HAWK_VER) + '.' + mac_type,
-                       timestamp,
-                       nonce,
-                       method or '',
-                       name or '',
-                       host,
-                       port,
-                       content_hash or ''
-                       '',  # for ext which is empty in this case
-                       '',  # Add trailing new line.
-                       ]
-    ])
-
-
-def calculate_mac(mac_type,
-                  access_token,
-                  algorithm,
-                  timestamp,
-                  nonce,
-                  method,
-                  name,
-                  host,
-                  port,
-                  content_hash,
-                  ):
-    normalized = normalize_string(mac_type,
-                                  timestamp,
-                                  nonce,
-                                  method,
-                                  name,
-                                  host,
-                                  port,
-                                  content_hash)
-    log.debug(u'normalized resource for mac calc: {norm}'.format(norm=normalized))
-    digestmod = getattr(hashlib, algorithm)
-
-    # Make sure we are about to hash binary strings.
-
-    if not isinstance(normalized, six_binary_type):
-        normalized = normalized.encode('utf8')
-
-    if not isinstance(access_token, six_binary_type):
-        access_token = access_token.encode('ascii')
-
-    result = hmac.new(access_token, normalized, digestmod)
-    return base64.b64encode(result.digest())
-
-
-def make_taskcluster_header(credentials, req):
-    validate_taskcluster_credentials(credentials)
-
-    url = req.get_full_url()
-    method = req.get_method()
-    algorithm = 'sha256'
-    timestamp = str(utc_now())
-    nonce = random_string(6)
-    url_parts = parse_url(url)
-
-    content_hash = None
-    if req.has_data():
-        content_hash = calculate_payload_hash(  # pragma: no cover
-            algorithm,
-            req.get_data(),
-            req.get_method(),
-        )
-
-    mac = calculate_mac('header',
-                        credentials['accessToken'],
-                        algorithm,
-                        timestamp,
-                        nonce,
-                        method,
-                        url_parts['resource'],
-                        url_parts['hostname'],
-                        str(url_parts['port']),
-                        content_hash,
-                        )
-
-    header = u'Hawk mac="{}"'.format(prepare_header_val(mac))
-
-    if content_hash:  # pragma: no cover
-        header = u'{}, hash="{}"'.format(header, prepare_header_val(content_hash))
-
-    header = u'{header}, id="{id}", ts="{ts}", nonce="{nonce}"'.format(
-        header=header,
-        id=prepare_header_val(credentials['clientId']),
-        ts=prepare_header_val(timestamp),
-        nonce=prepare_header_val(nonce),
-    )
-
-    log.debug('Hawk header for URL={} method={}: {}'.format(url, method, header))
-
-    return header
-
-
 class FileRecord(object):

    def __init__(self, filename, size, digest, algorithm, unpack=False,
@ -313,14 +95,17 @@ class FileRecord(object):
        self.visibility = visibility

    def __eq__(self, other):
-        return self is other or (
-            self.filename == other.filename and
-            self.size == other.size and
-            self.digest == other.digest and
-            self.algorithm == other.algorithm and
-            self.version == other.version and
-            self.visibility == other.visibility
-        )
+        if self is other:
+            return True
+        if self.filename == other.filename and \
+           self.size == other.size and \
+           self.digest == other.digest and \
+           self.algorithm == other.algorithm and \
+           self.version == other.version and \
+           self.visibility == other.visibility:
+            return True
+        else:
+            return False

    def __ne__(self, other):
        return not self.__eq__(other)
@ -355,7 +140,10 @@ class FileRecord(object):
            raise MissingFileException(filename=self.filename)

    def validate(self):
-        return self.validate_size() and self.validate_digest()
+        if self.size is None or self.validate_size():
+            if self.validate_digest():
+                return True
+        return False

    def describe(self):
        if self.present() and self.validate():
@ -367,11 +155,12 @@ class FileRecord(object):


 def create_file_record(filename, algorithm):
-    with open(filename, 'rb') as fo:
-        stored_filename = os.path.split(filename)[1]
-        fr = FileRecord(stored_filename, os.path.getsize(
-            filename), digest_file(fo, algorithm), algorithm)
-        return fr
+    fo = open(filename, 'rb')
+    stored_filename = os.path.split(filename)[1]
+    fr = FileRecord(stored_filename, os.path.getsize(
+        filename), digest_file(fo, algorithm), algorithm)
+    fo.close()
+    return fr


 class FileRecordJSONEncoder(json.JSONEncoder):
@ -399,10 +188,10 @@ class FileRecordJSONEncoder(json.JSONEncoder):

    def default(self, f):
        if issubclass(type(f), list):
-            return [
-                self.encode_file_record(i)
-                for i in f
-            ]
+            record_list = []
+            for i in f:
+                record_list.append(self.encode_file_record(i))
+            return record_list
        else:
            return self.encode_file_record(f)

@ -900,7 +689,7 @@ def fetch_files(manifest_file, base_urls, filenames=[], cache_folder=None,
                try:
                    if not os.path.exists(cache_folder):
                        log.info("Creating cache in %s..." % cache_folder)
-                        os.makedirs(cache_folder, 0o700)
+                        os.makedirs(cache_folder, 0o0700)
                    shutil.copy(os.path.join(os.getcwd(), localfile.filename),
                                os.path.join(cache_folder, localfile.digest))
                    log.info("Local cache %s updated with %s" % (cache_folder,
@ -984,25 +773,10 @@ def _log_api_error(e):


 def _authorize(req, auth_file):
-    if not auth_file:
-        return
-
-    is_taskcluster_auth = False
-    with open(auth_file) as f:
-        auth_file_content = f.read().strip()
-        try:
-            auth_file_content = json.loads(auth_file_content)
-            is_taskcluster_auth = True
-        except:
-            pass
-
-    if is_taskcluster_auth:
-        taskcluster_header = make_taskcluster_header(auth_file_content, req)
-        log.debug("Using taskcluster credentials in %s" % auth_file)
-        req.add_unredirected_header('Authorization', taskcluster_header)
-    else:
-        log.debug("Using Bearer token in %s" % auth_file)
-        req.add_unredirected_header('Authorization', 'Bearer %s' % auth_file_content)
+    if auth_file:
+        log.debug("using bearer token in %s" % auth_file)
+        req.add_unredirected_header('Authorization',
+                                    'Bearer %s' % (open(auth_file, "rb").read().strip()))


 def _send_batch(base_url, auth_file, batch, region):
@ -1029,7 +803,7 @@ def _s3_upload(filename, file):
    try:
        req_path = "%s?%s" % (url.path, url.query) if url.query else url.path
        conn.request('PUT', req_path, open(filename, "rb"),
-                     {'Content-Type': 'application/octet-stream'})
+                     {'Content-type': 'application/octet-stream'})
        resp = conn.getresponse()
        resp_body = resp.read()
        conn.close()
--- a/testing/mozharness/external_tools/tooltool.py
+++ b/testing/mozharness/external_tools/tooltool.py
@ -22,20 +22,12 @@
 # in which the manifest file resides and it should be called
 # 'manifest.tt'

-from __future__ import print_function
-
-import base64
-import calendar
 import hashlib
 import httplib
 import json
-import hmac
 import logging
-import math
 import optparse
 import os
-import pprint
-import re
 import shutil
 import sys
 import tarfile
@ -51,20 +43,8 @@ from subprocess import Popen

 __version__ = '1'

-# Allowed request header characters:
-# !#$%&'()*+,-./:;<=>?@[]^_`{|}~ and space, a-z, A-Z, 0-9, \, "
-REQUEST_HEADER_ATTRIBUTE_CHARS = re.compile(
-    r"^[ a-zA-Z0-9_\!#\$%&'\(\)\*\+,\-\./\:;<\=>\?@\[\]\^`\{\|\}~]*$")
 DEFAULT_MANIFEST_NAME = 'manifest.tt'
 TOOLTOOL_PACKAGE_SUFFIX = '.TOOLTOOL-PACKAGE'
-HAWK_VER = 1
-PY3 = sys.version_info[0] == 3
-
-if PY3:
-    # TODO: py3 coverage
-    six_binary_type = bytes  # pragma: no cover
-else:
-    six_binary_type = str


 log = logging.getLogger(__name__)
@ -97,204 +77,6 @@ class MissingFileException(ExceptionWithFilename):
    pass


-class InvalidCredentials(Exception):
-    pass
-
-
-class BadHeaderValue(Exception):
-    pass
-
-
-def parse_url(url):
-    url_parts = urlparse.urlparse(url)
-    url_dict = {
-        'scheme': url_parts.scheme,
-        'hostname': url_parts.hostname,
-        'port': url_parts.port,
-        'path': url_parts.path,
-        'resource': url_parts.path,
-        'query': url_parts.query,
-    }
-    if len(url_dict['query']) > 0:
-        url_dict['resource'] = '%s?%s' % (url_dict['resource'],  # pragma: no cover
-                                          url_dict['query'])
-
-    if url_parts.port is None:
-        if url_parts.scheme == 'http':
-            url_dict['port'] = 80
-        elif url_parts.scheme == 'https':  # pragma: no cover
-            url_dict['port'] = 443
-    return url_dict
-
-
-def utc_now(offset_in_seconds=0.0):
-    return int(math.floor(calendar.timegm(time.gmtime()) + float(offset_in_seconds)))
-
-
-def random_string(length):
-    return base64.urlsafe_b64encode(os.urandom(length))[:length]
-
-
-def prepare_header_val(val):
-    if isinstance(val, six_binary_type):
-        val = val.decode('utf-8')
-
-    if not REQUEST_HEADER_ATTRIBUTE_CHARS.match(val):
-        raise BadHeaderValue(  # pragma: no cover
-            'header value value={val} contained an illegal character'.format(val=repr(val)))
-
-    return val
-
-
-def parse_content_type(content_type):  # pragma: no cover
-    if content_type:
-        return content_type.split(';')[0].strip().lower()
-    else:
-        return ''
-
-
-def calculate_payload_hash(algorithm, payload, content_type):  # pragma: no cover
-    parts = [
-        part if isinstance(part, six_binary_type) else part.encode('utf8')
-        for part in ['hawk.' + str(HAWK_VER) + '.payload\n',
-                     parse_content_type(content_type) + '\n',
-                     payload or '',
-                     '\n',
-                     ]
-    ]
-
-    p_hash = hashlib.new(algorithm)
-    p_hash.update(''.join(parts))
-
-    log.debug('calculating payload hash from:\n{parts}'.format(parts=pprint.pformat(parts)))
-
-    return base64.b64encode(p_hash.digest())
-
-
-def validate_taskcluster_credentials(credentials):
-    if not hasattr(credentials, '__getitem__'):
-        raise InvalidCredentials('credentials must be a dict-like object')  # pragma: no cover
-    try:
-        credentials['clientId']
-        credentials['accessToken']
-    except KeyError:  # pragma: no cover
-        etype, val, tb = sys.exc_info()
-        raise InvalidCredentials('{etype}: {val}'.format(etype=etype, val=val))
-
-
-def normalize_header_attr(val):
-    if isinstance(val, six_binary_type):
-        return val.decode('utf-8')
-    return val  # pragma: no cover
-
-
-def normalize_string(mac_type,
-                     timestamp,
-                     nonce,
-                     method,
-                     name,
-                     host,
-                     port,
-                     content_hash,
-                     ):
-    return '\n'.join([
-        normalize_header_attr(header)
-        # The blank lines are important. They follow what the Node Hawk lib does.
-        for header in ['hawk.' + str(HAWK_VER) + '.' + mac_type,
-                       timestamp,
-                       nonce,
-                       method or '',
-                       name or '',
-                       host,
-                       port,
-                       content_hash or ''
-                       '',  # for ext which is empty in this case
-                       '',  # Add trailing new line.
-                       ]
-    ])
-
-
-def calculate_mac(mac_type,
-                  access_token,
-                  algorithm,
-                  timestamp,
-                  nonce,
-                  method,
-                  name,
-                  host,
-                  port,
-                  content_hash,
-                  ):
-    normalized = normalize_string(mac_type,
-                                  timestamp,
-                                  nonce,
-                                  method,
-                                  name,
-                                  host,
-                                  port,
-                                  content_hash)
-    log.debug(u'normalized resource for mac calc: {norm}'.format(norm=normalized))
-    digestmod = getattr(hashlib, algorithm)
-
-    # Make sure we are about to hash binary strings.
-
-    if not isinstance(normalized, six_binary_type):
-        normalized = normalized.encode('utf8')
-
-    if not isinstance(access_token, six_binary_type):
-        access_token = access_token.encode('ascii')
-
-    result = hmac.new(access_token, normalized, digestmod)
-    return base64.b64encode(result.digest())
-
-
-def make_taskcluster_header(credentials, req):
-    validate_taskcluster_credentials(credentials)
-
-    url = req.get_full_url()
-    method = req.get_method()
-    algorithm = 'sha256'
-    timestamp = str(utc_now())
-    nonce = random_string(6)
-    url_parts = parse_url(url)
-
-    content_hash = None
-    if req.has_data():
-        content_hash = calculate_payload_hash(  # pragma: no cover
-            algorithm,
-            req.get_data(),
-            req.get_method(),
-        )
-
-    mac = calculate_mac('header',
-                        credentials['accessToken'],
-                        algorithm,
-                        timestamp,
-                        nonce,
-                        method,
-                        url_parts['resource'],
-                        url_parts['hostname'],
-                        str(url_parts['port']),
-                        content_hash,
-                        )
-
-    header = u'Hawk mac="{}"'.format(prepare_header_val(mac))
-
-    if content_hash:  # pragma: no cover
-        header = u'{}, hash="{}"'.format(header, prepare_header_val(content_hash))
-
-    header = u'{header}, id="{id}", ts="{ts}", nonce="{nonce}"'.format(
-        header=header,
-        id=prepare_header_val(credentials['clientId']),
-        ts=prepare_header_val(timestamp),
-        nonce=prepare_header_val(nonce),
-    )
-
-    log.debug('Hawk header for URL={} method={}: {}'.format(url, method, header))
-
-    return header
-
-
 class FileRecord(object):

    def __init__(self, filename, size, digest, algorithm, unpack=False,
@ -314,14 +96,17 @@ class FileRecord(object):
        self.setup = setup

    def __eq__(self, other):
-        return self is other or (
-            self.filename == other.filename and
-            self.size == other.size and
-            self.digest == other.digest and
-            self.algorithm == other.algorithm and
-            self.version == other.version and
-            self.visibility == other.visibility
-        )
+        if self is other:
+            return True
+        if self.filename == other.filename and \
+           self.size == other.size and \
+           self.digest == other.digest and \
+           self.algorithm == other.algorithm and \
+           self.version == other.version and \
+           self.visibility == other.visibility:
+            return True
+        else:
+            return False

    def __ne__(self, other):
        return not self.__eq__(other)
@ -356,7 +141,10 @@ class FileRecord(object):
            raise MissingFileException(filename=self.filename)

    def validate(self):
-        return self.validate_size() and self.validate_digest()
+        if self.size is None or self.validate_size():
+            if self.validate_digest():
+                return True
+        return False

    def describe(self):
        if self.present() and self.validate():
@ -368,11 +156,12 @@ class FileRecord(object):


 def create_file_record(filename, algorithm):
-    with open(filename, 'rb') as fo:
-        stored_filename = os.path.split(filename)[1]
-        fr = FileRecord(stored_filename, os.path.getsize(
-            filename), digest_file(fo, algorithm), algorithm)
-        return fr
+    fo = open(filename, 'rb')
+    stored_filename = os.path.split(filename)[1]
+    fr = FileRecord(stored_filename, os.path.getsize(
+        filename), digest_file(fo, algorithm), algorithm)
+    fo.close()
+    return fr


 class FileRecordJSONEncoder(json.JSONEncoder):
@ -402,10 +191,10 @@ class FileRecordJSONEncoder(json.JSONEncoder):

    def default(self, f):
        if issubclass(type(f), list):
-            return [
-                self.encode_file_record(i)
-                for i in f
-            ]
+            record_list = []
+            for i in f:
+                record_list.append(self.encode_file_record(i))
+            return record_list
        else:
            return self.encode_file_record(f)

@ -549,7 +338,7 @@ def digest_file(f, a):

 def execute(cmd):
    """Execute CMD, logging its stdout at the info level"""
-    process = Popen(cmd, shell=True, stdout=PIPE)
+    process = Popen(cmd, shell=True, stdout=PIPE, bufsize=0)
    while True:
        line = process.stdout.readline()
        if not line:
@ -694,8 +483,6 @@ def fetch_file(base_urls, file_record, grabchunk=1024 * 4, auth_file=None, regio
                k = True
                size = 0
                while k:
-                    # TODO: print statistics as file transfers happen both for info and to stop
-                    # buildbot timeouts
                    indata = f.read(grabchunk)
                    out.write(indata)
                    size += len(indata)
@ -884,7 +671,7 @@ def fetch_files(manifest_file, base_urls, filenames=[], cache_folder=None,
                try:
                    if not os.path.exists(cache_folder):
                        log.info("Creating cache in %s..." % cache_folder)
-                        os.makedirs(cache_folder, 0o700)
+                        os.makedirs(cache_folder, 0o0700)
                    shutil.copy(os.path.join(os.getcwd(), localfile.filename),
                                os.path.join(cache_folder, localfile.digest))
                    log.info("Local cache %s updated with %s" % (cache_folder,
@ -968,25 +755,10 @@ def _log_api_error(e):


 def _authorize(req, auth_file):
-    if not auth_file:
-        return
-
-    is_taskcluster_auth = False
-    with open(auth_file) as f:
-        auth_file_content = f.read().strip()
-        try:
-            auth_file_content = json.loads(auth_file_content)
-            is_taskcluster_auth = True
-        except:
-            pass
-
-    if is_taskcluster_auth:
-        taskcluster_header = make_taskcluster_header(auth_file_content, req)
-        log.debug("Using taskcluster credentials in %s" % auth_file)
-        req.add_unredirected_header('Authorization', taskcluster_header)
-    else:
-        log.debug("Using Bearer token in %s" % auth_file)
-        req.add_unredirected_header('Authorization', 'Bearer %s' % auth_file_content)
+    if auth_file:
+        log.debug("using bearer token in %s" % auth_file)
+        req.add_unredirected_header('Authorization',
+                                    'Bearer %s' % (open(auth_file, "rb").read().strip()))


 def _send_batch(base_url, auth_file, batch, region):
@ -1013,7 +785,7 @@ def _s3_upload(filename, file):
    try:
        req_path = "%s?%s" % (url.path, url.query) if url.query else url.path
        conn.request('PUT', req_path, open(filename, "rb"),
-                     {'Content-Type': 'application/octet-stream'})
+                     {'Content-type': 'application/octet-stream'})
        resp = conn.getresponse()
        resp_body = resp.read()
        conn.close()