Bug 1390693 - Use thread pool for S3 uploads; r=dustin

This reduces the time taken to upload the Firefox docs from ~30s to ~5s (per invocation). MozReview-Commit-ID: DxOrvxvVn42 --HG-- extra : rebase_source : b170fb728267c43484573e0a57ebc96512eb0e7c
2017-08-24 12:38:01 -07:00 · 2017-08-24 12:38:01 -07:00 · 3bc8ec26f0
--- a/tools/docs/moztreedocs/upload.py
+++ b/tools/docs/moztreedocs/upload.py
@ -7,8 +7,11 @@ from __future__ import absolute_import, unicode_literals
 import io
 import mimetypes
 import os
 import sys
 import botocore
 import boto3
 import concurrent.futures as futures
 import requests
@ -47,24 +50,36 @@ def s3_upload(files, key_prefix=None):
    else:
        print("Trying to use your AWS credentials..")
        session = boto3.session.Session(region_name=region)
    s3 = session.client('s3')
-    for path, f in files:
+    s3 = session.client('s3',
-        content_type, content_encoding = mimetypes.guess_type(path)
+                        config=botocore.client.Config(max_pool_connections=20))
        extra_args = {}
        if content_type:
            extra_args['ContentType'] = content_type
        if content_encoding:
            extra_args['ContentEncoding'] = content_encoding
-        if key_prefix:
+    def upload(f, bucket, key, extra_args):
-            key = '%s/%s' % (key_prefix, path)
+        # Need to flush to avoid buffering/interleaving from multiple threads.
-        else:
+        sys.stdout.write('uploading %s to %s\n' % (path, key))
-            key = path
+        sys.stdout.flush()
        s3.upload_fileobj(f, bucket, key, ExtraArgs=extra_args)
-        print('uploading %s to %s' % (path, key))
+    fs = []
    with futures.ThreadPoolExecutor(20) as e:
        for path, f in files:
            content_type, content_encoding = mimetypes.guess_type(path)
            extra_args = {}
            if content_type:
                extra_args['ContentType'] = content_type
            if content_encoding:
                extra_args['ContentEncoding'] = content_encoding
-        # The file types returned by mozpack behave like file objects. But they
+            if key_prefix:
-        # don't accept an argument to read(). So we wrap in a BytesIO.
+                key = '%s/%s' % (key_prefix, path)
-        s3.upload_fileobj(io.BytesIO(f.read()), bucket, key,
+            else:
-                          ExtraArgs=extra_args)
+                key = path
            # The file types returned by mozpack behave like file objects. But
            # they don't accept an argument to read(). So we wrap in a BytesIO.
            fs.append(e.submit(upload, io.BytesIO(f.read()), bucket, key,
                               extra_args))
    # Need to do this to catch any exceptions.
    for f in fs:
        f.result()