Update dependencies to fix ci (#354)

2020-12-16 11:51:45 -05:00 · 2020-12-16 11:51:45 -05:00 · e42d0a9d23
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -16,7 +16,7 @@ install_dependencies: &install_dependencies
  command: |
    apt update
    apt install -y libsnappy-dev openjdk-8-jre-headless
-    pip install tox coverage
+    pip install tox coverage==5.3

 save_cache_settings: &save_cache_settings
  key: v1-python_mozetl-{{ checksum "setup.py" }}
@ -66,20 +66,15 @@ test_settings: &test_settings

 version: 2
 jobs:
-  py27:
+  py37:
    <<: *test_settings
    parallelism: 4
    docker:
-      - image: python:2.7-stretch
-  py35:
-    <<: *test_settings
-    parallelism: 4
-    docker:
-      - image: python:3.5-stretch
+      - image: python:3.7-stretch

  lint:
    docker:
-      - image: python:3.6-stretch
+      - image: python:3.7-stretch
    working_directory: ~/python_mozetl
    steps:
      - checkout
@ -93,7 +88,7 @@ jobs:

  docs:
    docker:
-      - image: python:2.7-stretch
+      - image: python:3.7-stretch
    working_directory: ~/python_mozetl
    steps:
      - checkout
@ -137,8 +132,7 @@ workflows:
  version: 2
  build:
    jobs:
-      - py27
-      - py35
+      - py37
      - lint
      - docs
      - docs-deploy:
--- a/mozetl/clientsdaily/rollup.py
+++ b/mozetl/clientsdaily/rollup.py
@ -81,7 +81,7 @@ def extract_search_counts(frame):
    extracted = grouped.select(
        "did",
        F.col("sum(search_count_atom)").alias("search_count_all"),
-        *[F.col("sum({})".format(c)).alias(c) for c in SEARCH_ACCESS_COLUMNS]
+        *[F.col("sum({})".format(c)).alias(c) for c in SEARCH_ACCESS_COLUMNS],
    )
    # Create a homologous output row for each input row
    # where search_counts is NULL.
@ -91,7 +91,7 @@ def extract_search_counts(frame):
        .select(
            "did",
            F.lit(0).alias("search_count_all"),
-            *[F.lit(0).alias(c) for c in SEARCH_ACCESS_COLUMNS]
+            *[F.lit(0).alias(c) for c in SEARCH_ACCESS_COLUMNS],
        )
    )
    intermediate = extracted.unionAll(nulls)
--- a/mozetl/graphics/graphics_telemetry_trends.py
+++ b/mozetl/graphics/graphics_telemetry_trends.py
@ -155,8 +155,8 @@ class Prof(object):

 # Helpers.
 def fix_vendor(vendor_id):
-    if vendor_id == u"Intel Open Source Technology Center":
-        return u"0x8086"
+    if vendor_id == "Intel Open Source Technology Center":
+        return "0x8086"
    return vendor_id


@ -427,7 +427,7 @@ class Trend(TrendBase):

        text = json.dumps(self.cache)

-        print("Writing file {0}".format(self.local_path, text))
+        print("Writing file {0}".format(self.local_path))
        with open(self.local_path, "w") as fp:
            fp.write(text)

@ -621,9 +621,9 @@ if __name__ == "__main__":
                    WinArchTrend(),
                    WindowsVendorTrend(),
                    WindowsVistaPlusGroup([Direct2DTrend(), Direct3D11Trend()]),
-                    DeviceGenTrend(u"0x8086", "intel"),
-                    DeviceGenTrend(u"0x10de", "nvidia"),
-                    DeviceGenTrend(u"0x1002", "amd"),
+                    DeviceGenTrend("0x8086", "intel"),
+                    DeviceGenTrend("0x10de", "nvidia"),
+                    DeviceGenTrend("0x1002", "amd"),
                ]
            ),
        ]
--- a/mozetl/search/aggregates.py
+++ b/mozetl/search/aggregates.py
@ -372,7 +372,7 @@ def search_aggregates_etl(submission_date, bucket, prefix, **kwargs):
        prefix,
        SEARCH_AGGREGATES_VERSION,
        search_aggregates,
-        **kwargs
+        **kwargs,
    )


@ -384,7 +384,7 @@ def search_clients_daily_etl(submission_date, bucket, prefix, **kwargs):
        SEARCH_CLIENTS_DAILY_VERSION,
        search_clients_daily,
        orderBy=["sample_id"],
-        **kwargs
+        **kwargs,
    )


--- a/mozetl/symbolication/top_signatures_correlations.py
+++ b/mozetl/symbolication/top_signatures_correlations.py
@ -24,12 +24,12 @@ spark = SparkSession.builder.appName("modules-with-missing-symbols").getOrCreate

 sc.addPyFile("stemming-1.0.1/stemming/porter2.py")

-from crashcorrelations import (
+from crashcorrelations import (  # noqa E402
    utils,
    download_data,
    crash_deviations,
    comments,
-)  # noqa E402
+)


 # workaround airflow not able to different schedules for tasks in a dag
--- a/mozetl/taar/taar_ensemble.py
+++ b/mozetl/taar/taar_ensemble.py
@ -78,7 +78,7 @@ def get_df(spark, date_from):


 def get_addons_per_client(users_df, minimum_addons_count):
-    """ Extracts a DataFrame that contains one row
+    """Extracts a DataFrame that contains one row
    for each client along with the list of active add-on GUIDs.
    """

@ -328,8 +328,7 @@ def today_minus_7_days():


 def verify_valid_coefs(coefs):
-    """ verify that the model has proper floating point values (> 0)
-    """
+    """verify that the model has proper floating point values (> 0)"""

    assert "ensemble_weights" in coefs
    weights = coefs["ensemble_weights"]
@ -362,9 +361,9 @@ def verify_valid_coefs(coefs):


 class CostLLR:
-    """ based on Niko Brummer's original implementation:
-        Niko Brummer and Johan du Preez, Application-Independent Evaluation of Speaker Detection"
-        Computer Speech and Language, 2005
+    """based on Niko Brummer's original implementation:
+    Niko Brummer and Johan du Preez, Application-Independent Evaluation of Speaker Detection"
+    Computer Speech and Language, 2005
    """

    def __init__(self):
@ -417,8 +416,8 @@ class CostLLR:

 def cross_validation_split(dataset, k_folds):
    """
-  Splits dataframe into k_folds, returning array of dataframes
-  """
+    Splits dataframe into k_folds, returning array of dataframes
+    """
    dataset_split = []
    h = 1.0 / k_folds
    df = dataset.select("*", rand().alias("rand"))
--- a/mozetl/taar/taar_lite_guidguid.py
+++ b/mozetl/taar/taar_lite_guidguid.py
@ -27,7 +27,7 @@ ONE_WEEK_AGO = (dt.datetime.now() - dt.timedelta(days=7)).strftime("%Y%m%d")


 def is_valid_addon(broadcast_amo_whitelist, guid, addon):
-    """ Filter individual addons out to exclude, system addons,
+    """Filter individual addons out to exclude, system addons,
    legacy addons, disabled addons, sideloaded addons.
    """
    return not (
@ -47,7 +47,7 @@ def is_valid_addon(broadcast_amo_whitelist, guid, addon):


 def get_addons_per_client(broadcast_amo_whitelist, users_df):
-    """ Extracts a DataFrame that contains one row
+    """Extracts a DataFrame that contains one row
    for each client along with the list of active add-on GUIDs.
    """

@ -71,7 +71,7 @@ def get_addons_per_client(broadcast_amo_whitelist, users_df):


 def get_initial_sample(spark):
-    """ Takes an initial sample from the longitudinal dataset
+    """Takes an initial sample from the longitudinal dataset
    (randomly sampled from main summary). Coarse filtering on:
    - number of installed addons (greater than 1)
    - corrupt and generally wierd telemetry entries
@ -92,8 +92,7 @@ def get_initial_sample(spark):


 def extract_telemetry(spark):
-    """ load some training data from telemetry given a sparkContext
-    """
+    """load some training data from telemetry given a sparkContext"""
    sc = spark.sparkContext

    # Define the set of feature names to be used in the donor computations.
--- a/mozetl/taar/taar_lite_guidranking.py
+++ b/mozetl/taar/taar_lite_guidranking.py
@ -18,8 +18,7 @@ OUTPUT_BASE_FILENAME = "guid_install_ranking"


 def extract_telemetry(sparkSession):
-    """ Load some training data from telemetry given a sparkContext
-    """
+    """Load some training data from telemetry given a sparkContext"""
    frame = sparkSession.sql(
        """
    SELECT
@ -42,7 +41,7 @@ def extract_telemetry(sparkSession):


 def transform(frame):
-    """ Convert the dataframe to JSON and augment each record to
+    """Convert the dataframe to JSON and augment each record to
    include the install count for each addon.
    """

--- a/mozetl/taar/taar_similarity.py
+++ b/mozetl/taar/taar_similarity.py
@ -86,7 +86,7 @@ def get_samples(spark, date_from):


 def get_addons_per_client(users_df, addon_whitelist, minimum_addons_count):
-    """ Extracts a DataFrame that contains one row
+    """Extracts a DataFrame that contains one row
    for each client along with the list of active add-on GUIDs.
    """

@ -125,8 +125,7 @@ def get_addons_per_client(users_df, addon_whitelist, minimum_addons_count):


 def compute_clusters(addons_df, num_clusters, random_seed):
-    """ Performs user clustering by using add-on ids as features.
-    """
+    """Performs user clustering by using add-on ids as features."""

    # Build the stages of the pipeline. We need hashing to make the next
    # steps work.
@ -145,8 +144,7 @@ def compute_clusters(addons_df, num_clusters, random_seed):


 def get_donor_pools(users_df, clusters_df, num_donors, random_seed=None):
-    """ Samples users from each cluster.
-    """
+    """Samples users from each cluster."""
    cluster_population = clusters_df.groupBy("prediction").count().collect()
    clusters_histogram = [(x["prediction"], x["count"]) for x in cluster_population]

@ -216,7 +214,7 @@ def format_donors_dictionary(donors_df):


 def similarity_function(x, y):
-    """ Similarity function for comparing user features.
+    """Similarity function for comparing user features.

    This actually really should be implemented in taar.similarity_recommender
    and then imported here for consistency.
@ -260,7 +258,7 @@ def generate_non_cartesian_pairs(first_rdd, second_rdd):
 def get_lr_curves(
    spark, features_df, cluster_ids, kernel_bandwidth, num_pdf_points, random_seed=None
 ):
-    """ Compute the likelihood ratio curves for clustered clients.
+    """Compute the likelihood ratio curves for clustered clients.

    Work-flow followed in this function is as follows:

--- a/mozetl/taar/taar_utils.py
+++ b/mozetl/taar/taar_utils.py
@ -88,7 +88,7 @@ def store_json_to_s3(json_data, base_filename, date, prefix, bucket):


 def load_amo_external_whitelist():
-    """ Download and parse the AMO add-on whitelist.
+    """Download and parse the AMO add-on whitelist.

    :raises RuntimeError: the AMO whitelist file cannot be downloaded or contains
                          no valid add-ons.
@ -133,8 +133,8 @@ def load_amo_curated_whitelist():

 def hash_telemetry_id(telemetry_id):
    """
-        This hashing function is a reference implementation based on :
-            https://phabricator.services.mozilla.com/D8311
+    This hashing function is a reference implementation based on :
+        https://phabricator.services.mozilla.com/D8311

    """
    return hashlib.sha256(telemetry_id.encode("utf8")).hexdigest()
--- a/mozetl/utils.py
+++ b/mozetl/utils.py
@ -44,7 +44,7 @@ def generate_filter_parameters(end_date, days_back):


 def write_csv(dataframe, path, header=True):
-    """ Write a dataframe to local disk.
+    """Write a dataframe to local disk.

    Disclaimer: Do not write csv files larger than driver memory. This
    is ~15GB for ec2 c3.xlarge (due to caching overhead).
--- a/setup.py
+++ b/setup.py
@ -2,13 +2,13 @@
 from setuptools import setup, find_packages

 test_deps = [
-    'coverage==4.5.2',
+    'coverage==5.3',
    'pytest-cov==2.6.0',
    'pytest-timeout==1.3.3',
    'moto==1.3.16',
    'mock==2.0.0',
    'pytest==3.10.1',
-    'flake8==3.6.0'
+    'flake8==3.8.4'
 ]

 extras = {
@ -30,17 +30,17 @@ setup(
        'boto==2.49.0',
        'boto3==1.16.20',
        'botocore==1.19.20',
-        'click==6.7',
+        'click==7.1.2',
        'click_datetime==0.2',
-        'numpy==1.13.3',
-        'pandas==0.23.4',
+        'numpy==1.19.4',
+        'pandas==1.1.4',
        'pyspark==2.3.2',
        'python_moztelemetry==0.10.2',
-        'requests-toolbelt==0.8.0',
-        'requests==2.20.1',
-        'scipy==1.0.0rc1',
+        'requests-toolbelt==0.9.1',
+        'requests==2.25.0',
+        'scipy==1.5.4',
        'typing==3.6.4',
-        'six==1.11.0',
+        'six==1.15.0',
    ],
    tests_require=test_deps,
    extras_require=extras,
--- a/tests/test_clientsdaily.py
+++ b/tests/test_clientsdaily.py
@ -66,44 +66,44 @@ def test_profile_creation_date_fields(clients_daily):
    # the TZ setting of the system on which the tests run.
    expected_back = set(
        [
-            u"2014-12-16",
-            u"2016-09-07",
-            u"2016-05-12",
-            u"2017-02-16",
-            u"2012-11-17",
-            u"2013-09-08",
-            u"2017-02-12",
-            u"2016-04-04",
-            u"2017-04-25",
-            u"2015-06-17",
+            "2014-12-16",
+            "2016-09-07",
+            "2016-05-12",
+            "2017-02-16",
+            "2012-11-17",
+            "2013-09-08",
+            "2017-02-12",
+            "2016-04-04",
+            "2017-04-25",
+            "2015-06-17",
        ]
    )
    expected_utc = set(
        [
-            u"2014-12-17",
-            u"2016-09-08",
-            u"2016-05-13",
-            u"2017-02-17",
-            u"2012-11-18",
-            u"2013-09-09",
-            u"2017-02-13",
-            u"2016-04-05",
-            u"2017-04-26",
-            u"2015-06-18",
+            "2014-12-17",
+            "2016-09-08",
+            "2016-05-13",
+            "2017-02-17",
+            "2012-11-18",
+            "2013-09-09",
+            "2017-02-13",
+            "2016-04-05",
+            "2017-04-26",
+            "2015-06-18",
        ]
    )
    expected_forward = set(
        [
-            u"2014-12-18",
-            u"2016-09-09",
-            u"2016-05-14",
-            u"2017-02-18",
-            u"2012-11-19",
-            u"2013-09-10",
-            u"2017-02-14",
-            u"2016-04-06",
-            u"2017-04-27",
-            u"2015-06-19",
+            "2014-12-18",
+            "2016-09-09",
+            "2016-05-14",
+            "2017-02-18",
+            "2012-11-19",
+            "2013-09-10",
+            "2017-02-14",
+            "2016-04-06",
+            "2017-04-27",
+            "2015-06-19",
        ]
    )
    ten_pcds = clients_daily.select("profile_creation_date").take(10)
--- a/tests/test_landfill_sampler.py
+++ b/tests/test_landfill_sampler.py
@ -15,19 +15,19 @@ def sample_document():
        # string before passing through to the sampler code.
        "content": {"payload": {"foo": "bar"}},
        "meta": {
-            u"Content-Length": u"7094",
-            u"Date": u"Sun, 19 Aug 2018 15:08:00 GMT",
-            u"Host": u"incoming.telemetry.mozilla.org",
-            "Hostname": u"ip-1.1.1.1",
+            "Content-Length": "7094",
+            "Date": "Sun, 19 Aug 2018 15:08:00 GMT",
+            "Host": "incoming.telemetry.mozilla.org",
+            "Hostname": "ip-1.1.1.1",
            "Timestamp": 1534691279765301222,
-            "Type": u"telemetry-raw",
-            u"User-Agent": u"pingsender/1.0",
-            u"X-Forwarded-For": u"127.0.0.1",
-            u"X-PingSender-Version": u"1.0",
-            u"args": u"v=4",
-            u"protocol": u"HTTP/1.1",
-            u"remote_addr": u"1.1.1.1",
-            u"uri": u"/submit/telemetry/doc-id/main/Firefox/61.0.2/release/20180807170231",
+            "Type": "telemetry-raw",
+            "User-Agent": "pingsender/1.0",
+            "X-Forwarded-For": "127.0.0.1",
+            "X-PingSender-Version": "1.0",
+            "args": "v=4",
+            "protocol": "HTTP/1.1",
+            "remote_addr": "1.1.1.1",
+            "uri": "/submit/telemetry/doc-id/main/Firefox/61.0.2/release/20180807170231",
        },
    }

--- a/tests/test_sync_bookmark.py
+++ b/tests/test_sync_bookmark.py
@ -10,7 +10,7 @@ from pyspark.sql.types import ArrayType, LongType, StringType, StructField, Stru

@pytest.fixture()
 def sync_summary_schema():
-    """"Generate a schema for sync_summary. This subset contains enough
+    """Generate a schema for sync_summary. This subset contains enough
    structure for testing bookmark validation. The schema is derived from
    [`telemetry-batch-view`][1].

--- a/tests/test_taar_amowhitelist.py
+++ b/tests/test_taar_amowhitelist.py
@ -241,10 +241,16 @@ EXPECTED_FINAL_JDATA = {

@pytest.yield_fixture(scope="function")
 def s3_fixture():
-    mock_s3().start()
+    s3 = mock_s3()
+    s3.start()

    conn = boto3.resource("s3", region_name="us-west-2")
-    conn.create_bucket(Bucket=taar_amowhitelist.AMO_DUMP_BUCKET)
+    conn.create_bucket(
+        Bucket=taar_amowhitelist.AMO_DUMP_BUCKET,
+        CreateBucketConfiguration={
+            "LocationConstraint": "us-west-2",
+        },
+    )
    taar_utils.store_json_to_s3(
        json.dumps(SAMPLE_DATA),
        taar_amowhitelist.AMO_DUMP_BASE_FILENAME,
@ -253,7 +259,7 @@ def s3_fixture():
        taar_amowhitelist.AMO_DUMP_BUCKET,
    )
    yield conn, SAMPLE_DATA
-    mock_s3().stop()
+    s3.stop()


 def test_extract(s3_fixture):
--- a/tests/test_taar_lite_guidguid.py
+++ b/tests/test_taar_lite_guidguid.py
@ -108,44 +108,35 @@ MOCK_KEYED_ADDONS = [

 EXPECTED_GUID_GUID_DATA = [
    Row(
-        key_addon=u"test-guid-2",
+        key_addon="test-guid-2",
        coinstallation_counts=[
-            Row(id=u"test-guid-6", n=1),
-            Row(id=u"test-guid-5", n=1),
-            Row(id=u"test-guid-3", n=1),
-            Row(id=u"test-guid-1", n=1),
+            Row(id="test-guid-6", n=1),
+            Row(id="test-guid-5", n=1),
+            Row(id="test-guid-3", n=1),
+            Row(id="test-guid-1", n=1),
        ],
    ),
-    Row(key_addon=u"test-guid-4", coinstallation_counts=[Row(id=u"test-guid-1", n=1)]),
+    Row(key_addon="test-guid-4", coinstallation_counts=[Row(id="test-guid-1", n=1)]),
    Row(
-        key_addon=u"test-guid-3",
+        key_addon="test-guid-3",
+        coinstallation_counts=[Row(id="test-guid-2", n=1), Row(id="test-guid-1", n=2)],
+    ),
+    Row(
+        key_addon="test-guid-5",
+        coinstallation_counts=[Row(id="test-guid-6", n=1), Row(id="test-guid-2", n=1)],
+    ),
+    Row(
+        key_addon="test-guid-1",
        coinstallation_counts=[
-            Row(id=u"test-guid-2", n=1),
-            Row(id=u"test-guid-1", n=2),
+            Row(id="test-guid-2", n=1),
+            Row(id="test-guid-1", n=2),
+            Row(id="test-guid-3", n=2),
+            Row(id="test-guid-4", n=1),
        ],
    ),
    Row(
-        key_addon=u"test-guid-5",
-        coinstallation_counts=[
-            Row(id=u"test-guid-6", n=1),
-            Row(id=u"test-guid-2", n=1),
-        ],
-    ),
-    Row(
-        key_addon=u"test-guid-1",
-        coinstallation_counts=[
-            Row(id=u"test-guid-2", n=1),
-            Row(id=u"test-guid-1", n=2),
-            Row(id=u"test-guid-3", n=2),
-            Row(id=u"test-guid-4", n=1),
-        ],
-    ),
-    Row(
-        key_addon=u"test-guid-6",
-        coinstallation_counts=[
-            Row(id=u"test-guid-2", n=1),
-            Row(id=u"test-guid-5", n=1),
-        ],
+        key_addon="test-guid-6",
+        coinstallation_counts=[Row(id="test-guid-2", n=1), Row(id="test-guid-5", n=1)],
    ),
 ]

@ -236,7 +227,12 @@ def test_load_s3(spark):

    # Create the bucket before we upload
    conn = boto3.resource("s3", region_name="us-west-2")
-    bucket_obj = conn.create_bucket(Bucket=BUCKET)
+    bucket_obj = conn.create_bucket(
+        Bucket=BUCKET,
+        CreateBucketConfiguration={
+            "LocationConstraint": "us-west-2",
+        },
+    )

    load_df = spark.createDataFrame(EXPECTED_GUID_GUID_DATA)
    taar_lite_guidguid.load_s3(load_df, "20180301", PREFIX, BUCKET)
--- a/tests/test_taar_lite_guidranking.py
+++ b/tests/test_taar_lite_guidranking.py
@ -55,10 +55,10 @@ MOCK_TELEMETRY_SAMPLE = [
 ]

 EXPECTED_ADDON_INSTALLATIONS = {
-    u"test-guid-1": 100,
-    u"test-guid-2": 200,
-    u"test-guid-3": 300,
-    u"test-guid-4": 400,
+    "test-guid-1": 100,
+    "test-guid-2": 200,
+    "test-guid-3": 300,
+    "test-guid-4": 400,
 }


@ -74,12 +74,12 @@ def test_extract_phase(spark):

    output = dict(extract_df.rdd.map(lambda_func).collect())
    EXPECTED = {
-        u"test-guid-1": 1,
-        u"test-guid-2": 3,
-        u"test-guid-3": 3,
-        u"test-guid-4": 2,
-        u"test-guid-5": 2,
-        u"test-guid-6": 1,
+        "test-guid-1": 1,
+        "test-guid-2": 3,
+        "test-guid-3": 3,
+        "test-guid-4": 2,
+        "test-guid-5": 2,
+        "test-guid-6": 1,
    }
    assert EXPECTED == output

@ -106,7 +106,12 @@ def test_load_s3(spark):

    # Create the bucket before we upload
    conn = boto3.resource("s3", region_name="us-west-2")
-    bucket_obj = conn.create_bucket(Bucket=BUCKET)
+    bucket_obj = conn.create_bucket(
+        Bucket=BUCKET,
+        CreateBucketConfiguration={
+            "LocationConstraint": "us-west-2",
+        },
+    )

    rdd = spark.createDataFrame(MOCK_TELEMETRY_SAMPLE)
    result_json = taar_lite_guidranking.transform(rdd)
--- a/tests/test_taar_update_whitelist.py
+++ b/tests/test_taar_update_whitelist.py
@ -85,7 +85,12 @@ def test_load(mock_transformed_data):
    date = "20190105"

    conn = boto3.resource("s3", region_name="us-west-2")
-    conn.create_bucket(Bucket=bucket)
+    conn.create_bucket(
+        Bucket=bucket,
+        CreateBucketConfiguration={
+            "LocationConstraint": "us-west-2",
+        },
+    )

    taar_update_whitelist.load_etl(mock_transformed_data, date, prefix, bucket)

--- a/tests/test_taar_utils.py
+++ b/tests/test_taar_utils.py
@ -50,7 +50,12 @@ def test_read_from_s3():
    s3_json_fname = "test.json"

    conn = boto3.resource("s3", region_name="us-west-2")
-    conn.create_bucket(Bucket=bucket)
+    conn.create_bucket(
+        Bucket=bucket,
+        CreateBucketConfiguration={
+            "LocationConstraint": "us-west-2",
+        },
+    )

    with NamedTemporaryFile("w") as json_file:
        json.dump(SAMPLE_DATA, json_file)
@ -71,7 +76,12 @@ def test_write_to_s3():
    dest_filename = "test.json"

    conn = boto3.resource("s3", region_name="us-west-2")
-    bucket_obj = conn.create_bucket(Bucket=bucket)
+    bucket_obj = conn.create_bucket(
+        Bucket=bucket,
+        CreateBucketConfiguration={
+            "LocationConstraint": "us-west-2",
+        },
+    )

    with NamedTemporaryFile("w") as json_file:
        json.dump(SAMPLE_DATA, json_file)
@ -102,7 +112,12 @@ def test_write_json_s3():
    content = {"it-IT": ["firefox@getpocket.com"]}

    conn = boto3.resource("s3", region_name="us-west-2")
-    bucket_obj = conn.create_bucket(Bucket=bucket)
+    bucket_obj = conn.create_bucket(
+        Bucket=bucket,
+        CreateBucketConfiguration={
+            "LocationConstraint": "us-west-2",
+        },
+    )

    # Store the data in the mocked bucket.
    taar_utils.store_json_to_s3(
@ -123,7 +138,12 @@ def test_write_json_s3():
@mock_s3
 def test_load_amo_external_whitelist():
    conn = boto3.resource("s3", region_name="us-west-2")
-    conn.create_bucket(Bucket=taar_utils.AMO_DUMP_BUCKET)
+    conn.create_bucket(
+        Bucket=taar_utils.AMO_DUMP_BUCKET,
+        CreateBucketConfiguration={
+            "LocationConstraint": "us-west-2",
+        },
+    )

    # Make sure that whitelist loading fails before mocking the S3 file.
    EXCEPTION_MSG = "Empty AMO whitelist detected"
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@ -31,7 +31,9 @@ def test_write_csv_ascii(generate_data, tmpdir):
    with open(path, "rb") as f:
        data = f.read()

-    assert [l.decode("utf-8") for l in data.rstrip().split(b"\r\n")[1:]] == test_data
+    assert [
+        line.decode("utf-8") for line in data.rstrip().split(b"\r\n")[1:]
+    ] == test_data


 def test_generate_filter_parameters():
@ -58,7 +60,7 @@ def test_generate_filter_parameters():


 def test_write_csv_valid_unicode(generate_data, tmpdir):
-    test_data = [u"∆", u"∫", u"∬"]
+    test_data = ["∆", "∫", "∬"]
    df = generate_data(test_data)

    path = str(tmpdir.join("test_data.csv"))
@ -67,7 +69,9 @@ def test_write_csv_valid_unicode(generate_data, tmpdir):
    with open(path, "rb") as f:
        data = f.read()

-    assert [l.decode("utf-8") for l in data.rstrip().split(b"\r\n")[1:]] == test_data
+    assert [
+        line.decode("utf-8") for line in data.rstrip().split(b"\r\n")[1:]
+    ] == test_data


@mock_s3
@ -76,7 +80,12 @@ def test_write_csv_to_s3(generate_data):
    key = "test.csv"

    conn = boto3.resource("s3", region_name="us-west-2")
-    conn.create_bucket(Bucket=bucket)
+    conn.create_bucket(
+        Bucket=bucket,
+        CreateBucketConfiguration={
+            "LocationConstraint": "us-west-2",
+        },
+    )

    utils.write_csv_to_s3(generate_data(["foo"]), bucket, key)

@ -92,7 +101,12 @@ def test_write_csv_to_s3_no_header(generate_data):
    key = "test.csv"

    conn = boto3.resource("s3", region_name="us-west-2")
-    conn.create_bucket(Bucket=bucket)
+    conn.create_bucket(
+        Bucket=bucket,
+        CreateBucketConfiguration={
+            "LocationConstraint": "us-west-2",
+        },
+    )

    utils.write_csv_to_s3(generate_data(), bucket, key, header=False)

@ -107,7 +121,12 @@ def test_write_csv_to_s3_existing(generate_data):
    key = "test.csv"

    conn = boto3.resource("s3", region_name="us-west-2")
-    conn.create_bucket(Bucket=bucket)
+    conn.create_bucket(
+        Bucket=bucket,
+        CreateBucketConfiguration={
+            "LocationConstraint": "us-west-2",
+        },
+    )

    utils.write_csv_to_s3(generate_data(["foo"]), bucket, key)
    utils.write_csv_to_s3(generate_data(["foo", "bar"]), bucket, key)
--- a/tox.ini
+++ b/tox.ini
@ -4,7 +4,7 @@
 # and then run "tox" from this directory.

 [tox]
-envlist = py27, py35, flake8, black, docs
+envlist = py37, flake8, black, docs

 [pytest]
 addopts =
@ -20,17 +20,17 @@ max-line-length = 100

 [testenv:flake8]
 deps =
-    flake8==3.6.0
+    flake8==3.8.4
 commands =
    flake8 mozetl tests

 [testenv:black]
-deps = black
+deps = black==20.8b1
 commands = black --check mozetl/ tests/

 [testenv:docs]
 description = invoke sphinx-build to build HTML docs
-basepython = python2.7
+basepython = python3.7
 deps =
    sphinx >= 1.7.5, < 2
    m2r