Use six.text_type instead of unicode

2018-12-21 15:03:36 -08:00 · 2018-12-21 15:03:36 -08:00 · 69e6720fdd
--- a/mozetl/engagement/churn_to_csv/job.py
+++ b/mozetl/engagement/churn_to_csv/job.py
@ -7,10 +7,11 @@ import click
 from boto3.s3.transfer import S3Transfer
 from moztelemetry.standards import snap_to_beginning_of_week
 from pyspark.sql import SparkSession, functions as F
+from six import text_type


 def csv(f):
-    return ",".join([unicode(a) for a in f])
+    return ",".join([text_type(a) for a in f])


 def fmt(d, date_format="%Y%m%d"):
--- a/mozetl/hardware_report/summarize_json.py
+++ b/mozetl/hardware_report/summarize_json.py
@ -11,6 +11,7 @@ import botocore
 import requests
 import logging
 import moztelemetry.standards as moz_std
+from six import text_type

 # Reasons why the data for a client can be discarded.
 REASON_INACTIVE = "inactive"
@ -462,7 +463,7 @@ def finalize_data(data, sample_count, broken_ratio,
    for k, v in data.items():
        # The old key is a tuple (key, value). We translate the key part and concatenate the
        # value as a string.
-        new_key = keys_translation[k[0]] + unicode(k[1])
+        new_key = keys_translation[k[0]] + text_type(k[1])
        aggregated_percentages[new_key] = v / denom

    return aggregated_percentages
--- a/mozetl/taar/taar_amodump.py
+++ b/mozetl/taar/taar_amodump.py
@ -6,6 +6,7 @@ import logging
 import logging.config
 import typing
 from six.moves import urllib
+from six import text_type
 import queue as queue

 from .taar_utils import store_json_to_s3
@ -244,8 +245,8 @@ class Undefined:
 def marshal(value, name, type_def):
    serializers = {typing.List: list,
                   typing.Dict: dict,
-                   str: unicode,
-                   unicode: unicode,
+                   str: text_type,
+                   text_type: text_type,
                   int: int,
                   float: float,
                   bool: bool}
--- a/mozetl/utils.py
+++ b/mozetl/utils.py
@ -10,6 +10,7 @@ from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText

 import boto3
+from six import text_type

 ACTIVITY_SUBMISSION_LAG = DT.timedelta(10)

@ -60,7 +61,7 @@ def write_csv(dataframe, path, header=True):
            writer.writerow(dataframe.columns)

        for row in dataframe.collect():
-            row = [unicode(s).encode('utf-8') for s in row]
+            row = [text_type(s).encode('utf-8') for s in row]
            writer.writerow(row)


--- a/tests/test_topline_historical_backfill.py
+++ b/tests/test_topline_historical_backfill.py
@ -6,6 +6,7 @@ from click.testing import CliRunner

 from mozetl.topline import historical_backfill as backfill
 from mozetl.topline.schema import historical_schema
+from six import text_type


@pytest.fixture(autouse=True)
@ -107,7 +108,7 @@ def test_cli_monthly(generate_data, tmpdir, monkeypatch):

    csv_data = ','.join(input_df.columns) + '\n'
    for row in input_df.collect():
-        csv_data += ','.join([unicode(x) for x in row]) + '\n'
+        csv_data += ','.join([text_type(x) for x in row]) + '\n'

    input_csv.write(csv_data)

--- a/tests/test_topline_summary.py
+++ b/tests/test_topline_summary.py
@ -8,6 +8,7 @@ from pyspark.sql import functions as F, Row
 from pyspark.sql.types import (
    StructField, StructType, StringType, BooleanType, ArrayType, LongType
 )
+from six import text_type

 from mozetl.topline import topline_summary as topline

@ -38,8 +39,8 @@ def generate_dates(submission_date_s3, ts_offset=0, creation_offset=0):

 def search_row(engine='hooli', count=1, source='searchbar'):
    return Row(
-        engine=unicode(engine),
-        source=unicode(source),
+        engine=text_type(engine),
+        source=text_type(source),
        count=count
    )