diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000000..0ef160ad13 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,3 @@ +[settings] +profile = black +skip = dags,script/legacy,target,venv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c7da1e857f..e21fa5247f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,3 +18,7 @@ repos: hooks: - id: yamllint args: [-c, .yamllint.yaml, .] +- repo: https://github.com/PyCQA/isort + rev: 5.7.0 + hooks: + - id: isort diff --git a/bigquery_etl/broken_views.py b/bigquery_etl/broken_views.py index f54ace5b12..e946769c45 100644 --- a/bigquery_etl/broken_views.py +++ b/bigquery_etl/broken_views.py @@ -5,7 +5,7 @@ from argparse import ArgumentParser from functools import partial from multiprocessing.pool import ThreadPool -from google.api_core.exceptions import NotFound, Forbidden +from google.api_core.exceptions import Forbidden, NotFound from google.cloud import bigquery from bigquery_etl.util import standard_args # noqa E402 diff --git a/bigquery_etl/cli/__init__.py b/bigquery_etl/cli/__init__.py index a34d738df9..c8cc37c921 100644 --- a/bigquery_etl/cli/__init__.py +++ b/bigquery_etl/cli/__init__.py @@ -14,9 +14,9 @@ from ..cli.format import format from ..cli.query import query from ..cli.routine import mozfun, routine from ..cli.view import view +from ..dependency import dependency from ..glam.cli import glam from ..stripe import stripe_ -from ..dependency import dependency def cli(prog_name=None): diff --git a/bigquery_etl/cli/query.py b/bigquery_etl/cli/query.py index 99836ef0a1..e73360a199 100644 --- a/bigquery_etl/cli/query.py +++ b/bigquery_etl/cli/query.py @@ -13,16 +13,16 @@ import click from google.cloud import bigquery from google.cloud.exceptions import NotFound -from ..cli.dryrun import dryrun, SKIP +from ..cli.dryrun import SKIP, dryrun from ..cli.format import format from ..cli.utils import is_authenticated, is_valid_dir, is_valid_project from ..format_sql.formatter import reformat from ..metadata import validate_metadata from ..metadata.parse_metadata import METADATA_FILE, Metadata -from ..query_scheduling.generate_airflow_dags import get_dags from ..query_scheduling.dag_collection import DagCollection +from ..query_scheduling.generate_airflow_dags import get_dags from ..run_query import run -from ..schema import Schema, SCHEMA_FILE +from ..schema import SCHEMA_FILE, Schema QUERY_NAME_RE = re.compile(r"(?P[a-zA-z0-9_]+)\.(?P[a-zA-z0-9_]+)") SQL_FILE_RE = re.compile( diff --git a/bigquery_etl/copy_deduplicate.py b/bigquery_etl/copy_deduplicate.py index 6451cc272a..cacdf6e548 100644 --- a/bigquery_etl/copy_deduplicate.py +++ b/bigquery_etl/copy_deduplicate.py @@ -7,20 +7,19 @@ datasets. The script can be configured to exclude a list of tables or to process only a specific list of tables. """ +import json +import logging from argparse import ArgumentParser from datetime import datetime, timedelta from itertools import groupby from multiprocessing.pool import ThreadPool -import json -import logging from google.api_core.exceptions import BadRequest from google.cloud import bigquery from bigquery_etl.util import standard_args -from bigquery_etl.util.client_queue import ClientQueue from bigquery_etl.util.bigquery_id import sql_table_id - +from bigquery_etl.util.client_queue import ClientQueue QUERY_TEMPLATE = """ WITH diff --git a/bigquery_etl/docs/derived_datasets/generate_derived_dataset_docs.py b/bigquery_etl/docs/derived_datasets/generate_derived_dataset_docs.py index 22eb940e83..d24d08f7e9 100644 --- a/bigquery_etl/docs/derived_datasets/generate_derived_dataset_docs.py +++ b/bigquery_etl/docs/derived_datasets/generate_derived_dataset_docs.py @@ -1,9 +1,11 @@ """Generate documentation for derived datasets.""" import os -import yaml from pathlib import Path + +import yaml from jinja2 import Environment, FileSystemLoader + from bigquery_etl.dryrun import DryRun VIEW_FILE = "view.sql" diff --git a/bigquery_etl/docs/generate_docs.py b/bigquery_etl/docs/generate_docs.py index e9b03246f9..f72ca400eb 100644 --- a/bigquery_etl/docs/generate_docs.py +++ b/bigquery_etl/docs/generate_docs.py @@ -1,14 +1,15 @@ """Generates documentations for provided projects.""" -from argparse import ArgumentParser import os -from pathlib import Path import re import shutil +from argparse import ArgumentParser +from pathlib import Path + import yaml -from bigquery_etl.util import standard_args from bigquery_etl.docs.derived_datasets import generate_derived_dataset_docs +from bigquery_etl.util import standard_args DEFAULT_PROJECTS_DIRS = ["sql/mozfun/", "sql/moz-fx-data-shared-prod/"] DOCS_FILE = "README.md" diff --git a/bigquery_etl/docs/validate_docs.py b/bigquery_etl/docs/validate_docs.py index 01ecc007bd..5438979b62 100644 --- a/bigquery_etl/docs/validate_docs.py +++ b/bigquery_etl/docs/validate_docs.py @@ -1,10 +1,10 @@ """Validates SQL examples in documentations.""" -from argparse import ArgumentParser import os -from pathlib import Path -import tempfile import sys +import tempfile +from argparse import ArgumentParser +from pathlib import Path from bigquery_etl.dryrun import DryRun from bigquery_etl.routine.parse_routine import read_routine_dir, sub_local_routines diff --git a/bigquery_etl/dryrun.py b/bigquery_etl/dryrun.py index 00764b4817..e5d5533db5 100644 --- a/bigquery_etl/dryrun.py +++ b/bigquery_etl/dryrun.py @@ -10,16 +10,16 @@ only dry runs can be performed. In order to reduce risk of CI or local users accidentally running queries during tests and overwriting production data, we proxy the queries through the dry run service endpoint. """ +import glob +import json +import re +import sys from argparse import ArgumentParser +from enum import Enum from functools import cached_property from multiprocessing.pool import Pool from os.path import basename, dirname, exists -from urllib.request import urlopen, Request -from enum import Enum -import glob -import json -import sys -import re +from urllib.request import Request, urlopen SKIP = { # Access Denied diff --git a/bigquery_etl/events_daily/generate_queries.py b/bigquery_etl/events_daily/generate_queries.py index 69927b52d8..5775a9b319 100755 --- a/bigquery_etl/events_daily/generate_queries.py +++ b/bigquery_etl/events_daily/generate_queries.py @@ -1,14 +1,14 @@ """Generate query directories.""" import os -import yaml - from argparse import ArgumentParser -from bigquery_etl.format_sql.formatter import reformat from dataclasses import dataclass -from jinja2 import Environment, FileSystemLoader from pathlib import Path from typing import List, Optional +import yaml +from jinja2 import Environment, FileSystemLoader + +from bigquery_etl.format_sql.formatter import reformat TEMPLATED_FILES = { "init.sql", diff --git a/bigquery_etl/format_sql/format.py b/bigquery_etl/format_sql/format.py index ee7b63f727..f52c50b83e 100644 --- a/bigquery_etl/format_sql/format.py +++ b/bigquery_etl/format_sql/format.py @@ -1,14 +1,13 @@ """Format SQL.""" -from argparse import ArgumentParser import glob import os import os.path import sys +from argparse import ArgumentParser from bigquery_etl.format_sql.formatter import reformat # noqa E402 - SKIP = { # files that existed before we started to enforce this script *glob.glob("bigquery_etl/glam/templates/*.sql"), diff --git a/bigquery_etl/format_sql/formatter.py b/bigquery_etl/format_sql/formatter.py index d034176b8e..8dc4978c90 100644 --- a/bigquery_etl/format_sql/formatter.py +++ b/bigquery_etl/format_sql/formatter.py @@ -1,13 +1,13 @@ """Format SQL.""" -from dataclasses import replace import re +from dataclasses import replace from .tokenizer import ( AliasSeparator, + BlockEndKeyword, BlockKeyword, BlockStartKeyword, - BlockEndKeyword, ClosingBracket, Comment, ExpressionSeparator, @@ -15,8 +15,8 @@ from .tokenizer import ( Identifier, Literal, NewlineKeyword, - Operator, OpeningBracket, + Operator, ReservedKeyword, SpaceBeforeBracketKeyword, StatementSeparator, diff --git a/bigquery_etl/format_sql/tokenizer.py b/bigquery_etl/format_sql/tokenizer.py index 3e5ba7b5b2..d0e054b1e8 100644 --- a/bigquery_etl/format_sql/tokenizer.py +++ b/bigquery_etl/format_sql/tokenizer.py @@ -1,8 +1,8 @@ """Tokenize SQL so that it can be formatted.""" -from dataclasses import dataclass, field import re import sys +from dataclasses import dataclass, field # These words get their own line followed by increased indent TOP_LEVEL_KEYWORDS = [ diff --git a/bigquery_etl/generate_incremental_table.py b/bigquery_etl/generate_incremental_table.py index 5042091ae9..a6b1b21716 100644 --- a/bigquery_etl/generate_incremental_table.py +++ b/bigquery_etl/generate_incremental_table.py @@ -1,12 +1,12 @@ """Run a query with a series of @submission_date values.""" +import os.path +import subprocess +import sys from argparse import ArgumentParser from datetime import datetime, timedelta from functools import partial from multiprocessing import Pool -import os.path -import subprocess -import sys def fromisoformat(string): diff --git a/bigquery_etl/glean_usage/baseline_clients_daily.py b/bigquery_etl/glean_usage/baseline_clients_daily.py index 002461e925..061114cf1c 100644 --- a/bigquery_etl/glean_usage/baseline_clients_daily.py +++ b/bigquery_etl/glean_usage/baseline_clients_daily.py @@ -6,14 +6,14 @@ from functools import partial from multiprocessing.pool import ThreadPool from google.cloud import bigquery -from google.cloud.bigquery import WriteDisposition, ScalarQueryParameter +from google.cloud.bigquery import ScalarQueryParameter, WriteDisposition from bigquery_etl.glean_usage.common import ( list_baseline_tables, + referenced_table_exists, render, table_names_from_baseline, write_sql, - referenced_table_exists, ) from bigquery_etl.util import standard_args # noqa E402 diff --git a/bigquery_etl/glean_usage/baseline_clients_last_seen.py b/bigquery_etl/glean_usage/baseline_clients_last_seen.py index 60d942b5c6..8532433c9c 100644 --- a/bigquery_etl/glean_usage/baseline_clients_last_seen.py +++ b/bigquery_etl/glean_usage/baseline_clients_last_seen.py @@ -7,14 +7,14 @@ from functools import partial from multiprocessing.pool import ThreadPool from google.cloud import bigquery -from google.cloud.bigquery import WriteDisposition, ScalarQueryParameter +from google.cloud.bigquery import ScalarQueryParameter, WriteDisposition from bigquery_etl.glean_usage.common import ( list_baseline_tables, + referenced_table_exists, render, table_names_from_baseline, write_sql, - referenced_table_exists, ) from bigquery_etl.util import standard_args # noqa E402 diff --git a/bigquery_etl/json_to_table_ddl.py b/bigquery_etl/json_to_table_ddl.py index 7fe3df9047..09e0c2e51f 100644 --- a/bigquery_etl/json_to_table_ddl.py +++ b/bigquery_etl/json_to_table_ddl.py @@ -17,7 +17,6 @@ import argparse import json import sys - TEMPLATE = """ {table_create_mode_string} `{project}:{dataset}.{table_name}` ( {columns_string} diff --git a/bigquery_etl/metadata/parse_metadata.py b/bigquery_etl/metadata/parse_metadata.py index 29e946fc01..e6b805abbc 100644 --- a/bigquery_etl/metadata/parse_metadata.py +++ b/bigquery_etl/metadata/parse_metadata.py @@ -1,13 +1,14 @@ """Parsing of metadata yaml files.""" -from google.cloud import bigquery import enum -import re -import yaml import os +import re +from typing import Dict, List, Optional + import attr import cattr -from typing import List, Optional, Dict +import yaml +from google.cloud import bigquery from bigquery_etl.query_scheduling.utils import is_email diff --git a/bigquery_etl/metadata/publish_metadata.py b/bigquery_etl/metadata/publish_metadata.py index 9da06c1b23..b1fe3aaad0 100755 --- a/bigquery_etl/metadata/publish_metadata.py +++ b/bigquery_etl/metadata/publish_metadata.py @@ -1,17 +1,16 @@ """Update metadata of BigQuery tables and views.""" -from argparse import ArgumentParser import logging import os -import yaml +from argparse import ArgumentParser +import yaml from google.cloud import bigquery -from .parse_metadata import Metadata from ..util import standard_args from ..util.bigquery_tables import get_tables_matching_patterns from ..util.common import project_dirs - +from .parse_metadata import Metadata METADATA_FILE = "metadata.yaml" DEFAULT_PATTERN = "moz-fx-data-shared-prod:*.*" diff --git a/bigquery_etl/metadata/validate_metadata.py b/bigquery_etl/metadata/validate_metadata.py index 12a92a0b40..69702bf6dc 100644 --- a/bigquery_etl/metadata/validate_metadata.py +++ b/bigquery_etl/metadata/validate_metadata.py @@ -1,13 +1,13 @@ """Validate metadata files.""" -from argparse import ArgumentParser import logging import os import sys +from argparse import ArgumentParser -from .parse_metadata import Metadata from ..util import standard_args from ..util.common import project_dirs +from .parse_metadata import Metadata parser = ArgumentParser(description=__doc__) diff --git a/bigquery_etl/public_data/publish_gcs_metadata.py b/bigquery_etl/public_data/publish_gcs_metadata.py index 6729b5200d..17a9d100f0 100755 --- a/bigquery_etl/public_data/publish_gcs_metadata.py +++ b/bigquery_etl/public_data/publish_gcs_metadata.py @@ -1,20 +1,19 @@ """Generate and upload JSON metadata files for public datasets on GCS.""" -from argparse import ArgumentParser import json import logging import os import re -import smart_open - -from google.cloud import storage +from argparse import ArgumentParser from itertools import groupby +import smart_open +from google.cloud import storage + from bigquery_etl.metadata.parse_metadata import Metadata from bigquery_etl.util import standard_args from bigquery_etl.util.common import project_dirs - DEFAULT_BUCKET = "mozilla-public-data-http" DEFAULT_API_VERSION = "v1" DEFAULT_ENDPOINT = "https://public-data.telemetry.mozilla.org/" diff --git a/bigquery_etl/public_data/publish_json.py b/bigquery_etl/public_data/publish_json.py index ea0fa24af8..5e2236bba0 100644 --- a/bigquery_etl/public_data/publish_json.py +++ b/bigquery_etl/public_data/publish_json.py @@ -1,21 +1,20 @@ """Machinery for exporting query results as JSON to Cloud storage.""" -from argparse import ArgumentParser -from google.cloud import storage -from google.cloud import bigquery import datetime import json -import smart_open import logging -import sys -import re import random +import re import string +import sys +from argparse import ArgumentParser + +import smart_open +from google.cloud import bigquery, storage from bigquery_etl.metadata.parse_metadata import Metadata from bigquery_etl.metadata.validate_metadata import validate_public_data - SUBMISSION_DATE_RE = re.compile(r"^submission_date:DATE:(\d\d\d\d-\d\d-\d\d)$") QUERY_FILE_RE = re.compile(r"^.*/([a-zA-Z0-9_]+)/([a-zA-Z0-9_]+)_(v[0-9]+)/query\.sql$") MAX_JSON_SIZE = 1 * 1024 * 1024 * 1024 # 1 GB as max. size of exported JSON files diff --git a/bigquery_etl/public_data/publish_public_data_views.py b/bigquery_etl/public_data/publish_public_data_views.py index d361e0f292..3b1f82caf4 100755 --- a/bigquery_etl/public_data/publish_public_data_views.py +++ b/bigquery_etl/public_data/publish_public_data_views.py @@ -11,8 +11,8 @@ from argparse import ArgumentParser from google.cloud import bigquery -from ..util.bigquery_tables import get_tables_matching_patterns from ..util import standard_args +from ..util.bigquery_tables import get_tables_matching_patterns DEFAULT_PATTERN = "mozilla-public-data:*.*" diff --git a/bigquery_etl/publish_static.py b/bigquery_etl/publish_static.py index 0e992e0a21..bd1b20a2b8 100644 --- a/bigquery_etl/publish_static.py +++ b/bigquery_etl/publish_static.py @@ -1,8 +1,9 @@ """Publish csv files as BigQuery tables.""" -import os import json +import os from argparse import ArgumentParser + from google.cloud import bigquery from bigquery_etl.util.common import project_dirs diff --git a/bigquery_etl/pytest_plugin/routine.py b/bigquery_etl/pytest_plugin/routine.py index 33fb8bfbe9..2814e1b288 100644 --- a/bigquery_etl/pytest_plugin/routine.py +++ b/bigquery_etl/pytest_plugin/routine.py @@ -1,21 +1,21 @@ """PyTest plugin for running udf tests.""" -from google.api_core.exceptions import BadRequest -from google.cloud import bigquery import os -import pytest import re -from .sql_test import dataset +import pytest +from google.api_core.exceptions import BadRequest +from google.cloud import bigquery + from bigquery_etl.util.common import project_dirs - from ..routine.parse_routine import ( - UDF_FILE, - PROCEDURE_FILE, - parse_routines, GENERIC_DATASET, + PROCEDURE_FILE, + UDF_FILE, + parse_routines, ) +from .sql_test import dataset _parsed_routines = None diff --git a/bigquery_etl/pytest_plugin/script_lint/docstyle.py b/bigquery_etl/pytest_plugin/script_lint/docstyle.py index 4bb0dd41e5..9ef3de8b74 100644 --- a/bigquery_etl/pytest_plugin/script_lint/docstyle.py +++ b/bigquery_etl/pytest_plugin/script_lint/docstyle.py @@ -1,7 +1,7 @@ """PyTest plugin for collecting docstyle tests on python scripts.""" -from pytest_pydocstyle import _patch_sys_argv, File import pydocstyle +from pytest_pydocstyle import File, _patch_sys_argv from . import is_python_executable diff --git a/bigquery_etl/pytest_plugin/script_lint/mypy.py b/bigquery_etl/pytest_plugin/script_lint/mypy.py index ddc1878440..a7dc09ae17 100644 --- a/bigquery_etl/pytest_plugin/script_lint/mypy.py +++ b/bigquery_etl/pytest_plugin/script_lint/mypy.py @@ -1,6 +1,6 @@ """PyTest plugin for collecting mypy tests on python scripts.""" -from pytest_mypy import mypy_argv, MypyFile +from pytest_mypy import MypyFile, mypy_argv from . import is_python_executable diff --git a/bigquery_etl/pytest_plugin/sql.py b/bigquery_etl/pytest_plugin/sql.py index 60910026a2..73e90a6558 100644 --- a/bigquery_etl/pytest_plugin/sql.py +++ b/bigquery_etl/pytest_plugin/sql.py @@ -1,25 +1,25 @@ """PyTest plugin for running sql tests.""" -from typing import Dict import json import os.path +from typing import Dict +import pytest from google.api_core.exceptions import BadRequest from google.cloud import bigquery -import pytest from ..routine import parse_routine from .sql_test import ( + TABLE_EXTENSIONS, + Table, coerce_result, dataset, get_query_params, load, load_tables, load_views, - read, - Table, - TABLE_EXTENSIONS, print_and_test, + read, ) expect_names = {f"expect.{ext}" for ext in ("yaml", "json", "ndjson")} diff --git a/bigquery_etl/pytest_plugin/sql_test.py b/bigquery_etl/pytest_plugin/sql_test.py index 09c0189fc7..c2733b9ac9 100644 --- a/bigquery_etl/pytest_plugin/sql_test.py +++ b/bigquery_etl/pytest_plugin/sql_test.py @@ -3,12 +3,15 @@ # file, you can obtain one at http://mozilla.org/MPL/2.0/. """Utilities.""" +import codecs +import json +import os +import os.path +import pprint from contextlib import contextmanager from dataclasses import dataclass from datetime import date, datetime from decimal import Decimal -from google.api_core.exceptions import BadRequest, NotFound -from google.cloud import bigquery from io import BytesIO, TextIOWrapper from typing import ( Any, @@ -22,12 +25,9 @@ from typing import ( Union, ) -import codecs -import json -import os -import os.path -import pprint import yaml +from google.api_core.exceptions import BadRequest, NotFound +from google.cloud import bigquery QueryParameter = Union[ bigquery.ArrayQueryParameter, diff --git a/bigquery_etl/query_scheduling/dag.py b/bigquery_etl/query_scheduling/dag.py index 657a6b855e..88e511c824 100644 --- a/bigquery_etl/query_scheduling/dag.py +++ b/bigquery_etl/query_scheduling/dag.py @@ -1,22 +1,22 @@ """Represents an Airflow DAG.""" +from typing import List, Optional + import attr import cattr from jinja2 import Environment, PackageLoader -from typing import List, Optional -from bigquery_etl.query_scheduling.task import Task, TaskRef from bigquery_etl.query_scheduling import formatters +from bigquery_etl.query_scheduling.task import Task, TaskRef from bigquery_etl.query_scheduling.utils import ( - is_timedelta_string, is_date_string, is_email, is_schedule_interval, + is_timedelta_string, is_valid_dag_name, schedule_interval_delta, ) - AIRFLOW_DAG_TEMPLATE = "airflow_dag.j2" PUBLIC_DATA_JSON_DAG_TEMPLATE = "public_data_json_airflow_dag.j2" PUBLIC_DATA_JSON_DAG = "bqetl_public_data_json" diff --git a/bigquery_etl/query_scheduling/dag_collection.py b/bigquery_etl/query_scheduling/dag_collection.py index 8dc718574c..470d2b39e7 100644 --- a/bigquery_etl/query_scheduling/dag_collection.py +++ b/bigquery_etl/query_scheduling/dag_collection.py @@ -1,14 +1,15 @@ """Represents a collection of configured Airflow DAGs.""" -from black import format_file_contents, FileMode +from functools import partial from itertools import groupby +from multiprocessing.pool import ThreadPool from operator import attrgetter from pathlib import Path + import yaml +from black import FileMode, format_file_contents from bigquery_etl.query_scheduling.dag import Dag, InvalidDag, PublicDataJsonDag -from functools import partial -from multiprocessing.pool import ThreadPool class DagCollection: diff --git a/bigquery_etl/query_scheduling/formatters.py b/bigquery_etl/query_scheduling/formatters.py index 9f97e35c3d..50ff651d39 100644 --- a/bigquery_etl/query_scheduling/formatters.py +++ b/bigquery_etl/query_scheduling/formatters.py @@ -1,7 +1,7 @@ """This file contains custom filters for formatting data types in Jinja templates.""" -from datetime import datetime, timedelta import re +from datetime import datetime, timedelta from bigquery_etl import query_scheduling diff --git a/bigquery_etl/query_scheduling/generate_airflow_dags.py b/bigquery_etl/query_scheduling/generate_airflow_dags.py index 0693164059..746d147b2e 100644 --- a/bigquery_etl/query_scheduling/generate_airflow_dags.py +++ b/bigquery_etl/query_scheduling/generate_airflow_dags.py @@ -3,13 +3,13 @@ import logging import os from argparse import ArgumentParser -from ..util import standard_args from pathlib import Path from bigquery_etl.query_scheduling.dag_collection import DagCollection from bigquery_etl.query_scheduling.task import Task, UnscheduledTask from bigquery_etl.util.common import project_dirs +from ..util import standard_args DEFAULT_DAGS_FILE = "dags.yaml" QUERY_FILE = "query.sql" diff --git a/bigquery_etl/query_scheduling/task.py b/bigquery_etl/query_scheduling/task.py index e3c43b1cf4..cb0c979c52 100644 --- a/bigquery_etl/query_scheduling/task.py +++ b/bigquery_etl/query_scheduling/task.py @@ -1,27 +1,26 @@ """Represents a scheduled Airflow task.""" -import attr -import cattr -from fnmatch import fnmatchcase +import logging import os import re -import logging +from fnmatch import fnmatchcase from pathlib import Path from typing import List, Optional, Tuple +import attr +import cattr from bigquery_etl.dependency import extract_table_references_without_views from bigquery_etl.metadata.parse_metadata import Metadata from bigquery_etl.query_scheduling.utils import ( is_date_string, is_email, - is_valid_dag_name, - is_timedelta_string, - schedule_interval_delta, is_schedule_interval, + is_timedelta_string, + is_valid_dag_name, + schedule_interval_delta, ) - AIRFLOW_TASK_TEMPLATE = "airflow_task.j2" QUERY_FILE_RE = re.compile( r"^(?:.*/)?([a-zA-Z0-9_-]+)/([a-zA-Z0-9_]+)/" diff --git a/bigquery_etl/query_scheduling/utils.py b/bigquery_etl/query_scheduling/utils.py index 6b094dbe79..d27608cc26 100644 --- a/bigquery_etl/query_scheduling/utils.py +++ b/bigquery_etl/query_scheduling/utils.py @@ -1,7 +1,7 @@ """Utility functions for scheduling queries.""" -from datetime import datetime import re +from datetime import datetime def is_timedelta_string(s): diff --git a/bigquery_etl/routine/parse_routine.py b/bigquery_etl/routine/parse_routine.py index 163b512c5c..b51a1e36c4 100644 --- a/bigquery_etl/routine/parse_routine.py +++ b/bigquery_etl/routine/parse_routine.py @@ -5,17 +5,17 @@ This should eventually be refactored to a more general library for parsing UDF dependencies in queries as well. """ -import attr -import re import os +import re from pathlib import Path from typing import List + +import attr import sqlparse import yaml from bigquery_etl.metadata.parse_metadata import METADATA_FILE - UDF_CHAR = "[a-zA-z0-9_]" UDF_FILE = "udf.sql" PROCEDURE_FILE = "stored_procedure.sql" diff --git a/bigquery_etl/routine/publish_routines.py b/bigquery_etl/routine/publish_routines.py index a77628c139..f2ea837b7c 100644 --- a/bigquery_etl/routine/publish_routines.py +++ b/bigquery_etl/routine/publish_routines.py @@ -1,19 +1,15 @@ """Publish UDFs and resources to the public mozfun GCP project.""" -from argparse import ArgumentParser import json import os import re +from argparse import ArgumentParser -from google.cloud import bigquery -from google.cloud import storage +from google.cloud import bigquery, storage +from bigquery_etl.routine.parse_routine import accumulate_dependencies, read_routine_dir from bigquery_etl.util import standard_args from bigquery_etl.util.common import project_dirs -from bigquery_etl.routine.parse_routine import ( - read_routine_dir, - accumulate_dependencies, -) DEFAULT_UDF_DEPENDENCY_DIR = "udf_js_lib/" DEFAULT_GCS_BUCKET = "moz-fx-data-prod-bigquery-etl" diff --git a/bigquery_etl/run_multipart_query.py b/bigquery_etl/run_multipart_query.py index 757ca3be3b..b9fc60dc54 100644 --- a/bigquery_etl/run_multipart_query.py +++ b/bigquery_etl/run_multipart_query.py @@ -11,9 +11,9 @@ complexity limit, and then join those results to generate a final wide result. The query files must be in the same directory and all be prefixed with `part`. """ +import os.path from argparse import ArgumentParser from multiprocessing.pool import ThreadPool -import os.path from google.cloud import bigquery diff --git a/bigquery_etl/run_query.py b/bigquery_etl/run_query.py index 32697ac1dc..65e667be4c 100644 --- a/bigquery_etl/run_query.py +++ b/bigquery_etl/run_query.py @@ -5,12 +5,12 @@ When executing a query associated metadata is parsed to determine whether results should be written to a corresponding public dataset. """ -from argparse import ArgumentParser import re import subprocess import sys -import yaml +from argparse import ArgumentParser +import yaml from bigquery_etl.metadata.parse_metadata import Metadata from bigquery_etl.metadata.validate_metadata import validate_public_data diff --git a/bigquery_etl/schema/__init__.py b/bigquery_etl/schema/__init__.py index 16cdc37629..ae978128ea 100644 --- a/bigquery_etl/schema/__init__.py +++ b/bigquery_etl/schema/__init__.py @@ -1,12 +1,12 @@ """Query schema.""" -import attr import json -import yaml - from pathlib import Path from tempfile import NamedTemporaryFile -from typing import Dict, Any +from typing import Any, Dict + +import attr +import yaml from bigquery_etl.dryrun import DryRun diff --git a/bigquery_etl/shredder/amplitude.py b/bigquery_etl/shredder/amplitude.py index 798afa59e1..540779bc76 100644 --- a/bigquery_etl/shredder/amplitude.py +++ b/bigquery_etl/shredder/amplitude.py @@ -1,22 +1,21 @@ """Forward deletion requests from BigQuery to Amplitude.""" +import json +import logging +import warnings from argparse import ArgumentParser from datetime import datetime from multiprocessing.pool import ThreadPool -from time import time, sleep from os import environ -import warnings -import logging -import json +from time import sleep, time +import requests from google.cloud import bigquery from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry -import requests from ..util import standard_args - AMPLITUDE_API_KEY = "AMPLITUDE_API_KEY" AMPLITUDE_SECRET_KEY = "AMPLITUDE_SECRET_KEY" diff --git a/bigquery_etl/shredder/config.py b/bigquery_etl/shredder/config.py index acde16cd31..a53de13ea3 100644 --- a/bigquery_etl/shredder/config.py +++ b/bigquery_etl/shredder/config.py @@ -2,17 +2,16 @@ """Meta data about tables and ids for self serve deletion.""" +import logging +import re from dataclasses import dataclass from functools import partial from typing import Tuple, Union -import logging -import re from google.cloud import bigquery from ..util.bigquery_id import qualified_table_id - SHARED_PROD = "moz-fx-data-shared-prod" GLEAN_SCHEMA_ID = "glean_ping_1" diff --git a/bigquery_etl/shredder/cost.py b/bigquery_etl/shredder/cost.py index 3b43ecaa59..4bb38791a9 100755 --- a/bigquery_etl/shredder/cost.py +++ b/bigquery_etl/shredder/cost.py @@ -2,18 +2,17 @@ """Report estimated cost to run shredder.""" +import warnings from argparse import ArgumentParser from datetime import datetime, timedelta from math import ceil from textwrap import dedent -import warnings from google.cloud import bigquery -from .config import DELETE_TARGETS -from ..util.bigquery_id import sql_table_id from ..util import standard_args - +from ..util.bigquery_id import sql_table_id +from .config import DELETE_TARGETS JOBS_QUERY = """ SELECT diff --git a/bigquery_etl/shredder/delete.py b/bigquery_etl/shredder/delete.py index cf35a14c07..bb158769b6 100644 --- a/bigquery_etl/shredder/delete.py +++ b/bigquery_etl/shredder/delete.py @@ -1,5 +1,7 @@ """Delete user data from long term storage.""" +import logging +import warnings from argparse import ArgumentParser from collections import defaultdict from dataclasses import dataclass, replace @@ -10,8 +12,6 @@ from multiprocessing.pool import ThreadPool from operator import attrgetter from textwrap import dedent from typing import Callable, Iterable, Optional, Tuple -import logging -import warnings from google.api_core.exceptions import NotFound from google.cloud import bigquery @@ -22,14 +22,13 @@ from ..util.bigquery_id import FULL_JOB_ID_RE, full_job_id, sql_table_id from ..util.client_queue import ClientQueue from ..util.exceptions import BigQueryInsertError from .config import ( - DeleteSource, DELETE_TARGETS, - find_glean_targets, + DeleteSource, find_experiment_analysis_targets, + find_glean_targets, find_pioneer_targets, ) - NULL_PARTITION_ID = "__NULL__" OUTSIDE_RANGE_PARTITION_ID = "__UNPARTITIONED__" diff --git a/bigquery_etl/shredder/search.py b/bigquery_etl/shredder/search.py index d4429bd172..ba307f3590 100644 --- a/bigquery_etl/shredder/search.py +++ b/bigquery_etl/shredder/search.py @@ -2,15 +2,14 @@ """Search for tables and user ids that may be eligible for self serve deletion.""" -from argparse import ArgumentParser import re import warnings +from argparse import ArgumentParser from google.cloud import bigquery -from .config import SHARED_PROD, SEARCH_IGNORE_TABLES, SEARCH_IGNORE_FIELDS from ..util import standard_args - +from .config import SEARCH_IGNORE_FIELDS, SEARCH_IGNORE_TABLES, SHARED_PROD parser = ArgumentParser(description=__doc__) parser.add_argument( diff --git a/bigquery_etl/stripe/__init__.py b/bigquery_etl/stripe/__init__.py index c893b4e92b..ebe3b34366 100644 --- a/bigquery_etl/stripe/__init__.py +++ b/bigquery_etl/stripe/__init__.py @@ -1,19 +1,19 @@ """Import Stripe data into BigQuery.""" -from datetime import datetime, timedelta, timezone -from hashlib import sha256 -from tempfile import TemporaryFile -from typing import Any, Dict, IO, List, Optional, Type import os.path import re import sys -import ujson import warnings +from datetime import datetime, timedelta, timezone +from hashlib import sha256 +from tempfile import TemporaryFile +from typing import IO, Any, Dict, List, Optional, Type -from google.cloud import bigquery -from stripe.api_resources.abstract import ListableAPIResource import click import stripe +import ujson +from google.cloud import bigquery +from stripe.api_resources.abstract import ListableAPIResource # event data types with separate events and a defined schema EVENT_DATA_TYPES = ( diff --git a/bigquery_etl/util/client_queue.py b/bigquery_etl/util/client_queue.py index da95ab425d..e4a22fe3a8 100644 --- a/bigquery_etl/util/client_queue.py +++ b/bigquery_etl/util/client_queue.py @@ -1,8 +1,8 @@ """Queue for balancing jobs across billing projects.""" +import asyncio from contextlib import contextmanager from queue import Queue -import asyncio from google.cloud import bigquery diff --git a/bigquery_etl/util/common.py b/bigquery_etl/util/common.py index 81b13596e6..f84c633711 100644 --- a/bigquery_etl/util/common.py +++ b/bigquery_etl/util/common.py @@ -3,7 +3,6 @@ import os import re from typing import List - # Search for all camelCase situations in reverse with arbitrary lookaheads. REV_WORD_BOUND_PAT = re.compile( r""" diff --git a/bigquery_etl/util/standard_args.py b/bigquery_etl/util/standard_args.py index e238dc0f41..e7109cf861 100644 --- a/bigquery_etl/util/standard_args.py +++ b/bigquery_etl/util/standard_args.py @@ -1,12 +1,12 @@ """Standard definitions for reusable script arguments.""" +import fnmatch +import logging +import re +import warnings from argparse import Action from functools import partial from uuid import uuid4 -import fnmatch -import re -import logging -import warnings from google.cloud import bigquery diff --git a/bigquery_etl/view/generate_stable_views.py b/bigquery_etl/view/generate_stable_views.py index 49fe0f642b..56f695901f 100644 --- a/bigquery_etl/view/generate_stable_views.py +++ b/bigquery_etl/view/generate_stable_views.py @@ -9,12 +9,12 @@ present in the target directory, which allows manual overrides of views by checking them into the sql/ tree of the default branch of the repository. """ -import logging -import tempfile -from argparse import ArgumentParser import json -import urllib.request +import logging import tarfile +import tempfile +import urllib.request +from argparse import ArgumentParser from dataclasses import dataclass from functools import partial from io import BytesIO @@ -27,7 +27,6 @@ from bigquery_etl.dryrun import DryRun from bigquery_etl.format_sql.formatter import reformat from bigquery_etl.util import standard_args - SCHEMAS_URI = ( "https://github.com/mozilla-services/mozilla-pipeline-schemas" "/archive/generated-schemas.tar.gz" diff --git a/bigquery_etl/view/validate_views.py b/bigquery_etl/view/validate_views.py index 9652ed3baa..83508c233b 100644 --- a/bigquery_etl/view/validate_views.py +++ b/bigquery_etl/view/validate_views.py @@ -1,11 +1,12 @@ """Validates view definitions.""" -from argparse import ArgumentParser import glob +import sys +from argparse import ArgumentParser from multiprocessing.pool import Pool from pathlib import Path + import sqlparse -import sys from bigquery_etl.dependency import extract_table_references from bigquery_etl.util import standard_args diff --git a/conftest.py b/conftest.py index 04a715cff5..9218f05352 100644 --- a/conftest.py +++ b/conftest.py @@ -1,14 +1,13 @@ """PyTest configuration.""" -from google.cloud import bigquery -from google.cloud import storage -from pathlib import Path import os -import pytest import random import string import subprocess +from pathlib import Path +import pytest +from google.cloud import bigquery, storage TEST_BUCKET = "bigquery-etl-integration-test-bucket" diff --git a/requirements.in b/requirements.in index 1e6652f2f8..e035c3075e 100644 --- a/requirements.in +++ b/requirements.in @@ -25,3 +25,4 @@ yamllint==1.26.0 pip-tools==5.5.0 pyjnius==1.3.0 pre-commit==2.10.1 +pytest-isort==1.3.0 diff --git a/requirements.txt b/requirements.txt index 58192fe4d1..070a1a3f71 100644 --- a/requirements.txt +++ b/requirements.txt @@ -298,6 +298,10 @@ iniconfig==1.0.1 \ --hash=sha256:80cf40c597eb564e86346103f609d74efce0f6b4d4f30ec8ce9e2c26411ba437 \ --hash=sha256:e5f92f89355a67de0595932a6c6c02ab4afddc6fcdc0bfc5becd0d60884d3f69 # via pytest +isort==5.7.0 \ + --hash=sha256:c729845434366216d320e936b8ad6f9d681aab72dc7cbc2d51bedc3582f3ad1e \ + --hash=sha256:fff4f0c04e1825522ce6949973e83110a6e907750cd92d128b0d14aaaadbffdc + # via pytest-isort jinja2==2.11.3 \ --hash=sha256:03e47ad063331dd6a3f04a43eddca8a966a26ba0c5b7207a9a9e4e08f1b29419 \ --hash=sha256:a6d58433de0ae800347cab1fa3043cebbabe8baa9d29e668f1c768cb87a333c6 @@ -577,6 +581,10 @@ pytest-forked==1.3.0 \ --hash=sha256:6aa9ac7e00ad1a539c41bec6d21011332de671e938c7637378ec9710204e37ca \ --hash=sha256:dc4147784048e70ef5d437951728825a131b81714b398d5d52f17c7c144d8815 # via pytest-xdist +pytest-isort==1.3.0 \ + --hash=sha256:074255ad393088a2daee6ca7f2305b7b86358ff632f62302896d8d4b2b339107 \ + --hash=sha256:46a12331a701e2f21d48548b2828c8b0a7956dbf1cd5347163f537deb24332dd + # via -r requirements.in pytest-mypy==0.8.0 \ --hash=sha256:63d418a4fea7d598ac40b659723c00804d16a251d90a5cfbca213eeba5aaf01c \ --hash=sha256:8d2112972c1debf087943f48963a0daf04f3424840aea0cf437cc97053b1b0ef diff --git a/script/experiments/export_experiment_monitoring_data.py b/script/experiments/export_experiment_monitoring_data.py index 71e55c13eb..10ed966c3d 100644 --- a/script/experiments/export_experiment_monitoring_data.py +++ b/script/experiments/export_experiment_monitoring_data.py @@ -2,15 +2,15 @@ """Exports experiment monitoring data to GCS as JSON.""" +import random +import string from argparse import ArgumentParser from datetime import datetime, timedelta from functools import partial -from google.cloud import storage -from google.cloud import bigquery from multiprocessing import Pool -import random + import smart_open -import string +from google.cloud import bigquery, storage parser = ArgumentParser(description=__doc__) parser.add_argument( diff --git a/script/pyspark/export_to_parquet.py b/script/pyspark/export_to_parquet.py index d515156ab9..a85c530a8e 100644 --- a/script/pyspark/export_to_parquet.py +++ b/script/pyspark/export_to_parquet.py @@ -2,11 +2,11 @@ """Read a table from BigQuery and write it as parquet.""" -from argparse import ArgumentParser -from textwrap import dedent import json import re import sys +from argparse import ArgumentParser +from textwrap import dedent try: from google.cloud import bigquery diff --git a/setup.py b/setup.py index 3ce25dcd48..7994ab930d 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ -from setuptools import setup, find_namespace_packages +from setuptools import find_namespace_packages, setup + def get_version(): version = {} diff --git a/sql/moz-fx-data-experiments/monitoring/experimenter_experiments_v1/query.py b/sql/moz-fx-data-experiments/monitoring/experimenter_experiments_v1/query.py index a042859366..2037548675 100644 --- a/sql/moz-fx-data-experiments/monitoring/experimenter_experiments_v1/query.py +++ b/sql/moz-fx-data-experiments/monitoring/experimenter_experiments_v1/query.py @@ -2,18 +2,19 @@ """Import experiments from Experimenter via the Experimenter API.""" -from argparse import ArgumentParser -from google.cloud import bigquery -import requests -import attr -import cattr import datetime import json -import pytz import sys import time +from argparse import ArgumentParser from typing import List, Optional +import attr +import cattr +import pytz +import requests +from google.cloud import bigquery + EXPERIMENTER_API_URL_V1 = ( "https://experimenter.services.mozilla.com/api/v1/experiments/" ) diff --git a/sql/moz-fx-data-shared-prod/monitoring_derived/average_ping_sizes_v1/query.py b/sql/moz-fx-data-shared-prod/monitoring_derived/average_ping_sizes_v1/query.py index 6c27989bda..8e64254e5c 100644 --- a/sql/moz-fx-data-shared-prod/monitoring_derived/average_ping_sizes_v1/query.py +++ b/sql/moz-fx-data-shared-prod/monitoring_derived/average_ping_sizes_v1/query.py @@ -4,10 +4,11 @@ from argparse import ArgumentParser from fnmatch import fnmatchcase -from google.cloud import bigquery from functools import partial from multiprocessing.pool import ThreadPool +from google.cloud import bigquery + parser = ArgumentParser(description=__doc__) parser.add_argument("--date", required=True) # expect string with format yyyy-mm-dd parser.add_argument("--project", default="moz-fx-data-shared-prod") diff --git a/sql/moz-fx-data-shared-prod/monitoring_derived/bigquery_etl_scheduled_queries_cost_v1/query.py b/sql/moz-fx-data-shared-prod/monitoring_derived/bigquery_etl_scheduled_queries_cost_v1/query.py index efde28cfea..5aa52d07f1 100644 --- a/sql/moz-fx-data-shared-prod/monitoring_derived/bigquery_etl_scheduled_queries_cost_v1/query.py +++ b/sql/moz-fx-data-shared-prod/monitoring_derived/bigquery_etl_scheduled_queries_cost_v1/query.py @@ -4,9 +4,10 @@ from argparse import ArgumentParser from fnmatch import fnmatchcase -from google.cloud import bigquery from pathlib import Path +from google.cloud import bigquery + parser = ArgumentParser(description=__doc__) parser.add_argument("--date", required=True) # expect string with format yyyy-mm-dd parser.add_argument("--project", default="moz-fx-data-shared-prod") diff --git a/sql/moz-fx-data-shared-prod/monitoring_derived/bigquery_etl_scheduled_query_usage_v1/query.py b/sql/moz-fx-data-shared-prod/monitoring_derived/bigquery_etl_scheduled_query_usage_v1/query.py index 8d15abadfb..f198f2c477 100644 --- a/sql/moz-fx-data-shared-prod/monitoring_derived/bigquery_etl_scheduled_query_usage_v1/query.py +++ b/sql/moz-fx-data-shared-prod/monitoring_derived/bigquery_etl_scheduled_query_usage_v1/query.py @@ -3,9 +3,10 @@ """Determine cost of previously scheduled bigquery-etl queries.""" from argparse import ArgumentParser -from google.cloud import bigquery from pathlib import Path +from google.cloud import bigquery + DEFAULT_PROJECTS = [ "moz-fx-data-shared-prod", "moz-fx-data-derived-datasets", diff --git a/sql/moz-fx-data-shared-prod/monitoring_derived/column_size_v1/query.py b/sql/moz-fx-data-shared-prod/monitoring_derived/column_size_v1/query.py index f23712ba04..d6b2728e09 100644 --- a/sql/moz-fx-data-shared-prod/monitoring_derived/column_size_v1/query.py +++ b/sql/moz-fx-data-shared-prod/monitoring_derived/column_size_v1/query.py @@ -3,10 +3,11 @@ """Determine column sizes by performing dry runs.""" from argparse import ArgumentParser -from google.cloud import bigquery from functools import partial from multiprocessing.pool import ThreadPool +from google.cloud import bigquery + parser = ArgumentParser(description=__doc__) parser.add_argument("--date", required=True) # expect string with format yyyy-mm-dd parser.add_argument("--project", default="moz-fx-data-shared-prod") diff --git a/sql/moz-fx-data-shared-prod/monitoring_derived/stable_table_sizes_v1/query.py b/sql/moz-fx-data-shared-prod/monitoring_derived/stable_table_sizes_v1/query.py index a5cadcbdef..50c4798227 100644 --- a/sql/moz-fx-data-shared-prod/monitoring_derived/stable_table_sizes_v1/query.py +++ b/sql/moz-fx-data-shared-prod/monitoring_derived/stable_table_sizes_v1/query.py @@ -4,10 +4,11 @@ from argparse import ArgumentParser from fnmatch import fnmatchcase -from google.cloud import bigquery from functools import partial from multiprocessing.pool import ThreadPool +from google.cloud import bigquery + parser = ArgumentParser(description=__doc__) parser.add_argument("--date", required=True) # expect string with format yyyy-mm-dd parser.add_argument("--project", default="moz-fx-data-shared-prod") diff --git a/sql/moz-fx-data-shared-prod/telemetry/unnest_parquet_view.sql.py b/sql/moz-fx-data-shared-prod/telemetry/unnest_parquet_view.sql.py index 3692f3a64e..3960616144 100755 --- a/sql/moz-fx-data-shared-prod/telemetry/unnest_parquet_view.sql.py +++ b/sql/moz-fx-data-shared-prod/telemetry/unnest_parquet_view.sql.py @@ -2,11 +2,12 @@ """Generate view to unnest parquet-format list and map fields.""" -from argparse import ArgumentParser -from google.cloud import bigquery -from textwrap import dedent import json import sys +from argparse import ArgumentParser +from textwrap import dedent + +from google.cloud import bigquery def qualify(table, dataset, project): diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_histogram_aggregates_v1.sql.py b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_histogram_aggregates_v1.sql.py index ea8ea7953f..8e7bebf277 100755 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_histogram_aggregates_v1.sql.py +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_histogram_aggregates_v1.sql.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 """clients_daily_histogram_aggregates query generator.""" -import sys +import argparse import gzip import json -import argparse +import sys import textwrap import urllib.request from pathlib import Path @@ -14,7 +14,6 @@ from google.cloud import bigquery sys.path.append(str(Path(__file__).parent.parent.parent.resolve())) from bigquery_etl.format_sql.formatter import reformat - PROBE_INFO_SERVICE = ( "https://probeinfo.telemetry.mozilla.org/firefox/all/main/all_probes" ) diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_scalar_aggregates_v1.sql.py b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_scalar_aggregates_v1.sql.py index 4d2fee6828..60d6d3d533 100755 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_scalar_aggregates_v1.sql.py +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_scalar_aggregates_v1.sql.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 """clients_daily_scalar_aggregates query generator.""" -import sys -import json -import gzip import argparse -import textwrap +import gzip +import json import subprocess +import sys +import textwrap import urllib.request from pathlib import Path from time import sleep @@ -14,7 +14,6 @@ sys.path.append(str(Path(__file__).parent.parent.parent.resolve())) from bigquery_etl.format_sql.formatter import reformat from bigquery_etl.util.common import snake_case - PROBE_INFO_SERVICE = ( "https://probeinfo.telemetry.mozilla.org/firefox/all/main/all_probes" ) diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/experiments_v1/get_experiment_list.py b/sql/moz-fx-data-shared-prod/telemetry_derived/experiments_v1/get_experiment_list.py index c0b168c8bf..b6489eee70 100755 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/experiments_v1/get_experiment_list.py +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/experiments_v1/get_experiment_list.py @@ -2,9 +2,9 @@ """Get experiment list for a given date from the recipe server.""" -from argparse import ArgumentParser import json import time +from argparse import ArgumentParser import requests diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/surveygizmo_daily_attitudes/import_responses.py b/sql/moz-fx-data-shared-prod/telemetry_derived/surveygizmo_daily_attitudes/import_responses.py index a7dcb8eec2..15cd85ab76 100755 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/surveygizmo_daily_attitudes/import_responses.py +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/surveygizmo_daily_attitudes/import_responses.py @@ -2,15 +2,15 @@ """Import data from daily attitudes heartbeat survey into BigQuery.""" -from argparse import ArgumentParser import datetime as dt -from google.cloud import bigquery import itertools -import pytz import re -import requests +from argparse import ArgumentParser from time import sleep +import pytz +import requests +from google.cloud import bigquery parser = ArgumentParser(description=__doc__) parser.add_argument("--date", required=True) diff --git a/sql/moz-fx-data-shared-prod/udf/main_summary_scalars/main_summary_scalars.sql.py b/sql/moz-fx-data-shared-prod/udf/main_summary_scalars/main_summary_scalars.sql.py index fb6be10eeb..4072618080 100755 --- a/sql/moz-fx-data-shared-prod/udf/main_summary_scalars/main_summary_scalars.sql.py +++ b/sql/moz-fx-data-shared-prod/udf/main_summary_scalars/main_summary_scalars.sql.py @@ -3,8 +3,8 @@ import itertools import json -import sys import os.path +import sys from pathlib import Path sys.path.append(str(Path(__file__).parent.parent.resolve())) diff --git a/tests/cli/test_cli_dag.py b/tests/cli/test_cli_dag.py index ba0f1249f9..fbac38cf35 100644 --- a/tests/cli/test_cli_dag.py +++ b/tests/cli/test_cli_dag.py @@ -1,10 +1,11 @@ import os -import pytest -from click.testing import CliRunner from pathlib import Path -import yaml -from bigquery_etl.cli.dag import info, create, remove +import pytest +import yaml +from click.testing import CliRunner + +from bigquery_etl.cli.dag import create, info, remove TEST_DIR = Path(__file__).parent.parent diff --git a/tests/cli/test_cli_dependency.py b/tests/cli/test_cli_dependency.py index acb6522d43..8767410a1c 100644 --- a/tests/cli/test_cli_dependency.py +++ b/tests/cli/test_cli_dependency.py @@ -1,6 +1,7 @@ -from click.testing import CliRunner import os + import pytest +from click.testing import CliRunner from bigquery_etl.dependency import show as dependency_show diff --git a/tests/cli/test_cli_format.py b/tests/cli/test_cli_format.py index ff98c2535e..5f0b7322ee 100644 --- a/tests/cli/test_cli_format.py +++ b/tests/cli/test_cli_format.py @@ -1,6 +1,7 @@ -from click.testing import CliRunner import os + import pytest +from click.testing import CliRunner from bigquery_etl.cli.format import format as sql_format diff --git a/tests/cli/test_cli_query.py b/tests/cli/test_cli_query.py index efebe82d5c..26463c5351 100644 --- a/tests/cli/test_cli_query.py +++ b/tests/cli/test_cli_query.py @@ -1,13 +1,14 @@ import os + import pytest -from click.testing import CliRunner import yaml +from click.testing import CliRunner from bigquery_etl.cli.query import ( - create, - schedule, - info, _queries_matching_name_pattern, + create, + info, + schedule, ) diff --git a/tests/cli/test_cli_routine.py b/tests/cli/test_cli_routine.py index 87f642b83a..1876f70cf4 100644 --- a/tests/cli/test_cli_routine.py +++ b/tests/cli/test_cli_routine.py @@ -1,7 +1,8 @@ import os + import pytest -from click.testing import CliRunner import yaml +from click.testing import CliRunner from bigquery_etl.cli.routine import create, info, rename diff --git a/tests/cli/test_cli_utils.py b/tests/cli/test_cli_utils.py index 4275cb6307..c5e5bbf386 100644 --- a/tests/cli/test_cli_utils.py +++ b/tests/cli/test_cli_utils.py @@ -1,14 +1,15 @@ from pathlib import Path + import pytest from click.exceptions import BadParameter + from bigquery_etl.cli.utils import ( + is_authenticated, is_valid_dir, is_valid_file, - is_authenticated, is_valid_project, ) - TEST_DIR = Path(__file__).parent.parent diff --git a/tests/docs/test_generate_docs.py b/tests/docs/test_generate_docs.py index f5339ae1c2..5f50a8631d 100644 --- a/tests/docs/test_generate_docs.py +++ b/tests/docs/test_generate_docs.py @@ -1,5 +1,6 @@ import os from pathlib import Path + import pytest from bigquery_etl.docs.generate_docs import load_with_examples diff --git a/tests/generate_queries/test_generate_queries.py b/tests/generate_queries/test_generate_queries.py index fbc139ce91..1dcdaec448 100644 --- a/tests/generate_queries/test_generate_queries.py +++ b/tests/generate_queries/test_generate_queries.py @@ -1,8 +1,8 @@ import os - -from bigquery_etl.events_daily.generate_queries import get_query_dirs, TemplatedDir from pathlib import Path +from bigquery_etl.events_daily.generate_queries import TemplatedDir, get_query_dirs + BASE_DIR = Path(os.path.dirname(__file__)).parent diff --git a/tests/generate_queries/test_templated_dir.py b/tests/generate_queries/test_templated_dir.py index e9a3dbad95..e67e8f65ed 100644 --- a/tests/generate_queries/test_templated_dir.py +++ b/tests/generate_queries/test_templated_dir.py @@ -1,9 +1,9 @@ import os -import pytest - -from bigquery_etl.events_daily.generate_queries import TemplatedDir, Template from pathlib import Path +import pytest + +from bigquery_etl.events_daily.generate_queries import Template, TemplatedDir BASE_DIR = Path(os.path.dirname(__file__)).parent diff --git a/tests/metadata/test_parse_metadata.py b/tests/metadata/test_parse_metadata.py index b7761f92dd..e2905ac58b 100644 --- a/tests/metadata/test_parse_metadata.py +++ b/tests/metadata/test_parse_metadata.py @@ -1,6 +1,7 @@ -import pytest from pathlib import Path +import pytest + from bigquery_etl.metadata.parse_metadata import Metadata TEST_DIR = Path(__file__).parent.parent diff --git a/tests/public_data/test_publish_gcs_metadata.py b/tests/public_data/test_publish_gcs_metadata.py index 89f3d39956..f4ab3786dd 100644 --- a/tests/public_data/test_publish_gcs_metadata.py +++ b/tests/public_data/test_publish_gcs_metadata.py @@ -1,14 +1,13 @@ import json +from datetime import datetime +from pathlib import Path +from unittest.mock import MagicMock, Mock, call + import pytest import smart_open -from pathlib import Path - -from datetime import datetime -from unittest.mock import call, Mock, MagicMock import bigquery_etl.public_data.publish_gcs_metadata as pgm - TEST_DIR = Path(__file__).parent.parent diff --git a/tests/public_data/test_publish_json.py b/tests/public_data/test_publish_json.py index e04b282319..ce4e8d1685 100644 --- a/tests/public_data/test_publish_json.py +++ b/tests/public_data/test_publish_json.py @@ -1,12 +1,12 @@ import json +from pathlib import Path +from unittest.mock import MagicMock, Mock, call + import pytest import smart_open -from pathlib import Path -from unittest.mock import call, Mock, MagicMock from bigquery_etl.public_data.publish_json import JsonPublisher - TEST_DIR = Path(__file__).parent.parent diff --git a/tests/public_data/test_publish_public_data_json_script.py b/tests/public_data/test_publish_public_data_json_script.py index 5f6de01aed..e173198a87 100644 --- a/tests/public_data/test_publish_public_data_json_script.py +++ b/tests/public_data/test_publish_public_data_json_script.py @@ -1,11 +1,10 @@ import json -import pytest import subprocess - -from pathlib import Path from datetime import datetime -from google.cloud import bigquery +from pathlib import Path +import pytest +from google.cloud import bigquery TEST_DIR = Path(__file__).parent.parent diff --git a/tests/query_scheduling/test_dag.py b/tests/query_scheduling/test_dag.py index 341c276202..c4a87baee3 100644 --- a/tests/query_scheduling/test_dag.py +++ b/tests/query_scheduling/test_dag.py @@ -1,15 +1,16 @@ from pathlib import Path + import pytest +from bigquery_etl.metadata.parse_metadata import Metadata from bigquery_etl.query_scheduling.dag import ( Dag, - DagParseException, DagDefaultArgs, + DagParseException, PublicDataJsonDag, ) -from bigquery_etl.metadata.parse_metadata import Metadata -from bigquery_etl.query_scheduling.task import Task from bigquery_etl.query_scheduling.dag_collection import DagCollection +from bigquery_etl.query_scheduling.task import Task TEST_DIR = Path(__file__).parent.parent diff --git a/tests/query_scheduling/test_dag_collection.py b/tests/query_scheduling/test_dag_collection.py index da4028ce5f..70631668c8 100644 --- a/tests/query_scheduling/test_dag_collection.py +++ b/tests/query_scheduling/test_dag_collection.py @@ -1,11 +1,12 @@ import os from pathlib import Path + import pytest -from bigquery_etl.query_scheduling.dag_collection import DagCollection -from bigquery_etl.query_scheduling.dag import InvalidDag, DagParseException -from bigquery_etl.query_scheduling.task import Task from bigquery_etl.metadata.parse_metadata import Metadata +from bigquery_etl.query_scheduling.dag import DagParseException, InvalidDag +from bigquery_etl.query_scheduling.dag_collection import DagCollection +from bigquery_etl.query_scheduling.task import Task TEST_DIR = Path(__file__).parent.parent diff --git a/tests/query_scheduling/test_formatters.py b/tests/query_scheduling/test_formatters.py index 6907aa323a..b94919b2ec 100644 --- a/tests/query_scheduling/test_formatters.py +++ b/tests/query_scheduling/test_formatters.py @@ -1,12 +1,14 @@ +import datetime + +import pytest + from bigquery_etl.query_scheduling.formatters import ( - format_schedule_interval, format_attr, format_date, - format_timedelta, format_optional_string, + format_schedule_interval, + format_timedelta, ) -import datetime -import pytest class TestFormatters: diff --git a/tests/query_scheduling/test_task.py b/tests/query_scheduling/test_task.py index 585082804e..9576853330 100644 --- a/tests/query_scheduling/test_task.py +++ b/tests/query_scheduling/test_task.py @@ -1,16 +1,17 @@ -from pathlib import Path import os -import pytest +from pathlib import Path from typing import NewType -from bigquery_etl.query_scheduling.task import ( - Task, - UnscheduledTask, - TaskParseException, - TaskRef, -) +import pytest + from bigquery_etl.metadata.parse_metadata import Metadata from bigquery_etl.query_scheduling.dag_collection import DagCollection +from bigquery_etl.query_scheduling.task import ( + Task, + TaskParseException, + TaskRef, + UnscheduledTask, +) TEST_DIR = Path(__file__).parent.parent diff --git a/tests/schema/test_schema.py b/tests/schema/test_schema.py index 7bec4fa707..0bdd466218 100644 --- a/tests/schema/test_schema.py +++ b/tests/schema/test_schema.py @@ -1,9 +1,9 @@ from pathlib import Path - -from bigquery_etl.schema import Schema from textwrap import dedent + import yaml +from bigquery_etl.schema import Schema TEST_DIR = Path(__file__).parent.parent diff --git a/tests/sql/glam-fenix-dev/glam_etl/bootstrap.py b/tests/sql/glam-fenix-dev/glam_etl/bootstrap.py index 036a293eb4..4315728658 100644 --- a/tests/sql/glam-fenix-dev/glam_etl/bootstrap.py +++ b/tests/sql/glam-fenix-dev/glam_etl/bootstrap.py @@ -13,9 +13,10 @@ from pathlib import Path import click import yaml -from bigquery_etl.glam.utils import run from google.cloud import bigquery +from bigquery_etl.glam.utils import run + warnings.filterwarnings( "ignore", "Your application has authenticated using end user credentials" ) diff --git a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_minimal/data.py b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_minimal/data.py index b859a2ea36..8e4567837c 100644 --- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_minimal/data.py +++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_minimal/data.py @@ -1,7 +1,7 @@ """Testing data for query.""" -from pathlib import Path from itertools import product +from pathlib import Path import yaml diff --git a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_multiple_clients/data.py b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_multiple_clients/data.py index f7e69eac14..1668bb2d14 100644 --- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_multiple_clients/data.py +++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_multiple_clients/data.py @@ -1,7 +1,7 @@ """Testing data for query.""" -from pathlib import Path from itertools import product +from pathlib import Path from uuid import uuid4 import yaml diff --git a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/test_minimal/data.py b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/test_minimal/data.py index f010b0b17a..380dc04dfd 100644 --- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/test_minimal/data.py +++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/test_minimal/data.py @@ -1,7 +1,7 @@ """Testing data for query.""" +from itertools import product from pathlib import Path from uuid import uuid4 -from itertools import product import yaml diff --git a/tests/sql/moz-fx-data-shared-prod/telemetry/test_longitudinal.py b/tests/sql/moz-fx-data-shared-prod/telemetry/test_longitudinal.py index bf964b56e1..7964f4cf40 100644 --- a/tests/sql/moz-fx-data-shared-prod/telemetry/test_longitudinal.py +++ b/tests/sql/moz-fx-data-shared-prod/telemetry/test_longitudinal.py @@ -1,6 +1,6 @@ -import textwrap import subprocess import sys +import textwrap import pytest diff --git a/tests/test_dryrun.py b/tests/test_dryrun.py index e157676694..36dd8b0479 100644 --- a/tests/test_dryrun.py +++ b/tests/test_dryrun.py @@ -1,5 +1,7 @@ import os + import pytest + from bigquery_etl.dryrun import DryRun, Errors diff --git a/tests/test_entrypoint.py b/tests/test_entrypoint.py index 31b714c9e1..8ce001b7f0 100644 --- a/tests/test_entrypoint.py +++ b/tests/test_entrypoint.py @@ -1,8 +1,9 @@ -from pathlib import Path -from google.cloud import bigquery import os -import pytest import subprocess +from pathlib import Path + +import pytest +from google.cloud import bigquery ENTRYPOINT_SCRIPT = Path(__file__).parent.parent / "script" / "entrypoint" diff --git a/tests/test_run_query.py b/tests/test_run_query.py index a150e95372..c5e5eb0126 100644 --- a/tests/test_run_query.py +++ b/tests/test_run_query.py @@ -1,5 +1,6 @@ -from unittest.mock import patch import os +from unittest.mock import patch + import pytest import yaml diff --git a/tests/util/test_snake_casing.py b/tests/util/test_snake_casing.py index ae96dc0e7a..236f39b578 100644 --- a/tests/util/test_snake_casing.py +++ b/tests/util/test_snake_casing.py @@ -1,7 +1,7 @@ -import os import csv - +import os from pathlib import Path + from bigquery_etl.util.common import snake_case diff --git a/tests/validation/hmac_sha256.py b/tests/validation/hmac_sha256.py index fc3d95051f..e76d111bc5 100644 --- a/tests/validation/hmac_sha256.py +++ b/tests/validation/hmac_sha256.py @@ -5,13 +5,15 @@ The vectors are located in tests/validation/data/hmac_sha256_validation.json. """ import json + import pytest +from google.cloud import bigquery + from bigquery_etl.routine.parse_routine import ( - read_routine_dir, RawRoutine, + read_routine_dir, routine_tests_sql, ) -from google.cloud import bigquery validation_data_file = "tests/validation/data/hmac_sha256_validation.json"