Automatically sort python imports (#1840)

This commit is contained in:
Daniel Thorn 2021-02-24 14:11:52 -08:00 коммит произвёл GitHub
Родитель 744600069d
Коммит a190e18264
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
98 изменённых файлов: 293 добавлений и 265 удалений

3
.isort.cfg Normal file
Просмотреть файл

@ -0,0 +1,3 @@
[settings]
profile = black
skip = dags,script/legacy,target,venv

Просмотреть файл

@ -18,3 +18,7 @@ repos:
hooks:
- id: yamllint
args: [-c, .yamllint.yaml, .]
- repo: https://github.com/PyCQA/isort
rev: 5.7.0
hooks:
- id: isort

Просмотреть файл

@ -5,7 +5,7 @@ from argparse import ArgumentParser
from functools import partial
from multiprocessing.pool import ThreadPool
from google.api_core.exceptions import NotFound, Forbidden
from google.api_core.exceptions import Forbidden, NotFound
from google.cloud import bigquery
from bigquery_etl.util import standard_args # noqa E402

Просмотреть файл

@ -14,9 +14,9 @@ from ..cli.format import format
from ..cli.query import query
from ..cli.routine import mozfun, routine
from ..cli.view import view
from ..dependency import dependency
from ..glam.cli import glam
from ..stripe import stripe_
from ..dependency import dependency
def cli(prog_name=None):

Просмотреть файл

@ -13,16 +13,16 @@ import click
from google.cloud import bigquery
from google.cloud.exceptions import NotFound
from ..cli.dryrun import dryrun, SKIP
from ..cli.dryrun import SKIP, dryrun
from ..cli.format import format
from ..cli.utils import is_authenticated, is_valid_dir, is_valid_project
from ..format_sql.formatter import reformat
from ..metadata import validate_metadata
from ..metadata.parse_metadata import METADATA_FILE, Metadata
from ..query_scheduling.generate_airflow_dags import get_dags
from ..query_scheduling.dag_collection import DagCollection
from ..query_scheduling.generate_airflow_dags import get_dags
from ..run_query import run
from ..schema import Schema, SCHEMA_FILE
from ..schema import SCHEMA_FILE, Schema
QUERY_NAME_RE = re.compile(r"(?P<dataset>[a-zA-z0-9_]+)\.(?P<name>[a-zA-z0-9_]+)")
SQL_FILE_RE = re.compile(

Просмотреть файл

@ -7,20 +7,19 @@ datasets. The script can be configured to exclude a list of tables
or to process only a specific list of tables.
"""
import json
import logging
from argparse import ArgumentParser
from datetime import datetime, timedelta
from itertools import groupby
from multiprocessing.pool import ThreadPool
import json
import logging
from google.api_core.exceptions import BadRequest
from google.cloud import bigquery
from bigquery_etl.util import standard_args
from bigquery_etl.util.client_queue import ClientQueue
from bigquery_etl.util.bigquery_id import sql_table_id
from bigquery_etl.util.client_queue import ClientQueue
QUERY_TEMPLATE = """
WITH

Просмотреть файл

@ -1,9 +1,11 @@
"""Generate documentation for derived datasets."""
import os
import yaml
from pathlib import Path
import yaml
from jinja2 import Environment, FileSystemLoader
from bigquery_etl.dryrun import DryRun
VIEW_FILE = "view.sql"

Просмотреть файл

@ -1,14 +1,15 @@
"""Generates documentations for provided projects."""
from argparse import ArgumentParser
import os
from pathlib import Path
import re
import shutil
from argparse import ArgumentParser
from pathlib import Path
import yaml
from bigquery_etl.util import standard_args
from bigquery_etl.docs.derived_datasets import generate_derived_dataset_docs
from bigquery_etl.util import standard_args
DEFAULT_PROJECTS_DIRS = ["sql/mozfun/", "sql/moz-fx-data-shared-prod/"]
DOCS_FILE = "README.md"

Просмотреть файл

@ -1,10 +1,10 @@
"""Validates SQL examples in documentations."""
from argparse import ArgumentParser
import os
from pathlib import Path
import tempfile
import sys
import tempfile
from argparse import ArgumentParser
from pathlib import Path
from bigquery_etl.dryrun import DryRun
from bigquery_etl.routine.parse_routine import read_routine_dir, sub_local_routines

Просмотреть файл

@ -10,16 +10,16 @@ only dry runs can be performed. In order to reduce risk of CI or local users
accidentally running queries during tests and overwriting production data, we
proxy the queries through the dry run service endpoint.
"""
import glob
import json
import re
import sys
from argparse import ArgumentParser
from enum import Enum
from functools import cached_property
from multiprocessing.pool import Pool
from os.path import basename, dirname, exists
from urllib.request import urlopen, Request
from enum import Enum
import glob
import json
import sys
import re
from urllib.request import Request, urlopen
SKIP = {
# Access Denied

Просмотреть файл

@ -1,14 +1,14 @@
"""Generate query directories."""
import os
import yaml
from argparse import ArgumentParser
from bigquery_etl.format_sql.formatter import reformat
from dataclasses import dataclass
from jinja2 import Environment, FileSystemLoader
from pathlib import Path
from typing import List, Optional
import yaml
from jinja2 import Environment, FileSystemLoader
from bigquery_etl.format_sql.formatter import reformat
TEMPLATED_FILES = {
"init.sql",

Просмотреть файл

@ -1,14 +1,13 @@
"""Format SQL."""
from argparse import ArgumentParser
import glob
import os
import os.path
import sys
from argparse import ArgumentParser
from bigquery_etl.format_sql.formatter import reformat # noqa E402
SKIP = {
# files that existed before we started to enforce this script
*glob.glob("bigquery_etl/glam/templates/*.sql"),

Просмотреть файл

@ -1,13 +1,13 @@
"""Format SQL."""
from dataclasses import replace
import re
from dataclasses import replace
from .tokenizer import (
AliasSeparator,
BlockEndKeyword,
BlockKeyword,
BlockStartKeyword,
BlockEndKeyword,
ClosingBracket,
Comment,
ExpressionSeparator,
@ -15,8 +15,8 @@ from .tokenizer import (
Identifier,
Literal,
NewlineKeyword,
Operator,
OpeningBracket,
Operator,
ReservedKeyword,
SpaceBeforeBracketKeyword,
StatementSeparator,

Просмотреть файл

@ -1,8 +1,8 @@
"""Tokenize SQL so that it can be formatted."""
from dataclasses import dataclass, field
import re
import sys
from dataclasses import dataclass, field
# These words get their own line followed by increased indent
TOP_LEVEL_KEYWORDS = [

Просмотреть файл

@ -1,12 +1,12 @@
"""Run a query with a series of @submission_date values."""
import os.path
import subprocess
import sys
from argparse import ArgumentParser
from datetime import datetime, timedelta
from functools import partial
from multiprocessing import Pool
import os.path
import subprocess
import sys
def fromisoformat(string):

Просмотреть файл

@ -6,14 +6,14 @@ from functools import partial
from multiprocessing.pool import ThreadPool
from google.cloud import bigquery
from google.cloud.bigquery import WriteDisposition, ScalarQueryParameter
from google.cloud.bigquery import ScalarQueryParameter, WriteDisposition
from bigquery_etl.glean_usage.common import (
list_baseline_tables,
referenced_table_exists,
render,
table_names_from_baseline,
write_sql,
referenced_table_exists,
)
from bigquery_etl.util import standard_args # noqa E402

Просмотреть файл

@ -7,14 +7,14 @@ from functools import partial
from multiprocessing.pool import ThreadPool
from google.cloud import bigquery
from google.cloud.bigquery import WriteDisposition, ScalarQueryParameter
from google.cloud.bigquery import ScalarQueryParameter, WriteDisposition
from bigquery_etl.glean_usage.common import (
list_baseline_tables,
referenced_table_exists,
render,
table_names_from_baseline,
write_sql,
referenced_table_exists,
)
from bigquery_etl.util import standard_args # noqa E402

Просмотреть файл

@ -17,7 +17,6 @@ import argparse
import json
import sys
TEMPLATE = """
{table_create_mode_string} `{project}:{dataset}.{table_name}` (
{columns_string}

Просмотреть файл

@ -1,13 +1,14 @@
"""Parsing of metadata yaml files."""
from google.cloud import bigquery
import enum
import re
import yaml
import os
import re
from typing import Dict, List, Optional
import attr
import cattr
from typing import List, Optional, Dict
import yaml
from google.cloud import bigquery
from bigquery_etl.query_scheduling.utils import is_email

Просмотреть файл

@ -1,17 +1,16 @@
"""Update metadata of BigQuery tables and views."""
from argparse import ArgumentParser
import logging
import os
import yaml
from argparse import ArgumentParser
import yaml
from google.cloud import bigquery
from .parse_metadata import Metadata
from ..util import standard_args
from ..util.bigquery_tables import get_tables_matching_patterns
from ..util.common import project_dirs
from .parse_metadata import Metadata
METADATA_FILE = "metadata.yaml"
DEFAULT_PATTERN = "moz-fx-data-shared-prod:*.*"

Просмотреть файл

@ -1,13 +1,13 @@
"""Validate metadata files."""
from argparse import ArgumentParser
import logging
import os
import sys
from argparse import ArgumentParser
from .parse_metadata import Metadata
from ..util import standard_args
from ..util.common import project_dirs
from .parse_metadata import Metadata
parser = ArgumentParser(description=__doc__)

Просмотреть файл

@ -1,20 +1,19 @@
"""Generate and upload JSON metadata files for public datasets on GCS."""
from argparse import ArgumentParser
import json
import logging
import os
import re
import smart_open
from google.cloud import storage
from argparse import ArgumentParser
from itertools import groupby
import smart_open
from google.cloud import storage
from bigquery_etl.metadata.parse_metadata import Metadata
from bigquery_etl.util import standard_args
from bigquery_etl.util.common import project_dirs
DEFAULT_BUCKET = "mozilla-public-data-http"
DEFAULT_API_VERSION = "v1"
DEFAULT_ENDPOINT = "https://public-data.telemetry.mozilla.org/"

Просмотреть файл

@ -1,21 +1,20 @@
"""Machinery for exporting query results as JSON to Cloud storage."""
from argparse import ArgumentParser
from google.cloud import storage
from google.cloud import bigquery
import datetime
import json
import smart_open
import logging
import sys
import re
import random
import re
import string
import sys
from argparse import ArgumentParser
import smart_open
from google.cloud import bigquery, storage
from bigquery_etl.metadata.parse_metadata import Metadata
from bigquery_etl.metadata.validate_metadata import validate_public_data
SUBMISSION_DATE_RE = re.compile(r"^submission_date:DATE:(\d\d\d\d-\d\d-\d\d)$")
QUERY_FILE_RE = re.compile(r"^.*/([a-zA-Z0-9_]+)/([a-zA-Z0-9_]+)_(v[0-9]+)/query\.sql$")
MAX_JSON_SIZE = 1 * 1024 * 1024 * 1024 # 1 GB as max. size of exported JSON files

Просмотреть файл

@ -11,8 +11,8 @@ from argparse import ArgumentParser
from google.cloud import bigquery
from ..util.bigquery_tables import get_tables_matching_patterns
from ..util import standard_args
from ..util.bigquery_tables import get_tables_matching_patterns
DEFAULT_PATTERN = "mozilla-public-data:*.*"

Просмотреть файл

@ -1,8 +1,9 @@
"""Publish csv files as BigQuery tables."""
import os
import json
import os
from argparse import ArgumentParser
from google.cloud import bigquery
from bigquery_etl.util.common import project_dirs

Просмотреть файл

@ -1,21 +1,21 @@
"""PyTest plugin for running udf tests."""
from google.api_core.exceptions import BadRequest
from google.cloud import bigquery
import os
import pytest
import re
from .sql_test import dataset
import pytest
from google.api_core.exceptions import BadRequest
from google.cloud import bigquery
from bigquery_etl.util.common import project_dirs
from ..routine.parse_routine import (
UDF_FILE,
PROCEDURE_FILE,
parse_routines,
GENERIC_DATASET,
PROCEDURE_FILE,
UDF_FILE,
parse_routines,
)
from .sql_test import dataset
_parsed_routines = None

Просмотреть файл

@ -1,7 +1,7 @@
"""PyTest plugin for collecting docstyle tests on python scripts."""
from pytest_pydocstyle import _patch_sys_argv, File
import pydocstyle
from pytest_pydocstyle import File, _patch_sys_argv
from . import is_python_executable

Просмотреть файл

@ -1,6 +1,6 @@
"""PyTest plugin for collecting mypy tests on python scripts."""
from pytest_mypy import mypy_argv, MypyFile
from pytest_mypy import MypyFile, mypy_argv
from . import is_python_executable

Просмотреть файл

@ -1,25 +1,25 @@
"""PyTest plugin for running sql tests."""
from typing import Dict
import json
import os.path
from typing import Dict
import pytest
from google.api_core.exceptions import BadRequest
from google.cloud import bigquery
import pytest
from ..routine import parse_routine
from .sql_test import (
TABLE_EXTENSIONS,
Table,
coerce_result,
dataset,
get_query_params,
load,
load_tables,
load_views,
read,
Table,
TABLE_EXTENSIONS,
print_and_test,
read,
)
expect_names = {f"expect.{ext}" for ext in ("yaml", "json", "ndjson")}

Просмотреть файл

@ -3,12 +3,15 @@
# file, you can obtain one at http://mozilla.org/MPL/2.0/.
"""Utilities."""
import codecs
import json
import os
import os.path
import pprint
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import date, datetime
from decimal import Decimal
from google.api_core.exceptions import BadRequest, NotFound
from google.cloud import bigquery
from io import BytesIO, TextIOWrapper
from typing import (
Any,
@ -22,12 +25,9 @@ from typing import (
Union,
)
import codecs
import json
import os
import os.path
import pprint
import yaml
from google.api_core.exceptions import BadRequest, NotFound
from google.cloud import bigquery
QueryParameter = Union[
bigquery.ArrayQueryParameter,

Просмотреть файл

@ -1,22 +1,22 @@
"""Represents an Airflow DAG."""
from typing import List, Optional
import attr
import cattr
from jinja2 import Environment, PackageLoader
from typing import List, Optional
from bigquery_etl.query_scheduling.task import Task, TaskRef
from bigquery_etl.query_scheduling import formatters
from bigquery_etl.query_scheduling.task import Task, TaskRef
from bigquery_etl.query_scheduling.utils import (
is_timedelta_string,
is_date_string,
is_email,
is_schedule_interval,
is_timedelta_string,
is_valid_dag_name,
schedule_interval_delta,
)
AIRFLOW_DAG_TEMPLATE = "airflow_dag.j2"
PUBLIC_DATA_JSON_DAG_TEMPLATE = "public_data_json_airflow_dag.j2"
PUBLIC_DATA_JSON_DAG = "bqetl_public_data_json"

Просмотреть файл

@ -1,14 +1,15 @@
"""Represents a collection of configured Airflow DAGs."""
from black import format_file_contents, FileMode
from functools import partial
from itertools import groupby
from multiprocessing.pool import ThreadPool
from operator import attrgetter
from pathlib import Path
import yaml
from black import FileMode, format_file_contents
from bigquery_etl.query_scheduling.dag import Dag, InvalidDag, PublicDataJsonDag
from functools import partial
from multiprocessing.pool import ThreadPool
class DagCollection:

Просмотреть файл

@ -1,7 +1,7 @@
"""This file contains custom filters for formatting data types in Jinja templates."""
from datetime import datetime, timedelta
import re
from datetime import datetime, timedelta
from bigquery_etl import query_scheduling

Просмотреть файл

@ -3,13 +3,13 @@
import logging
import os
from argparse import ArgumentParser
from ..util import standard_args
from pathlib import Path
from bigquery_etl.query_scheduling.dag_collection import DagCollection
from bigquery_etl.query_scheduling.task import Task, UnscheduledTask
from bigquery_etl.util.common import project_dirs
from ..util import standard_args
DEFAULT_DAGS_FILE = "dags.yaml"
QUERY_FILE = "query.sql"

Просмотреть файл

@ -1,27 +1,26 @@
"""Represents a scheduled Airflow task."""
import attr
import cattr
from fnmatch import fnmatchcase
import logging
import os
import re
import logging
from fnmatch import fnmatchcase
from pathlib import Path
from typing import List, Optional, Tuple
import attr
import cattr
from bigquery_etl.dependency import extract_table_references_without_views
from bigquery_etl.metadata.parse_metadata import Metadata
from bigquery_etl.query_scheduling.utils import (
is_date_string,
is_email,
is_valid_dag_name,
is_timedelta_string,
schedule_interval_delta,
is_schedule_interval,
is_timedelta_string,
is_valid_dag_name,
schedule_interval_delta,
)
AIRFLOW_TASK_TEMPLATE = "airflow_task.j2"
QUERY_FILE_RE = re.compile(
r"^(?:.*/)?([a-zA-Z0-9_-]+)/([a-zA-Z0-9_]+)/"

Просмотреть файл

@ -1,7 +1,7 @@
"""Utility functions for scheduling queries."""
from datetime import datetime
import re
from datetime import datetime
def is_timedelta_string(s):

Просмотреть файл

@ -5,17 +5,17 @@ This should eventually be refactored to a more general library for
parsing UDF dependencies in queries as well.
"""
import attr
import re
import os
import re
from pathlib import Path
from typing import List
import attr
import sqlparse
import yaml
from bigquery_etl.metadata.parse_metadata import METADATA_FILE
UDF_CHAR = "[a-zA-z0-9_]"
UDF_FILE = "udf.sql"
PROCEDURE_FILE = "stored_procedure.sql"

Просмотреть файл

@ -1,19 +1,15 @@
"""Publish UDFs and resources to the public mozfun GCP project."""
from argparse import ArgumentParser
import json
import os
import re
from argparse import ArgumentParser
from google.cloud import bigquery
from google.cloud import storage
from google.cloud import bigquery, storage
from bigquery_etl.routine.parse_routine import accumulate_dependencies, read_routine_dir
from bigquery_etl.util import standard_args
from bigquery_etl.util.common import project_dirs
from bigquery_etl.routine.parse_routine import (
read_routine_dir,
accumulate_dependencies,
)
DEFAULT_UDF_DEPENDENCY_DIR = "udf_js_lib/"
DEFAULT_GCS_BUCKET = "moz-fx-data-prod-bigquery-etl"

Просмотреть файл

@ -11,9 +11,9 @@ complexity limit, and then join those results to generate a final wide result.
The query files must be in the same directory and all be prefixed with `part`.
"""
import os.path
from argparse import ArgumentParser
from multiprocessing.pool import ThreadPool
import os.path
from google.cloud import bigquery

Просмотреть файл

@ -5,12 +5,12 @@ When executing a query associated metadata is parsed to determine whether
results should be written to a corresponding public dataset.
"""
from argparse import ArgumentParser
import re
import subprocess
import sys
import yaml
from argparse import ArgumentParser
import yaml
from bigquery_etl.metadata.parse_metadata import Metadata
from bigquery_etl.metadata.validate_metadata import validate_public_data

Просмотреть файл

@ -1,12 +1,12 @@
"""Query schema."""
import attr
import json
import yaml
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Dict, Any
from typing import Any, Dict
import attr
import yaml
from bigquery_etl.dryrun import DryRun

Просмотреть файл

@ -1,22 +1,21 @@
"""Forward deletion requests from BigQuery to Amplitude."""
import json
import logging
import warnings
from argparse import ArgumentParser
from datetime import datetime
from multiprocessing.pool import ThreadPool
from time import time, sleep
from os import environ
import warnings
import logging
import json
from time import sleep, time
import requests
from google.cloud import bigquery
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import requests
from ..util import standard_args
AMPLITUDE_API_KEY = "AMPLITUDE_API_KEY"
AMPLITUDE_SECRET_KEY = "AMPLITUDE_SECRET_KEY"

Просмотреть файл

@ -2,17 +2,16 @@
"""Meta data about tables and ids for self serve deletion."""
import logging
import re
from dataclasses import dataclass
from functools import partial
from typing import Tuple, Union
import logging
import re
from google.cloud import bigquery
from ..util.bigquery_id import qualified_table_id
SHARED_PROD = "moz-fx-data-shared-prod"
GLEAN_SCHEMA_ID = "glean_ping_1"

Просмотреть файл

@ -2,18 +2,17 @@
"""Report estimated cost to run shredder."""
import warnings
from argparse import ArgumentParser
from datetime import datetime, timedelta
from math import ceil
from textwrap import dedent
import warnings
from google.cloud import bigquery
from .config import DELETE_TARGETS
from ..util.bigquery_id import sql_table_id
from ..util import standard_args
from ..util.bigquery_id import sql_table_id
from .config import DELETE_TARGETS
JOBS_QUERY = """
SELECT

Просмотреть файл

@ -1,5 +1,7 @@
"""Delete user data from long term storage."""
import logging
import warnings
from argparse import ArgumentParser
from collections import defaultdict
from dataclasses import dataclass, replace
@ -10,8 +12,6 @@ from multiprocessing.pool import ThreadPool
from operator import attrgetter
from textwrap import dedent
from typing import Callable, Iterable, Optional, Tuple
import logging
import warnings
from google.api_core.exceptions import NotFound
from google.cloud import bigquery
@ -22,14 +22,13 @@ from ..util.bigquery_id import FULL_JOB_ID_RE, full_job_id, sql_table_id
from ..util.client_queue import ClientQueue
from ..util.exceptions import BigQueryInsertError
from .config import (
DeleteSource,
DELETE_TARGETS,
find_glean_targets,
DeleteSource,
find_experiment_analysis_targets,
find_glean_targets,
find_pioneer_targets,
)
NULL_PARTITION_ID = "__NULL__"
OUTSIDE_RANGE_PARTITION_ID = "__UNPARTITIONED__"

Просмотреть файл

@ -2,15 +2,14 @@
"""Search for tables and user ids that may be eligible for self serve deletion."""
from argparse import ArgumentParser
import re
import warnings
from argparse import ArgumentParser
from google.cloud import bigquery
from .config import SHARED_PROD, SEARCH_IGNORE_TABLES, SEARCH_IGNORE_FIELDS
from ..util import standard_args
from .config import SEARCH_IGNORE_FIELDS, SEARCH_IGNORE_TABLES, SHARED_PROD
parser = ArgumentParser(description=__doc__)
parser.add_argument(

Просмотреть файл

@ -1,19 +1,19 @@
"""Import Stripe data into BigQuery."""
from datetime import datetime, timedelta, timezone
from hashlib import sha256
from tempfile import TemporaryFile
from typing import Any, Dict, IO, List, Optional, Type
import os.path
import re
import sys
import ujson
import warnings
from datetime import datetime, timedelta, timezone
from hashlib import sha256
from tempfile import TemporaryFile
from typing import IO, Any, Dict, List, Optional, Type
from google.cloud import bigquery
from stripe.api_resources.abstract import ListableAPIResource
import click
import stripe
import ujson
from google.cloud import bigquery
from stripe.api_resources.abstract import ListableAPIResource
# event data types with separate events and a defined schema
EVENT_DATA_TYPES = (

Просмотреть файл

@ -1,8 +1,8 @@
"""Queue for balancing jobs across billing projects."""
import asyncio
from contextlib import contextmanager
from queue import Queue
import asyncio
from google.cloud import bigquery

Просмотреть файл

@ -3,7 +3,6 @@ import os
import re
from typing import List
# Search for all camelCase situations in reverse with arbitrary lookaheads.
REV_WORD_BOUND_PAT = re.compile(
r"""

Просмотреть файл

@ -1,12 +1,12 @@
"""Standard definitions for reusable script arguments."""
import fnmatch
import logging
import re
import warnings
from argparse import Action
from functools import partial
from uuid import uuid4
import fnmatch
import re
import logging
import warnings
from google.cloud import bigquery

Просмотреть файл

@ -9,12 +9,12 @@ present in the target directory, which allows manual overrides of views by
checking them into the sql/ tree of the default branch of the repository.
"""
import logging
import tempfile
from argparse import ArgumentParser
import json
import urllib.request
import logging
import tarfile
import tempfile
import urllib.request
from argparse import ArgumentParser
from dataclasses import dataclass
from functools import partial
from io import BytesIO
@ -27,7 +27,6 @@ from bigquery_etl.dryrun import DryRun
from bigquery_etl.format_sql.formatter import reformat
from bigquery_etl.util import standard_args
SCHEMAS_URI = (
"https://github.com/mozilla-services/mozilla-pipeline-schemas"
"/archive/generated-schemas.tar.gz"

Просмотреть файл

@ -1,11 +1,12 @@
"""Validates view definitions."""
from argparse import ArgumentParser
import glob
import sys
from argparse import ArgumentParser
from multiprocessing.pool import Pool
from pathlib import Path
import sqlparse
import sys
from bigquery_etl.dependency import extract_table_references
from bigquery_etl.util import standard_args

Просмотреть файл

@ -1,14 +1,13 @@
"""PyTest configuration."""
from google.cloud import bigquery
from google.cloud import storage
from pathlib import Path
import os
import pytest
import random
import string
import subprocess
from pathlib import Path
import pytest
from google.cloud import bigquery, storage
TEST_BUCKET = "bigquery-etl-integration-test-bucket"

Просмотреть файл

@ -25,3 +25,4 @@ yamllint==1.26.0
pip-tools==5.5.0
pyjnius==1.3.0
pre-commit==2.10.1
pytest-isort==1.3.0

Просмотреть файл

@ -298,6 +298,10 @@ iniconfig==1.0.1 \
--hash=sha256:80cf40c597eb564e86346103f609d74efce0f6b4d4f30ec8ce9e2c26411ba437 \
--hash=sha256:e5f92f89355a67de0595932a6c6c02ab4afddc6fcdc0bfc5becd0d60884d3f69
# via pytest
isort==5.7.0 \
--hash=sha256:c729845434366216d320e936b8ad6f9d681aab72dc7cbc2d51bedc3582f3ad1e \
--hash=sha256:fff4f0c04e1825522ce6949973e83110a6e907750cd92d128b0d14aaaadbffdc
# via pytest-isort
jinja2==2.11.3 \
--hash=sha256:03e47ad063331dd6a3f04a43eddca8a966a26ba0c5b7207a9a9e4e08f1b29419 \
--hash=sha256:a6d58433de0ae800347cab1fa3043cebbabe8baa9d29e668f1c768cb87a333c6
@ -577,6 +581,10 @@ pytest-forked==1.3.0 \
--hash=sha256:6aa9ac7e00ad1a539c41bec6d21011332de671e938c7637378ec9710204e37ca \
--hash=sha256:dc4147784048e70ef5d437951728825a131b81714b398d5d52f17c7c144d8815
# via pytest-xdist
pytest-isort==1.3.0 \
--hash=sha256:074255ad393088a2daee6ca7f2305b7b86358ff632f62302896d8d4b2b339107 \
--hash=sha256:46a12331a701e2f21d48548b2828c8b0a7956dbf1cd5347163f537deb24332dd
# via -r requirements.in
pytest-mypy==0.8.0 \
--hash=sha256:63d418a4fea7d598ac40b659723c00804d16a251d90a5cfbca213eeba5aaf01c \
--hash=sha256:8d2112972c1debf087943f48963a0daf04f3424840aea0cf437cc97053b1b0ef

Просмотреть файл

@ -2,15 +2,15 @@
"""Exports experiment monitoring data to GCS as JSON."""
import random
import string
from argparse import ArgumentParser
from datetime import datetime, timedelta
from functools import partial
from google.cloud import storage
from google.cloud import bigquery
from multiprocessing import Pool
import random
import smart_open
import string
from google.cloud import bigquery, storage
parser = ArgumentParser(description=__doc__)
parser.add_argument(

Просмотреть файл

@ -2,11 +2,11 @@
"""Read a table from BigQuery and write it as parquet."""
from argparse import ArgumentParser
from textwrap import dedent
import json
import re
import sys
from argparse import ArgumentParser
from textwrap import dedent
try:
from google.cloud import bigquery

Просмотреть файл

@ -1,4 +1,5 @@
from setuptools import setup, find_namespace_packages
from setuptools import find_namespace_packages, setup
def get_version():
version = {}

Просмотреть файл

@ -2,18 +2,19 @@
"""Import experiments from Experimenter via the Experimenter API."""
from argparse import ArgumentParser
from google.cloud import bigquery
import requests
import attr
import cattr
import datetime
import json
import pytz
import sys
import time
from argparse import ArgumentParser
from typing import List, Optional
import attr
import cattr
import pytz
import requests
from google.cloud import bigquery
EXPERIMENTER_API_URL_V1 = (
"https://experimenter.services.mozilla.com/api/v1/experiments/"
)

Просмотреть файл

@ -4,10 +4,11 @@
from argparse import ArgumentParser
from fnmatch import fnmatchcase
from google.cloud import bigquery
from functools import partial
from multiprocessing.pool import ThreadPool
from google.cloud import bigquery
parser = ArgumentParser(description=__doc__)
parser.add_argument("--date", required=True) # expect string with format yyyy-mm-dd
parser.add_argument("--project", default="moz-fx-data-shared-prod")

Просмотреть файл

@ -4,9 +4,10 @@
from argparse import ArgumentParser
from fnmatch import fnmatchcase
from google.cloud import bigquery
from pathlib import Path
from google.cloud import bigquery
parser = ArgumentParser(description=__doc__)
parser.add_argument("--date", required=True) # expect string with format yyyy-mm-dd
parser.add_argument("--project", default="moz-fx-data-shared-prod")

Просмотреть файл

@ -3,9 +3,10 @@
"""Determine cost of previously scheduled bigquery-etl queries."""
from argparse import ArgumentParser
from google.cloud import bigquery
from pathlib import Path
from google.cloud import bigquery
DEFAULT_PROJECTS = [
"moz-fx-data-shared-prod",
"moz-fx-data-derived-datasets",

Просмотреть файл

@ -3,10 +3,11 @@
"""Determine column sizes by performing dry runs."""
from argparse import ArgumentParser
from google.cloud import bigquery
from functools import partial
from multiprocessing.pool import ThreadPool
from google.cloud import bigquery
parser = ArgumentParser(description=__doc__)
parser.add_argument("--date", required=True) # expect string with format yyyy-mm-dd
parser.add_argument("--project", default="moz-fx-data-shared-prod")

Просмотреть файл

@ -4,10 +4,11 @@
from argparse import ArgumentParser
from fnmatch import fnmatchcase
from google.cloud import bigquery
from functools import partial
from multiprocessing.pool import ThreadPool
from google.cloud import bigquery
parser = ArgumentParser(description=__doc__)
parser.add_argument("--date", required=True) # expect string with format yyyy-mm-dd
parser.add_argument("--project", default="moz-fx-data-shared-prod")

Просмотреть файл

@ -2,11 +2,12 @@
"""Generate view to unnest parquet-format list and map fields."""
from argparse import ArgumentParser
from google.cloud import bigquery
from textwrap import dedent
import json
import sys
from argparse import ArgumentParser
from textwrap import dedent
from google.cloud import bigquery
def qualify(table, dataset, project):

Просмотреть файл

@ -1,9 +1,9 @@
#!/usr/bin/env python3
"""clients_daily_histogram_aggregates query generator."""
import sys
import argparse
import gzip
import json
import argparse
import sys
import textwrap
import urllib.request
from pathlib import Path
@ -14,7 +14,6 @@ from google.cloud import bigquery
sys.path.append(str(Path(__file__).parent.parent.parent.resolve()))
from bigquery_etl.format_sql.formatter import reformat
PROBE_INFO_SERVICE = (
"https://probeinfo.telemetry.mozilla.org/firefox/all/main/all_probes"
)

Просмотреть файл

@ -1,11 +1,11 @@
#!/usr/bin/env python3
"""clients_daily_scalar_aggregates query generator."""
import sys
import json
import gzip
import argparse
import textwrap
import gzip
import json
import subprocess
import sys
import textwrap
import urllib.request
from pathlib import Path
from time import sleep
@ -14,7 +14,6 @@ sys.path.append(str(Path(__file__).parent.parent.parent.resolve()))
from bigquery_etl.format_sql.formatter import reformat
from bigquery_etl.util.common import snake_case
PROBE_INFO_SERVICE = (
"https://probeinfo.telemetry.mozilla.org/firefox/all/main/all_probes"
)

Просмотреть файл

@ -2,9 +2,9 @@
"""Get experiment list for a given date from the recipe server."""
from argparse import ArgumentParser
import json
import time
from argparse import ArgumentParser
import requests

Просмотреть файл

@ -2,15 +2,15 @@
"""Import data from daily attitudes heartbeat survey into BigQuery."""
from argparse import ArgumentParser
import datetime as dt
from google.cloud import bigquery
import itertools
import pytz
import re
import requests
from argparse import ArgumentParser
from time import sleep
import pytz
import requests
from google.cloud import bigquery
parser = ArgumentParser(description=__doc__)
parser.add_argument("--date", required=True)

Просмотреть файл

@ -3,8 +3,8 @@
import itertools
import json
import sys
import os.path
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).parent.parent.resolve()))

Просмотреть файл

@ -1,10 +1,11 @@
import os
import pytest
from click.testing import CliRunner
from pathlib import Path
import yaml
from bigquery_etl.cli.dag import info, create, remove
import pytest
import yaml
from click.testing import CliRunner
from bigquery_etl.cli.dag import create, info, remove
TEST_DIR = Path(__file__).parent.parent

Просмотреть файл

@ -1,6 +1,7 @@
from click.testing import CliRunner
import os
import pytest
from click.testing import CliRunner
from bigquery_etl.dependency import show as dependency_show

Просмотреть файл

@ -1,6 +1,7 @@
from click.testing import CliRunner
import os
import pytest
from click.testing import CliRunner
from bigquery_etl.cli.format import format as sql_format

Просмотреть файл

@ -1,13 +1,14 @@
import os
import pytest
from click.testing import CliRunner
import yaml
from click.testing import CliRunner
from bigquery_etl.cli.query import (
create,
schedule,
info,
_queries_matching_name_pattern,
create,
info,
schedule,
)

Просмотреть файл

@ -1,7 +1,8 @@
import os
import pytest
from click.testing import CliRunner
import yaml
from click.testing import CliRunner
from bigquery_etl.cli.routine import create, info, rename

Просмотреть файл

@ -1,14 +1,15 @@
from pathlib import Path
import pytest
from click.exceptions import BadParameter
from bigquery_etl.cli.utils import (
is_authenticated,
is_valid_dir,
is_valid_file,
is_authenticated,
is_valid_project,
)
TEST_DIR = Path(__file__).parent.parent

Просмотреть файл

@ -1,5 +1,6 @@
import os
from pathlib import Path
import pytest
from bigquery_etl.docs.generate_docs import load_with_examples

Просмотреть файл

@ -1,8 +1,8 @@
import os
from bigquery_etl.events_daily.generate_queries import get_query_dirs, TemplatedDir
from pathlib import Path
from bigquery_etl.events_daily.generate_queries import TemplatedDir, get_query_dirs
BASE_DIR = Path(os.path.dirname(__file__)).parent

Просмотреть файл

@ -1,9 +1,9 @@
import os
import pytest
from bigquery_etl.events_daily.generate_queries import TemplatedDir, Template
from pathlib import Path
import pytest
from bigquery_etl.events_daily.generate_queries import Template, TemplatedDir
BASE_DIR = Path(os.path.dirname(__file__)).parent

Просмотреть файл

@ -1,6 +1,7 @@
import pytest
from pathlib import Path
import pytest
from bigquery_etl.metadata.parse_metadata import Metadata
TEST_DIR = Path(__file__).parent.parent

Просмотреть файл

@ -1,14 +1,13 @@
import json
from datetime import datetime
from pathlib import Path
from unittest.mock import MagicMock, Mock, call
import pytest
import smart_open
from pathlib import Path
from datetime import datetime
from unittest.mock import call, Mock, MagicMock
import bigquery_etl.public_data.publish_gcs_metadata as pgm
TEST_DIR = Path(__file__).parent.parent

Просмотреть файл

@ -1,12 +1,12 @@
import json
from pathlib import Path
from unittest.mock import MagicMock, Mock, call
import pytest
import smart_open
from pathlib import Path
from unittest.mock import call, Mock, MagicMock
from bigquery_etl.public_data.publish_json import JsonPublisher
TEST_DIR = Path(__file__).parent.parent

Просмотреть файл

@ -1,11 +1,10 @@
import json
import pytest
import subprocess
from pathlib import Path
from datetime import datetime
from google.cloud import bigquery
from pathlib import Path
import pytest
from google.cloud import bigquery
TEST_DIR = Path(__file__).parent.parent

Просмотреть файл

@ -1,15 +1,16 @@
from pathlib import Path
import pytest
from bigquery_etl.metadata.parse_metadata import Metadata
from bigquery_etl.query_scheduling.dag import (
Dag,
DagParseException,
DagDefaultArgs,
DagParseException,
PublicDataJsonDag,
)
from bigquery_etl.metadata.parse_metadata import Metadata
from bigquery_etl.query_scheduling.task import Task
from bigquery_etl.query_scheduling.dag_collection import DagCollection
from bigquery_etl.query_scheduling.task import Task
TEST_DIR = Path(__file__).parent.parent

Просмотреть файл

@ -1,11 +1,12 @@
import os
from pathlib import Path
import pytest
from bigquery_etl.query_scheduling.dag_collection import DagCollection
from bigquery_etl.query_scheduling.dag import InvalidDag, DagParseException
from bigquery_etl.query_scheduling.task import Task
from bigquery_etl.metadata.parse_metadata import Metadata
from bigquery_etl.query_scheduling.dag import DagParseException, InvalidDag
from bigquery_etl.query_scheduling.dag_collection import DagCollection
from bigquery_etl.query_scheduling.task import Task
TEST_DIR = Path(__file__).parent.parent

Просмотреть файл

@ -1,12 +1,14 @@
import datetime
import pytest
from bigquery_etl.query_scheduling.formatters import (
format_schedule_interval,
format_attr,
format_date,
format_timedelta,
format_optional_string,
format_schedule_interval,
format_timedelta,
)
import datetime
import pytest
class TestFormatters:

Просмотреть файл

@ -1,16 +1,17 @@
from pathlib import Path
import os
import pytest
from pathlib import Path
from typing import NewType
from bigquery_etl.query_scheduling.task import (
Task,
UnscheduledTask,
TaskParseException,
TaskRef,
)
import pytest
from bigquery_etl.metadata.parse_metadata import Metadata
from bigquery_etl.query_scheduling.dag_collection import DagCollection
from bigquery_etl.query_scheduling.task import (
Task,
TaskParseException,
TaskRef,
UnscheduledTask,
)
TEST_DIR = Path(__file__).parent.parent

Просмотреть файл

@ -1,9 +1,9 @@
from pathlib import Path
from bigquery_etl.schema import Schema
from textwrap import dedent
import yaml
from bigquery_etl.schema import Schema
TEST_DIR = Path(__file__).parent.parent

Просмотреть файл

@ -13,9 +13,10 @@ from pathlib import Path
import click
import yaml
from bigquery_etl.glam.utils import run
from google.cloud import bigquery
from bigquery_etl.glam.utils import run
warnings.filterwarnings(
"ignore", "Your application has authenticated using end user credentials"
)

Просмотреть файл

@ -1,7 +1,7 @@
"""Testing data for query."""
from pathlib import Path
from itertools import product
from pathlib import Path
import yaml

Просмотреть файл

@ -1,7 +1,7 @@
"""Testing data for query."""
from pathlib import Path
from itertools import product
from pathlib import Path
from uuid import uuid4
import yaml

Просмотреть файл

@ -1,7 +1,7 @@
"""Testing data for query."""
from itertools import product
from pathlib import Path
from uuid import uuid4
from itertools import product
import yaml

Просмотреть файл

@ -1,6 +1,6 @@
import textwrap
import subprocess
import sys
import textwrap
import pytest

Просмотреть файл

@ -1,5 +1,7 @@
import os
import pytest
from bigquery_etl.dryrun import DryRun, Errors

Просмотреть файл

@ -1,8 +1,9 @@
from pathlib import Path
from google.cloud import bigquery
import os
import pytest
import subprocess
from pathlib import Path
import pytest
from google.cloud import bigquery
ENTRYPOINT_SCRIPT = Path(__file__).parent.parent / "script" / "entrypoint"

Просмотреть файл

@ -1,5 +1,6 @@
from unittest.mock import patch
import os
from unittest.mock import patch
import pytest
import yaml

Просмотреть файл

@ -1,7 +1,7 @@
import os
import csv
import os
from pathlib import Path
from bigquery_etl.util.common import snake_case

Просмотреть файл

@ -5,13 +5,15 @@ The vectors are located in tests/validation/data/hmac_sha256_validation.json.
"""
import json
import pytest
from google.cloud import bigquery
from bigquery_etl.routine.parse_routine import (
read_routine_dir,
RawRoutine,
read_routine_dir,
routine_tests_sql,
)
from google.cloud import bigquery
validation_data_file = "tests/validation/data/hmac_sha256_validation.json"