Automatically sort python imports (#1840)
This commit is contained in:
Родитель
744600069d
Коммит
a190e18264
|
@ -0,0 +1,3 @@
|
|||
[settings]
|
||||
profile = black
|
||||
skip = dags,script/legacy,target,venv
|
|
@ -18,3 +18,7 @@ repos:
|
|||
hooks:
|
||||
- id: yamllint
|
||||
args: [-c, .yamllint.yaml, .]
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.7.0
|
||||
hooks:
|
||||
- id: isort
|
||||
|
|
|
@ -5,7 +5,7 @@ from argparse import ArgumentParser
|
|||
from functools import partial
|
||||
from multiprocessing.pool import ThreadPool
|
||||
|
||||
from google.api_core.exceptions import NotFound, Forbidden
|
||||
from google.api_core.exceptions import Forbidden, NotFound
|
||||
from google.cloud import bigquery
|
||||
|
||||
from bigquery_etl.util import standard_args # noqa E402
|
||||
|
|
|
@ -14,9 +14,9 @@ from ..cli.format import format
|
|||
from ..cli.query import query
|
||||
from ..cli.routine import mozfun, routine
|
||||
from ..cli.view import view
|
||||
from ..dependency import dependency
|
||||
from ..glam.cli import glam
|
||||
from ..stripe import stripe_
|
||||
from ..dependency import dependency
|
||||
|
||||
|
||||
def cli(prog_name=None):
|
||||
|
|
|
@ -13,16 +13,16 @@ import click
|
|||
from google.cloud import bigquery
|
||||
from google.cloud.exceptions import NotFound
|
||||
|
||||
from ..cli.dryrun import dryrun, SKIP
|
||||
from ..cli.dryrun import SKIP, dryrun
|
||||
from ..cli.format import format
|
||||
from ..cli.utils import is_authenticated, is_valid_dir, is_valid_project
|
||||
from ..format_sql.formatter import reformat
|
||||
from ..metadata import validate_metadata
|
||||
from ..metadata.parse_metadata import METADATA_FILE, Metadata
|
||||
from ..query_scheduling.generate_airflow_dags import get_dags
|
||||
from ..query_scheduling.dag_collection import DagCollection
|
||||
from ..query_scheduling.generate_airflow_dags import get_dags
|
||||
from ..run_query import run
|
||||
from ..schema import Schema, SCHEMA_FILE
|
||||
from ..schema import SCHEMA_FILE, Schema
|
||||
|
||||
QUERY_NAME_RE = re.compile(r"(?P<dataset>[a-zA-z0-9_]+)\.(?P<name>[a-zA-z0-9_]+)")
|
||||
SQL_FILE_RE = re.compile(
|
||||
|
|
|
@ -7,20 +7,19 @@ datasets. The script can be configured to exclude a list of tables
|
|||
or to process only a specific list of tables.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime, timedelta
|
||||
from itertools import groupby
|
||||
from multiprocessing.pool import ThreadPool
|
||||
import json
|
||||
import logging
|
||||
|
||||
from google.api_core.exceptions import BadRequest
|
||||
from google.cloud import bigquery
|
||||
|
||||
from bigquery_etl.util import standard_args
|
||||
from bigquery_etl.util.client_queue import ClientQueue
|
||||
from bigquery_etl.util.bigquery_id import sql_table_id
|
||||
|
||||
from bigquery_etl.util.client_queue import ClientQueue
|
||||
|
||||
QUERY_TEMPLATE = """
|
||||
WITH
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
"""Generate documentation for derived datasets."""
|
||||
|
||||
import os
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
from bigquery_etl.dryrun import DryRun
|
||||
|
||||
VIEW_FILE = "view.sql"
|
||||
|
|
|
@ -1,14 +1,15 @@
|
|||
"""Generates documentations for provided projects."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import os
|
||||
from pathlib import Path
|
||||
import re
|
||||
import shutil
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from bigquery_etl.util import standard_args
|
||||
from bigquery_etl.docs.derived_datasets import generate_derived_dataset_docs
|
||||
from bigquery_etl.util import standard_args
|
||||
|
||||
DEFAULT_PROJECTS_DIRS = ["sql/mozfun/", "sql/moz-fx-data-shared-prod/"]
|
||||
DOCS_FILE = "README.md"
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
"""Validates SQL examples in documentations."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import os
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import sys
|
||||
import tempfile
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
||||
from bigquery_etl.dryrun import DryRun
|
||||
from bigquery_etl.routine.parse_routine import read_routine_dir, sub_local_routines
|
||||
|
|
|
@ -10,16 +10,16 @@ only dry runs can be performed. In order to reduce risk of CI or local users
|
|||
accidentally running queries during tests and overwriting production data, we
|
||||
proxy the queries through the dry run service endpoint.
|
||||
"""
|
||||
import glob
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from enum import Enum
|
||||
from functools import cached_property
|
||||
from multiprocessing.pool import Pool
|
||||
from os.path import basename, dirname, exists
|
||||
from urllib.request import urlopen, Request
|
||||
from enum import Enum
|
||||
import glob
|
||||
import json
|
||||
import sys
|
||||
import re
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
SKIP = {
|
||||
# Access Denied
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
"""Generate query directories."""
|
||||
import os
|
||||
import yaml
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from bigquery_etl.format_sql.formatter import reformat
|
||||
from dataclasses import dataclass
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
import yaml
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
from bigquery_etl.format_sql.formatter import reformat
|
||||
|
||||
TEMPLATED_FILES = {
|
||||
"init.sql",
|
||||
|
|
|
@ -1,14 +1,13 @@
|
|||
"""Format SQL."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import glob
|
||||
import os
|
||||
import os.path
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
|
||||
from bigquery_etl.format_sql.formatter import reformat # noqa E402
|
||||
|
||||
|
||||
SKIP = {
|
||||
# files that existed before we started to enforce this script
|
||||
*glob.glob("bigquery_etl/glam/templates/*.sql"),
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
"""Format SQL."""
|
||||
|
||||
from dataclasses import replace
|
||||
import re
|
||||
from dataclasses import replace
|
||||
|
||||
from .tokenizer import (
|
||||
AliasSeparator,
|
||||
BlockEndKeyword,
|
||||
BlockKeyword,
|
||||
BlockStartKeyword,
|
||||
BlockEndKeyword,
|
||||
ClosingBracket,
|
||||
Comment,
|
||||
ExpressionSeparator,
|
||||
|
@ -15,8 +15,8 @@ from .tokenizer import (
|
|||
Identifier,
|
||||
Literal,
|
||||
NewlineKeyword,
|
||||
Operator,
|
||||
OpeningBracket,
|
||||
Operator,
|
||||
ReservedKeyword,
|
||||
SpaceBeforeBracketKeyword,
|
||||
StatementSeparator,
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
"""Tokenize SQL so that it can be formatted."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
# These words get their own line followed by increased indent
|
||||
TOP_LEVEL_KEYWORDS = [
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
"""Run a query with a series of @submission_date values."""
|
||||
|
||||
import os.path
|
||||
import subprocess
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime, timedelta
|
||||
from functools import partial
|
||||
from multiprocessing import Pool
|
||||
import os.path
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def fromisoformat(string):
|
||||
|
|
|
@ -6,14 +6,14 @@ from functools import partial
|
|||
from multiprocessing.pool import ThreadPool
|
||||
|
||||
from google.cloud import bigquery
|
||||
from google.cloud.bigquery import WriteDisposition, ScalarQueryParameter
|
||||
from google.cloud.bigquery import ScalarQueryParameter, WriteDisposition
|
||||
|
||||
from bigquery_etl.glean_usage.common import (
|
||||
list_baseline_tables,
|
||||
referenced_table_exists,
|
||||
render,
|
||||
table_names_from_baseline,
|
||||
write_sql,
|
||||
referenced_table_exists,
|
||||
)
|
||||
from bigquery_etl.util import standard_args # noqa E402
|
||||
|
||||
|
|
|
@ -7,14 +7,14 @@ from functools import partial
|
|||
from multiprocessing.pool import ThreadPool
|
||||
|
||||
from google.cloud import bigquery
|
||||
from google.cloud.bigquery import WriteDisposition, ScalarQueryParameter
|
||||
from google.cloud.bigquery import ScalarQueryParameter, WriteDisposition
|
||||
|
||||
from bigquery_etl.glean_usage.common import (
|
||||
list_baseline_tables,
|
||||
referenced_table_exists,
|
||||
render,
|
||||
table_names_from_baseline,
|
||||
write_sql,
|
||||
referenced_table_exists,
|
||||
)
|
||||
from bigquery_etl.util import standard_args # noqa E402
|
||||
|
||||
|
|
|
@ -17,7 +17,6 @@ import argparse
|
|||
import json
|
||||
import sys
|
||||
|
||||
|
||||
TEMPLATE = """
|
||||
{table_create_mode_string} `{project}:{dataset}.{table_name}` (
|
||||
{columns_string}
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
"""Parsing of metadata yaml files."""
|
||||
|
||||
from google.cloud import bigquery
|
||||
import enum
|
||||
import re
|
||||
import yaml
|
||||
import os
|
||||
import re
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import attr
|
||||
import cattr
|
||||
from typing import List, Optional, Dict
|
||||
import yaml
|
||||
from google.cloud import bigquery
|
||||
|
||||
from bigquery_etl.query_scheduling.utils import is_email
|
||||
|
||||
|
|
|
@ -1,17 +1,16 @@
|
|||
"""Update metadata of BigQuery tables and views."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import logging
|
||||
import os
|
||||
import yaml
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import yaml
|
||||
from google.cloud import bigquery
|
||||
|
||||
from .parse_metadata import Metadata
|
||||
from ..util import standard_args
|
||||
from ..util.bigquery_tables import get_tables_matching_patterns
|
||||
from ..util.common import project_dirs
|
||||
|
||||
from .parse_metadata import Metadata
|
||||
|
||||
METADATA_FILE = "metadata.yaml"
|
||||
DEFAULT_PATTERN = "moz-fx-data-shared-prod:*.*"
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
"""Validate metadata files."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
|
||||
from .parse_metadata import Metadata
|
||||
from ..util import standard_args
|
||||
from ..util.common import project_dirs
|
||||
from .parse_metadata import Metadata
|
||||
|
||||
parser = ArgumentParser(description=__doc__)
|
||||
|
||||
|
|
|
@ -1,20 +1,19 @@
|
|||
"""Generate and upload JSON metadata files for public datasets on GCS."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import smart_open
|
||||
|
||||
from google.cloud import storage
|
||||
from argparse import ArgumentParser
|
||||
from itertools import groupby
|
||||
|
||||
import smart_open
|
||||
from google.cloud import storage
|
||||
|
||||
from bigquery_etl.metadata.parse_metadata import Metadata
|
||||
from bigquery_etl.util import standard_args
|
||||
from bigquery_etl.util.common import project_dirs
|
||||
|
||||
|
||||
DEFAULT_BUCKET = "mozilla-public-data-http"
|
||||
DEFAULT_API_VERSION = "v1"
|
||||
DEFAULT_ENDPOINT = "https://public-data.telemetry.mozilla.org/"
|
||||
|
|
|
@ -1,21 +1,20 @@
|
|||
"""Machinery for exporting query results as JSON to Cloud storage."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from google.cloud import storage
|
||||
from google.cloud import bigquery
|
||||
import datetime
|
||||
import json
|
||||
import smart_open
|
||||
import logging
|
||||
import sys
|
||||
import re
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import smart_open
|
||||
from google.cloud import bigquery, storage
|
||||
|
||||
from bigquery_etl.metadata.parse_metadata import Metadata
|
||||
from bigquery_etl.metadata.validate_metadata import validate_public_data
|
||||
|
||||
|
||||
SUBMISSION_DATE_RE = re.compile(r"^submission_date:DATE:(\d\d\d\d-\d\d-\d\d)$")
|
||||
QUERY_FILE_RE = re.compile(r"^.*/([a-zA-Z0-9_]+)/([a-zA-Z0-9_]+)_(v[0-9]+)/query\.sql$")
|
||||
MAX_JSON_SIZE = 1 * 1024 * 1024 * 1024 # 1 GB as max. size of exported JSON files
|
||||
|
|
|
@ -11,8 +11,8 @@ from argparse import ArgumentParser
|
|||
|
||||
from google.cloud import bigquery
|
||||
|
||||
from ..util.bigquery_tables import get_tables_matching_patterns
|
||||
from ..util import standard_args
|
||||
from ..util.bigquery_tables import get_tables_matching_patterns
|
||||
|
||||
DEFAULT_PATTERN = "mozilla-public-data:*.*"
|
||||
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
"""Publish csv files as BigQuery tables."""
|
||||
|
||||
import os
|
||||
import json
|
||||
import os
|
||||
from argparse import ArgumentParser
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
from bigquery_etl.util.common import project_dirs
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
"""PyTest plugin for running udf tests."""
|
||||
|
||||
from google.api_core.exceptions import BadRequest
|
||||
from google.cloud import bigquery
|
||||
import os
|
||||
import pytest
|
||||
import re
|
||||
|
||||
from .sql_test import dataset
|
||||
import pytest
|
||||
from google.api_core.exceptions import BadRequest
|
||||
from google.cloud import bigquery
|
||||
|
||||
from bigquery_etl.util.common import project_dirs
|
||||
|
||||
|
||||
from ..routine.parse_routine import (
|
||||
UDF_FILE,
|
||||
PROCEDURE_FILE,
|
||||
parse_routines,
|
||||
GENERIC_DATASET,
|
||||
PROCEDURE_FILE,
|
||||
UDF_FILE,
|
||||
parse_routines,
|
||||
)
|
||||
from .sql_test import dataset
|
||||
|
||||
_parsed_routines = None
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
"""PyTest plugin for collecting docstyle tests on python scripts."""
|
||||
|
||||
from pytest_pydocstyle import _patch_sys_argv, File
|
||||
import pydocstyle
|
||||
from pytest_pydocstyle import File, _patch_sys_argv
|
||||
|
||||
from . import is_python_executable
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
"""PyTest plugin for collecting mypy tests on python scripts."""
|
||||
|
||||
from pytest_mypy import mypy_argv, MypyFile
|
||||
from pytest_mypy import MypyFile, mypy_argv
|
||||
|
||||
from . import is_python_executable
|
||||
|
||||
|
|
|
@ -1,25 +1,25 @@
|
|||
"""PyTest plugin for running sql tests."""
|
||||
|
||||
from typing import Dict
|
||||
import json
|
||||
import os.path
|
||||
from typing import Dict
|
||||
|
||||
import pytest
|
||||
from google.api_core.exceptions import BadRequest
|
||||
from google.cloud import bigquery
|
||||
import pytest
|
||||
|
||||
from ..routine import parse_routine
|
||||
from .sql_test import (
|
||||
TABLE_EXTENSIONS,
|
||||
Table,
|
||||
coerce_result,
|
||||
dataset,
|
||||
get_query_params,
|
||||
load,
|
||||
load_tables,
|
||||
load_views,
|
||||
read,
|
||||
Table,
|
||||
TABLE_EXTENSIONS,
|
||||
print_and_test,
|
||||
read,
|
||||
)
|
||||
|
||||
expect_names = {f"expect.{ext}" for ext in ("yaml", "json", "ndjson")}
|
||||
|
|
|
@ -3,12 +3,15 @@
|
|||
# file, you can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
"""Utilities."""
|
||||
|
||||
import codecs
|
||||
import json
|
||||
import os
|
||||
import os.path
|
||||
import pprint
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal
|
||||
from google.api_core.exceptions import BadRequest, NotFound
|
||||
from google.cloud import bigquery
|
||||
from io import BytesIO, TextIOWrapper
|
||||
from typing import (
|
||||
Any,
|
||||
|
@ -22,12 +25,9 @@ from typing import (
|
|||
Union,
|
||||
)
|
||||
|
||||
import codecs
|
||||
import json
|
||||
import os
|
||||
import os.path
|
||||
import pprint
|
||||
import yaml
|
||||
from google.api_core.exceptions import BadRequest, NotFound
|
||||
from google.cloud import bigquery
|
||||
|
||||
QueryParameter = Union[
|
||||
bigquery.ArrayQueryParameter,
|
||||
|
|
|
@ -1,22 +1,22 @@
|
|||
"""Represents an Airflow DAG."""
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
import attr
|
||||
import cattr
|
||||
from jinja2 import Environment, PackageLoader
|
||||
from typing import List, Optional
|
||||
|
||||
from bigquery_etl.query_scheduling.task import Task, TaskRef
|
||||
from bigquery_etl.query_scheduling import formatters
|
||||
from bigquery_etl.query_scheduling.task import Task, TaskRef
|
||||
from bigquery_etl.query_scheduling.utils import (
|
||||
is_timedelta_string,
|
||||
is_date_string,
|
||||
is_email,
|
||||
is_schedule_interval,
|
||||
is_timedelta_string,
|
||||
is_valid_dag_name,
|
||||
schedule_interval_delta,
|
||||
)
|
||||
|
||||
|
||||
AIRFLOW_DAG_TEMPLATE = "airflow_dag.j2"
|
||||
PUBLIC_DATA_JSON_DAG_TEMPLATE = "public_data_json_airflow_dag.j2"
|
||||
PUBLIC_DATA_JSON_DAG = "bqetl_public_data_json"
|
||||
|
|
|
@ -1,14 +1,15 @@
|
|||
"""Represents a collection of configured Airflow DAGs."""
|
||||
|
||||
from black import format_file_contents, FileMode
|
||||
from functools import partial
|
||||
from itertools import groupby
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from operator import attrgetter
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from black import FileMode, format_file_contents
|
||||
|
||||
from bigquery_etl.query_scheduling.dag import Dag, InvalidDag, PublicDataJsonDag
|
||||
from functools import partial
|
||||
from multiprocessing.pool import ThreadPool
|
||||
|
||||
|
||||
class DagCollection:
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
"""This file contains custom filters for formatting data types in Jinja templates."""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from bigquery_etl import query_scheduling
|
||||
|
||||
|
|
|
@ -3,13 +3,13 @@
|
|||
import logging
|
||||
import os
|
||||
from argparse import ArgumentParser
|
||||
from ..util import standard_args
|
||||
from pathlib import Path
|
||||
|
||||
from bigquery_etl.query_scheduling.dag_collection import DagCollection
|
||||
from bigquery_etl.query_scheduling.task import Task, UnscheduledTask
|
||||
from bigquery_etl.util.common import project_dirs
|
||||
|
||||
from ..util import standard_args
|
||||
|
||||
DEFAULT_DAGS_FILE = "dags.yaml"
|
||||
QUERY_FILE = "query.sql"
|
||||
|
|
|
@ -1,27 +1,26 @@
|
|||
"""Represents a scheduled Airflow task."""
|
||||
|
||||
import attr
|
||||
import cattr
|
||||
from fnmatch import fnmatchcase
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import logging
|
||||
from fnmatch import fnmatchcase
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import attr
|
||||
import cattr
|
||||
|
||||
from bigquery_etl.dependency import extract_table_references_without_views
|
||||
from bigquery_etl.metadata.parse_metadata import Metadata
|
||||
from bigquery_etl.query_scheduling.utils import (
|
||||
is_date_string,
|
||||
is_email,
|
||||
is_valid_dag_name,
|
||||
is_timedelta_string,
|
||||
schedule_interval_delta,
|
||||
is_schedule_interval,
|
||||
is_timedelta_string,
|
||||
is_valid_dag_name,
|
||||
schedule_interval_delta,
|
||||
)
|
||||
|
||||
|
||||
AIRFLOW_TASK_TEMPLATE = "airflow_task.j2"
|
||||
QUERY_FILE_RE = re.compile(
|
||||
r"^(?:.*/)?([a-zA-Z0-9_-]+)/([a-zA-Z0-9_]+)/"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
"""Utility functions for scheduling queries."""
|
||||
|
||||
from datetime import datetime
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def is_timedelta_string(s):
|
||||
|
|
|
@ -5,17 +5,17 @@ This should eventually be refactored to a more general library for
|
|||
parsing UDF dependencies in queries as well.
|
||||
"""
|
||||
|
||||
import attr
|
||||
import re
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
import attr
|
||||
import sqlparse
|
||||
import yaml
|
||||
|
||||
from bigquery_etl.metadata.parse_metadata import METADATA_FILE
|
||||
|
||||
|
||||
UDF_CHAR = "[a-zA-z0-9_]"
|
||||
UDF_FILE = "udf.sql"
|
||||
PROCEDURE_FILE = "stored_procedure.sql"
|
||||
|
|
|
@ -1,19 +1,15 @@
|
|||
"""Publish UDFs and resources to the public mozfun GCP project."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from argparse import ArgumentParser
|
||||
|
||||
from google.cloud import bigquery
|
||||
from google.cloud import storage
|
||||
from google.cloud import bigquery, storage
|
||||
|
||||
from bigquery_etl.routine.parse_routine import accumulate_dependencies, read_routine_dir
|
||||
from bigquery_etl.util import standard_args
|
||||
from bigquery_etl.util.common import project_dirs
|
||||
from bigquery_etl.routine.parse_routine import (
|
||||
read_routine_dir,
|
||||
accumulate_dependencies,
|
||||
)
|
||||
|
||||
DEFAULT_UDF_DEPENDENCY_DIR = "udf_js_lib/"
|
||||
DEFAULT_GCS_BUCKET = "moz-fx-data-prod-bigquery-etl"
|
||||
|
|
|
@ -11,9 +11,9 @@ complexity limit, and then join those results to generate a final wide result.
|
|||
The query files must be in the same directory and all be prefixed with `part`.
|
||||
"""
|
||||
|
||||
import os.path
|
||||
from argparse import ArgumentParser
|
||||
from multiprocessing.pool import ThreadPool
|
||||
import os.path
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
|
|
|
@ -5,12 +5,12 @@ When executing a query associated metadata is parsed to determine whether
|
|||
results should be written to a corresponding public dataset.
|
||||
"""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import yaml
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import yaml
|
||||
|
||||
from bigquery_etl.metadata.parse_metadata import Metadata
|
||||
from bigquery_etl.metadata.validate_metadata import validate_public_data
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
"""Query schema."""
|
||||
|
||||
import attr
|
||||
import json
|
||||
import yaml
|
||||
|
||||
from pathlib import Path
|
||||
from tempfile import NamedTemporaryFile
|
||||
from typing import Dict, Any
|
||||
from typing import Any, Dict
|
||||
|
||||
import attr
|
||||
import yaml
|
||||
|
||||
from bigquery_etl.dryrun import DryRun
|
||||
|
||||
|
|
|
@ -1,22 +1,21 @@
|
|||
"""Forward deletion requests from BigQuery to Amplitude."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import warnings
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from time import time, sleep
|
||||
from os import environ
|
||||
import warnings
|
||||
import logging
|
||||
import json
|
||||
from time import sleep, time
|
||||
|
||||
import requests
|
||||
from google.cloud import bigquery
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
import requests
|
||||
|
||||
from ..util import standard_args
|
||||
|
||||
|
||||
AMPLITUDE_API_KEY = "AMPLITUDE_API_KEY"
|
||||
AMPLITUDE_SECRET_KEY = "AMPLITUDE_SECRET_KEY"
|
||||
|
||||
|
|
|
@ -2,17 +2,16 @@
|
|||
|
||||
"""Meta data about tables and ids for self serve deletion."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from functools import partial
|
||||
from typing import Tuple, Union
|
||||
import logging
|
||||
import re
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
from ..util.bigquery_id import qualified_table_id
|
||||
|
||||
|
||||
SHARED_PROD = "moz-fx-data-shared-prod"
|
||||
GLEAN_SCHEMA_ID = "glean_ping_1"
|
||||
|
||||
|
|
|
@ -2,18 +2,17 @@
|
|||
|
||||
"""Report estimated cost to run shredder."""
|
||||
|
||||
import warnings
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime, timedelta
|
||||
from math import ceil
|
||||
from textwrap import dedent
|
||||
import warnings
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
from .config import DELETE_TARGETS
|
||||
from ..util.bigquery_id import sql_table_id
|
||||
from ..util import standard_args
|
||||
|
||||
from ..util.bigquery_id import sql_table_id
|
||||
from .config import DELETE_TARGETS
|
||||
|
||||
JOBS_QUERY = """
|
||||
SELECT
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
"""Delete user data from long term storage."""
|
||||
|
||||
import logging
|
||||
import warnings
|
||||
from argparse import ArgumentParser
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, replace
|
||||
|
@ -10,8 +12,6 @@ from multiprocessing.pool import ThreadPool
|
|||
from operator import attrgetter
|
||||
from textwrap import dedent
|
||||
from typing import Callable, Iterable, Optional, Tuple
|
||||
import logging
|
||||
import warnings
|
||||
|
||||
from google.api_core.exceptions import NotFound
|
||||
from google.cloud import bigquery
|
||||
|
@ -22,14 +22,13 @@ from ..util.bigquery_id import FULL_JOB_ID_RE, full_job_id, sql_table_id
|
|||
from ..util.client_queue import ClientQueue
|
||||
from ..util.exceptions import BigQueryInsertError
|
||||
from .config import (
|
||||
DeleteSource,
|
||||
DELETE_TARGETS,
|
||||
find_glean_targets,
|
||||
DeleteSource,
|
||||
find_experiment_analysis_targets,
|
||||
find_glean_targets,
|
||||
find_pioneer_targets,
|
||||
)
|
||||
|
||||
|
||||
NULL_PARTITION_ID = "__NULL__"
|
||||
OUTSIDE_RANGE_PARTITION_ID = "__UNPARTITIONED__"
|
||||
|
||||
|
|
|
@ -2,15 +2,14 @@
|
|||
|
||||
"""Search for tables and user ids that may be eligible for self serve deletion."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import re
|
||||
import warnings
|
||||
from argparse import ArgumentParser
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
from .config import SHARED_PROD, SEARCH_IGNORE_TABLES, SEARCH_IGNORE_FIELDS
|
||||
from ..util import standard_args
|
||||
|
||||
from .config import SEARCH_IGNORE_FIELDS, SEARCH_IGNORE_TABLES, SHARED_PROD
|
||||
|
||||
parser = ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
|
|
|
@ -1,19 +1,19 @@
|
|||
"""Import Stripe data into BigQuery."""
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from hashlib import sha256
|
||||
from tempfile import TemporaryFile
|
||||
from typing import Any, Dict, IO, List, Optional, Type
|
||||
import os.path
|
||||
import re
|
||||
import sys
|
||||
import ujson
|
||||
import warnings
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from hashlib import sha256
|
||||
from tempfile import TemporaryFile
|
||||
from typing import IO, Any, Dict, List, Optional, Type
|
||||
|
||||
from google.cloud import bigquery
|
||||
from stripe.api_resources.abstract import ListableAPIResource
|
||||
import click
|
||||
import stripe
|
||||
import ujson
|
||||
from google.cloud import bigquery
|
||||
from stripe.api_resources.abstract import ListableAPIResource
|
||||
|
||||
# event data types with separate events and a defined schema
|
||||
EVENT_DATA_TYPES = (
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
"""Queue for balancing jobs across billing projects."""
|
||||
|
||||
import asyncio
|
||||
from contextlib import contextmanager
|
||||
from queue import Queue
|
||||
import asyncio
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@ import os
|
|||
import re
|
||||
from typing import List
|
||||
|
||||
|
||||
# Search for all camelCase situations in reverse with arbitrary lookaheads.
|
||||
REV_WORD_BOUND_PAT = re.compile(
|
||||
r"""
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
"""Standard definitions for reusable script arguments."""
|
||||
|
||||
import fnmatch
|
||||
import logging
|
||||
import re
|
||||
import warnings
|
||||
from argparse import Action
|
||||
from functools import partial
|
||||
from uuid import uuid4
|
||||
import fnmatch
|
||||
import re
|
||||
import logging
|
||||
import warnings
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
|
|
|
@ -9,12 +9,12 @@ present in the target directory, which allows manual overrides of views by
|
|||
checking them into the sql/ tree of the default branch of the repository.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import tempfile
|
||||
from argparse import ArgumentParser
|
||||
import json
|
||||
import urllib.request
|
||||
import logging
|
||||
import tarfile
|
||||
import tempfile
|
||||
import urllib.request
|
||||
from argparse import ArgumentParser
|
||||
from dataclasses import dataclass
|
||||
from functools import partial
|
||||
from io import BytesIO
|
||||
|
@ -27,7 +27,6 @@ from bigquery_etl.dryrun import DryRun
|
|||
from bigquery_etl.format_sql.formatter import reformat
|
||||
from bigquery_etl.util import standard_args
|
||||
|
||||
|
||||
SCHEMAS_URI = (
|
||||
"https://github.com/mozilla-services/mozilla-pipeline-schemas"
|
||||
"/archive/generated-schemas.tar.gz"
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
"""Validates view definitions."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import glob
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from multiprocessing.pool import Pool
|
||||
from pathlib import Path
|
||||
|
||||
import sqlparse
|
||||
import sys
|
||||
|
||||
from bigquery_etl.dependency import extract_table_references
|
||||
from bigquery_etl.util import standard_args
|
||||
|
|
|
@ -1,14 +1,13 @@
|
|||
"""PyTest configuration."""
|
||||
|
||||
from google.cloud import bigquery
|
||||
from google.cloud import storage
|
||||
from pathlib import Path
|
||||
import os
|
||||
import pytest
|
||||
import random
|
||||
import string
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from google.cloud import bigquery, storage
|
||||
|
||||
TEST_BUCKET = "bigquery-etl-integration-test-bucket"
|
||||
|
||||
|
|
|
@ -25,3 +25,4 @@ yamllint==1.26.0
|
|||
pip-tools==5.5.0
|
||||
pyjnius==1.3.0
|
||||
pre-commit==2.10.1
|
||||
pytest-isort==1.3.0
|
||||
|
|
|
@ -298,6 +298,10 @@ iniconfig==1.0.1 \
|
|||
--hash=sha256:80cf40c597eb564e86346103f609d74efce0f6b4d4f30ec8ce9e2c26411ba437 \
|
||||
--hash=sha256:e5f92f89355a67de0595932a6c6c02ab4afddc6fcdc0bfc5becd0d60884d3f69
|
||||
# via pytest
|
||||
isort==5.7.0 \
|
||||
--hash=sha256:c729845434366216d320e936b8ad6f9d681aab72dc7cbc2d51bedc3582f3ad1e \
|
||||
--hash=sha256:fff4f0c04e1825522ce6949973e83110a6e907750cd92d128b0d14aaaadbffdc
|
||||
# via pytest-isort
|
||||
jinja2==2.11.3 \
|
||||
--hash=sha256:03e47ad063331dd6a3f04a43eddca8a966a26ba0c5b7207a9a9e4e08f1b29419 \
|
||||
--hash=sha256:a6d58433de0ae800347cab1fa3043cebbabe8baa9d29e668f1c768cb87a333c6
|
||||
|
@ -577,6 +581,10 @@ pytest-forked==1.3.0 \
|
|||
--hash=sha256:6aa9ac7e00ad1a539c41bec6d21011332de671e938c7637378ec9710204e37ca \
|
||||
--hash=sha256:dc4147784048e70ef5d437951728825a131b81714b398d5d52f17c7c144d8815
|
||||
# via pytest-xdist
|
||||
pytest-isort==1.3.0 \
|
||||
--hash=sha256:074255ad393088a2daee6ca7f2305b7b86358ff632f62302896d8d4b2b339107 \
|
||||
--hash=sha256:46a12331a701e2f21d48548b2828c8b0a7956dbf1cd5347163f537deb24332dd
|
||||
# via -r requirements.in
|
||||
pytest-mypy==0.8.0 \
|
||||
--hash=sha256:63d418a4fea7d598ac40b659723c00804d16a251d90a5cfbca213eeba5aaf01c \
|
||||
--hash=sha256:8d2112972c1debf087943f48963a0daf04f3424840aea0cf437cc97053b1b0ef
|
||||
|
|
|
@ -2,15 +2,15 @@
|
|||
|
||||
"""Exports experiment monitoring data to GCS as JSON."""
|
||||
|
||||
import random
|
||||
import string
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime, timedelta
|
||||
from functools import partial
|
||||
from google.cloud import storage
|
||||
from google.cloud import bigquery
|
||||
from multiprocessing import Pool
|
||||
import random
|
||||
|
||||
import smart_open
|
||||
import string
|
||||
from google.cloud import bigquery, storage
|
||||
|
||||
parser = ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
|
|
|
@ -2,11 +2,11 @@
|
|||
|
||||
"""Read a table from BigQuery and write it as parquet."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from textwrap import dedent
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from textwrap import dedent
|
||||
|
||||
try:
|
||||
from google.cloud import bigquery
|
||||
|
|
3
setup.py
3
setup.py
|
@ -1,4 +1,5 @@
|
|||
from setuptools import setup, find_namespace_packages
|
||||
from setuptools import find_namespace_packages, setup
|
||||
|
||||
|
||||
def get_version():
|
||||
version = {}
|
||||
|
|
|
@ -2,18 +2,19 @@
|
|||
|
||||
"""Import experiments from Experimenter via the Experimenter API."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from google.cloud import bigquery
|
||||
import requests
|
||||
import attr
|
||||
import cattr
|
||||
import datetime
|
||||
import json
|
||||
import pytz
|
||||
import sys
|
||||
import time
|
||||
from argparse import ArgumentParser
|
||||
from typing import List, Optional
|
||||
|
||||
import attr
|
||||
import cattr
|
||||
import pytz
|
||||
import requests
|
||||
from google.cloud import bigquery
|
||||
|
||||
EXPERIMENTER_API_URL_V1 = (
|
||||
"https://experimenter.services.mozilla.com/api/v1/experiments/"
|
||||
)
|
||||
|
|
|
@ -4,10 +4,11 @@
|
|||
|
||||
from argparse import ArgumentParser
|
||||
from fnmatch import fnmatchcase
|
||||
from google.cloud import bigquery
|
||||
from functools import partial
|
||||
from multiprocessing.pool import ThreadPool
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
parser = ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--date", required=True) # expect string with format yyyy-mm-dd
|
||||
parser.add_argument("--project", default="moz-fx-data-shared-prod")
|
||||
|
|
|
@ -4,9 +4,10 @@
|
|||
|
||||
from argparse import ArgumentParser
|
||||
from fnmatch import fnmatchcase
|
||||
from google.cloud import bigquery
|
||||
from pathlib import Path
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
parser = ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--date", required=True) # expect string with format yyyy-mm-dd
|
||||
parser.add_argument("--project", default="moz-fx-data-shared-prod")
|
||||
|
|
|
@ -3,9 +3,10 @@
|
|||
"""Determine cost of previously scheduled bigquery-etl queries."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from google.cloud import bigquery
|
||||
from pathlib import Path
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
DEFAULT_PROJECTS = [
|
||||
"moz-fx-data-shared-prod",
|
||||
"moz-fx-data-derived-datasets",
|
||||
|
|
|
@ -3,10 +3,11 @@
|
|||
"""Determine column sizes by performing dry runs."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from google.cloud import bigquery
|
||||
from functools import partial
|
||||
from multiprocessing.pool import ThreadPool
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
parser = ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--date", required=True) # expect string with format yyyy-mm-dd
|
||||
parser.add_argument("--project", default="moz-fx-data-shared-prod")
|
||||
|
|
|
@ -4,10 +4,11 @@
|
|||
|
||||
from argparse import ArgumentParser
|
||||
from fnmatch import fnmatchcase
|
||||
from google.cloud import bigquery
|
||||
from functools import partial
|
||||
from multiprocessing.pool import ThreadPool
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
parser = ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--date", required=True) # expect string with format yyyy-mm-dd
|
||||
parser.add_argument("--project", default="moz-fx-data-shared-prod")
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
|
||||
"""Generate view to unnest parquet-format list and map fields."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from google.cloud import bigquery
|
||||
from textwrap import dedent
|
||||
import json
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from textwrap import dedent
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
|
||||
def qualify(table, dataset, project):
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
#!/usr/bin/env python3
|
||||
"""clients_daily_histogram_aggregates query generator."""
|
||||
import sys
|
||||
import argparse
|
||||
import gzip
|
||||
import json
|
||||
import argparse
|
||||
import sys
|
||||
import textwrap
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
@ -14,7 +14,6 @@ from google.cloud import bigquery
|
|||
sys.path.append(str(Path(__file__).parent.parent.parent.resolve()))
|
||||
from bigquery_etl.format_sql.formatter import reformat
|
||||
|
||||
|
||||
PROBE_INFO_SERVICE = (
|
||||
"https://probeinfo.telemetry.mozilla.org/firefox/all/main/all_probes"
|
||||
)
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
#!/usr/bin/env python3
|
||||
"""clients_daily_scalar_aggregates query generator."""
|
||||
import sys
|
||||
import json
|
||||
import gzip
|
||||
import argparse
|
||||
import textwrap
|
||||
import gzip
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from time import sleep
|
||||
|
@ -14,7 +14,6 @@ sys.path.append(str(Path(__file__).parent.parent.parent.resolve()))
|
|||
from bigquery_etl.format_sql.formatter import reformat
|
||||
from bigquery_etl.util.common import snake_case
|
||||
|
||||
|
||||
PROBE_INFO_SERVICE = (
|
||||
"https://probeinfo.telemetry.mozilla.org/firefox/all/main/all_probes"
|
||||
)
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
|
||||
"""Get experiment list for a given date from the recipe server."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import json
|
||||
import time
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import requests
|
||||
|
||||
|
|
|
@ -2,15 +2,15 @@
|
|||
|
||||
"""Import data from daily attitudes heartbeat survey into BigQuery."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import datetime as dt
|
||||
from google.cloud import bigquery
|
||||
import itertools
|
||||
import pytz
|
||||
import re
|
||||
import requests
|
||||
from argparse import ArgumentParser
|
||||
from time import sleep
|
||||
|
||||
import pytz
|
||||
import requests
|
||||
from google.cloud import bigquery
|
||||
|
||||
parser = ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--date", required=True)
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
import itertools
|
||||
import json
|
||||
import sys
|
||||
import os.path
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.append(str(Path(__file__).parent.parent.resolve()))
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
import os
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
from pathlib import Path
|
||||
import yaml
|
||||
|
||||
from bigquery_etl.cli.dag import info, create, remove
|
||||
import pytest
|
||||
import yaml
|
||||
from click.testing import CliRunner
|
||||
|
||||
from bigquery_etl.cli.dag import create, info, remove
|
||||
|
||||
TEST_DIR = Path(__file__).parent.parent
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from click.testing import CliRunner
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
|
||||
from bigquery_etl.dependency import show as dependency_show
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from click.testing import CliRunner
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
|
||||
from bigquery_etl.cli.format import format as sql_format
|
||||
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
import os
|
||||
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
import yaml
|
||||
from click.testing import CliRunner
|
||||
|
||||
from bigquery_etl.cli.query import (
|
||||
create,
|
||||
schedule,
|
||||
info,
|
||||
_queries_matching_name_pattern,
|
||||
create,
|
||||
info,
|
||||
schedule,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
import yaml
|
||||
from click.testing import CliRunner
|
||||
|
||||
from bigquery_etl.cli.routine import create, info, rename
|
||||
|
||||
|
|
|
@ -1,14 +1,15 @@
|
|||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from click.exceptions import BadParameter
|
||||
|
||||
from bigquery_etl.cli.utils import (
|
||||
is_authenticated,
|
||||
is_valid_dir,
|
||||
is_valid_file,
|
||||
is_authenticated,
|
||||
is_valid_project,
|
||||
)
|
||||
|
||||
|
||||
TEST_DIR = Path(__file__).parent.parent
|
||||
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from bigquery_etl.docs.generate_docs import load_with_examples
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import os
|
||||
|
||||
from bigquery_etl.events_daily.generate_queries import get_query_dirs, TemplatedDir
|
||||
from pathlib import Path
|
||||
|
||||
from bigquery_etl.events_daily.generate_queries import TemplatedDir, get_query_dirs
|
||||
|
||||
BASE_DIR = Path(os.path.dirname(__file__)).parent
|
||||
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
import os
|
||||
import pytest
|
||||
|
||||
from bigquery_etl.events_daily.generate_queries import TemplatedDir, Template
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from bigquery_etl.events_daily.generate_queries import Template, TemplatedDir
|
||||
|
||||
BASE_DIR = Path(os.path.dirname(__file__)).parent
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import pytest
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from bigquery_etl.metadata.parse_metadata import Metadata
|
||||
|
||||
TEST_DIR = Path(__file__).parent.parent
|
||||
|
|
|
@ -1,14 +1,13 @@
|
|||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, Mock, call
|
||||
|
||||
import pytest
|
||||
import smart_open
|
||||
from pathlib import Path
|
||||
|
||||
from datetime import datetime
|
||||
from unittest.mock import call, Mock, MagicMock
|
||||
|
||||
import bigquery_etl.public_data.publish_gcs_metadata as pgm
|
||||
|
||||
|
||||
TEST_DIR = Path(__file__).parent.parent
|
||||
|
||||
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
import json
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, Mock, call
|
||||
|
||||
import pytest
|
||||
import smart_open
|
||||
from pathlib import Path
|
||||
from unittest.mock import call, Mock, MagicMock
|
||||
|
||||
from bigquery_etl.public_data.publish_json import JsonPublisher
|
||||
|
||||
|
||||
TEST_DIR = Path(__file__).parent.parent
|
||||
|
||||
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
import json
|
||||
import pytest
|
||||
import subprocess
|
||||
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from google.cloud import bigquery
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from google.cloud import bigquery
|
||||
|
||||
TEST_DIR = Path(__file__).parent.parent
|
||||
|
||||
|
|
|
@ -1,15 +1,16 @@
|
|||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from bigquery_etl.metadata.parse_metadata import Metadata
|
||||
from bigquery_etl.query_scheduling.dag import (
|
||||
Dag,
|
||||
DagParseException,
|
||||
DagDefaultArgs,
|
||||
DagParseException,
|
||||
PublicDataJsonDag,
|
||||
)
|
||||
from bigquery_etl.metadata.parse_metadata import Metadata
|
||||
from bigquery_etl.query_scheduling.task import Task
|
||||
from bigquery_etl.query_scheduling.dag_collection import DagCollection
|
||||
from bigquery_etl.query_scheduling.task import Task
|
||||
|
||||
TEST_DIR = Path(__file__).parent.parent
|
||||
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from bigquery_etl.query_scheduling.dag_collection import DagCollection
|
||||
from bigquery_etl.query_scheduling.dag import InvalidDag, DagParseException
|
||||
from bigquery_etl.query_scheduling.task import Task
|
||||
from bigquery_etl.metadata.parse_metadata import Metadata
|
||||
from bigquery_etl.query_scheduling.dag import DagParseException, InvalidDag
|
||||
from bigquery_etl.query_scheduling.dag_collection import DagCollection
|
||||
from bigquery_etl.query_scheduling.task import Task
|
||||
|
||||
TEST_DIR = Path(__file__).parent.parent
|
||||
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
import datetime
|
||||
|
||||
import pytest
|
||||
|
||||
from bigquery_etl.query_scheduling.formatters import (
|
||||
format_schedule_interval,
|
||||
format_attr,
|
||||
format_date,
|
||||
format_timedelta,
|
||||
format_optional_string,
|
||||
format_schedule_interval,
|
||||
format_timedelta,
|
||||
)
|
||||
import datetime
|
||||
import pytest
|
||||
|
||||
|
||||
class TestFormatters:
|
||||
|
|
|
@ -1,16 +1,17 @@
|
|||
from pathlib import Path
|
||||
import os
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from typing import NewType
|
||||
|
||||
from bigquery_etl.query_scheduling.task import (
|
||||
Task,
|
||||
UnscheduledTask,
|
||||
TaskParseException,
|
||||
TaskRef,
|
||||
)
|
||||
import pytest
|
||||
|
||||
from bigquery_etl.metadata.parse_metadata import Metadata
|
||||
from bigquery_etl.query_scheduling.dag_collection import DagCollection
|
||||
from bigquery_etl.query_scheduling.task import (
|
||||
Task,
|
||||
TaskParseException,
|
||||
TaskRef,
|
||||
UnscheduledTask,
|
||||
)
|
||||
|
||||
TEST_DIR = Path(__file__).parent.parent
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
from pathlib import Path
|
||||
|
||||
from bigquery_etl.schema import Schema
|
||||
from textwrap import dedent
|
||||
|
||||
import yaml
|
||||
|
||||
from bigquery_etl.schema import Schema
|
||||
|
||||
TEST_DIR = Path(__file__).parent.parent
|
||||
|
||||
|
|
|
@ -13,9 +13,10 @@ from pathlib import Path
|
|||
|
||||
import click
|
||||
import yaml
|
||||
from bigquery_etl.glam.utils import run
|
||||
from google.cloud import bigquery
|
||||
|
||||
from bigquery_etl.glam.utils import run
|
||||
|
||||
warnings.filterwarnings(
|
||||
"ignore", "Your application has authenticated using end user credentials"
|
||||
)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
"""Testing data for query."""
|
||||
|
||||
from pathlib import Path
|
||||
from itertools import product
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
"""Testing data for query."""
|
||||
|
||||
from pathlib import Path
|
||||
from itertools import product
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
|
||||
import yaml
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
"""Testing data for query."""
|
||||
from itertools import product
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
from itertools import product
|
||||
|
||||
import yaml
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import textwrap
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from bigquery_etl.dryrun import DryRun, Errors
|
||||
|
||||
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
from pathlib import Path
|
||||
from google.cloud import bigquery
|
||||
import os
|
||||
import pytest
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from google.cloud import bigquery
|
||||
|
||||
ENTRYPOINT_SCRIPT = Path(__file__).parent.parent / "script" / "entrypoint"
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from unittest.mock import patch
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os
|
||||
import csv
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from bigquery_etl.util.common import snake_case
|
||||
|
||||
|
||||
|
|
|
@ -5,13 +5,15 @@ The vectors are located in tests/validation/data/hmac_sha256_validation.json.
|
|||
"""
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
from google.cloud import bigquery
|
||||
|
||||
from bigquery_etl.routine.parse_routine import (
|
||||
read_routine_dir,
|
||||
RawRoutine,
|
||||
read_routine_dir,
|
||||
routine_tests_sql,
|
||||
)
|
||||
from google.cloud import bigquery
|
||||
|
||||
validation_data_file = "tests/validation/data/hmac_sha256_validation.json"
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче