Run black and refactor --only and --except args
This commit is contained in:
Родитель
ccb65d6d18
Коммит
00cef9d7e9
|
@ -12,6 +12,7 @@ or to process only a specific list of tables.
|
|||
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime
|
||||
from fnmatch import fnmatch
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
|
@ -49,25 +50,29 @@ parser.add_argument(
|
|||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help=("Do not run queries, but instead print the query job config "
|
||||
"and bytes that would be processed"),
|
||||
help=(
|
||||
"Do not run queries, but instead print the query job config "
|
||||
"and bytes that would be processed"
|
||||
),
|
||||
)
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
group.add_argument(
|
||||
"--only",
|
||||
nargs="+",
|
||||
dest="only_tables",
|
||||
default=[],
|
||||
help=("Process only the given tables; "
|
||||
"pass names like 'telemetry_live.main_v4'"),
|
||||
help=(
|
||||
"Process only the given tables; "
|
||||
"pass names or globs like 'telemetry_live.main_v*' "
|
||||
),
|
||||
)
|
||||
group.add_argument(
|
||||
"--except",
|
||||
nargs="+",
|
||||
dest="except_tables",
|
||||
default=[],
|
||||
help=("Process all tables in *_live datasets except for the given tables; "
|
||||
"pass names like 'telemetry_live.main_v4'"),
|
||||
help=(
|
||||
"Process all tables in *_live datasets except for the given tables; "
|
||||
"pass names or globs like 'telemetry_live.main_v*'"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
@ -75,9 +80,7 @@ def sql_full_table_id(table):
|
|||
return table.full_table_id.replace(":", ".")
|
||||
|
||||
|
||||
def run_deduplication_query(
|
||||
client, live_table, stable_table, date, dry_run
|
||||
):
|
||||
def run_deduplication_query(client, live_table, stable_table, date, dry_run):
|
||||
|
||||
sql = QUERY_TEMPLATE.format(source_table_spec=sql_full_table_id(live_table))
|
||||
destination = f"{sql_full_table_id(stable_table)}${date:%Y%m%d}"
|
||||
|
@ -111,24 +114,30 @@ def main():
|
|||
|
||||
client = bigquery.Client()
|
||||
live_datasets = [
|
||||
d for d in client.list_datasets(args.project_id) if d.dataset_id.endswith("_live")
|
||||
d
|
||||
for d in client.list_datasets(args.project_id)
|
||||
if d.dataset_id.endswith("_live")
|
||||
]
|
||||
|
||||
for live_dataset in live_datasets:
|
||||
stable_dataset_id = live_dataset.dataset_id[:-5] + "_stable"
|
||||
for live_table in client.list_tables(live_dataset.reference):
|
||||
live_table_spec = f"{live_table.dataset_id}.{live_table.table_id}"
|
||||
stable_table = client.get_table('.'.join([args.project_id, stable_dataset_id, live_table.table_id]))
|
||||
if live_table_spec not in args.only_tables:
|
||||
stable_table = client.get_table(
|
||||
".".join([args.project_id, stable_dataset_id, live_table.table_id])
|
||||
)
|
||||
if args.except_tables is not None and any(
|
||||
fnmatch(live_table_spec, pattern) for pattern in args.except_tables
|
||||
):
|
||||
print(f"Skipping {live_table_spec} due to --except argument")
|
||||
continue
|
||||
if live_table_spec in args.except_tables:
|
||||
if args.only_tables is not None and not any(
|
||||
fnmatch(live_table_spec, pattern) for pattern in args.only_tables
|
||||
):
|
||||
print(f"Skipping {live_table_spec} due to --only argument")
|
||||
continue
|
||||
run_deduplication_query(
|
||||
client,
|
||||
live_table,
|
||||
stable_table,
|
||||
args.date,
|
||||
args.dry_run,
|
||||
client, live_table, stable_table, args.date, args.dry_run
|
||||
)
|
||||
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче