From 84ee88e2b9a9e840d6b4969926cfdd83b46f7ede Mon Sep 17 00:00:00 2001 From: Lucia <30448600+lucia-vargas-a@users.noreply.github.com> Date: Mon, 19 Feb 2024 15:27:34 +0100 Subject: [PATCH] Dependabot/pip/black 24.1.1 fix (#5027) * Bump black from 23.10.1 to 24.1.1 Bumps [black](https://github.com/psf/black) from 23.10.1 to 24.1.1. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/23.10.1...24.1.1) --- updated-dependencies: - dependency-name: black dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * Reformat files with black to fix dependabot update. * Reformat with black 24.1.1. Update test dag with required space. * Update test dags. --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- bigquery_etl/__init__.py | 1 + bigquery_etl/alchemer/survey.py | 1 + bigquery_etl/backfill/validate.py | 1 + bigquery_etl/cli/alchemer.py | 1 + bigquery_etl/cli/backfill.py | 1 + bigquery_etl/cli/check.py | 1 + bigquery_etl/cli/generate.py | 1 + bigquery_etl/cli/metadata.py | 1 + bigquery_etl/cli/query.py | 14 ++++--- bigquery_etl/cli/view.py | 1 + .../docs/mozfun/generate_mozfun_docs.py | 1 + bigquery_etl/glam/cli.py | 1 + .../clients_daily_histogram_aggregates.py | 1 + bigquery_etl/glam/generate.py | 1 + bigquery_etl/glam/models.py | 1 + bigquery_etl/glam/utils.py | 1 + bigquery_etl/schema/stable_table_schema.py | 1 + bigquery_etl/util/common.py | 1 + requirements.in | 2 +- requirements.txt | 42 ++++++++++--------- script/glam/run_scalar_agg_clustered_query.py | 1 + script/marketing/copy_ga_sessions.py | 8 ++-- .../marketing/generate_app_store_queries.py | 8 ++-- .../experimenter_experiments_v1/query.py | 18 ++++---- .../adjust_derived/adjust_cohort_v1/query.py | 1 + .../adjust_deliverables_v1/query.py | 1 + .../mdn_popularities_v1/query.py | 9 ++-- .../query.py | 6 +-- .../shredder_rows_deleted_v1/query.py | 1 + .../query.py | 34 +++++++++------ .../structured_missing_columns_v1/query.py | 2 +- .../unified_metrics_v1/query.py | 21 +++++++--- .../telemetry/longitudinal.sql.py | 6 +-- .../crash_signatures_v1/query.py | 1 + .../crash_symbols_v1/query.py | 1 + .../experiments_stats_v1/query.py | 8 ++-- sql_generators/active_users/__init__.py | 1 + .../__init__.py | 1 + .../country_code_lookup/__init__.py | 1 + sql_generators/events_daily/__init__.py | 1 + .../experiment_monitoring/__init__.py | 1 + sql_generators/feature_usage/__init__.py | 1 + sql_generators/funnels/__init__.py | 1 + sql_generators/glean_usage/__init__.py | 1 + .../glean_usage/baseline_clients_daily.py | 1 + .../glean_usage/event_monitoring_live.py | 8 ++-- .../glean_usage/glean_app_ping_views.py | 1 + sql_generators/search/__init__.py | 1 + sql_generators/serp_events/__init__.py | 1 + sql_generators/urlbar_events/__init__.py | 1 + sql_generators/use_counters/__init__.py | 1 + tests/data/dags/python_script_test_dag | 1 + tests/data/dags/simple_test_dag | 1 + .../data/dags/test_dag_duplicate_dependencies | 1 + .../dags/test_dag_external_check_dependency | 3 +- tests/data/dags/test_dag_external_dependency | 1 + .../dags/test_dag_with_check_dependencies | 3 +- .../test_dag_with_check_table_dependencies | 1 + tests/data/dags/test_dag_with_dependencies | 1 + .../test_minimal/data.py | 1 + .../test_minimal/data.py | 1 + .../test_minimal/data.py | 1 + .../geckoview_version_v1/bootstrap.py | 1 + 63 files changed, 160 insertions(+), 79 deletions(-) diff --git a/bigquery_etl/__init__.py b/bigquery_etl/__init__.py index c422ebcf34..d824837521 100644 --- a/bigquery_etl/__init__.py +++ b/bigquery_etl/__init__.py @@ -1,4 +1,5 @@ """BigQuery ETL.""" + from pathlib import Path from .config import ConfigLoader diff --git a/bigquery_etl/alchemer/survey.py b/bigquery_etl/alchemer/survey.py index 2e0112c6fa..b0c7c1b7b7 100644 --- a/bigquery_etl/alchemer/survey.py +++ b/bigquery_etl/alchemer/survey.py @@ -1,4 +1,5 @@ """Import data from alchemer (surveygizmo) surveys into BigQuery.""" + import datetime as dt import json import re diff --git a/bigquery_etl/backfill/validate.py b/bigquery_etl/backfill/validate.py index ed91d3aeb4..0c2238c32c 100644 --- a/bigquery_etl/backfill/validate.py +++ b/bigquery_etl/backfill/validate.py @@ -1,4 +1,5 @@ """Validate backfill entries.""" + from pathlib import Path from typing import List diff --git a/bigquery_etl/cli/alchemer.py b/bigquery_etl/cli/alchemer.py index 5e881bbe72..01954bdb6c 100644 --- a/bigquery_etl/cli/alchemer.py +++ b/bigquery_etl/cli/alchemer.py @@ -1,4 +1,5 @@ """bigquery-etl CLI alchemer command.""" + from datetime import date, datetime, timedelta import rich_click as click diff --git a/bigquery_etl/cli/backfill.py b/bigquery_etl/cli/backfill.py index d17a1f32e8..024319160b 100644 --- a/bigquery_etl/cli/backfill.py +++ b/bigquery_etl/cli/backfill.py @@ -1,4 +1,5 @@ """bigquery-etl CLI backfill command.""" + import json import sys import tempfile diff --git a/bigquery_etl/cli/check.py b/bigquery_etl/cli/check.py index 4daa73d6e5..8a1f91468d 100644 --- a/bigquery_etl/cli/check.py +++ b/bigquery_etl/cli/check.py @@ -1,4 +1,5 @@ """bigquery-etl CLI check command.""" + import re import subprocess import sys diff --git a/bigquery_etl/cli/generate.py b/bigquery_etl/cli/generate.py index 902af02f7a..9c66dbe250 100644 --- a/bigquery_etl/cli/generate.py +++ b/bigquery_etl/cli/generate.py @@ -1,4 +1,5 @@ """bigquery-etl CLI generate command.""" + import importlib.util import sys from inspect import getmembers diff --git a/bigquery_etl/cli/metadata.py b/bigquery_etl/cli/metadata.py index 320ea668e0..b6f110acf6 100644 --- a/bigquery_etl/cli/metadata.py +++ b/bigquery_etl/cli/metadata.py @@ -1,4 +1,5 @@ """bigquery-etl CLI metadata command.""" + from pathlib import Path from typing import Optional diff --git a/bigquery_etl/cli/query.py b/bigquery_etl/cli/query.py index 2da8f37c99..5b2345f8c2 100644 --- a/bigquery_etl/cli/query.py +++ b/bigquery_etl/cli/query.py @@ -1858,9 +1858,9 @@ def _update_query_schema( f"{project_name}.{tmp_dataset}.{table_name}_{random_str(12)}" ) existing_schema.deploy(tmp_identifier) - tmp_tables[ - f"{project_name}.{dataset_name}.{table_name}" - ] = tmp_identifier + tmp_tables[f"{project_name}.{dataset_name}.{table_name}"] = ( + tmp_identifier + ) existing_schema.to_yaml_file(existing_schema_path) # replace temporary table references @@ -1916,9 +1916,11 @@ def _update_query_schema( field=table.time_partitioning.field, partition_type=table.time_partitioning.type_.lower(), required=table.time_partitioning.require_partition_filter, - expiration_days=table.time_partitioning.expiration_ms / 86400000.0 - if table.time_partitioning.expiration_ms - else None, + expiration_days=( + table.time_partitioning.expiration_ms / 86400000.0 + if table.time_partitioning.expiration_ms + else None + ), ) click.echo(f"Partitioning metadata added to {metadata_file_path}") diff --git a/bigquery_etl/cli/view.py b/bigquery_etl/cli/view.py index b6ce97b96a..6957f90809 100644 --- a/bigquery_etl/cli/view.py +++ b/bigquery_etl/cli/view.py @@ -1,4 +1,5 @@ """bigquery-etl CLI view command.""" + import logging import re import string diff --git a/bigquery_etl/docs/mozfun/generate_mozfun_docs.py b/bigquery_etl/docs/mozfun/generate_mozfun_docs.py index 6e7203db1e..866402bb10 100644 --- a/bigquery_etl/docs/mozfun/generate_mozfun_docs.py +++ b/bigquery_etl/docs/mozfun/generate_mozfun_docs.py @@ -1,4 +1,5 @@ """Generate documentation for mozfun.""" + import itertools import os import re diff --git a/bigquery_etl/glam/cli.py b/bigquery_etl/glam/cli.py index be8ece0474..992c24586d 100644 --- a/bigquery_etl/glam/cli.py +++ b/bigquery_etl/glam/cli.py @@ -1,4 +1,5 @@ """Tools for GLAM ETL.""" + import os from pathlib import Path diff --git a/bigquery_etl/glam/clients_daily_histogram_aggregates.py b/bigquery_etl/glam/clients_daily_histogram_aggregates.py index 9702ce6e26..05e3d1da23 100644 --- a/bigquery_etl/glam/clients_daily_histogram_aggregates.py +++ b/bigquery_etl/glam/clients_daily_histogram_aggregates.py @@ -1,4 +1,5 @@ """clients_daily_histogram_aggregates query generator.""" + import argparse import sys from typing import Dict, List diff --git a/bigquery_etl/glam/generate.py b/bigquery_etl/glam/generate.py index b0e0699ffc..dd86a979ba 100644 --- a/bigquery_etl/glam/generate.py +++ b/bigquery_etl/glam/generate.py @@ -1,4 +1,5 @@ """Generate templated views.""" + from argparse import ArgumentParser, Namespace from dataclasses import dataclass from functools import partial diff --git a/bigquery_etl/glam/models.py b/bigquery_etl/glam/models.py index 46afc339c5..9317a209eb 100644 --- a/bigquery_etl/glam/models.py +++ b/bigquery_etl/glam/models.py @@ -1,4 +1,5 @@ """Variables for templated SQL.""" + from .utils import compute_datacube_groupings, get_custom_distribution_metadata diff --git a/bigquery_etl/glam/utils.py b/bigquery_etl/glam/utils.py index 3891bff3f6..bd77d77de4 100644 --- a/bigquery_etl/glam/utils.py +++ b/bigquery_etl/glam/utils.py @@ -1,4 +1,5 @@ """Utilities for the GLAM module.""" + import json import subprocess from collections import namedtuple diff --git a/bigquery_etl/schema/stable_table_schema.py b/bigquery_etl/schema/stable_table_schema.py index ee94766cdf..77027bfe4f 100644 --- a/bigquery_etl/schema/stable_table_schema.py +++ b/bigquery_etl/schema/stable_table_schema.py @@ -1,4 +1,5 @@ """Methods for working with stable table schemas.""" + import json import tarfile import urllib.request diff --git a/bigquery_etl/util/common.py b/bigquery_etl/util/common.py index d72d4dcfe5..19285f017b 100644 --- a/bigquery_etl/util/common.py +++ b/bigquery_etl/util/common.py @@ -1,4 +1,5 @@ """Generic utility functions.""" + import glob import logging import os diff --git a/requirements.in b/requirements.in index 5e4093d1da..906b5e96eb 100644 --- a/requirements.in +++ b/requirements.in @@ -1,6 +1,6 @@ attrs==23.2.0 authlib==1.3.0 -black==23.10.1 +black==24.1.1 cattrs==23.2.3 click==8.1.7 exceptiongroup==1.2.0 # for backwards compatibility with python < 3.11 diff --git a/requirements.txt b/requirements.txt index 573c0ca52f..57d556f958 100644 --- a/requirements.txt +++ b/requirements.txt @@ -110,25 +110,29 @@ babel==2.12.1 \ --hash=sha256:b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610 \ --hash=sha256:cc2d99999cd01d44420ae725a21c9e3711b3aadc7976d6147f622d8581963455 # via mkdocs-material -black==23.10.1 \ - --hash=sha256:037e9b4664cafda5f025a1728c50a9e9aedb99a759c89f760bd83730e76ba884 \ - --hash=sha256:1b917a2aa020ca600483a7b340c165970b26e9029067f019e3755b56e8dd5916 \ - --hash=sha256:1f8ce316753428ff68749c65a5f7844631aa18c8679dfd3ca9dc1a289979c258 \ - --hash=sha256:33d40f5b06be80c1bbce17b173cda17994fbad096ce60eb22054da021bf933d1 \ - --hash=sha256:3f157a8945a7b2d424da3335f7ace89c14a3b0625e6593d21139c2d8214d55ce \ - --hash=sha256:5ed45ac9a613fb52dad3b61c8dea2ec9510bf3108d4db88422bacc7d1ba1243d \ - --hash=sha256:6d23d7822140e3fef190734216cefb262521789367fbdc0b3f22af6744058982 \ - --hash=sha256:7670242e90dc129c539e9ca17665e39a146a761e681805c54fbd86015c7c84f7 \ - --hash=sha256:7b4d10b0f016616a0d93d24a448100adf1699712fb7a4efd0e2c32bbb219b173 \ - --hash=sha256:7cb5936e686e782fddb1c73f8aa6f459e1ad38a6a7b0e54b403f1f05a1507ee9 \ - --hash=sha256:7d56124b7a61d092cb52cce34182a5280e160e6aff3137172a68c2c2c4b76bcb \ - --hash=sha256:840015166dbdfbc47992871325799fd2dc0dcf9395e401ada6d88fe11498abad \ - --hash=sha256:9c74de4c77b849e6359c6f01987e94873c707098322b91490d24296f66d067dc \ - --hash=sha256:b15b75fc53a2fbcac8a87d3e20f69874d161beef13954747e053bca7a1ce53a0 \ - --hash=sha256:cfcce6f0a384d0da692119f2d72d79ed07c7159879d0bb1bb32d2e443382bf3a \ - --hash=sha256:d431e6739f727bb2e0495df64a6c7a5310758e87505f5f8cde9ff6c0f2d7e4fe \ - --hash=sha256:e293e4c2f4a992b980032bbd62df07c1bcff82d6964d6c9496f2cd726e246ace \ - --hash=sha256:ec3f8e6234c4e46ff9e16d9ae96f4ef69fa328bb4ad08198c8cee45bb1f08c69 +black==24.1.1 \ + --hash=sha256:0269dfdea12442022e88043d2910429bed717b2d04523867a85dacce535916b8 \ + --hash=sha256:07204d078e25327aad9ed2c64790d681238686bce254c910de640c7cc4fc3aa6 \ + --hash=sha256:08b34e85170d368c37ca7bf81cf67ac863c9d1963b2c1780c39102187ec8dd62 \ + --hash=sha256:1a95915c98d6e32ca43809d46d932e2abc5f1f7d582ffbe65a5b4d1588af7445 \ + --hash=sha256:2588021038bd5ada078de606f2a804cadd0a3cc6a79cb3e9bb3a8bf581325a4c \ + --hash=sha256:2fa6a0e965779c8f2afb286f9ef798df770ba2b6cee063c650b96adec22c056a \ + --hash=sha256:34afe9da5056aa123b8bfda1664bfe6fb4e9c6f311d8e4a6eb089da9a9173bf9 \ + --hash=sha256:3897ae5a21ca132efa219c029cce5e6bfc9c3d34ed7e892113d199c0b1b444a2 \ + --hash=sha256:40657e1b78212d582a0edecafef133cf1dd02e6677f539b669db4746150d38f6 \ + --hash=sha256:48b5760dcbfe5cf97fd4fba23946681f3a81514c6ab8a45b50da67ac8fbc6c7b \ + --hash=sha256:5242ecd9e990aeb995b6d03dc3b2d112d4a78f2083e5a8e86d566340ae80fec4 \ + --hash=sha256:5cdc2e2195212208fbcae579b931407c1fa9997584f0a415421748aeafff1168 \ + --hash=sha256:5d7b06ea8816cbd4becfe5f70accae953c53c0e53aa98730ceccb0395520ee5d \ + --hash=sha256:7258c27115c1e3b5de9ac6c4f9957e3ee2c02c0b39222a24dc7aa03ba0e986f5 \ + --hash=sha256:854c06fb86fd854140f37fb24dbf10621f5dab9e3b0c29a690ba595e3d543024 \ + --hash=sha256:a21725862d0e855ae05da1dd25e3825ed712eaaccef6b03017fe0853a01aa45e \ + --hash=sha256:a83fe522d9698d8f9a101b860b1ee154c1d25f8a82ceb807d319f085b2627c5b \ + --hash=sha256:b3d64db762eae4a5ce04b6e3dd745dcca0fb9560eb931a5be97472e38652a161 \ + --hash=sha256:e298d588744efda02379521a19639ebcd314fba7a49be22136204d7ed1782717 \ + --hash=sha256:e2c8dfa14677f90d976f68e0c923947ae68fa3961d61ee30976c388adc0b02c8 \ + --hash=sha256:ecba2a15dfb2d97105be74bbfe5128bc5e9fa8477d8c46766505c1dda5883aac \ + --hash=sha256:fc1ec9aa6f4d98d022101e015261c056ddebe3da6a8ccfc2c792cbe0349d48b7 # via # -r requirements.in # pytest-black diff --git a/script/glam/run_scalar_agg_clustered_query.py b/script/glam/run_scalar_agg_clustered_query.py index ab05d22ceb..440a618d86 100644 --- a/script/glam/run_scalar_agg_clustered_query.py +++ b/script/glam/run_scalar_agg_clustered_query.py @@ -1,4 +1,5 @@ """Run a query on clients_scalar_aggregates on one app_version at a time.""" + import datetime import sys import time diff --git a/script/marketing/copy_ga_sessions.py b/script/marketing/copy_ga_sessions.py index 24bf8ede53..0ad7e7fbc5 100755 --- a/script/marketing/copy_ga_sessions.py +++ b/script/marketing/copy_ga_sessions.py @@ -17,9 +17,11 @@ def get_qualified_table_name(project, ga_id, table_date): def copy_single_table(bq_client, src_table, dst_table, overwrite): """Copy a single day of ga_sessions.""" job_config = bigquery.CopyJobConfig( - write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE - if overwrite - else bigquery.WriteDisposition.WRITE_EMPTY + write_disposition=( + bigquery.WriteDisposition.WRITE_TRUNCATE + if overwrite + else bigquery.WriteDisposition.WRITE_EMPTY + ) ) try: copy_job = bq_client.copy_table( diff --git a/script/marketing/generate_app_store_queries.py b/script/marketing/generate_app_store_queries.py index cfb8a96361..acaf8d64c8 100755 --- a/script/marketing/generate_app_store_queries.py +++ b/script/marketing/generate_app_store_queries.py @@ -164,9 +164,11 @@ def main(project, source_dataset, destination_dataset, create_table, backfill, d schema_update_options=schema_update_options, time_partitioning=bigquery.TimePartitioning(field="date"), create_disposition=bigquery.CreateDisposition.CREATE_IF_NEEDED, - write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE - if backfill - else bigquery.WriteDisposition.WRITE_APPEND, + write_disposition=( + bigquery.WriteDisposition.WRITE_TRUNCATE + if backfill + else bigquery.WriteDisposition.WRITE_APPEND + ), ) print(f"Creating table {table_name}") query_job = client.query(query_text, job_config) diff --git a/sql/moz-fx-data-experiments/monitoring/experimenter_experiments_v1/query.py b/sql/moz-fx-data-experiments/monitoring/experimenter_experiments_v1/query.py index 9f1d9f799d..629a70cda7 100644 --- a/sql/moz-fx-data-experiments/monitoring/experimenter_experiments_v1/query.py +++ b/sql/moz-fx-data-experiments/monitoring/experimenter_experiments_v1/query.py @@ -190,15 +190,17 @@ class ExperimentV6: normandy_slug=self.slug, experimenter_slug=None, type="v6", - status="Live" - if ( - self.endDate is None - or ( - self.endDate - and self.endDate > pytz.utc.localize(datetime.datetime.now()) + status=( + "Live" + if ( + self.endDate is None + or ( + self.endDate + and self.endDate > pytz.utc.localize(datetime.datetime.now()) + ) ) - ) - else "Complete", + else "Complete" + ), start_date=self.startDate, end_date=self.endDate, enrollment_end_date=self.enrollmentEndDate, diff --git a/sql/moz-fx-data-shared-prod/adjust_derived/adjust_cohort_v1/query.py b/sql/moz-fx-data-shared-prod/adjust_derived/adjust_cohort_v1/query.py index 915017e4b1..467472e7e2 100644 --- a/sql/moz-fx-data-shared-prod/adjust_derived/adjust_cohort_v1/query.py +++ b/sql/moz-fx-data-shared-prod/adjust_derived/adjust_cohort_v1/query.py @@ -1,4 +1,5 @@ """Adjust data - download deliverables, clean and upload to BigQuery.""" + import csv import json import tempfile diff --git a/sql/moz-fx-data-shared-prod/adjust_derived/adjust_deliverables_v1/query.py b/sql/moz-fx-data-shared-prod/adjust_derived/adjust_deliverables_v1/query.py index 3e87973779..1156496bfd 100644 --- a/sql/moz-fx-data-shared-prod/adjust_derived/adjust_deliverables_v1/query.py +++ b/sql/moz-fx-data-shared-prod/adjust_derived/adjust_deliverables_v1/query.py @@ -1,4 +1,5 @@ """Adjust data - download deliverables, clean and upload to BigQuery.""" + import csv import json import tempfile diff --git a/sql/moz-fx-data-shared-prod/mdn_yari_derived/mdn_popularities_v1/query.py b/sql/moz-fx-data-shared-prod/mdn_yari_derived/mdn_popularities_v1/query.py index d2ebec5a1f..d747702b1d 100644 --- a/sql/moz-fx-data-shared-prod/mdn_yari_derived/mdn_popularities_v1/query.py +++ b/sql/moz-fx-data-shared-prod/mdn_yari_derived/mdn_popularities_v1/query.py @@ -65,10 +65,10 @@ def main(): table_ref = dataset_ref.table(args.temp_table) target_file_name = f"{uuid4()}.csv" - target_file_path = join(args.destination_path, args.date.strftime('%Y/%m'), target_file_name) - mdn_uri = ( - f"gs://{args.destination_bucket}/{target_file_path}" + target_file_path = join( + args.destination_path, args.date.strftime("%Y/%m"), target_file_name ) + mdn_uri = f"gs://{args.destination_bucket}/{target_file_path}" logging.info( "Exporting %s to GCS: %s:%s" % (temp_table, args.destination_project, mdn_uri) @@ -85,11 +85,12 @@ def main(): # Make it available as current. current_file_path = join(args.destination_path, CURRENT_FILE_NAME) - + storage_client = storage.Client(args.project) bucket = storage_client.get_bucket(args.destination_bucket) blob = bucket.get_blob(target_file_path) bucket.copy_blob(blob, bucket, current_file_path) + if __name__ == "__main__": main() diff --git a/sql/moz-fx-data-shared-prod/monitoring_derived/bigquery_etl_scheduled_query_usage_v1/query.py b/sql/moz-fx-data-shared-prod/monitoring_derived/bigquery_etl_scheduled_query_usage_v1/query.py index 50dfd784e5..cd55dc49f7 100755 --- a/sql/moz-fx-data-shared-prod/monitoring_derived/bigquery_etl_scheduled_query_usage_v1/query.py +++ b/sql/moz-fx-data-shared-prod/monitoring_derived/bigquery_etl_scheduled_query_usage_v1/query.py @@ -8,11 +8,7 @@ from pathlib import Path from google.cloud import bigquery -DEFAULT_PROJECTS = [ - "moz-fx-data-shared-prod", - "moz-fx-data-experiments", - "mozdata" -] +DEFAULT_PROJECTS = ["moz-fx-data-shared-prod", "moz-fx-data-experiments", "mozdata"] parser = ArgumentParser(description=__doc__) parser.add_argument("--date", required=True) # expect string with format yyyy-mm-dd diff --git a/sql/moz-fx-data-shared-prod/monitoring_derived/shredder_rows_deleted_v1/query.py b/sql/moz-fx-data-shared-prod/monitoring_derived/shredder_rows_deleted_v1/query.py index d1d0424a7e..343547b193 100644 --- a/sql/moz-fx-data-shared-prod/monitoring_derived/shredder_rows_deleted_v1/query.py +++ b/sql/moz-fx-data-shared-prod/monitoring_derived/shredder_rows_deleted_v1/query.py @@ -1,4 +1,5 @@ """Monitor the number of rows deleted by shredder.""" + import datetime from argparse import ArgumentParser from multiprocessing.pool import ThreadPool diff --git a/sql/moz-fx-data-shared-prod/monitoring_derived/stable_and_derived_table_sizes_v1/query.py b/sql/moz-fx-data-shared-prod/monitoring_derived/stable_and_derived_table_sizes_v1/query.py index 7ec76571ef..f1563278b8 100644 --- a/sql/moz-fx-data-shared-prod/monitoring_derived/stable_and_derived_table_sizes_v1/query.py +++ b/sql/moz-fx-data-shared-prod/monitoring_derived/stable_and_derived_table_sizes_v1/query.py @@ -33,7 +33,9 @@ def get_tables(client, project, dataset): def get_partition_size_json(client, date, table): """Returns the size of a specific date parition of the specified table.""" job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False) - job_config_columns_info = bigquery.QueryJobConfig(dry_run=False, use_query_cache=False) + job_config_columns_info = bigquery.QueryJobConfig( + dry_run=False, use_query_cache=False + ) dataset_id = table[0] table_id = table[1] @@ -42,11 +44,16 @@ def get_partition_size_json(client, date, table): FROM {dataset_id}.INFORMATION_SCHEMA.COLUMNS WHERE table_name = '{table_id}' and is_partitioning_column = 'YES' """ - partition_column_name_result = client.query(partition_column_sql, job_config=job_config_columns_info) + partition_column_name_result = client.query( + partition_column_sql, job_config=job_config_columns_info + ) partition_column_name = [row[0] for row in partition_column_name_result.result()] - if len(partition_column_name) > 0 and partition_column_name[0] in ('submission_date', 'submission_timestamp'): + if len(partition_column_name) > 0 and partition_column_name[0] in ( + "submission_date", + "submission_timestamp", + ): sql = f""" SELECT * FROM `{dataset_id}.{table_id}` WHERE DATE({partition_column_name[0]}) = '{date}' @@ -57,7 +64,7 @@ def get_partition_size_json(client, date, table): "submission_date": date, "dataset_id": dataset_id, "table_id": table_id, - "byte_size": size + "byte_size": size, } @@ -90,11 +97,14 @@ def main(): stable_datasets = [ dataset.dataset_id for dataset in list(client.list_datasets()) - if fnmatchcase(dataset.dataset_id, arg_dataset) and not fnmatchcase(dataset.dataset_id, 'monitoring_derived') + if fnmatchcase(dataset.dataset_id, arg_dataset) + and not fnmatchcase(dataset.dataset_id, "monitoring_derived") ] with ThreadPool(20) as p: stable_tables = p.map( - partial(get_tables, client, args.project), stable_datasets, chunksize=1, + partial(get_tables, client, args.project), + stable_datasets, + chunksize=1, ) stable_tables = [table for tables in stable_tables for table in tables] @@ -107,12 +117,12 @@ def main(): stable_derived_partition_sizes.extend(partition_sizes) save_table_sizes( - client, - stable_derived_partition_sizes, - args.date, - args.destination_dataset, - args.destination_table, - ) + client, + stable_derived_partition_sizes, + args.date, + args.destination_dataset, + args.destination_table, + ) if __name__ == "__main__": diff --git a/sql/moz-fx-data-shared-prod/monitoring_derived/structured_missing_columns_v1/query.py b/sql/moz-fx-data-shared-prod/monitoring_derived/structured_missing_columns_v1/query.py index 12a868e02b..2e385b468f 100755 --- a/sql/moz-fx-data-shared-prod/monitoring_derived/structured_missing_columns_v1/query.py +++ b/sql/moz-fx-data-shared-prod/monitoring_derived/structured_missing_columns_v1/query.py @@ -107,7 +107,7 @@ def structured_missing_columns( ), ) job.result() - mb_processed[namespace] = round(job.total_bytes_processed / 1024 ** 2, 1) + mb_processed[namespace] = round(job.total_bytes_processed / 1024**2, 1) except Exception as e: if not skip_exceptions: raise diff --git a/sql/moz-fx-data-shared-prod/org_mozilla_ios_firefox/unified_metrics_v1/query.py b/sql/moz-fx-data-shared-prod/org_mozilla_ios_firefox/unified_metrics_v1/query.py index b1d5d28c2a..bcf50509b4 100644 --- a/sql/moz-fx-data-shared-prod/org_mozilla_ios_firefox/unified_metrics_v1/query.py +++ b/sql/moz-fx-data-shared-prod/org_mozilla_ios_firefox/unified_metrics_v1/query.py @@ -151,17 +151,24 @@ def main(): legacy_core = ( "moz-fx-data-shared-prod.org_mozilla_ios_firefox_derived.legacy_mobile_core_v2" ) - legacy_event = ( - "moz-fx-data-shared-prod.org_mozilla_ios_firefox_derived.legacy_mobile_event_counts_v2" - ) + legacy_event = "moz-fx-data-shared-prod.org_mozilla_ios_firefox_derived.legacy_mobile_event_counts_v2" update_schema(bq, legacy_core, schema) update_schema(bq, legacy_event, schema) # these columns needs to be excluded due to a change in view generation (metrics) # for more details, see: https://github.com/mozilla/bigquery-etl/pull/4029 # and https://bugzilla.mozilla.org/show_bug.cgi?id=1741487 - columns_to_exclude = ("root.metrics.text RECORD", "root.metrics.url RECORD", "root.metrics.jwe RECORD", "root.metrics.labeled_rate RECORD",) - stripped = [c.split()[0].lstrip("root.") for c in column_summary if c not in columns_to_exclude] + columns_to_exclude = ( + "root.metrics.text RECORD", + "root.metrics.url RECORD", + "root.metrics.jwe RECORD", + "root.metrics.labeled_rate RECORD", + ) + stripped = [ + c.split()[0].lstrip("root.") + for c in column_summary + if c not in columns_to_exclude + ] query_glean = generate_query( ['"glean" as telemetry_system', *stripped], @@ -185,7 +192,9 @@ def main(): replacements=query_legacy_replacements, ) - view_body = reformat(" UNION ALL ".join([query_glean, query_legacy_core, query_legacy_events])) + view_body = reformat( + " UNION ALL ".join([query_glean, query_legacy_core, query_legacy_events]) + ) print(view_body) view_id = "moz-fx-data-shared-prod.org_mozilla_ios_firefox.unified_metrics" try: diff --git a/sql/moz-fx-data-shared-prod/telemetry/longitudinal.sql.py b/sql/moz-fx-data-shared-prod/telemetry/longitudinal.sql.py index c86c3175a1..33bb71636c 100755 --- a/sql/moz-fx-data-shared-prod/telemetry/longitudinal.sql.py +++ b/sql/moz-fx-data-shared-prod/telemetry/longitudinal.sql.py @@ -82,9 +82,9 @@ def generate_sql(opts): opts["from"] or six_months_before(opts["to"]), "'%Y-%m-%d'" ), "to": datetime.datetime.strftime(opts["to"], "'%Y-%m-%d'"), - "where": "\n{}AND {}".format(" " * 10, opts["where"]) - if opts["where"] - else "", + "where": ( + "\n{}AND {}".format(" " * 10, opts["where"]) if opts["where"] else "" + ), "ordering_columns": ", ".join( opts["ordering_columns"] or [opts["submission_date_col"]] ), diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/crash_signatures_v1/query.py b/sql/moz-fx-data-shared-prod/telemetry_derived/crash_signatures_v1/query.py index 973dc87d40..b936315b65 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/crash_signatures_v1/query.py +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/crash_signatures_v1/query.py @@ -1,4 +1,5 @@ """Generate signatures for symbolicated crash pings via siggen.""" + from pathlib import Path import click diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/crash_symbols_v1/query.py b/sql/moz-fx-data-shared-prod/telemetry_derived/crash_symbols_v1/query.py index 3e4b941bbd..3a18221799 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/crash_symbols_v1/query.py +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/crash_symbols_v1/query.py @@ -1,4 +1,5 @@ """Upload symbols used in crash pings.""" + from datetime import datetime from functools import partial from multiprocessing.pool import ThreadPool as Pool diff --git a/sql/moz-fx-data-shared-prod/telemetry_dev_cycle_external/experiments_stats_v1/query.py b/sql/moz-fx-data-shared-prod/telemetry_dev_cycle_external/experiments_stats_v1/query.py index f64bc5c8ae..d25e935580 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_dev_cycle_external/experiments_stats_v1/query.py +++ b/sql/moz-fx-data-shared-prod/telemetry_dev_cycle_external/experiments_stats_v1/query.py @@ -117,9 +117,11 @@ def compare_experiments_with_metric_hub_configs(): metric_files = download_metric_hub_files(API_BASE_URL_METRIC_HUB) experiments = [ - {**experiment, "has_config": True} - if experiment["slug"] in metric_files - else {**experiment, "has_config": False} + ( + {**experiment, "has_config": True} + if experiment["slug"] in metric_files + else {**experiment, "has_config": False} + ) for experiment in (experiments_v1 + experiments_v6) ] return experiments diff --git a/sql_generators/active_users/__init__.py b/sql_generators/active_users/__init__.py index 5bc07e1742..39b288fc43 100644 --- a/sql_generators/active_users/__init__.py +++ b/sql_generators/active_users/__init__.py @@ -1,4 +1,5 @@ """Generate active users aggregates per app.""" + import os from enum import Enum from pathlib import Path diff --git a/sql_generators/active_users_deletion_requests/__init__.py b/sql_generators/active_users_deletion_requests/__init__.py index 67450faf5d..638175e045 100644 --- a/sql_generators/active_users_deletion_requests/__init__.py +++ b/sql_generators/active_users_deletion_requests/__init__.py @@ -1,4 +1,5 @@ """Generate active users aggregates per app.""" + import os from enum import Enum from pathlib import Path diff --git a/sql_generators/country_code_lookup/__init__.py b/sql_generators/country_code_lookup/__init__.py index 43246974b4..f86112463a 100644 --- a/sql_generators/country_code_lookup/__init__.py +++ b/sql_generators/country_code_lookup/__init__.py @@ -1,4 +1,5 @@ """Country code lookup generation.""" + import os from collections import Counter from pathlib import Path diff --git a/sql_generators/events_daily/__init__.py b/sql_generators/events_daily/__init__.py index b1d21adfef..dbf5e9f5f4 100755 --- a/sql_generators/events_daily/__init__.py +++ b/sql_generators/events_daily/__init__.py @@ -1,4 +1,5 @@ """Generate query directories.""" + import os from dataclasses import dataclass from pathlib import Path diff --git a/sql_generators/experiment_monitoring/__init__.py b/sql_generators/experiment_monitoring/__init__.py index 7732e45cb5..7a95e44c0f 100644 --- a/sql_generators/experiment_monitoring/__init__.py +++ b/sql_generators/experiment_monitoring/__init__.py @@ -1,4 +1,5 @@ """Experiment monitoring materialized view generation.""" + import os from pathlib import Path diff --git a/sql_generators/feature_usage/__init__.py b/sql_generators/feature_usage/__init__.py index 294c9a1e81..b9cbdc67d6 100644 --- a/sql_generators/feature_usage/__init__.py +++ b/sql_generators/feature_usage/__init__.py @@ -1,4 +1,5 @@ """Feature usage table generation.""" + import os import shutil from pathlib import Path diff --git a/sql_generators/funnels/__init__.py b/sql_generators/funnels/__init__.py index 49d5b564f8..6a84e65227 100644 --- a/sql_generators/funnels/__init__.py +++ b/sql_generators/funnels/__init__.py @@ -1,4 +1,5 @@ """Funnel generation.""" + import os import re from pathlib import Path diff --git a/sql_generators/glean_usage/__init__.py b/sql_generators/glean_usage/__init__.py index 086d9f37a9..b56f701c3b 100644 --- a/sql_generators/glean_usage/__init__.py +++ b/sql_generators/glean_usage/__init__.py @@ -1,4 +1,5 @@ """GLEAN Usage.""" + from functools import cache, partial from pathlib import Path diff --git a/sql_generators/glean_usage/baseline_clients_daily.py b/sql_generators/glean_usage/baseline_clients_daily.py index 00c142beba..024133ae4c 100644 --- a/sql_generators/glean_usage/baseline_clients_daily.py +++ b/sql_generators/glean_usage/baseline_clients_daily.py @@ -1,4 +1,5 @@ """Generating and run baseline_clients_daily queries for Glean apps.""" + from sql_generators.glean_usage.common import GleanTable BASELINE_DAILY_TABLE_ID = "baseline_clients_daily_v1" diff --git a/sql_generators/glean_usage/event_monitoring_live.py b/sql_generators/glean_usage/event_monitoring_live.py index 60e5906dd5..2a44dc1d45 100644 --- a/sql_generators/glean_usage/event_monitoring_live.py +++ b/sql_generators/glean_usage/event_monitoring_live.py @@ -77,9 +77,11 @@ class EventMonitoringLive(GleanTable): for app_dataset in app if dataset == app_dataset["bq_dataset_family"] ][0], - events_table=default_events_table - if dataset not in events_table_overwrites - else events_table_overwrites[dataset], + events_table=( + default_events_table + if dataset not in events_table_overwrites + else events_table_overwrites[dataset] + ), ) render_kwargs.update(self.custom_render_kwargs) diff --git a/sql_generators/glean_usage/glean_app_ping_views.py b/sql_generators/glean_usage/glean_app_ping_views.py index 5b1bba18b2..24cdf05a42 100644 --- a/sql_generators/glean_usage/glean_app_ping_views.py +++ b/sql_generators/glean_usage/glean_app_ping_views.py @@ -7,6 +7,7 @@ the stable tables are possible. For views that have incomaptible schemas (e.g due to fields having mismatching types), the view is only generated for the release channel. """ + import os from copy import deepcopy from pathlib import Path diff --git a/sql_generators/search/__init__.py b/sql_generators/search/__init__.py index 6223e71363..976ba9b854 100755 --- a/sql_generators/search/__init__.py +++ b/sql_generators/search/__init__.py @@ -4,6 +4,7 @@ Generate mobile search clients_daily query. Create a combined CTE for metrics and baseline for Android and iOS Glean apps, then print the query to a file in the output directory. """ + from pathlib import Path from typing import List diff --git a/sql_generators/serp_events/__init__.py b/sql_generators/serp_events/__init__.py index 2fe8990cd4..40a8970a3b 100644 --- a/sql_generators/serp_events/__init__.py +++ b/sql_generators/serp_events/__init__.py @@ -1,4 +1,5 @@ """Generate serp events aggregates per app.""" + import os from enum import Enum from pathlib import Path diff --git a/sql_generators/urlbar_events/__init__.py b/sql_generators/urlbar_events/__init__.py index b22e738e0d..3a6a331d33 100644 --- a/sql_generators/urlbar_events/__init__.py +++ b/sql_generators/urlbar_events/__init__.py @@ -1,4 +1,5 @@ """Generate active users aggregates per app.""" + import os import shutil from enum import Enum diff --git a/sql_generators/use_counters/__init__.py b/sql_generators/use_counters/__init__.py index 29b6f4be4e..b8fd927197 100644 --- a/sql_generators/use_counters/__init__.py +++ b/sql_generators/use_counters/__init__.py @@ -1,4 +1,5 @@ """Use counter table generation.""" + import os import shutil from pathlib import Path diff --git a/tests/data/dags/python_script_test_dag b/tests/data/dags/python_script_test_dag index 5619c0c4d5..7a12d21408 100644 --- a/tests/data/dags/python_script_test_dag +++ b/tests/data/dags/python_script_test_dag @@ -45,6 +45,7 @@ with DAG( doc_md=docs, tags=tags, ) as dag: + task_group_test_group = TaskGroup("test_group") test__python_script_query__v1 = GKEPodOperator( diff --git a/tests/data/dags/simple_test_dag b/tests/data/dags/simple_test_dag index d47f18e44c..7f6c7c1e53 100644 --- a/tests/data/dags/simple_test_dag +++ b/tests/data/dags/simple_test_dag @@ -49,6 +49,7 @@ with DAG( doc_md=docs, tags=tags, ) as dag: + test__non_incremental_query__v1 = bigquery_etl_query( task_id="test__non_incremental_query__v1", destination_table="non_incremental_query_v1", diff --git a/tests/data/dags/test_dag_duplicate_dependencies b/tests/data/dags/test_dag_duplicate_dependencies index 797ddaafd6..f0aa567f34 100644 --- a/tests/data/dags/test_dag_duplicate_dependencies +++ b/tests/data/dags/test_dag_duplicate_dependencies @@ -45,6 +45,7 @@ with DAG( doc_md=docs, tags=tags, ) as dag: + test__no_metadata_query__v1 = bigquery_etl_query( task_id="test__no_metadata_query__v1", destination_table='no_metadata_query_v1${{ macros.ds_format(macros.ds_add(ds, -2), "%Y-%m-%d", "%Y%m%d") }}', diff --git a/tests/data/dags/test_dag_external_check_dependency b/tests/data/dags/test_dag_external_check_dependency index af64dca9f1..b1de636485 100644 --- a/tests/data/dags/test_dag_external_check_dependency +++ b/tests/data/dags/test_dag_external_check_dependency @@ -45,6 +45,7 @@ with DAG( doc_md=docs, tags=tags, ) as dag: + checks__fail_test__external_table__v1 = bigquery_dq_check( task_id="checks__fail_test__external_table__v1", source_table="external_table_v1", @@ -82,4 +83,4 @@ with DAG( depends_on_past=False, ) - checks__fail_test__external_table__v1.set_upstream(test__external_table__v1) \ No newline at end of file + checks__fail_test__external_table__v1.set_upstream(test__external_table__v1) diff --git a/tests/data/dags/test_dag_external_dependency b/tests/data/dags/test_dag_external_dependency index a5dfb7ed01..7a4f7f7cd8 100644 --- a/tests/data/dags/test_dag_external_dependency +++ b/tests/data/dags/test_dag_external_dependency @@ -45,6 +45,7 @@ with DAG( doc_md=docs, tags=tags, ) as dag: + test__external_table__v1 = bigquery_etl_query( task_id="test__external_table__v1", destination_table="external_table_v1", diff --git a/tests/data/dags/test_dag_with_check_dependencies b/tests/data/dags/test_dag_with_check_dependencies index f98f710c5b..4402ec251d 100644 --- a/tests/data/dags/test_dag_with_check_dependencies +++ b/tests/data/dags/test_dag_with_check_dependencies @@ -45,6 +45,7 @@ with DAG( doc_md=docs, tags=tags, ) as dag: + checks__fail_test__table1__v1 = bigquery_dq_check( task_id="checks__fail_test__table1__v1", source_table="table1_v1", @@ -108,4 +109,4 @@ with DAG( test__query__v1.set_upstream(checks__fail_test__table1__v1) - test__query__v1.set_upstream(test__table2__v1) \ No newline at end of file + test__query__v1.set_upstream(test__table2__v1) diff --git a/tests/data/dags/test_dag_with_check_table_dependencies b/tests/data/dags/test_dag_with_check_table_dependencies index 6fdb6fa97a..37ba0b6a72 100644 --- a/tests/data/dags/test_dag_with_check_table_dependencies +++ b/tests/data/dags/test_dag_with_check_table_dependencies @@ -45,6 +45,7 @@ with DAG( doc_md=docs, tags=tags, ) as dag: + checks__fail_test__table1__v1 = bigquery_dq_check( task_id="checks__fail_test__table1__v1", source_table="table1_v1", diff --git a/tests/data/dags/test_dag_with_dependencies b/tests/data/dags/test_dag_with_dependencies index 6d4dfd6280..a6d5a8633e 100644 --- a/tests/data/dags/test_dag_with_dependencies +++ b/tests/data/dags/test_dag_with_dependencies @@ -45,6 +45,7 @@ with DAG( doc_md=docs, tags=tags, ) as dag: + test__query__v1 = bigquery_etl_query( task_id="test__query__v1", destination_table="query_v1", diff --git a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/test_minimal/data.py b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/test_minimal/data.py index 80ccf38b00..d48e921e0e 100644 --- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/test_minimal/data.py +++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/test_minimal/data.py @@ -1,4 +1,5 @@ """Testing data for query.""" + from itertools import product from pathlib import Path from uuid import uuid4 diff --git a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_percentiles_v1/test_minimal/data.py b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_percentiles_v1/test_minimal/data.py index 3f2c8b4eee..4b2d8b103a 100644 --- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_percentiles_v1/test_minimal/data.py +++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_percentiles_v1/test_minimal/data.py @@ -1,4 +1,5 @@ """Testing data for query.""" + from pathlib import Path from uuid import uuid4 diff --git a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_probe_counts_v1/test_minimal/data.py b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_probe_counts_v1/test_minimal/data.py index f1b0ba14fd..ec8471e02c 100644 --- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_probe_counts_v1/test_minimal/data.py +++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_probe_counts_v1/test_minimal/data.py @@ -1,4 +1,5 @@ """Testing data for query.""" + from pathlib import Path import yaml diff --git a/tests/sql/moz-fx-data-shared-prod/org_mozilla_fenix_derived/geckoview_version_v1/bootstrap.py b/tests/sql/moz-fx-data-shared-prod/org_mozilla_fenix_derived/geckoview_version_v1/bootstrap.py index 5b94340c77..6e22e1910e 100644 --- a/tests/sql/moz-fx-data-shared-prod/org_mozilla_fenix_derived/geckoview_version_v1/bootstrap.py +++ b/tests/sql/moz-fx-data-shared-prod/org_mozilla_fenix_derived/geckoview_version_v1/bootstrap.py @@ -1,4 +1,5 @@ """Code for setting up the the tests.""" + import math import shutil from datetime import datetime, timedelta