bigquery-etl/script/format_sql

255 строки
11 KiB
Python
Executable File

#!/usr/bin/env python3
"""Format SQL."""
from argparse import ArgumentParser
import glob
import os
import os.path
import sys
# sys.path needs to be modified to enable package imports from parent
# and sibling directories. Also see:
# https://stackoverflow.com/questions/6323860/sibling-package-imports/23542795#23542795
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from bigquery_etl.format_sql.formatter import reformat # noqa E402
SKIP = {
# files that existed before we started to enforce this script
*glob.glob("bigquery_etl/glam/templates/*.sql"),
"bigquery_etl/glean_usage/templates/baseline_clients_daily.view.sql",
"bigquery_etl/glean_usage/templates/baseline_clients_daily_v1.sql",
"bigquery_etl/glean_usage/templates/baseline_clients_last_seen.view.sql",
"bigquery_etl/glean_usage/templates/baseline_clients_last_seen_v1.sql",
"sql/telemetry/experiments_aggregates_v1/view.sql",
"sql/telemetry/fenix_events_v1/view.sql",
"sql/telemetry/fennec_ios_events_v1/view.sql",
"sql/telemetry/fire_tv_events_v1/view.sql",
"sql/telemetry/first_shutdown_summary/view.sql",
"sql/telemetry/first_shutdown_summary_v4/view.sql",
"sql/telemetry/focus_android_events_v1/view.sql",
"sql/telemetry/lockwise_mobile_events_v1/view.sql",
"sql/telemetry/origin_content_blocking/view.sql",
"sql/telemetry/rocket_android_events_v1/view.sql",
"sql/telemetry/simpleprophet_forecasts/view.sql",
"sql/telemetry/smoot_usage_day_0/view.sql",
"sql/telemetry/smoot_usage_day_13/view.sql",
"sql/telemetry/sync/view.sql",
"sql/telemetry/telemetry_anonymous_parquet/view.sql",
"sql/telemetry/telemetry_anonymous_parquet_v1/view.sql",
"sql/telemetry/telemetry_core_parquet/view.sql",
"sql/telemetry/telemetry_core_parquet_v3/view.sql",
"sql/telemetry/telemetry_downgrade_parquet/view.sql",
"sql/telemetry/telemetry_downgrade_parquet_v1/view.sql",
"sql/telemetry/telemetry_focus_event_parquet/view.sql",
"sql/telemetry/telemetry_focus_event_parquet_v1/view.sql",
"sql/telemetry/telemetry_heartbeat_parquet/view.sql",
"sql/telemetry/telemetry_heartbeat_parquet_v1/view.sql",
"sql/telemetry/telemetry_ip_privacy_parquet/view.sql",
"sql/telemetry/telemetry_ip_privacy_parquet_v1/view.sql",
"sql/telemetry/telemetry_mobile_event_parquet/view.sql",
"sql/telemetry/telemetry_mobile_event_parquet_v2/view.sql",
"sql/telemetry/telemetry_new_profile_parquet/view.sql",
"sql/telemetry/telemetry_new_profile_parquet_v2/view.sql",
"sql/telemetry/telemetry_shield_study_parquet/view.sql",
"sql/telemetry/telemetry_shield_study_parquet_v1/view.sql",
"sql/telemetry/windows_10_aggregate/view.sql",
"sql/telemetry/windows_10_build_distribution/view.sql",
"sql/telemetry/windows_10_patch_adoption/view.sql",
"sql/telemetry_derived/attitudes_daily_v1/init.sql",
"sql/telemetry_derived/attitudes_daily_v1/query.sql",
"sql/telemetry_derived/clients_daily_histogram_aggregates_v1/init.sql",
"sql/telemetry_derived/clients_daily_histogram_aggregates_v1/query.sql",
"sql/telemetry_derived/clients_daily_keyed_boolean_aggregates_v1/query.sql",
"sql/telemetry_derived/clients_daily_keyed_histogram_aggregates_v1/query.sql",
"sql/telemetry_derived/clients_daily_keyed_scalar_aggregates_v1/query.sql",
"sql/telemetry_derived/clients_daily_scalar_aggregates_v1/init.sql",
"sql/telemetry_derived/clients_daily_scalar_aggregates_v1/query.sql",
"sql/telemetry_derived/clients_histogram_aggregates_v1/init.sql",
"sql/telemetry_derived/clients_histogram_aggregates_v1/query.sql",
"sql/telemetry_derived/clients_histogram_probe_counts_v1/query.sql",
"sql/telemetry_derived/clients_scalar_probe_counts_v1/query.sql",
"sql/telemetry_derived/core_clients_daily_v1/query.sql",
"sql/telemetry_derived/core_clients_last_seen_v1/init.sql",
"sql/telemetry_derived/core_clients_last_seen_v1/query.sql",
"sql/telemetry_derived/core_live/view.sql",
"sql/telemetry_derived/devtools_events_amplitude_v1/view.sql",
"sql/telemetry_derived/error_aggregates/query.sql",
"sql/telemetry_derived/event_events_v1/init.sql",
"sql/telemetry_derived/event_events_v1/query.sql",
"sql/telemetry_derived/events_live/view.sql",
"sql/telemetry_derived/experiment_enrollment_aggregates_v1/init.sql",
"sql/telemetry_derived/experiment_enrollment_aggregates_v1/query.sql",
"sql/telemetry_derived/experiments_daily_active_clients_v1/init.sql",
"sql/telemetry_derived/firefox_desktop_exact_mau28_by_client_count_dimensions_v1/query.sql", # noqa E501
"sql/telemetry_derived/fxa_users_services_daily_v1/query.sql",
"sql/telemetry_derived/fxa_users_services_last_seen_v1/query.sql",
"sql/telemetry_derived/glam_user_counts_v1/query.sql",
"sql/telemetry_derived/latest_versions/query.sql",
"sql/telemetry_derived/main_events_v1/init.sql",
"sql/telemetry_derived/main_events_v1/query.sql",
"sql/telemetry_derived/scalar_percentiles_v1/query.sql",
"sql/telemetry_derived/smoot_usage_desktop_v2/query.sql",
"sql/telemetry_derived/smoot_usage_fxa_v2/query.sql",
"sql/telemetry_derived/smoot_usage_new_profiles_v2/query.sql",
"sql/telemetry_derived/smoot_usage_nondesktop_v2/query.sql",
"sql/telemetry_derived/surveygizmo_daily_attitudes/init.sql",
"sql/search_derived/mobile_search_clients_daily_v1/fenix_metrics.template.sql",
"sql/search_derived/mobile_search_clients_daily_v1/mobile_search_clients_daily.template.sql", # noqa E501
"udf/active_n_weeks_ago.sql",
"udf/add_monthly_engine_searches.sql",
"udf/add_monthly_searches.sql",
"udf/aggregate_search_map.sql",
"udf/array_11_zeroes_then.sql",
"udf/array_drop_first_and_append.sql",
"udf/array_of_12_zeroes.sql",
"udf/bitcount_lowest_7.sql",
"udf/bitmask_365.sql",
"udf/bitmask_lowest_28.sql",
"udf/bitmask_lowest_7.sql",
"udf/bitmask_range.sql",
"udf/bits_to_days_seen.sql",
"udf/bits_to_days_since_first_seen.sql",
"udf/bits_to_days_since_seen.sql",
"udf/bool_to_365_bits.sql",
"udf/coalesce_adjacent_days_28_bits.sql",
"udf/coalesce_adjacent_days_365_bits.sql",
"udf/combine_adjacent_days_365_bits.sql",
"udf/combine_experiment_days.sql",
"udf/country_code_to_flag.sql",
"udf/days_seen_bytes_to_rfm.sql",
"udf/days_since_created_profile_as_28_bits.sql",
"udf/deanonymize_event.sql",
"udf/decode_int64.sql",
"udf/dedupe_array.sql",
"udf/extract_count_histogram_value.sql",
"udf/extract_document_type.sql",
"udf/extract_document_version.sql",
"udf/get_key_with_null.sql",
"udf/glean_timespan_nanos.sql",
"udf/glean_timespan_seconds.sql",
"udf/int_to_365_bits.sql",
"udf/int_to_hex_string.sql",
"udf/keyed_histogram_get_sum.sql",
"udf/kv_array_to_json_string.sql",
"udf/mode_last.sql",
"udf/new_monthly_engine_searches_struct.sql",
"udf/normalize_glean_ping_info.sql",
"udf/normalize_metadata.sql",
"udf/normalize_search_engine.sql",
"udf/one_as_365_bits.sql",
"udf/parse_desktop_telemetry_uri.sql",
"udf/parse_iso8601_date.sql",
"udf/pos_of_leading_set_bit.sql",
"udf/pos_of_trailing_set_bit.sql",
"udf/round_timestamp_to_minute.sql",
"udf/shift_28_bits_one_day.sql",
"udf/shift_365_bits_one_day.sql",
"udf/shift_one_day.sql",
"udf/smoot_usage_from_28_bits.sql",
"udf/vector_add.sql",
"udf/zero_as_365_bits.sql",
"udf/zeroed_array.sql",
"udf_js/crc32.sql",
"udf_js/gunzip.sql",
"udf_js/jackknife_mean_ci.sql",
"udf_js/jackknife_ratio_ci.sql",
"udf_js/jackknife_sum_ci.sql",
"udf_js/json_extract_events.sql",
"udf_js/json_extract_histogram.sql",
"udf_js/json_extract_keyed_histogram.sql",
"udf_js/json_extract_missing_cols.sql",
"udf_js/sample_id.sql",
"udf_legacy/contains.sql",
"udf_legacy/date_format.sql",
"udf_legacy/date_trunc.sql",
"udf_legacy/to_iso8601.sql",
"stored_procedures/safe_crc32_uuid.sql",
}
parser = ArgumentParser(description=__doc__)
parser.add_argument(
"paths",
metavar="PATH",
nargs="*",
help="file or directory to format;"
" if not specified read from stdin and write to stdout;"
" recursively search directories for .sql files",
)
parser.add_argument(
"--check",
action="store_true",
help="do not write changes, just return status;"
" return code 0 indicates nothing would change;"
" return code 1 indicates some files would be reformatted",
)
def main():
args = parser.parse_args()
if not args.paths:
if sys.stdin.isatty():
parser.print_help()
print("Error: must specify PATH or provide input via stdin")
sys.exit(255)
query = sys.stdin.read()
formatted = reformat(query) + "\n"
if not args.check:
print(formatted, end="")
if args.check and query != formatted:
sys.exit(1)
else:
sql_files = []
for path in args.paths:
if os.path.isdir(path):
sql_files.extend(
filepath
for dirpath, _, filenames in os.walk(path)
for filename in filenames
if filename.endswith(".sql")
# skip tests/**/input.sql
and not (path.startswith("tests") and filename == "input.sql")
for filepath in [os.path.join(dirpath, filename)]
if filepath not in SKIP
)
elif path:
sql_files.append(path)
if not sql_files:
print("Error: no files were found to format")
sys.exit(255)
sql_files.sort()
reformatted = unchanged = 0
for path in sql_files:
with open(path) as fp:
query = fp.read()
formatted = reformat(query) + "\n"
if query != formatted:
if args.check:
print(f"would reformat {path}")
else:
with open(path, "w") as fp:
fp.write(formatted)
print(f"reformatted {path}")
reformatted += 1
else:
unchanged += 1
print(
", ".join(
f"{number} file{'s' if number > 1 else ''}"
f"{' would be' if args.check else ''} {msg}"
for number, msg in [
(reformatted, "reformatted"),
(unchanged, "left unchanged"),
]
if number > 0
)
+ "."
)
if args.check and reformatted:
sys.exit(1)
if __name__ == "__main__":
main()