Make validate-metadata run on all files (#6891)
This commit is contained in:
Родитель
fa5b93dc8b
Коммит
2ee41e836f
|
@ -297,7 +297,7 @@ jobs:
|
|||
command: |
|
||||
# TODO: Add check here to make sure all queries have metadata.yaml
|
||||
PATH="venv/bin:$PATH" script/bqetl query validate \
|
||||
--respect-dryrun-skip
|
||||
--no-dryrun --skip-format-sql
|
||||
- *copy_debug_sql
|
||||
- *store_debug_artifacts
|
||||
- unless:
|
||||
|
|
|
@ -99,7 +99,7 @@ def dryrun(
|
|||
|
||||
if not sql_files:
|
||||
click.echo("Skipping dry run because no queries matched")
|
||||
sys.exit(0)
|
||||
return
|
||||
|
||||
if not use_cloud_function and not is_authenticated():
|
||||
click.echo(
|
||||
|
|
|
@ -1359,6 +1359,7 @@ def _run_part(
|
|||
)
|
||||
@respect_dryrun_skip_option(default=False)
|
||||
@no_dryrun_option(default=False)
|
||||
@click.option("--skip_format_sql", "--skip-format-sql", is_flag=True, default=False)
|
||||
@click.pass_context
|
||||
def validate(
|
||||
ctx,
|
||||
|
@ -1369,6 +1370,7 @@ def validate(
|
|||
validate_schemas,
|
||||
respect_dryrun_skip,
|
||||
no_dryrun,
|
||||
skip_format_sql,
|
||||
):
|
||||
"""Validate queries by dry running, formatting and checking scheduling configs."""
|
||||
if name is None:
|
||||
|
@ -1376,8 +1378,11 @@ def validate(
|
|||
|
||||
query_files = paths_matching_name_pattern(name, sql_dir, project_id)
|
||||
dataset_dirs = set()
|
||||
errors = []
|
||||
for query in query_files:
|
||||
ctx.invoke(format, paths=[str(query)])
|
||||
click.echo(f"Validating metadata for {query}")
|
||||
if not skip_format_sql:
|
||||
ctx.invoke(format, paths=[str(query)])
|
||||
|
||||
if not no_dryrun:
|
||||
ctx.invoke(
|
||||
|
@ -1389,14 +1394,27 @@ def validate(
|
|||
respect_skip=respect_dryrun_skip,
|
||||
)
|
||||
|
||||
validate_metadata.validate(query.parent)
|
||||
try:
|
||||
validate_metadata.validate(query.parent)
|
||||
except validate_metadata.MetadataValidationError as e:
|
||||
errors.append(str(e))
|
||||
dataset_dirs.add(query.parent.parent)
|
||||
|
||||
if no_dryrun:
|
||||
click.echo("Dry run skipped for query files.")
|
||||
|
||||
for dataset_dir in dataset_dirs:
|
||||
validate_metadata.validate_datasets(dataset_dir)
|
||||
try:
|
||||
validate_metadata.validate_datasets(dataset_dir)
|
||||
except validate_metadata.MetadataValidationError as e:
|
||||
errors.append(str(e))
|
||||
|
||||
if len(errors) > 0:
|
||||
click.echo(
|
||||
f"Failed to validate {len(errors)} metadata files (see above for error messages):"
|
||||
)
|
||||
click.echo("\n".join(errors))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _initialize_in_parallel(
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
||||
|
@ -111,6 +110,14 @@ def validate_shredder_mitigation(query_dir, metadata):
|
|||
|
||||
if has_shredder_mitigation:
|
||||
schema_file = Path(query_dir) / SCHEMA_FILE
|
||||
if not schema_file.exists():
|
||||
click.echo(
|
||||
click.style(
|
||||
f"Table {query_dir} does not have schema.yaml required for shredder mitigation.",
|
||||
fg="yellow",
|
||||
)
|
||||
)
|
||||
return False
|
||||
schema = Schema.from_schema_file(schema_file).to_bigquery_schema()
|
||||
|
||||
# This label requires that the query doesn't have id-level columns,
|
||||
|
@ -281,6 +288,10 @@ def validate_retention_policy_based_on_table_type(metadata, path):
|
|||
return is_valid
|
||||
|
||||
|
||||
class MetadataValidationError(Exception):
|
||||
"""Metadata validation failed."""
|
||||
|
||||
|
||||
def validate(target):
|
||||
"""Validate metadata files."""
|
||||
failed = False
|
||||
|
@ -322,11 +333,11 @@ def validate(target):
|
|||
# todo more validation
|
||||
# e.g. https://github.com/mozilla/bigquery-etl/issues/924
|
||||
else:
|
||||
logging.error(f"Invalid target: {target}, target must be a directory.")
|
||||
sys.exit(1)
|
||||
raise ValueError(f"Invalid target: {target}, target must be a directory.")
|
||||
|
||||
if failed:
|
||||
sys.exit(1)
|
||||
# TODO: add failed checks to message
|
||||
raise MetadataValidationError(f"Metadata validation failed for {target}")
|
||||
|
||||
|
||||
def validate_datasets(target):
|
||||
|
@ -340,11 +351,12 @@ def validate_datasets(target):
|
|||
path = os.path.join(root, file)
|
||||
_ = DatasetMetadata.from_file(path)
|
||||
else:
|
||||
logging.error(f"Invalid target: {target}, target must be a directory.")
|
||||
sys.exit(1)
|
||||
raise ValueError(f"Invalid target: {target}, target must be a directory.")
|
||||
|
||||
if failed:
|
||||
sys.exit(1)
|
||||
raise MetadataValidationError(
|
||||
f"Dataset metadata validation failed for {target}"
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
|
|
Загрузка…
Ссылка в новой задаче