From 33f9017c758a625f9e95a0d32b14b5426d7f5b50 Mon Sep 17 00:00:00 2001 From: Winnie Chan <10429026+wwyc@users.noreply.github.com> Date: Tue, 19 Mar 2024 11:17:32 -0700 Subject: [PATCH] DENG-2823: Added deprecate cli command (#5219) * Added deprecate cli command * Fixed typo * Fixed failed tests * Fixed deletion date label * Update bigquery_etl/metadata/parse_metadata.py Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com> * Fixed deletion date * Fixed arguments optional * Added return back * Added invalid deletion date test --------- Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com> --- bigquery_etl/cli/metadata.py | 47 ++++++++ bigquery_etl/metadata/parse_metadata.py | 9 ++ bigquery_etl/metadata/publish_metadata.py | 3 + bigquery_etl/metadata/validate_metadata.py | 14 +++ tests/cli/test_cli_metadata.py | 103 +++++++++++++++++- tests/metadata/test_validate_metadata.py | 41 ++++++- .../metadata.yaml | 1 + 7 files changed, 216 insertions(+), 2 deletions(-) diff --git a/bigquery_etl/cli/metadata.py b/bigquery_etl/cli/metadata.py index b6f110acf6..72ac22a26f 100644 --- a/bigquery_etl/cli/metadata.py +++ b/bigquery_etl/cli/metadata.py @@ -1,9 +1,11 @@ """bigquery-etl CLI metadata command.""" +from datetime import datetime from pathlib import Path from typing import Optional import click +from dateutil.relativedelta import relativedelta from google.cloud import bigquery from bigquery_etl.metadata.parse_metadata import DatasetMetadata, Metadata @@ -126,3 +128,48 @@ def publish(name: str, sql_dir: Optional[str], project_id: Optional[str]) -> Non print("No metadata file for: {}.{}.{}".format(project, dataset, table)) return None + + +@metadata.command( + help=""" + Deprecate BigQuery table by updating metadata.yaml file. + Deletion date is by default 3 months from current date if not provided. + + Example: + ./bqetl metadata deprecate ga_derived.downloads_with_attribution_v2 --deletion_date=2024-03-02 + """ +) +@click.argument("name") +@project_id_option( + ConfigLoader.get("default", "project", fallback="moz-fx-data-shared-prod") +) +@sql_dir_option +@click.option( + "--deletion_date", + "--deletion-date", + help="Date when table is scheduled for deletion. Date format: yyyy-mm-dd", + type=click.DateTime(formats=["%Y-%m-%d"]), + default=datetime.today() + relativedelta(months=+3), +) +def deprecate( + name: str, + sql_dir: str, + project_id: str, + deletion_date: datetime, +) -> None: + """Deprecate Bigquery table by updating metadata yaml file(s).""" + table_metadata_files = paths_matching_name_pattern( + name, sql_dir, project_id=project_id, files=["metadata.yaml"] + ) + + for metadata_file in table_metadata_files: + metadata = Metadata.from_file(metadata_file) + + metadata.deprecated = True + metadata.deletion_date = deletion_date.date() + + metadata.write(metadata_file) + click.echo(f"Updated {metadata_file} with deprecation.") + + if not table_metadata_files: + raise FileNotFoundError(f"No metadata file(s) were found for: {name}") diff --git a/bigquery_etl/metadata/parse_metadata.py b/bigquery_etl/metadata/parse_metadata.py index 80de672a3b..28263e229d 100644 --- a/bigquery_etl/metadata/parse_metadata.py +++ b/bigquery_etl/metadata/parse_metadata.py @@ -4,6 +4,7 @@ import enum import os import re import string +from datetime import date from pathlib import Path from typing import Any, Dict, List, Optional @@ -153,6 +154,7 @@ class Metadata: references: Dict = attr.ib({}) external_data: Optional[ExternalDataMetadata] = attr.ib(None) deprecated: bool = attr.ib(False) + deletion_date: Optional[date] = attr.ib(None) @owners.validator def validate_owners(self, attribute, value): @@ -228,6 +230,7 @@ class Metadata: references = {} external_data = None deprecated = False + deletion_date = None with open(metadata_file, "r") as yaml_stream: try: @@ -295,6 +298,8 @@ class Metadata: ) if "deprecated" in metadata: deprecated = metadata["deprecated"] + if "deletion_date" in metadata: + deletion_date = metadata["deletion_date"] return cls( friendly_name, @@ -308,6 +313,7 @@ class Metadata: references, external_data, deprecated, + deletion_date, ) except yaml.YAMLError as e: raise e @@ -349,6 +355,9 @@ class Metadata: if not metadata_dict["deprecated"]: del metadata_dict["deprecated"] + if not metadata_dict["deletion_date"]: + del metadata_dict["deletion_date"] + file.write_text( yaml.dump( converter.unstructure(metadata_dict), diff --git a/bigquery_etl/metadata/publish_metadata.py b/bigquery_etl/metadata/publish_metadata.py index 07546afb9c..408910987f 100755 --- a/bigquery_etl/metadata/publish_metadata.py +++ b/bigquery_etl/metadata/publish_metadata.py @@ -44,6 +44,9 @@ def publish_metadata(client, project, dataset, table, metadata): if metadata.deprecated is True: table.labels["deprecated"] = "true" + if metadata.deletion_date: + table.labels["deletion_date"] = metadata.deletion_date.strftime("%Y-%m-%d") + # TODO: in the future we can consider updating the table expiration date based on deletion_date client.update_table(table, ["friendly_name", "description", "labels"]) print("Published metadata for: {}.{}.{}".format(project, dataset, table)) diff --git a/bigquery_etl/metadata/validate_metadata.py b/bigquery_etl/metadata/validate_metadata.py index 21469664ee..0b870e92c4 100644 --- a/bigquery_etl/metadata/validate_metadata.py +++ b/bigquery_etl/metadata/validate_metadata.py @@ -98,6 +98,17 @@ def validate_change_control( return True +def validate_deprecation(metadata, path): + """Check that deprecated is True when deletion date exists.""" + if metadata.deletion_date and not metadata.deprecated: + click.echo( + f"Deletion date should only be added when table is deprecated in {path}" + ) + return False + + return True + + def validate(target): """Validate metadata files.""" failed = False @@ -119,6 +130,9 @@ def validate(target): ): failed = True + if not validate_deprecation(metadata, path): + failed = True + # todo more validation # e.g. https://github.com/mozilla/bigquery-etl/issues/924 else: diff --git a/tests/cli/test_cli_metadata.py b/tests/cli/test_cli_metadata.py index eb0cbb429d..bbe47ede14 100644 --- a/tests/cli/test_cli_metadata.py +++ b/tests/cli/test_cli_metadata.py @@ -1,14 +1,16 @@ import distutils import os import tempfile +from datetime import datetime from pathlib import Path from unittest.mock import patch import pytest import yaml from click.testing import CliRunner +from dateutil.relativedelta import relativedelta -from bigquery_etl.cli.metadata import publish, update +from bigquery_etl.cli.metadata import deprecate, publish, update from bigquery_etl.metadata.parse_metadata import Metadata from bigquery_etl.metadata.validate_metadata import validate_change_control @@ -277,6 +279,7 @@ class TestMetadata: == "Clustering fields: `column1`" ) assert mock_bigquery_client().update_table.call_args[0][0].labels == { + "deletion_date": "2024-03-02", "deprecated": "true", "owner1": "test", } @@ -291,6 +294,7 @@ class TestMetadata: assert mock_bigquery_table().friendly_name == "Test metadata.yaml" assert mock_bigquery_table().description == "Clustering fields: `column1`" assert mock_bigquery_table().labels == { + "deletion_date": "2024-03-02", "deprecated": "true", "owner1": "test", } @@ -311,3 +315,100 @@ class TestMetadata: runner.invoke(publish, name, "--sql_dir=" + str(tmpdirname) + "/sql") assert mock_bigquery_client().update_table.call_count == 0 + + def test_metadata_deprecate_default_deletion_date(self, runner): + with tempfile.TemporaryDirectory() as tmpdirname: + distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname)) + + qualified_table_name = ( + "moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6" + ) + result = runner.invoke( + deprecate, + [qualified_table_name, "--sql_dir=" + str(tmpdirname) + "/sql"], + ) + with open( + tmpdirname + + "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/metadata.yaml", + "r", + ) as stream: + metadata = yaml.safe_load(stream) + + default_deletion_date = (datetime.today() + relativedelta(months=+3)).date() + + assert result.exit_code == 0 + assert metadata["deprecated"] + assert metadata["deletion_date"] == default_deletion_date + + def test_metadata_deprecate_set_deletion_date(self, runner): + with tempfile.TemporaryDirectory() as tmpdirname: + distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname)) + + qualified_table_name = ( + "moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6" + ) + result = runner.invoke( + deprecate, + [ + qualified_table_name, + "--deletion_date=2024-03-02", + "--sql_dir=" + str(tmpdirname) + "/sql", + ], + ) + with open( + tmpdirname + + "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/metadata.yaml", + "r", + ) as stream: + metadata = yaml.safe_load(stream) + + assert result.exit_code == 0 + assert metadata["deprecated"] + assert metadata["deletion_date"] == datetime(2024, 3, 2).date() + + def test_metadata_deprecate_set_invalid_deletion_date_should_fail(self, runner): + with tempfile.TemporaryDirectory() as tmpdirname: + distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname)) + + qualified_table_name = ( + "moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6" + ) + result = runner.invoke( + deprecate, + [ + qualified_table_name, + "--deletion_date=2024-02", + "--sql_dir=" + str(tmpdirname) + "/sql", + ], + ) + with open( + tmpdirname + + "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/metadata.yaml", + "r", + ) as stream: + metadata = yaml.safe_load(stream) + + assert result.exit_code == 2 + assert "deprecated" not in metadata + assert "deletion_date" not in metadata + assert "Invalid value for '--deletion_date'" in result.output + + def test_metadata_deprecate_no_metadata(self, runner): + with tempfile.TemporaryDirectory() as tmpdirname: + distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname)) + + qualified_table_name = "moz-fx-data-shared-prod.telemetry_derived.clients_daily_scalar_aggregates_v2" + result = runner.invoke( + deprecate, + [ + qualified_table_name, + "--deletion_date=2024-03-02", + "--sql_dir=" + str(tmpdirname) + "/sql", + ], + ) + + assert result.exit_code == 1 + assert ( + str(result.exception) + == f"No metadata file(s) were found for: {qualified_table_name}" + ) diff --git a/tests/metadata/test_validate_metadata.py b/tests/metadata/test_validate_metadata.py index 2390d765c9..1f99d69b0d 100644 --- a/tests/metadata/test_validate_metadata.py +++ b/tests/metadata/test_validate_metadata.py @@ -1,5 +1,10 @@ +from datetime import date + from bigquery_etl.metadata.parse_metadata import Metadata -from bigquery_etl.metadata.validate_metadata import validate_public_data +from bigquery_etl.metadata.validate_metadata import ( + validate_deprecation, + validate_public_data, +) class TestValidateMetadata(object): @@ -36,3 +41,37 @@ class TestValidateMetadata(object): validate_public_data(metadata_invalid_public, "test/path/metadata.yaml") is False ) + + def test_validate_deprecation(self): + metadata_valid = Metadata( + friendly_name="test", + description="test", + owners=["test@example.org"], + labels={"test": "true", "foo": "abc"}, + deprecated=True, + deletion_date=date(2024, 5, 4), + ) + + assert validate_deprecation(metadata_valid, "test/path/metadata.yaml") + + metadata_valid = Metadata( + friendly_name="test", + description="test", + owners=["test@example.org"], + labels={"test": "true", "foo": "abc"}, + deprecated=True, + deletion_date=None, + ) + + assert validate_deprecation(metadata_valid, "test/path/metadata.yaml") + + metadata_valid = Metadata( + friendly_name="test", + description="test", + owners=["test@example.org"], + labels={"test": "true", "foo": "abc"}, + deprecated=False, + deletion_date=date(2024, 5, 4), + ) + + assert not validate_deprecation(metadata_valid, "test/path/metadata.yaml") diff --git a/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_scalar_aggregates_v1/metadata.yaml b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_scalar_aggregates_v1/metadata.yaml index 40a5f48d52..dea0dd368f 100644 --- a/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_scalar_aggregates_v1/metadata.yaml +++ b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_scalar_aggregates_v1/metadata.yaml @@ -4,3 +4,4 @@ description: |- owners: - test@mozilla.com deprecated: true +deletion_date: 2024-03-02