DENG-2823: Added deprecate cli command (#5219)
* Added deprecate cli command * Fixed typo * Fixed failed tests * Fixed deletion date label * Update bigquery_etl/metadata/parse_metadata.py Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com> * Fixed deletion date * Fixed arguments optional * Added return back * Added invalid deletion date test --------- Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com>
This commit is contained in:
Родитель
2b6d37e288
Коммит
33f9017c75
|
@ -1,9 +1,11 @@
|
|||
"""bigquery-etl CLI metadata command."""
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import click
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from google.cloud import bigquery
|
||||
|
||||
from bigquery_etl.metadata.parse_metadata import DatasetMetadata, Metadata
|
||||
|
@ -126,3 +128,48 @@ def publish(name: str, sql_dir: Optional[str], project_id: Optional[str]) -> Non
|
|||
print("No metadata file for: {}.{}.{}".format(project, dataset, table))
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@metadata.command(
|
||||
help="""
|
||||
Deprecate BigQuery table by updating metadata.yaml file.
|
||||
Deletion date is by default 3 months from current date if not provided.
|
||||
|
||||
Example:
|
||||
./bqetl metadata deprecate ga_derived.downloads_with_attribution_v2 --deletion_date=2024-03-02
|
||||
"""
|
||||
)
|
||||
@click.argument("name")
|
||||
@project_id_option(
|
||||
ConfigLoader.get("default", "project", fallback="moz-fx-data-shared-prod")
|
||||
)
|
||||
@sql_dir_option
|
||||
@click.option(
|
||||
"--deletion_date",
|
||||
"--deletion-date",
|
||||
help="Date when table is scheduled for deletion. Date format: yyyy-mm-dd",
|
||||
type=click.DateTime(formats=["%Y-%m-%d"]),
|
||||
default=datetime.today() + relativedelta(months=+3),
|
||||
)
|
||||
def deprecate(
|
||||
name: str,
|
||||
sql_dir: str,
|
||||
project_id: str,
|
||||
deletion_date: datetime,
|
||||
) -> None:
|
||||
"""Deprecate Bigquery table by updating metadata yaml file(s)."""
|
||||
table_metadata_files = paths_matching_name_pattern(
|
||||
name, sql_dir, project_id=project_id, files=["metadata.yaml"]
|
||||
)
|
||||
|
||||
for metadata_file in table_metadata_files:
|
||||
metadata = Metadata.from_file(metadata_file)
|
||||
|
||||
metadata.deprecated = True
|
||||
metadata.deletion_date = deletion_date.date()
|
||||
|
||||
metadata.write(metadata_file)
|
||||
click.echo(f"Updated {metadata_file} with deprecation.")
|
||||
|
||||
if not table_metadata_files:
|
||||
raise FileNotFoundError(f"No metadata file(s) were found for: {name}")
|
||||
|
|
|
@ -4,6 +4,7 @@ import enum
|
|||
import os
|
||||
import re
|
||||
import string
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
@ -153,6 +154,7 @@ class Metadata:
|
|||
references: Dict = attr.ib({})
|
||||
external_data: Optional[ExternalDataMetadata] = attr.ib(None)
|
||||
deprecated: bool = attr.ib(False)
|
||||
deletion_date: Optional[date] = attr.ib(None)
|
||||
|
||||
@owners.validator
|
||||
def validate_owners(self, attribute, value):
|
||||
|
@ -228,6 +230,7 @@ class Metadata:
|
|||
references = {}
|
||||
external_data = None
|
||||
deprecated = False
|
||||
deletion_date = None
|
||||
|
||||
with open(metadata_file, "r") as yaml_stream:
|
||||
try:
|
||||
|
@ -295,6 +298,8 @@ class Metadata:
|
|||
)
|
||||
if "deprecated" in metadata:
|
||||
deprecated = metadata["deprecated"]
|
||||
if "deletion_date" in metadata:
|
||||
deletion_date = metadata["deletion_date"]
|
||||
|
||||
return cls(
|
||||
friendly_name,
|
||||
|
@ -308,6 +313,7 @@ class Metadata:
|
|||
references,
|
||||
external_data,
|
||||
deprecated,
|
||||
deletion_date,
|
||||
)
|
||||
except yaml.YAMLError as e:
|
||||
raise e
|
||||
|
@ -349,6 +355,9 @@ class Metadata:
|
|||
if not metadata_dict["deprecated"]:
|
||||
del metadata_dict["deprecated"]
|
||||
|
||||
if not metadata_dict["deletion_date"]:
|
||||
del metadata_dict["deletion_date"]
|
||||
|
||||
file.write_text(
|
||||
yaml.dump(
|
||||
converter.unstructure(metadata_dict),
|
||||
|
|
|
@ -44,6 +44,9 @@ def publish_metadata(client, project, dataset, table, metadata):
|
|||
|
||||
if metadata.deprecated is True:
|
||||
table.labels["deprecated"] = "true"
|
||||
if metadata.deletion_date:
|
||||
table.labels["deletion_date"] = metadata.deletion_date.strftime("%Y-%m-%d")
|
||||
# TODO: in the future we can consider updating the table expiration date based on deletion_date
|
||||
|
||||
client.update_table(table, ["friendly_name", "description", "labels"])
|
||||
print("Published metadata for: {}.{}.{}".format(project, dataset, table))
|
||||
|
|
|
@ -98,6 +98,17 @@ def validate_change_control(
|
|||
return True
|
||||
|
||||
|
||||
def validate_deprecation(metadata, path):
|
||||
"""Check that deprecated is True when deletion date exists."""
|
||||
if metadata.deletion_date and not metadata.deprecated:
|
||||
click.echo(
|
||||
f"Deletion date should only be added when table is deprecated in {path}"
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def validate(target):
|
||||
"""Validate metadata files."""
|
||||
failed = False
|
||||
|
@ -119,6 +130,9 @@ def validate(target):
|
|||
):
|
||||
failed = True
|
||||
|
||||
if not validate_deprecation(metadata, path):
|
||||
failed = True
|
||||
|
||||
# todo more validation
|
||||
# e.g. https://github.com/mozilla/bigquery-etl/issues/924
|
||||
else:
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
import distutils
|
||||
import os
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
from click.testing import CliRunner
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
from bigquery_etl.cli.metadata import publish, update
|
||||
from bigquery_etl.cli.metadata import deprecate, publish, update
|
||||
from bigquery_etl.metadata.parse_metadata import Metadata
|
||||
from bigquery_etl.metadata.validate_metadata import validate_change_control
|
||||
|
||||
|
@ -277,6 +279,7 @@ class TestMetadata:
|
|||
== "Clustering fields: `column1`"
|
||||
)
|
||||
assert mock_bigquery_client().update_table.call_args[0][0].labels == {
|
||||
"deletion_date": "2024-03-02",
|
||||
"deprecated": "true",
|
||||
"owner1": "test",
|
||||
}
|
||||
|
@ -291,6 +294,7 @@ class TestMetadata:
|
|||
assert mock_bigquery_table().friendly_name == "Test metadata.yaml"
|
||||
assert mock_bigquery_table().description == "Clustering fields: `column1`"
|
||||
assert mock_bigquery_table().labels == {
|
||||
"deletion_date": "2024-03-02",
|
||||
"deprecated": "true",
|
||||
"owner1": "test",
|
||||
}
|
||||
|
@ -311,3 +315,100 @@ class TestMetadata:
|
|||
runner.invoke(publish, name, "--sql_dir=" + str(tmpdirname) + "/sql")
|
||||
|
||||
assert mock_bigquery_client().update_table.call_count == 0
|
||||
|
||||
def test_metadata_deprecate_default_deletion_date(self, runner):
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname))
|
||||
|
||||
qualified_table_name = (
|
||||
"moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6"
|
||||
)
|
||||
result = runner.invoke(
|
||||
deprecate,
|
||||
[qualified_table_name, "--sql_dir=" + str(tmpdirname) + "/sql"],
|
||||
)
|
||||
with open(
|
||||
tmpdirname
|
||||
+ "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/metadata.yaml",
|
||||
"r",
|
||||
) as stream:
|
||||
metadata = yaml.safe_load(stream)
|
||||
|
||||
default_deletion_date = (datetime.today() + relativedelta(months=+3)).date()
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert metadata["deprecated"]
|
||||
assert metadata["deletion_date"] == default_deletion_date
|
||||
|
||||
def test_metadata_deprecate_set_deletion_date(self, runner):
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname))
|
||||
|
||||
qualified_table_name = (
|
||||
"moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6"
|
||||
)
|
||||
result = runner.invoke(
|
||||
deprecate,
|
||||
[
|
||||
qualified_table_name,
|
||||
"--deletion_date=2024-03-02",
|
||||
"--sql_dir=" + str(tmpdirname) + "/sql",
|
||||
],
|
||||
)
|
||||
with open(
|
||||
tmpdirname
|
||||
+ "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/metadata.yaml",
|
||||
"r",
|
||||
) as stream:
|
||||
metadata = yaml.safe_load(stream)
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert metadata["deprecated"]
|
||||
assert metadata["deletion_date"] == datetime(2024, 3, 2).date()
|
||||
|
||||
def test_metadata_deprecate_set_invalid_deletion_date_should_fail(self, runner):
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname))
|
||||
|
||||
qualified_table_name = (
|
||||
"moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6"
|
||||
)
|
||||
result = runner.invoke(
|
||||
deprecate,
|
||||
[
|
||||
qualified_table_name,
|
||||
"--deletion_date=2024-02",
|
||||
"--sql_dir=" + str(tmpdirname) + "/sql",
|
||||
],
|
||||
)
|
||||
with open(
|
||||
tmpdirname
|
||||
+ "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/metadata.yaml",
|
||||
"r",
|
||||
) as stream:
|
||||
metadata = yaml.safe_load(stream)
|
||||
|
||||
assert result.exit_code == 2
|
||||
assert "deprecated" not in metadata
|
||||
assert "deletion_date" not in metadata
|
||||
assert "Invalid value for '--deletion_date'" in result.output
|
||||
|
||||
def test_metadata_deprecate_no_metadata(self, runner):
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname))
|
||||
|
||||
qualified_table_name = "moz-fx-data-shared-prod.telemetry_derived.clients_daily_scalar_aggregates_v2"
|
||||
result = runner.invoke(
|
||||
deprecate,
|
||||
[
|
||||
qualified_table_name,
|
||||
"--deletion_date=2024-03-02",
|
||||
"--sql_dir=" + str(tmpdirname) + "/sql",
|
||||
],
|
||||
)
|
||||
|
||||
assert result.exit_code == 1
|
||||
assert (
|
||||
str(result.exception)
|
||||
== f"No metadata file(s) were found for: {qualified_table_name}"
|
||||
)
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
from datetime import date
|
||||
|
||||
from bigquery_etl.metadata.parse_metadata import Metadata
|
||||
from bigquery_etl.metadata.validate_metadata import validate_public_data
|
||||
from bigquery_etl.metadata.validate_metadata import (
|
||||
validate_deprecation,
|
||||
validate_public_data,
|
||||
)
|
||||
|
||||
|
||||
class TestValidateMetadata(object):
|
||||
|
@ -36,3 +41,37 @@ class TestValidateMetadata(object):
|
|||
validate_public_data(metadata_invalid_public, "test/path/metadata.yaml")
|
||||
is False
|
||||
)
|
||||
|
||||
def test_validate_deprecation(self):
|
||||
metadata_valid = Metadata(
|
||||
friendly_name="test",
|
||||
description="test",
|
||||
owners=["test@example.org"],
|
||||
labels={"test": "true", "foo": "abc"},
|
||||
deprecated=True,
|
||||
deletion_date=date(2024, 5, 4),
|
||||
)
|
||||
|
||||
assert validate_deprecation(metadata_valid, "test/path/metadata.yaml")
|
||||
|
||||
metadata_valid = Metadata(
|
||||
friendly_name="test",
|
||||
description="test",
|
||||
owners=["test@example.org"],
|
||||
labels={"test": "true", "foo": "abc"},
|
||||
deprecated=True,
|
||||
deletion_date=None,
|
||||
)
|
||||
|
||||
assert validate_deprecation(metadata_valid, "test/path/metadata.yaml")
|
||||
|
||||
metadata_valid = Metadata(
|
||||
friendly_name="test",
|
||||
description="test",
|
||||
owners=["test@example.org"],
|
||||
labels={"test": "true", "foo": "abc"},
|
||||
deprecated=False,
|
||||
deletion_date=date(2024, 5, 4),
|
||||
)
|
||||
|
||||
assert not validate_deprecation(metadata_valid, "test/path/metadata.yaml")
|
||||
|
|
|
@ -4,3 +4,4 @@ description: |-
|
|||
owners:
|
||||
- test@mozilla.com
|
||||
deprecated: true
|
||||
deletion_date: 2024-03-02
|
||||
|
|
Загрузка…
Ссылка в новой задаче