DENG-2823: Added deprecate cli command (#5219)

* Added deprecate cli command

* Fixed typo

* Fixed failed tests

* Fixed deletion date label

* Update bigquery_etl/metadata/parse_metadata.py

Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com>

* Fixed deletion date

* Fixed arguments optional

* Added return back

* Added invalid deletion date test

---------

Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com>
This commit is contained in:
Winnie Chan 2024-03-19 11:17:32 -07:00 коммит произвёл GitHub
Родитель 2b6d37e288
Коммит 33f9017c75
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
7 изменённых файлов: 216 добавлений и 2 удалений

Просмотреть файл

@ -1,9 +1,11 @@
"""bigquery-etl CLI metadata command."""
from datetime import datetime
from pathlib import Path
from typing import Optional
import click
from dateutil.relativedelta import relativedelta
from google.cloud import bigquery
from bigquery_etl.metadata.parse_metadata import DatasetMetadata, Metadata
@ -126,3 +128,48 @@ def publish(name: str, sql_dir: Optional[str], project_id: Optional[str]) -> Non
print("No metadata file for: {}.{}.{}".format(project, dataset, table))
return None
@metadata.command(
help="""
Deprecate BigQuery table by updating metadata.yaml file.
Deletion date is by default 3 months from current date if not provided.
Example:
./bqetl metadata deprecate ga_derived.downloads_with_attribution_v2 --deletion_date=2024-03-02
"""
)
@click.argument("name")
@project_id_option(
ConfigLoader.get("default", "project", fallback="moz-fx-data-shared-prod")
)
@sql_dir_option
@click.option(
"--deletion_date",
"--deletion-date",
help="Date when table is scheduled for deletion. Date format: yyyy-mm-dd",
type=click.DateTime(formats=["%Y-%m-%d"]),
default=datetime.today() + relativedelta(months=+3),
)
def deprecate(
name: str,
sql_dir: str,
project_id: str,
deletion_date: datetime,
) -> None:
"""Deprecate Bigquery table by updating metadata yaml file(s)."""
table_metadata_files = paths_matching_name_pattern(
name, sql_dir, project_id=project_id, files=["metadata.yaml"]
)
for metadata_file in table_metadata_files:
metadata = Metadata.from_file(metadata_file)
metadata.deprecated = True
metadata.deletion_date = deletion_date.date()
metadata.write(metadata_file)
click.echo(f"Updated {metadata_file} with deprecation.")
if not table_metadata_files:
raise FileNotFoundError(f"No metadata file(s) were found for: {name}")

Просмотреть файл

@ -4,6 +4,7 @@ import enum
import os
import re
import string
from datetime import date
from pathlib import Path
from typing import Any, Dict, List, Optional
@ -153,6 +154,7 @@ class Metadata:
references: Dict = attr.ib({})
external_data: Optional[ExternalDataMetadata] = attr.ib(None)
deprecated: bool = attr.ib(False)
deletion_date: Optional[date] = attr.ib(None)
@owners.validator
def validate_owners(self, attribute, value):
@ -228,6 +230,7 @@ class Metadata:
references = {}
external_data = None
deprecated = False
deletion_date = None
with open(metadata_file, "r") as yaml_stream:
try:
@ -295,6 +298,8 @@ class Metadata:
)
if "deprecated" in metadata:
deprecated = metadata["deprecated"]
if "deletion_date" in metadata:
deletion_date = metadata["deletion_date"]
return cls(
friendly_name,
@ -308,6 +313,7 @@ class Metadata:
references,
external_data,
deprecated,
deletion_date,
)
except yaml.YAMLError as e:
raise e
@ -349,6 +355,9 @@ class Metadata:
if not metadata_dict["deprecated"]:
del metadata_dict["deprecated"]
if not metadata_dict["deletion_date"]:
del metadata_dict["deletion_date"]
file.write_text(
yaml.dump(
converter.unstructure(metadata_dict),

Просмотреть файл

@ -44,6 +44,9 @@ def publish_metadata(client, project, dataset, table, metadata):
if metadata.deprecated is True:
table.labels["deprecated"] = "true"
if metadata.deletion_date:
table.labels["deletion_date"] = metadata.deletion_date.strftime("%Y-%m-%d")
# TODO: in the future we can consider updating the table expiration date based on deletion_date
client.update_table(table, ["friendly_name", "description", "labels"])
print("Published metadata for: {}.{}.{}".format(project, dataset, table))

Просмотреть файл

@ -98,6 +98,17 @@ def validate_change_control(
return True
def validate_deprecation(metadata, path):
"""Check that deprecated is True when deletion date exists."""
if metadata.deletion_date and not metadata.deprecated:
click.echo(
f"Deletion date should only be added when table is deprecated in {path}"
)
return False
return True
def validate(target):
"""Validate metadata files."""
failed = False
@ -119,6 +130,9 @@ def validate(target):
):
failed = True
if not validate_deprecation(metadata, path):
failed = True
# todo more validation
# e.g. https://github.com/mozilla/bigquery-etl/issues/924
else:

Просмотреть файл

@ -1,14 +1,16 @@
import distutils
import os
import tempfile
from datetime import datetime
from pathlib import Path
from unittest.mock import patch
import pytest
import yaml
from click.testing import CliRunner
from dateutil.relativedelta import relativedelta
from bigquery_etl.cli.metadata import publish, update
from bigquery_etl.cli.metadata import deprecate, publish, update
from bigquery_etl.metadata.parse_metadata import Metadata
from bigquery_etl.metadata.validate_metadata import validate_change_control
@ -277,6 +279,7 @@ class TestMetadata:
== "Clustering fields: `column1`"
)
assert mock_bigquery_client().update_table.call_args[0][0].labels == {
"deletion_date": "2024-03-02",
"deprecated": "true",
"owner1": "test",
}
@ -291,6 +294,7 @@ class TestMetadata:
assert mock_bigquery_table().friendly_name == "Test metadata.yaml"
assert mock_bigquery_table().description == "Clustering fields: `column1`"
assert mock_bigquery_table().labels == {
"deletion_date": "2024-03-02",
"deprecated": "true",
"owner1": "test",
}
@ -311,3 +315,100 @@ class TestMetadata:
runner.invoke(publish, name, "--sql_dir=" + str(tmpdirname) + "/sql")
assert mock_bigquery_client().update_table.call_count == 0
def test_metadata_deprecate_default_deletion_date(self, runner):
with tempfile.TemporaryDirectory() as tmpdirname:
distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname))
qualified_table_name = (
"moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6"
)
result = runner.invoke(
deprecate,
[qualified_table_name, "--sql_dir=" + str(tmpdirname) + "/sql"],
)
with open(
tmpdirname
+ "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/metadata.yaml",
"r",
) as stream:
metadata = yaml.safe_load(stream)
default_deletion_date = (datetime.today() + relativedelta(months=+3)).date()
assert result.exit_code == 0
assert metadata["deprecated"]
assert metadata["deletion_date"] == default_deletion_date
def test_metadata_deprecate_set_deletion_date(self, runner):
with tempfile.TemporaryDirectory() as tmpdirname:
distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname))
qualified_table_name = (
"moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6"
)
result = runner.invoke(
deprecate,
[
qualified_table_name,
"--deletion_date=2024-03-02",
"--sql_dir=" + str(tmpdirname) + "/sql",
],
)
with open(
tmpdirname
+ "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/metadata.yaml",
"r",
) as stream:
metadata = yaml.safe_load(stream)
assert result.exit_code == 0
assert metadata["deprecated"]
assert metadata["deletion_date"] == datetime(2024, 3, 2).date()
def test_metadata_deprecate_set_invalid_deletion_date_should_fail(self, runner):
with tempfile.TemporaryDirectory() as tmpdirname:
distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname))
qualified_table_name = (
"moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6"
)
result = runner.invoke(
deprecate,
[
qualified_table_name,
"--deletion_date=2024-02",
"--sql_dir=" + str(tmpdirname) + "/sql",
],
)
with open(
tmpdirname
+ "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/metadata.yaml",
"r",
) as stream:
metadata = yaml.safe_load(stream)
assert result.exit_code == 2
assert "deprecated" not in metadata
assert "deletion_date" not in metadata
assert "Invalid value for '--deletion_date'" in result.output
def test_metadata_deprecate_no_metadata(self, runner):
with tempfile.TemporaryDirectory() as tmpdirname:
distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname))
qualified_table_name = "moz-fx-data-shared-prod.telemetry_derived.clients_daily_scalar_aggregates_v2"
result = runner.invoke(
deprecate,
[
qualified_table_name,
"--deletion_date=2024-03-02",
"--sql_dir=" + str(tmpdirname) + "/sql",
],
)
assert result.exit_code == 1
assert (
str(result.exception)
== f"No metadata file(s) were found for: {qualified_table_name}"
)

Просмотреть файл

@ -1,5 +1,10 @@
from datetime import date
from bigquery_etl.metadata.parse_metadata import Metadata
from bigquery_etl.metadata.validate_metadata import validate_public_data
from bigquery_etl.metadata.validate_metadata import (
validate_deprecation,
validate_public_data,
)
class TestValidateMetadata(object):
@ -36,3 +41,37 @@ class TestValidateMetadata(object):
validate_public_data(metadata_invalid_public, "test/path/metadata.yaml")
is False
)
def test_validate_deprecation(self):
metadata_valid = Metadata(
friendly_name="test",
description="test",
owners=["test@example.org"],
labels={"test": "true", "foo": "abc"},
deprecated=True,
deletion_date=date(2024, 5, 4),
)
assert validate_deprecation(metadata_valid, "test/path/metadata.yaml")
metadata_valid = Metadata(
friendly_name="test",
description="test",
owners=["test@example.org"],
labels={"test": "true", "foo": "abc"},
deprecated=True,
deletion_date=None,
)
assert validate_deprecation(metadata_valid, "test/path/metadata.yaml")
metadata_valid = Metadata(
friendly_name="test",
description="test",
owners=["test@example.org"],
labels={"test": "true", "foo": "abc"},
deprecated=False,
deletion_date=date(2024, 5, 4),
)
assert not validate_deprecation(metadata_valid, "test/path/metadata.yaml")

Просмотреть файл

@ -4,3 +4,4 @@ description: |-
owners:
- test@mozilla.com
deprecated: true
deletion_date: 2024-03-02