DENG-990 Refractor backfill cli commands (#3924)

* Refractored backfill cli commands

* Adjusted  validate command
This commit is contained in:
Winnie Chan 2023-06-12 10:24:35 -07:00 коммит произвёл GitHub
Родитель 02afdfb443
Коммит b9d01ca959
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 110 добавлений и 139 удалений

Просмотреть файл

@ -50,7 +50,7 @@ class BackfillStatus(enum.Enum):
"""Represents backfill status types."""
DRAFTING = "Drafting"
VALIDATING = "Validating"
VALIDATED = "Validated"
COMPLETE = "Complete"

Просмотреть файл

@ -22,7 +22,7 @@ from ..backfill.validate import (
validate_file,
validate_overlap_dates,
)
from ..cli.utils import paths_matching_name_pattern, project_id_option, sql_dir_option
from ..cli.utils import project_id_option, qualified_table_name_matching, sql_dir_option
from ..util import extract_from_query_path
QUALIFIED_TABLE_NAME_RE = re.compile(
@ -56,6 +56,7 @@ def backfill(ctx):
)
@click.argument("qualified_table_name")
@sql_dir_option
@project_id_option("moz-fx-data-shared-prod")
@click.option(
"--start_date",
"--start-date",
@ -91,6 +92,7 @@ def create(
ctx,
qualified_table_name,
sql_dir,
project_id,
start_date,
end_date,
exclude,
@ -100,24 +102,9 @@ def create(
A backfill.yaml file will be created if it does not already exist.
"""
try:
match = QUALIFIED_TABLE_NAME_RE.match(qualified_table_name)
project_id = match.group("project_id")
dataset_id = match.group("dataset_id")
table_id = match.group("table_id")
except AttributeError:
click.echo(
"Qualified table name must be named like:" + " <project>.<dataset>.<table>"
)
sys.exit(1)
path = Path(sql_dir)
query_path = path / project_id / dataset_id / table_id
if not query_path.exists():
click.echo(f"{project_id}.{dataset_id}.{table_id}" + " does not exist")
sys.exit(1)
backfills_dict = get_backfill_file_to_entries_map(
sql_dir, project_id, qualified_table_name
)
backfill = Backfill(
entry_date=date.today(),
@ -131,22 +118,34 @@ def create(
backfills = []
backfill_file = query_path / BACKFILL_FILE
if backfills_dict:
# There should only be one backfill file with entries
backfill_file = list(backfills_dict.keys())[0]
entries = backfills_dict[backfill_file]
if backfill_file.exists():
backfills = Backfill.entries_from_file(backfill_file)
for entry in backfills:
for entry in entries:
validate_duplicate_entry_dates(backfill, entry)
if entry.status == BackfillStatus.DRAFTING:
validate_overlap_dates(backfill, entry)
backfills = entries
else:
(project_id, dataset_id, table_id) = qualified_table_name_matching(
qualified_table_name
)
path = Path(sql_dir)
query_path = path / project_id / dataset_id / table_id
backfill_file = query_path / BACKFILL_FILE
backfills.insert(0, backfill)
backfill_file.write_text(
"\n".join(backfill.to_yaml() for backfill in sorted(backfills, reverse=True))
)
click.echo(f"Created backfill entry in {backfill_file}")
click.echo(f"Created backfill entry in {backfill_file}.")
@backfill.command(
@ -177,50 +176,22 @@ def validate(
project_id,
):
"""Validate backfill.yaml files."""
backfill_files = []
backfills_dict = get_backfill_file_to_entries_map(
sql_dir, project_id, qualified_table_name
)
if qualified_table_name:
for backfill_file in backfills_dict:
try:
match = QUALIFIED_TABLE_NAME_RE.match(qualified_table_name)
project_id = match.group("project_id")
dataset_id = match.group("dataset_id")
table_id = match.group("table_id")
except AttributeError:
click.echo(
"Qualified table name must be named like:"
+ " <project>.<dataset>.<table>"
)
sys.exit(1)
path = Path(sql_dir)
query_path = path / project_id / dataset_id / table_id
if not query_path.exists():
click.echo(f"{project_id}.{dataset_id}.{table_id}" + " does not exist")
sys.exit(1)
backfill_file = path / project_id / dataset_id / table_id / BACKFILL_FILE
backfill_files.append(backfill_file)
else:
backfill_files = paths_matching_name_pattern(
None, sql_dir, project_id, [BACKFILL_FILE]
)
for file in backfill_files:
try:
validate_file(file)
validate_file(backfill_file)
except (yaml.YAMLError, ValueError) as e:
click.echo(f"{file} contains the following error:\n {e}")
click.echo(f"{backfill_file} contains the following error:\n {e}")
sys.exit(1)
if qualified_table_name:
click.echo(f"{BACKFILL_FILE} has been validated for {qualified_table_name}.")
elif backfills_dict:
click.echo(
f"{BACKFILL_FILE} has been validated for {project_id}.{dataset_id}.{table_id} "
)
elif backfill_files:
click.echo(
f"All {BACKFILL_FILE} files have been validated for project {project_id}"
f"All {BACKFILL_FILE} files have been validated for project {project_id}."
)
@ -258,7 +229,7 @@ def info(ctx, qualified_table_name, sql_dir, project_id, status):
total_backfills_count = 0
for file, entries in backfills.items():
for backfill_file, entries in backfills.items():
if status is not None:
entries = [e for e in entries if e.status.value.lower() == status.lower()]
@ -267,7 +238,7 @@ def info(ctx, qualified_table_name, sql_dir, project_id, status):
if entries_count:
total_backfills_count += entries_count
project, dataset, table = extract_from_query_path(file)
project, dataset, table = extract_from_query_path(backfill_file)
status_str = f" with {status} status" if status is not None else ""
click.echo(
@ -277,4 +248,4 @@ def info(ctx, qualified_table_name, sql_dir, project_id, status):
for entry in entries:
click.echo(str(entry))
click.echo(f"\nThere are a total of {total_backfills_count} backfill(s)")
click.echo(f"\nThere are a total of {total_backfills_count} backfill(s).")

Просмотреть файл

@ -235,29 +235,29 @@ class TestBackfill:
backfill_file.write_text(BACKFILL_YAML_TEMPLATE)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 0
assert result.exit_code == 0
def test_validate_backfill_invalid_table_name(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.",
],
)
assert validate_backfill_result.exit_code == 1
assert result.exit_code == 1
assert (
"Qualified table name must be named like: <project>.<dataset>.<table>"
in validate_backfill_result.output
in result.output
)
def test_validate_backfill_non_existing_table_name(self, runner):
@ -265,14 +265,14 @@ class TestBackfill:
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v2",
],
)
assert validate_backfill_result.exit_code == 1
assert "does not exist" in validate_backfill_result.output
assert result.exit_code == 1
assert "does not exist" in result.output
def test_validate_backfill_invalid_default_reason(self, runner):
with runner.isolated_filesystem():
@ -286,14 +286,14 @@ class TestBackfill:
backfill_file.write_text(invalid_backfill)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid Reason" in validate_backfill_result.output
assert result.exit_code == 1
assert "Invalid Reason" in result.output
def test_validate_backfill_empty_reason(self, runner):
with runner.isolated_filesystem():
@ -308,14 +308,14 @@ class TestBackfill:
backfill_file.write_text(invalid_backfill)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid Reason" in validate_backfill_result.output
assert result.exit_code == 1
assert "Invalid Reason" in result.output
def test_validate_backfill_invalid_watcher(self, runner):
with runner.isolated_filesystem():
@ -330,15 +330,15 @@ class TestBackfill:
backfill_file.write_text(invalid_backfill)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid" in validate_backfill_result.output
assert "watchers" in validate_backfill_result.output
assert result.exit_code == 1
assert "Invalid" in str(result.exception)
assert "watchers" in str(result.exception)
def test_validate_backfill_empty_watcher(self, runner):
with runner.isolated_filesystem():
@ -353,15 +353,15 @@ class TestBackfill:
backfill_file.write_text(invalid_backfill)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid" in validate_backfill_result.output
assert "watchers" in validate_backfill_result.output
assert result.exit_code == 1
assert "Invalid" in str(result.exception)
assert "watchers" in str(result.exception)
def test_validate_backfill_watchers_duplicated(self, runner):
with runner.isolated_filesystem():
@ -376,14 +376,14 @@ class TestBackfill:
backfill_file.write_text(invalid_backfill)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Duplicate or default watcher" in validate_backfill_result.output
assert result.exit_code == 1
assert "Duplicate or default watcher" in result.output
def test_validate_backfill_invalid_status(self, runner):
with runner.isolated_filesystem():
@ -398,14 +398,14 @@ class TestBackfill:
backfill_file.write_text(invalid_backfill)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert invalid_status in str(validate_backfill_result.exception)
assert result.exit_code == 1
assert invalid_status in str(result.exception)
def test_validate_backfill_duplicate_entry_dates(self, runner):
with runner.isolated_filesystem():
@ -422,14 +422,14 @@ class TestBackfill:
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Backfill entry already exists" in validate_backfill_result.output
assert result.exit_code == 1
assert "Backfill entry already exists" in str(result.exception)
def test_validate_backfill_invalid_entry_date(self, runner):
with runner.isolated_filesystem():
@ -444,14 +444,14 @@ class TestBackfill:
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "can't be in the future" in validate_backfill_result.output
assert result.exit_code == 1
assert "can't be in the future" in str(result.exception)
def test_validate_backfill_invalid_start_date_greater_than_end_date(self, runner):
with runner.isolated_filesystem():
@ -466,14 +466,14 @@ class TestBackfill:
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid start date" in validate_backfill_result.output
assert result.exit_code == 1
assert "Invalid start date" in str(result.exception)
#
def test_validate_backfill_invalid_start_date_greater_than_entry_date(self, runner):
@ -489,14 +489,14 @@ class TestBackfill:
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid start date" in validate_backfill_result.output
assert result.exit_code == 1
assert "Invalid start date" in str(result.exception)
def test_validate_backfill_invalid_end_date_greater_than_entry_date(self, runner):
with runner.isolated_filesystem():
@ -511,14 +511,14 @@ class TestBackfill:
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid end date" in validate_backfill_result.output
assert result.exit_code == 1
assert "Invalid end date" in str(result.exception)
def test_validate_backfill_invalid_excluded_dates_less_than_start_date(
self, runner
@ -535,14 +535,14 @@ class TestBackfill:
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid excluded dates" in validate_backfill_result.output
assert result.exit_code == 1
assert "Invalid excluded dates" in str(result.exception)
def test_validate_backfill_invalid_excluded_dates_greater_than_end_date(
self, runner
@ -559,14 +559,14 @@ class TestBackfill:
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid excluded dates" in validate_backfill_result.output
assert result.exit_code == 1
assert "Invalid excluded dates" in str(result.exception)
def test_validate_backfill_invalid_excluded_dates_duplicated(self, runner):
with runner.isolated_filesystem():
@ -591,14 +591,14 @@ class TestBackfill:
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "duplicate excluded dates" in validate_backfill_result.output
assert result.exit_code == 1
assert "duplicate excluded dates" in result.output
def test_validate_backfill_invalid_excluded_dates_not_sorted(self, runner):
with runner.isolated_filesystem():
@ -621,14 +621,14 @@ class TestBackfill:
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "excluded dates not sorted" in validate_backfill_result.output
assert result.exit_code == 1
assert "excluded dates not sorted" in result.output
def test_validate_backfill_entries_not_sorted(self, runner):
with runner.isolated_filesystem():
@ -649,14 +649,14 @@ class TestBackfill:
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "entries are not sorted" in validate_backfill_result.output
assert result.exit_code == 1
assert "entries are not sorted" in result.output
def test_validate_backfill_overlap_dates(self, runner):
with runner.isolated_filesystem():
@ -677,14 +677,14 @@ class TestBackfill:
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "overlap dates" in validate_backfill_result.output
assert result.exit_code == 1
assert "overlap dates" in result.output
def test_validate_backfill_overlap_dates_not_drafting_status(self, runner):
with runner.isolated_filesystem():
@ -705,13 +705,13 @@ class TestBackfill:
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 0
assert result.exit_code == 0
def test_backfill_info_one_table_all_status(self, runner):
with runner.isolated_filesystem():
@ -728,7 +728,7 @@ class TestBackfill:
" reason: test_reason\n"
" watchers:\n"
" - test@example.org\n"
" status: Validating\n"
" status: Validated\n"
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
@ -756,7 +756,7 @@ class TestBackfill:
assert result.exit_code == 0
assert qualified_table_name_1 in result.output
assert BackfillStatus.DRAFTING.value in result.output
assert BackfillStatus.VALIDATING.value in result.output
assert BackfillStatus.VALIDATED.value in result.output
assert "total of 2 backfill(s)" in result.output
assert qualified_table_name_2 not in result.output
assert BackfillStatus.COMPLETE.value not in result.output
@ -776,7 +776,7 @@ class TestBackfill:
" reason: test_reason\n"
" watchers:\n"
" - test@example.org\n"
" status: Validating\n"
" status: Validated\n"
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
@ -790,7 +790,7 @@ class TestBackfill:
assert qualified_table_name in result.output
assert BackfillStatus.DRAFTING.value in result.output
assert "total of 1 backfill(s)" in result.output
assert BackfillStatus.VALIDATING.value not in result.output
assert BackfillStatus.VALIDATED.value not in result.output
assert BackfillStatus.COMPLETE.value not in result.output
def test_backfill_info_all_tables_all_status(self, runner):
@ -808,7 +808,7 @@ class TestBackfill:
" reason: test_reason\n"
" watchers:\n"
" - test@example.org\n"
" status: Validating\n"
" status: Validated\n"
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
@ -837,7 +837,7 @@ class TestBackfill:
assert qualified_table_name_1 in result.output
assert qualified_table_name_2 in result.output
assert BackfillStatus.DRAFTING.value in result.output
assert BackfillStatus.VALIDATING.value in result.output
assert BackfillStatus.VALIDATED.value in result.output
assert "total of 3 backfill(s)" in result.output
assert BackfillStatus.COMPLETE.value not in result.output
@ -856,7 +856,7 @@ class TestBackfill:
" reason: test_reason\n"
" watchers:\n"
" - test@example.org\n"
" status: Validating\n"
" status: Validated\n"
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
@ -874,7 +874,7 @@ class TestBackfill:
" reason: test_reason\n"
" watchers:\n"
" - test@example.org\n"
" status: Validating\n"
" status: Validated\n"
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
@ -882,14 +882,14 @@ class TestBackfill:
result = runner.invoke(
info,
[
"--status=validating",
"--status=validated",
],
)
assert result.exit_code == 0
assert qualified_table_name_1 in result.output
assert qualified_table_name_2 in result.output
assert BackfillStatus.VALIDATING.value in result.output
assert BackfillStatus.VALIDATED.value in result.output
assert "total of 2 backfill(s)" in result.output
assert BackfillStatus.DRAFTING.value not in result.output
assert BackfillStatus.COMPLETE.value not in result.output
@ -928,7 +928,7 @@ class TestBackfill:
" reason: test_reason\n"
" watchers:\n"
" - test@example.org\n"
" status: Validating\n"
" status: Validated\n"
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)