From 071c53e4cbda1d0c48bf8f3aab313986ad43da50 Mon Sep 17 00:00:00 2001 From: Winnie Chan <10429026+wwyc@users.noreply.github.com> Date: Thu, 1 Jun 2023 10:06:09 -0700 Subject: [PATCH] DENG-803/805: Create & Validate backfill cli commands (#3760) * Added backfill create and validate cli ommand --------- Co-authored-by: Alexander Co-authored-by: kik-kik <42538694+kik-kik@users.noreply.github.com> --- .circleci/config.yml | 15 + bigquery_etl/backfill/__init__.py | 1 + bigquery_etl/backfill/parse.py | 174 +++++ bigquery_etl/backfill/validate.py | 77 ++ bigquery_etl/cli/__init__.py | 2 + bigquery_etl/cli/backfill.py | 222 ++++++ bigquery_etl/cli/utils.py | 2 +- docs/bqetl.md | 1 + tests/backfill/backfill.yaml | 9 + .../backfill/test_dir_multiple/backfill.yaml | 19 + tests/backfill/test_dir_valid/backfill.yaml | 9 + tests/backfill/test_parse_backfill.py | 358 +++++++++ tests/backfill/test_validate_backfill.py | 144 ++++ tests/cli/test_cli_backfill.py | 714 ++++++++++++++++++ 14 files changed, 1746 insertions(+), 1 deletion(-) create mode 100644 bigquery_etl/backfill/__init__.py create mode 100644 bigquery_etl/backfill/parse.py create mode 100644 bigquery_etl/backfill/validate.py create mode 100644 bigquery_etl/cli/backfill.py create mode 100644 tests/backfill/backfill.yaml create mode 100644 tests/backfill/test_dir_multiple/backfill.yaml create mode 100644 tests/backfill/test_dir_valid/backfill.yaml create mode 100644 tests/backfill/test_parse_backfill.py create mode 100644 tests/backfill/test_validate_backfill.py create mode 100644 tests/cli/test_cli_backfill.py diff --git a/.circleci/config.yml b/.circleci/config.yml index bf3dc5236b..ecbfa93ae4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -151,6 +151,18 @@ jobs: echo $PATHS PATH="venv/bin:$PATH" script/bqetl dryrun --validate-schemas $PATHS # yamllint enable rule:line-length + validate-backfills: + docker: *docker + steps: + - checkout + - *restore_venv_cache + - *build + - *attach_generated_sql + - *copy_staged_sql + - run: + name: Verify that backfill.yaml files are valid + command: | + PATH="venv/bin:$PATH" script/bqetl backfill validate validate-metadata: docker: *docker steps: @@ -614,6 +626,9 @@ workflows: requires: - deploy-changes-to-stage - integration + - validate-backfills: + requires: + - deploy-changes-to-stage - validate-dags: requires: - generate-dags diff --git a/bigquery_etl/backfill/__init__.py b/bigquery_etl/backfill/__init__.py new file mode 100644 index 0000000000..46c129db93 --- /dev/null +++ b/bigquery_etl/backfill/__init__.py @@ -0,0 +1 @@ +"""Backfill.""" diff --git a/bigquery_etl/backfill/parse.py b/bigquery_etl/backfill/parse.py new file mode 100644 index 0000000000..69ed4186da --- /dev/null +++ b/bigquery_etl/backfill/parse.py @@ -0,0 +1,174 @@ +"""Parse backfill entries.""" + +import enum +import os +from datetime import date +from pathlib import Path +from typing import List + +import attr +import yaml + +from bigquery_etl.query_scheduling.utils import is_email_or_github_identity + +BACKFILL_FILE = "backfill.yaml" +DEFAULT_WATCHER = "nobody@mozilla.com" +DEFAULT_REASON = "Please provide a reason for the backfill and links to any related bugzilla or jira tickets" + + +class UniqueKeyLoader(yaml.SafeLoader): + """YAML loader to check duplicate keys.""" + + def construct_mapping(self, node, deep=False): + """Create mapping while checking for duplicate keys.""" + mapping = set() + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + if key in mapping: + raise ValueError( + f"Backfill entry already exists with entry date: {key}." + ) + mapping.add(key) + return super().construct_mapping(node, deep) + + +class Literal(str): + """Represents a YAML literal.""" + + pass + + +def literal_presenter(dumper, data): + """Literal representer for YAML output.""" + return dumper.represent_scalar("tag:yaml.org,2002:str", data) + + +yaml.add_representer(Literal, literal_presenter) + + +class BackfillStatus(enum.Enum): + """Represents backfill status types.""" + + DRAFTING = "Drafting" + VALIDATING = "Validating" + COMPLETE = "Complete" + + +@attr.s(auto_attribs=True) +class Backfill: + """ + Representation of a backfill entry configuration. + + Uses attrs to simplify the class definition and provide validation. + Docs: https://www.attrs.org + """ + + entry_date: date = attr.ib() + start_date: date = attr.ib() + end_date: date = attr.ib() + excluded_dates: List[date] = attr.ib() + reason: str = attr.ib() + watchers: List[str] = attr.ib() + status: BackfillStatus = attr.ib() + + @entry_date.validator + def validate_entry_date(self, attribute, value): + """Check that provided entry date is valid.""" + if date.today() < value: + raise ValueError(f"Backfill entry {value} can't be in the future.") + + @start_date.validator + def validate_start_date(self, attribute, value): + """Check that provided start date is valid.""" + if self.end_date < value or self.entry_date < value: + raise ValueError(f"Invalid start date: {value}.") + + @end_date.validator + def validate_end_date(self, attribute, value): + """Check that provided end date is valid.""" + if value < self.start_date or self.entry_date < self.end_date: + raise ValueError(f"Invalid end date: {value}.") + + @excluded_dates.validator + def validate_excluded_dates(self, attribute, value): + """Check that provided excluded dates are valid.""" + if not all(map(lambda e: self.start_date < e < self.end_date, value)): + raise ValueError(f"Invalid excluded dates: {value}.") + + @watchers.validator + def validate_watchers(self, attribute, value): + """Check that provided watchers are valid.""" + if not value or not all( + map(lambda e: e and is_email_or_github_identity(e), value) + ): + raise ValueError(f"Invalid email or Github identity for watchers: {value}.") + + @status.validator + def validate_status(self, attribute, value): + """Check that provided status is valid.""" + if not hasattr(BackfillStatus, value.name): + raise ValueError(f"Invalid status: {value.name}.") + + @staticmethod + def is_backfill_file(file_path: Path) -> bool: + """Check if the provided file is a backfill file.""" + return os.path.basename(file_path) == BACKFILL_FILE + + @classmethod + def entries_from_file(cls, file: Path) -> List["Backfill"]: + """ + Parse all backfill entries from the provided yaml file. + + Return a list with all backfill entries. + """ + if not cls.is_backfill_file(file): + raise ValueError(f"Invalid file: {file}.") + + backfill_entries: List[Backfill] = [] + + with open(file, "r") as yaml_stream: + try: + backfills = yaml.load(yaml_stream, Loader=UniqueKeyLoader) or {} + + for entry_date, entry in backfills.items(): + excluded_dates = [] + if "excluded_dates" in entry: + excluded_dates = entry["excluded_dates"] + + backfill = cls( + entry_date=entry_date, + start_date=entry["start_date"], + end_date=entry["end_date"], + excluded_dates=excluded_dates, + reason=entry["reason"], + watchers=entry["watchers"], + status=BackfillStatus[entry["status"].upper()], + ) + + backfill_entries.append(backfill) + + except yaml.YAMLError as e: + raise e + + return backfill_entries + + def to_yaml(self) -> str: + """Create dictionary version of yaml for writing to file.""" + yaml_dict = { + self.entry_date: { + "start_date": self.start_date, + "end_date": self.end_date, + "excluded_dates": sorted(self.excluded_dates), + "reason": self.reason, + "watchers": self.watchers, + "status": self.status.value, + } + } + + if yaml_dict[self.entry_date]["excluded_dates"] == []: + del yaml_dict[self.entry_date]["excluded_dates"] + + return yaml.dump( + yaml_dict, + sort_keys=False, + ) diff --git a/bigquery_etl/backfill/validate.py b/bigquery_etl/backfill/validate.py new file mode 100644 index 0000000000..ed91d3aeb4 --- /dev/null +++ b/bigquery_etl/backfill/validate.py @@ -0,0 +1,77 @@ +"""Validate backfill entries.""" +from pathlib import Path +from typing import List + +from ..backfill.parse import DEFAULT_REASON, DEFAULT_WATCHER, Backfill, BackfillStatus + + +def validate_duplicate_entry_dates(entry_1: Backfill, entry_2: Backfill) -> None: + """Check if backfill entries have the same entry dates.""" + if entry_1.entry_date == entry_2.entry_date: + raise ValueError(f"Duplicate backfill with entry date: {entry_1.entry_date}.") + + +def validate_overlap_dates(entry_1: Backfill, entry_2: Backfill) -> None: + """Check overlap dates between two backfill entries.""" + if max(entry_1.start_date, entry_2.start_date) <= min( + entry_1.end_date, entry_2.end_date + ): + raise ValueError( + f"Existing backfill entry with overlap dates from: {entry_2.entry_date}." + ) + + +def validate_excluded_dates(entry: Backfill) -> None: + """Check if backfill excluded dates are sorted and have no duplicates.""" + if not entry.excluded_dates == sorted(entry.excluded_dates): + raise ValueError( + f"Existing backfill entry with excluded dates not sorted: {entry.entry_date}." + ) + if not len(entry.excluded_dates) == len(set(entry.excluded_dates)): + raise ValueError( + f"Existing backfill entry with duplicate excluded dates: {entry.entry_date}." + ) + + +def validate_reason(entry: Backfill) -> None: + """Check if backfill reason is the same as default or empty.""" + if not entry.reason or entry.reason == DEFAULT_REASON: + raise ValueError(f"Invalid Reason: {entry.reason}.") + + +def validate_watchers(entry: Backfill) -> None: + """Check if backfill watcher is the same as default or duplicated.""" + if DEFAULT_WATCHER in entry.watchers or len(entry.watchers) != len( + set(entry.watchers) + ): + raise ValueError(f"Duplicate or default watcher in ({entry.watchers}).") + + +def validate_entries_are_sorted(backfills: List[Backfill]) -> None: + """Check if list of backfill entries are sorted.""" + entry_dates = [backfill.entry_date for backfill in backfills] + if not entry_dates == sorted(entry_dates, reverse=True): + raise ValueError("Backfill entries are not sorted") + + +def validate_file(file: Path) -> None: + """Validate all entries from a given backfill.yaml file.""" + backfills = Backfill.entries_from_file(file) + validate_entries(backfills) + + +def validate_entries(backfills: list) -> None: + """Validate a list of backfill entries.""" + for i, backfill_entry_1 in enumerate(backfills): + validate_watchers(backfill_entry_1) + validate_reason(backfill_entry_1) + validate_excluded_dates(backfill_entry_1) + + # validate against other entries with drafting status + if backfill_entry_1.status == BackfillStatus.DRAFTING: + for backfill_entry_2 in backfills[i + 1 :]: + if backfill_entry_2.status == BackfillStatus.DRAFTING: + validate_duplicate_entry_dates(backfill_entry_1, backfill_entry_2) + validate_overlap_dates(backfill_entry_1, backfill_entry_2) + + validate_entries_are_sorted(backfills) diff --git a/bigquery_etl/cli/__init__.py b/bigquery_etl/cli/__init__.py index 531775839f..94f6dd8436 100644 --- a/bigquery_etl/cli/__init__.py +++ b/bigquery_etl/cli/__init__.py @@ -8,6 +8,7 @@ from .._version import __version__ # We rename the import, otherwise it affects monkeypatching in tests from ..cli.alchemer import alchemer as alchemer_ +from ..cli.backfill import backfill from ..cli.dag import dag from ..cli.dryrun import dryrun from ..cli.format import format @@ -45,6 +46,7 @@ def cli(prog_name=None): "docs": docs_, "copy_deduplicate": copy_deduplicate, "stage": stage, + "backfill": backfill, } @click.group(commands=commands) diff --git a/bigquery_etl/cli/backfill.py b/bigquery_etl/cli/backfill.py new file mode 100644 index 0000000000..793e52ab8c --- /dev/null +++ b/bigquery_etl/cli/backfill.py @@ -0,0 +1,222 @@ +"""bigquery-etl CLI backfill command.""" + +import re +import sys +import tempfile +from datetime import date, datetime +from pathlib import Path + +import click +import yaml + +from ..backfill.parse import ( + BACKFILL_FILE, + DEFAULT_REASON, + DEFAULT_WATCHER, + Backfill, + BackfillStatus, +) +from ..backfill.validate import ( + validate_duplicate_entry_dates, + validate_file, + validate_overlap_dates, +) +from ..cli.utils import paths_matching_name_pattern, project_id_option, sql_dir_option + +QUALIFIED_TABLE_NAME_RE = re.compile( + r"(?P[a-zA-z0-9_-]+)\.(?P[a-zA-z0-9_-]+)\.(?P[a-zA-z0-9_-]+)" +) + + +@click.group(help="Commands for managing backfills.") +@click.pass_context +def backfill(ctx): + """Create the CLI group for the backfill command.""" + # create temporary directory generated content is written to + # the directory will be deleted automatically after the command exits + ctx.ensure_object(dict) + ctx.obj["TMP_DIR"] = ctx.with_resource(tempfile.TemporaryDirectory()) + + +@backfill.command( + help="""Create a new backfill entry in the backfill.yaml file. Create + a backfill.yaml file if it does not already exist. + + Examples: + + \b + ./bqetl backfill create moz-fx-data-shared-prod.telemetry_derived.deviations_v1 \\ + --start_date=2021-03-01 \\ + --end_date=2021-03-31 \\ + --exclude=2021-03-03 \\ + """, +) +@click.argument("qualified_table_name") +@sql_dir_option +@click.option( + "--start_date", + "--start-date", + "-s", + help="First date to be backfilled. Date format: yyyy-mm-dd", + type=click.DateTime(formats=["%Y-%m-%d"]), + required=True, +) +@click.option( + "--end_date", + "--end-date", + "-e", + help="Last date to be backfilled. Date format: yyyy-mm-dd", + type=click.DateTime(formats=["%Y-%m-%d"]), + default=datetime.today(), +) +@click.option( + "--exclude", + "-x", + multiple=True, + help="Dates excluded from backfill. Date format: yyyy-mm-dd", + type=click.DateTime(formats=["%Y-%m-%d"]), + default=None, +) +@click.option( + "--watcher", + "-w", + help="Watcher of the backfill (email address)", + default=DEFAULT_WATCHER, +) +@click.pass_context +def create( + ctx, + qualified_table_name, + sql_dir, + start_date, + end_date, + exclude, + watcher, +): + """CLI command for creating a new backfill entry in backfill.yaml file. + + A backfill.yaml file will be created if it does not already exist. + """ + try: + match = QUALIFIED_TABLE_NAME_RE.match(qualified_table_name) + project_id = match.group("project_id") + dataset_id = match.group("dataset_id") + table_id = match.group("table_id") + except AttributeError: + click.echo( + "Qualified table name must be named like:" + " .." + ) + sys.exit(1) + + path = Path(sql_dir) + + query_path = path / project_id / dataset_id / table_id + + if not query_path.exists(): + click.echo(f"{project_id}.{dataset_id}.{table_id}" + " does not exist") + sys.exit(1) + + backfill = Backfill( + entry_date=date.today(), + start_date=start_date.date(), + end_date=end_date.date(), + excluded_dates=[e.date() for e in list(exclude)], + reason=DEFAULT_REASON, + watchers=[watcher], + status=BackfillStatus.DRAFTING, + ) + + backfills = [] + + backfill_file = query_path / BACKFILL_FILE + + if backfill_file.exists(): + backfills = Backfill.entries_from_file(backfill_file) + for entry in backfills: + validate_duplicate_entry_dates(backfill, entry) + if entry.status == BackfillStatus.DRAFTING: + validate_overlap_dates(backfill, entry) + + backfills.insert(0, backfill) + + backfill_file.write_text( + "\n".join(backfill.to_yaml() for backfill in sorted(backfills, reverse=True)) + ) + + click.echo(f"Created backfill entry in {backfill_file}") + + +@backfill.command( + help="""Validate backfill.yaml file format and content. + + Examples: + + ./bqetl backfill validate moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6 + + \b + # validate all backfill.yaml files if table is not specified + Use the `--project_id` option to change the project to be validated; + default is `moz-fx-data-shared-prod`. + + Examples: + + ./bqetl backfill validate + """ +) +@click.argument("qualified_table_name", required=False) +@sql_dir_option +@project_id_option("moz-fx-data-shared-prod") +@click.pass_context +def validate( + ctx, + qualified_table_name, + sql_dir, + project_id, +): + """Validate backfill.yaml files.""" + backfill_files = [] + + # TODO: this code can potentially be a util + if qualified_table_name: + try: + match = QUALIFIED_TABLE_NAME_RE.match(qualified_table_name) + project_id = match.group("project_id") + dataset_id = match.group("dataset_id") + table_id = match.group("table_id") + except AttributeError: + click.echo( + "Qualified table name must be named like:" + + " ..
" + ) + sys.exit(1) + + path = Path(sql_dir) + query_path = path / project_id / dataset_id / table_id + + if not query_path.exists(): + click.echo(f"{project_id}.{dataset_id}.{table_id}" + " does not exist") + sys.exit(1) + + backfill_file = path / project_id / dataset_id / table_id / BACKFILL_FILE + backfill_files.append(backfill_file) + + else: + backfill_files = paths_matching_name_pattern( + None, sql_dir, project_id, [BACKFILL_FILE] + ) + + for file in backfill_files: + try: + validate_file(file) + except (yaml.YAMLError, ValueError) as e: + click.echo(f"{file} contains the following error:\n {e}") + sys.exit(1) + + if qualified_table_name: + click.echo( + f"{BACKFILL_FILE} has been validated for {project_id}.{dataset_id}.{table_id} " + ) + elif backfill_files: + click.echo( + f"All {BACKFILL_FILE} files have been validated for project {project_id}" + ) diff --git a/bigquery_etl/cli/utils.py b/bigquery_etl/cli/utils.py index b660da2b25..e705734ced 100644 --- a/bigquery_etl/cli/utils.py +++ b/bigquery_etl/cli/utils.py @@ -14,7 +14,7 @@ from bigquery_etl.util.common import TempDatasetReference, project_dirs QUERY_FILE_RE = re.compile( r"^.*/([a-zA-Z0-9-]+)/([a-zA-Z0-9_]+)/([a-zA-Z0-9_]+(_v[0-9]+)?)/" - r"(?:query\.sql|part1\.sql|script\.sql|query\.py|view\.sql|metadata\.yaml)$" + r"(?:query\.sql|part1\.sql|script\.sql|query\.py|view\.sql|metadata\.yaml|backfill\.yaml)$" ) TEST_PROJECT = "bigquery-etl-integration-test" MOZDATA = "mozdata" diff --git a/docs/bqetl.md b/docs/bqetl.md index d5662dd670..36c0e1ebdf 100644 --- a/docs/bqetl.md +++ b/docs/bqetl.md @@ -35,6 +35,7 @@ Commands: routine Commands for managing routines. stripe Commands for Stripe ETL. view Commands for managing views. + backfill Commands for managing backfills. ``` diff --git a/tests/backfill/backfill.yaml b/tests/backfill/backfill.yaml new file mode 100644 index 0000000000..3c6ef28ce5 --- /dev/null +++ b/tests/backfill/backfill.yaml @@ -0,0 +1,9 @@ +2021-05-03: + start_date: 2021-01-03 + end_date: 2021-05-03 + excluded_dates: + - 2021-02-03 + reason: Please provide a reason for the backfill and links to any related bugzilla or jira tickets + watchers: + - nobody@mozilla.com + status: Drafting diff --git a/tests/backfill/test_dir_multiple/backfill.yaml b/tests/backfill/test_dir_multiple/backfill.yaml new file mode 100644 index 0000000000..9ce6d6b3fc --- /dev/null +++ b/tests/backfill/test_dir_multiple/backfill.yaml @@ -0,0 +1,19 @@ +2023-05-03: + start_date: 2023-01-03 + end_date: 2023-05-03 + excluded_dates: + - 2023-02-03 + reason: Please provide a reason for the backfill and links to any related bugzilla or jira tickets + watchers: + - nobody@mozilla.com + status: Drafting + +2021-05-03: + start_date: 2021-01-03 + end_date: 2021-05-03 + excluded_dates: + - 2021-02-03 + reason: Please provide a reason for the backfill and links to any related bugzilla or jira tickets + watchers: + - nobody@mozilla.com + status: Drafting diff --git a/tests/backfill/test_dir_valid/backfill.yaml b/tests/backfill/test_dir_valid/backfill.yaml new file mode 100644 index 0000000000..4902b9f3e7 --- /dev/null +++ b/tests/backfill/test_dir_valid/backfill.yaml @@ -0,0 +1,9 @@ +2023-05-03: + start_date: 2021-01-03 + end_date: 2021-05-03 + excluded_dates: + - 2021-02-03 + reason: no_reason + watchers: + - test@example.org + status: Drafting diff --git a/tests/backfill/test_parse_backfill.py b/tests/backfill/test_parse_backfill.py new file mode 100644 index 0000000000..7d57e1989b --- /dev/null +++ b/tests/backfill/test_parse_backfill.py @@ -0,0 +1,358 @@ +from datetime import date, timedelta +from pathlib import Path + +import pytest + +from bigquery_etl.backfill.parse import ( + BACKFILL_FILE, + DEFAULT_REASON, + DEFAULT_WATCHER, + Backfill, + BackfillStatus, +) + +DEFAULT_STATUS = BackfillStatus.DRAFTING + +TEST_DIR = Path(__file__).parent.parent + +TEST_BACKFILL_1 = Backfill( + date(2021, 5, 3), + date(2021, 1, 3), + date(2021, 5, 3), + [date(2021, 2, 3)], + DEFAULT_REASON, + [DEFAULT_WATCHER], + DEFAULT_STATUS, +) + +TEST_BACKFILL_2 = Backfill( + date(2023, 5, 3), + date(2023, 1, 3), + date(2023, 5, 3), + [date(2023, 2, 3)], + DEFAULT_REASON, + [DEFAULT_WATCHER], + DEFAULT_STATUS, +) + + +class TestParseBackfill(object): + def test_backfill_instantiation(self): + backfill = TEST_BACKFILL_1 + + assert backfill.entry_date == date(2021, 5, 3) + assert backfill.start_date == date(2021, 1, 3) + assert backfill.end_date == date(2021, 5, 3) + assert backfill.excluded_dates == [date(2021, 2, 3)] + assert backfill.reason == DEFAULT_REASON + assert backfill.watchers == [DEFAULT_WATCHER] + assert backfill.status == DEFAULT_STATUS + + def test_invalid_watcher(self): + with pytest.raises(ValueError) as e: + invalid_watcher = ["test.org"] + Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + TEST_BACKFILL_1.excluded_dates, + TEST_BACKFILL_1.reason, + invalid_watcher, + TEST_BACKFILL_1.status, + ) + + assert "Invalid" in str(e.value) + assert "watchers" in str(e.value) + + def test_invalid_watchers(self): + with pytest.raises(ValueError) as e: + invalid_watchers = [DEFAULT_WATCHER, "test.org"] + Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + TEST_BACKFILL_1.excluded_dates, + TEST_BACKFILL_1.reason, + invalid_watchers, + TEST_BACKFILL_1.status, + ) + + assert "Invalid" in str(e.value) + assert "watchers" in str(e.value) + + def test_no_watchers(self): + with pytest.raises(ValueError) as e: + invalid_watchers = [""] + Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + TEST_BACKFILL_1.excluded_dates, + TEST_BACKFILL_1.reason, + invalid_watchers, + TEST_BACKFILL_1.status, + ) + + assert "Invalid" in str(e.value) + assert "watchers" in str(e.value) + + def test_multiple_watchers(self): + valid_watchers = TEST_BACKFILL_1.watchers + [ + "test2@example.org", + "test3@example.org", + ] + backfill = Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + TEST_BACKFILL_1.excluded_dates, + TEST_BACKFILL_1.reason, + valid_watchers, + TEST_BACKFILL_1.status, + ) + + assert backfill.watchers == TEST_BACKFILL_1.watchers + [ + "test2@example.org", + "test3@example.org", + ] + + def test_all_status(self): + valid_status = [status.value for status in BackfillStatus] + for i, status in enumerate(BackfillStatus): + backfill = Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + TEST_BACKFILL_1.excluded_dates, + TEST_BACKFILL_1.reason, + TEST_BACKFILL_1.watchers, + status, + ) + + assert backfill.status.value == valid_status[i] + + def test_invalid_entry_date_greater_than_today(self): + with pytest.raises(ValueError) as e: + invalid_entry_date = date.today() + timedelta(days=1) + Backfill( + invalid_entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + TEST_BACKFILL_1.excluded_dates, + TEST_BACKFILL_1.reason, + TEST_BACKFILL_1.watchers, + TEST_BACKFILL_1.status, + ) + + assert "can't be in the future" in str(e.value) + + def test_invalid_start_date_greater_than_entry_date(self): + with pytest.raises(ValueError) as e: + invalid_start_date = TEST_BACKFILL_1.entry_date + timedelta(days=1) + Backfill( + TEST_BACKFILL_1.entry_date, + invalid_start_date, + TEST_BACKFILL_1.end_date, + TEST_BACKFILL_1.excluded_dates, + TEST_BACKFILL_1.reason, + TEST_BACKFILL_1.watchers, + TEST_BACKFILL_1.status, + ) + + assert "Invalid start date" in str(e.value) + + def test_invalid_start_date_greater_than_end_date(self): + with pytest.raises(ValueError) as e: + invalid_start_date = TEST_BACKFILL_1.end_date + timedelta(days=1) + Backfill( + TEST_BACKFILL_1.entry_date, + invalid_start_date, + TEST_BACKFILL_1.end_date, + TEST_BACKFILL_1.excluded_dates, + TEST_BACKFILL_1.reason, + TEST_BACKFILL_1.watchers, + TEST_BACKFILL_1.status, + ) + + assert "Invalid start date" in str(e.value) + + def test_invalid_end_date_greater_than_entry_date(self): + with pytest.raises(ValueError) as e: + invalid_end_date = TEST_BACKFILL_1.entry_date + timedelta(days=1) + Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + invalid_end_date, + TEST_BACKFILL_1.excluded_dates, + TEST_BACKFILL_1.reason, + TEST_BACKFILL_1.watchers, + TEST_BACKFILL_1.status, + ) + + assert "Invalid end date" in str(e.value) + + def test_invalid_excluded_dates_greater_than_end_date(self): + with pytest.raises(ValueError) as e: + invalid_excluded_dates = [TEST_BACKFILL_1.end_date + timedelta(days=1)] + Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + invalid_excluded_dates, + TEST_BACKFILL_1.reason, + TEST_BACKFILL_1.watchers, + TEST_BACKFILL_1.status, + ) + + assert "Invalid excluded dates" in str(e.value) + + def test_invalid_excluded_dates_greater_than_end_date_multiple(self): + with pytest.raises(ValueError) as e: + invalid_excluded_dates = [ + TEST_BACKFILL_1.end_date, + TEST_BACKFILL_1.end_date + timedelta(days=1), + ] + Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + invalid_excluded_dates, + TEST_BACKFILL_1.reason, + TEST_BACKFILL_1.watchers, + TEST_BACKFILL_1.status, + ) + + assert "Invalid excluded dates" in str(e.value) + + def test_invalid_excluded_dates_less_than_start_date(self): + with pytest.raises(ValueError) as e: + invalid_excluded_dates = [TEST_BACKFILL_1.start_date - timedelta(days=1)] + Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + invalid_excluded_dates, + TEST_BACKFILL_1.reason, + TEST_BACKFILL_1.watchers, + TEST_BACKFILL_1.status, + ) + + assert "Invalid excluded dates" in str(e.value) + + def test_invalid_excluded_dates_less_than_start_date_multiple(self): + with pytest.raises(ValueError) as e: + invalid_excluded_dates = [ + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.start_date - timedelta(days=1), + ] + Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + invalid_excluded_dates, + TEST_BACKFILL_1.reason, + TEST_BACKFILL_1.watchers, + TEST_BACKFILL_1.status, + ) + + assert "Invalid excluded dates" in str(e.value) + + def test_invalid_status(self): + with pytest.raises(AttributeError): + invalid_status = "invalid_status" + Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + TEST_BACKFILL_1.excluded_dates, + TEST_BACKFILL_1.reason, + TEST_BACKFILL_1.watchers, + invalid_status, + ) + + def test_non_existing_file(self): + backfill_file = TEST_DIR / "nonexisting_dir" / BACKFILL_FILE + with pytest.raises(FileNotFoundError): + Backfill.entries_from_file(backfill_file) + + def test_of_backfill_file_no_backfill(self): + backfill_file = TEST_DIR / "test" / BACKFILL_FILE + with pytest.raises(FileNotFoundError): + Backfill.entries_from_file(backfill_file) + + def test_of_backfill_file_one(self): + backfill_file = TEST_DIR / "backfill" / BACKFILL_FILE + backfills = Backfill.entries_from_file(backfill_file) + backfill = backfills[0] + + assert backfill.entry_date == date(2021, 5, 3) + assert backfill.start_date == date(2021, 1, 3) + assert backfill.end_date == date(2021, 5, 3) + assert backfill.excluded_dates == [date(2021, 2, 3)] + assert backfill.reason == DEFAULT_REASON + assert backfill.watchers == [DEFAULT_WATCHER] + assert backfill.status == DEFAULT_STATUS + + def test_entries_from_file_multiple(self): + backfill_file = TEST_DIR / "backfill" / "test_dir_multiple" / BACKFILL_FILE + backfills = Backfill.entries_from_file(backfill_file) + + backfill_1 = TEST_BACKFILL_1 + backfill_2 = TEST_BACKFILL_2 + + assert backfills[0] == backfill_2 + assert backfills[1] == backfill_1 + + def test_invalid_file(self): + backfill_file = TEST_DIR / "test" / "invalid_file_name.yaml" + with pytest.raises(ValueError) as e: + Backfill.entries_from_file(backfill_file) + + assert "Invalid file" in str(e.value) + + def test_of_non_existing_table(self): + backfill_file = TEST_DIR / "non_exist_folder" / BACKFILL_FILE + with pytest.raises(FileNotFoundError): + Backfill.entries_from_file(backfill_file) + + def test_is_backfill_file(self): + assert Backfill.is_backfill_file("foo/bar/invalid.json") is False + assert Backfill.is_backfill_file("foo/bar/invalid.yaml") is False + assert Backfill.is_backfill_file(BACKFILL_FILE) + assert Backfill.is_backfill_file("some/path/to/" + BACKFILL_FILE) + + def test_to_yaml(self): + expected = ( + "2021-05-03:\n" + " start_date: 2021-01-03\n" + " end_date: 2021-05-03\n" + " excluded_dates:\n" + " - 2021-02-03\n" + " reason: Please provide a reason for the backfill and links to any related " + "bugzilla\n" + " or jira tickets\n" + " watchers:\n" + " - nobody@mozilla.com\n" + " status: Drafting\n" + ) + + results = TEST_BACKFILL_1.to_yaml() + assert results == expected + + def test_to_yaml_no_excluded_dates(self): + expected = ( + "2021-05-03:\n" + " start_date: 2021-01-03\n" + " end_date: 2021-05-03\n" + " reason: Please provide a reason for the backfill and links to any related " + "bugzilla\n" + " or jira tickets\n" + " watchers:\n" + " - nobody@mozilla.com\n" + " status: Drafting\n" + ) + + TEST_BACKFILL_1.excluded_dates = [] + results = TEST_BACKFILL_1.to_yaml() + + assert results == expected diff --git a/tests/backfill/test_validate_backfill.py b/tests/backfill/test_validate_backfill.py new file mode 100644 index 0000000000..0b8cb5462b --- /dev/null +++ b/tests/backfill/test_validate_backfill.py @@ -0,0 +1,144 @@ +from datetime import date +from pathlib import Path + +import pytest + +from bigquery_etl.backfill.parse import BACKFILL_FILE, DEFAULT_REASON, Backfill +from bigquery_etl.backfill.validate import ( + validate_duplicate_entry_dates, + validate_entries, + validate_entries_are_sorted, + validate_excluded_dates, + validate_file, + validate_overlap_dates, + validate_reason, +) +from tests.backfill.test_parse_backfill import TEST_BACKFILL_1, TEST_BACKFILL_2 + +TEST_DIR = Path(__file__).parent.parent + +VALID_REASON = "test_reason" +VALID_WATCHER = "test@example.org" + + +class TestValidateBackfill(object): + def test_duplicate_entry_dates_pass(self): + validate_duplicate_entry_dates(TEST_BACKFILL_1, TEST_BACKFILL_2) + + def test_duplicate_entry_dates_fail(self): + with pytest.raises(ValueError) as e: + validate_duplicate_entry_dates(TEST_BACKFILL_1, TEST_BACKFILL_1) + + assert "Duplicate backfill" in str(e.value) + + def test_overlap_dates_pass(self): + validate_overlap_dates(TEST_BACKFILL_1, TEST_BACKFILL_2) + + def test_overlap_dates_fail(self): + with pytest.raises(ValueError) as e: + validate_overlap_dates(TEST_BACKFILL_1, TEST_BACKFILL_1) + + assert "overlap dates" in str(e.value) + + def test_excluded_dates_duplicates(self): + invalid_excluded_dates = [date(2021, 2, 3), date(2021, 2, 3)] + invalid_backfill = Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + invalid_excluded_dates, + TEST_BACKFILL_1.reason, + TEST_BACKFILL_1.watchers, + TEST_BACKFILL_1.status, + ) + + with pytest.raises(ValueError) as e: + validate_excluded_dates(invalid_backfill) + + assert "duplicate excluded dates" in str(e.value) + + def test_excluded_dates_not_sorted(self): + invalid_excluded_dates = [date(2021, 2, 4), date(2021, 2, 3)] + invalid_backfill = Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + invalid_excluded_dates, + TEST_BACKFILL_1.reason, + TEST_BACKFILL_1.watchers, + TEST_BACKFILL_1.status, + ) + + with pytest.raises(ValueError) as e: + validate_excluded_dates(invalid_backfill) + + assert "excluded dates not sorted" in str(e.value) + + def test_valid_reason_pass(self): + valid_backfill = Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + TEST_BACKFILL_1.excluded_dates, + VALID_REASON, + TEST_BACKFILL_1.watchers, + TEST_BACKFILL_1.status, + ) + + validate_reason(valid_backfill) + + def test_reason_default_fail(self): + invalid_reason = DEFAULT_REASON + invalid_backfill = Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + TEST_BACKFILL_1.excluded_dates, + invalid_reason, + TEST_BACKFILL_1.watchers, + TEST_BACKFILL_1.status, + ) + + with pytest.raises(ValueError) as e: + validate_reason(invalid_backfill) + + assert "Invalid Reason" in str(e.value) + + def test_reason_empty_fail(self): + invalid_reason = "" + invalid_backfill = Backfill( + TEST_BACKFILL_1.entry_date, + TEST_BACKFILL_1.start_date, + TEST_BACKFILL_1.end_date, + TEST_BACKFILL_1.excluded_dates, + invalid_reason, + TEST_BACKFILL_1.watchers, + TEST_BACKFILL_1.status, + ) + with pytest.raises(ValueError) as e: + validate_reason(invalid_backfill) + + assert "Invalid Reason" in str(e.value) + + def test_entries_sorted(self): + backfills = [TEST_BACKFILL_2, TEST_BACKFILL_1] + validate_entries_are_sorted(backfills) + + def test_entries_not_sorted(self): + backfills = [TEST_BACKFILL_1, TEST_BACKFILL_2] + with pytest.raises(ValueError) as e: + validate_entries_are_sorted(backfills) + + assert "Backfill entries are not sorted" in str(e.value) + + def test_validate_entries_pass(self): + TEST_BACKFILL_1.watchers = [VALID_WATCHER] + TEST_BACKFILL_1.reason = VALID_REASON + TEST_BACKFILL_2.watchers = [VALID_WATCHER] + TEST_BACKFILL_2.reason = VALID_REASON + backfills = [TEST_BACKFILL_2, TEST_BACKFILL_1] + validate_entries(backfills) + + def test_validate_file(self): + backfill_file = TEST_DIR / "backfill" / "test_dir_valid" / BACKFILL_FILE + validate_file(backfill_file) diff --git a/tests/cli/test_cli_backfill.py b/tests/cli/test_cli_backfill.py new file mode 100644 index 0000000000..ab8b148778 --- /dev/null +++ b/tests/cli/test_cli_backfill.py @@ -0,0 +1,714 @@ +import os +from datetime import date, timedelta +from pathlib import Path + +import pytest +from click.testing import CliRunner + +from bigquery_etl.backfill.parse import ( + BACKFILL_FILE, + DEFAULT_REASON, + DEFAULT_WATCHER, + Backfill, + BackfillStatus, +) +from bigquery_etl.cli.backfill import create, validate + +DEFAULT_STATUS = BackfillStatus.DRAFTING +VALID_REASON = "test_reason" +VALID_WATCHER = "test@example.org" +VALID_BACKFILL = Backfill( + date(2021, 5, 3), + date(2021, 1, 3), + date(2021, 5, 3), + [date(2021, 2, 3)], + VALID_REASON, + [VALID_WATCHER], + DEFAULT_STATUS, +) +BACKFILL_YAML_TEMPLATE = ( + "2021-05-04:\n" + " start_date: 2021-01-03\n" + " end_date: 2021-05-03\n" + " excluded_dates:\n" + " - 2021-02-03\n" + " reason: test_reason\n" + " watchers:\n" + " - test@example.org\n" + " status: Drafting\n" +) + + +class TestBackfill: + @pytest.fixture + def runner(self): + return CliRunner() + + def test_create_backfill(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + result = runner.invoke( + create, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + "--start_date=2021-03-01", + ], + ) + + assert result.exit_code == 0 + assert BACKFILL_FILE in os.listdir( + "sql/moz-fx-data-shared-prod/test/test_query_v1" + ) + + backfill_file = SQL_DIR + "/" + BACKFILL_FILE + backfill = Backfill.entries_from_file(backfill_file)[0] + + assert backfill.entry_date == date.today() + assert backfill.start_date == date(2021, 3, 1) + assert backfill.end_date == date.today() + assert backfill.watchers == [DEFAULT_WATCHER] + assert backfill.reason == DEFAULT_REASON + assert backfill.status == DEFAULT_STATUS + + def test_create_backfill_with_invalid_watcher(self, runner): + with runner.isolated_filesystem(): + os.makedirs("sql/moz-fx-data-shared-prod/test/test_query_v1") + invalid_watcher = "test.org" + result = runner.invoke( + create, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + "--start_date=2021-03-01", + "--watcher=" + invalid_watcher, + ], + ) + assert result.exit_code == 1 + assert "Invalid" in str(result.exception) + assert "watchers" in str(result.exception) + + def test_create_backfill_with_invalid_path(self, runner): + with runner.isolated_filesystem(): + invalid_path = "test.test_query_v1" + result = runner.invoke(create, [invalid_path, "--start_date=2021-03-01"]) + assert result.exit_code == 2 + assert "Invalid" in result.output + assert "path" in result.output + + def test_create_backfill_with_invalid_start_date_greater_than_end_date( + self, runner + ): + with runner.isolated_filesystem(): + os.makedirs("sql/moz-fx-data-shared-prod/test/test_query_v1") + invalid_start_date = "2021-05-01" + result = runner.invoke( + create, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + "--start_date=" + invalid_start_date, + "--end_date=2021-03-01", + ], + ) + assert result.exit_code == 1 + assert "Invalid start date" in str(result.exception) + + def test_create_backfill_with_invalid_excluded_dates_before_start_date( + self, runner + ): + with runner.isolated_filesystem(): + os.makedirs("sql/moz-fx-data-shared-prod/test/test_query_v1") + invalid_exclude_date = "2021-03-01" + result = runner.invoke( + create, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + "--start_date=2021-05-01", + "--exclude=" + invalid_exclude_date, + ], + ) + assert result.exit_code == 1 + assert "Invalid excluded dates" in str(result.exception) + + def test_create_backfill_with_excluded_dates_after_end_date(self, runner): + with runner.isolated_filesystem(): + os.makedirs("sql/moz-fx-data-shared-prod/test/test_query_v1") + invalid_exclude_date = "2021-07-01" + result = runner.invoke( + create, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + "--start_date=2021-05-01", + "--end_date=2021-06-01", + "--exclude=" + invalid_exclude_date, + ], + ) + assert result.exit_code == 1 + assert "Invalid excluded dates" in str(result.exception) + + def test_create_backfill_entry_with_all_params(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + result = runner.invoke( + create, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + "--start_date=2021-03-01", + "--end_date=2021-03-10", + "--exclude=2021-03-05", + "--watcher=test@example.org", + ], + ) + + assert result.exit_code == 0 + assert BACKFILL_FILE in os.listdir( + "sql/moz-fx-data-shared-prod/test/test_query_v1" + ) + + backfill_file = SQL_DIR + "/" + BACKFILL_FILE + backfill = Backfill.entries_from_file(backfill_file)[0] + + assert backfill.start_date == date(2021, 3, 1) + assert backfill.end_date == date(2021, 3, 10) + assert backfill.watchers == [VALID_WATCHER] + assert backfill.reason == DEFAULT_REASON + assert backfill.status == DEFAULT_STATUS + + def test_create_backfill_with_exsting_entry(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_entry_1 = Backfill( + date(2021, 5, 3), + date(2021, 1, 3), + date(2021, 5, 3), + [date(2021, 2, 3)], + VALID_REASON, + [VALID_WATCHER], + DEFAULT_STATUS, + ) + + backfill_entry_2 = Backfill( + date.today(), + date(2023, 3, 1), + date(2023, 3, 10), + [], + DEFAULT_REASON, + [DEFAULT_WATCHER], + DEFAULT_STATUS, + ) + + backfill_file = ( + Path("sql/moz-fx-data-shared-prod/test/test_query_v1") / BACKFILL_FILE + ) + backfill_file.write_text(backfill_entry_1.to_yaml()) + assert BACKFILL_FILE in os.listdir( + "sql/moz-fx-data-shared-prod/test/test_query_v1" + ) + + backfills = Backfill.entries_from_file(backfill_file) + assert backfills[0] == backfill_entry_1 + + result = runner.invoke( + create, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + "--start_date=2023-03-01", + "--end_date=2023-03-10", + ], + ) + + assert result.exit_code == 0 + + backfills = Backfill.entries_from_file(backfill_file) + + assert backfills[1] == backfill_entry_1 + assert backfills[0] == backfill_entry_2 + + def test_validate_backfill(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + backfill_file.write_text(BACKFILL_YAML_TEMPLATE) + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 0 + + def test_validate_backfill_invalid_table_name(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert ( + "Qualified table name must be named like: ..
" + in validate_backfill_result.output + ) + + def test_validate_backfill_non_existing_table_name(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v2", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "does not exist" in validate_backfill_result.output + + def test_validate_backfill_invalid_default_reason(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + invalid_backfill = BACKFILL_YAML_TEMPLATE.replace( + VALID_REASON, DEFAULT_REASON + ) + backfill_file.write_text(invalid_backfill) + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "Invalid Reason" in validate_backfill_result.output + + def test_validate_backfill_empty_reason(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + invalid_reason = "" + invalid_backfill = BACKFILL_YAML_TEMPLATE.replace( + VALID_REASON, invalid_reason + ) + backfill_file.write_text(invalid_backfill) + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "Invalid Reason" in validate_backfill_result.output + + def test_validate_backfill_invalid_watcher(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + invalid_watcher = "test@example" + invalid_backfill = BACKFILL_YAML_TEMPLATE.replace( + VALID_WATCHER, invalid_watcher + ) + backfill_file.write_text(invalid_backfill) + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "Invalid" in validate_backfill_result.output + assert "watchers" in validate_backfill_result.output + + def test_validate_backfill_empty_watcher(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + invalid_watcher = "" + invalid_backfill = BACKFILL_YAML_TEMPLATE.replace( + VALID_WATCHER, invalid_watcher + ) + backfill_file.write_text(invalid_backfill) + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "Invalid" in validate_backfill_result.output + assert "watchers" in validate_backfill_result.output + + def test_validate_backfill_watchers_duplicated(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + invalid_watchers = " - test@example.org\n" " - test@example.org\n" + invalid_backfill = BACKFILL_YAML_TEMPLATE.replace( + " - " + VALID_WATCHER, invalid_watchers + ) + backfill_file.write_text(invalid_backfill) + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "Duplicate or default watcher" in validate_backfill_result.output + + def test_validate_backfill_invalid_status(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + invalid_status = "INVALIDSTATUS" + invalid_backfill = BACKFILL_YAML_TEMPLATE.replace( + DEFAULT_STATUS.value, invalid_status + ) + backfill_file.write_text(invalid_backfill) + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert invalid_status in str(validate_backfill_result.exception) + + def test_validate_backfill_duplicate_entry_dates(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + + duplicate_entry_date = "2021-05-05" + invalid_backfill = BACKFILL_YAML_TEMPLATE.replace( + "2021-05-04", duplicate_entry_date + ) + backfill_file.write_text(invalid_backfill + "\n" + invalid_backfill) + + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "Backfill entry already exists" in validate_backfill_result.output + + def test_validate_backfill_invalid_entry_date(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + invalid_entry_date = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d") + backfill_file.write_text( + BACKFILL_YAML_TEMPLATE.replace("2021-05-04", invalid_entry_date) + ) + + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "can't be in the future" in validate_backfill_result.output + + def test_validate_backfill_invalid_start_date_greater_than_end_date(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + invalid_start_date = "2021-05-04" + backfill_file.write_text( + BACKFILL_YAML_TEMPLATE.replace("2021-01-03", invalid_start_date) + ) + + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "Invalid start date" in validate_backfill_result.output + + # + def test_validate_backfill_invalid_start_date_greater_than_entry_date(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + invalid_start_date = "2021-05-05" + backfill_file.write_text( + BACKFILL_YAML_TEMPLATE.replace("2021-01-03", invalid_start_date) + ) + + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "Invalid start date" in validate_backfill_result.output + + def test_validate_backfill_invalid_end_date_greater_than_entry_date(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + invalid_end_date = "2021-05-05" + backfill_file.write_text( + BACKFILL_YAML_TEMPLATE.replace("2021-05-03", invalid_end_date) + ) + + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "Invalid end date" in validate_backfill_result.output + + def test_validate_backfill_invalid_excluded_dates_less_than_start_date( + self, runner + ): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + invalid_excluded_date = "2021-01-02" + backfill_file.write_text( + BACKFILL_YAML_TEMPLATE.replace("2021-02-03", invalid_excluded_date) + ) + + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "Invalid excluded dates" in validate_backfill_result.output + + def test_validate_backfill_invalid_excluded_dates_greater_than_end_date( + self, runner + ): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + invalid_excluded_date = "2021-05-04" + backfill_file.write_text( + BACKFILL_YAML_TEMPLATE.replace("2021-02-03", invalid_excluded_date) + ) + + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "Invalid excluded dates" in validate_backfill_result.output + + def test_validate_backfill_invalid_excluded_dates_duplicated(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + duplicate_excluded_dates = " - 2021-02-03\n" " - 2021-02-03\n" + backfill_file.write_text( + ( + "2021-05-04:\n" + " start_date: 2021-01-03\n" + " end_date: 2021-05-03\n" + " excluded_dates:\n" + + duplicate_excluded_dates + + " reason: test_reason\n" + " watchers:\n" + " - test@example.org\n" + " status: Drafting\n" + ) + ) + + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "duplicate excluded dates" in validate_backfill_result.output + + def test_validate_backfill_invalid_excluded_dates_not_sorted(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + duplicate_excluded_dates = " - 2021-02-04\n" " - 2021-02-03\n" + backfill_file.write_text( + "2021-05-04:\n" + " start_date: 2021-01-03\n" + " end_date: 2021-05-03\n" + " excluded_dates:\n" + + duplicate_excluded_dates + + " reason: test_reason\n" + " watchers:\n" + " - test@example.org\n" + " status: Drafting\n" + ) + + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "excluded dates not sorted" in validate_backfill_result.output + + def test_validate_backfill_entries_not_sorted(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + backfill_file.write_text( + BACKFILL_YAML_TEMPLATE + "\n" + "2023-05-04:\n" + " start_date: 2020-01-03\n" + " end_date: 2020-05-03\n" + " reason: test_reason\n" + " watchers:\n" + " - test@example.org\n" + " status: Drafting\n" + ) + + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "entries are not sorted" in validate_backfill_result.output + + def test_validate_backfill_overlap_dates(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + backfill_file.write_text( + BACKFILL_YAML_TEMPLATE + "\n" + "2021-05-03:\n" + " start_date: 2021-01-03\n" + " end_date: 2021-05-03\n" + " reason: test_reason\n" + " watchers:\n" + " - test@example.org\n" + " status: Drafting\n" + ) + + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 1 + assert "overlap dates" in validate_backfill_result.output + + def test_validate_backfill_overlap_dates_not_drafting_status(self, runner): + with runner.isolated_filesystem(): + SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1" + os.makedirs(SQL_DIR) + + backfill_file = Path(SQL_DIR) / BACKFILL_FILE + backfill_file.write_text( + BACKFILL_YAML_TEMPLATE + "\n" + "2021-05-03:\n" + " start_date: 2021-01-03\n" + " end_date: 2021-05-03\n" + " reason: test_reason\n" + " watchers:\n" + " - test@example.org\n" + " status: Complete\n" + ) + + assert BACKFILL_FILE in os.listdir(SQL_DIR) + + validate_backfill_result = runner.invoke( + validate, + [ + "moz-fx-data-shared-prod.test.test_query_v1", + ], + ) + assert validate_backfill_result.exit_code == 0