DENG-803/805: Create & Validate backfill cli commands (#3760)

* Added backfill create and validate cli ommand

---------

Co-authored-by: Alexander <anicholson@mozilla.com>
Co-authored-by: kik-kik <42538694+kik-kik@users.noreply.github.com>
This commit is contained in:
Winnie Chan 2023-06-01 10:06:09 -07:00 коммит произвёл GitHub
Родитель c08f21c2d5
Коммит 071c53e4cb
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
14 изменённых файлов: 1746 добавлений и 1 удалений

Просмотреть файл

@ -151,6 +151,18 @@ jobs:
echo $PATHS
PATH="venv/bin:$PATH" script/bqetl dryrun --validate-schemas $PATHS
# yamllint enable rule:line-length
validate-backfills:
docker: *docker
steps:
- checkout
- *restore_venv_cache
- *build
- *attach_generated_sql
- *copy_staged_sql
- run:
name: Verify that backfill.yaml files are valid
command: |
PATH="venv/bin:$PATH" script/bqetl backfill validate
validate-metadata:
docker: *docker
steps:
@ -614,6 +626,9 @@ workflows:
requires:
- deploy-changes-to-stage
- integration
- validate-backfills:
requires:
- deploy-changes-to-stage
- validate-dags:
requires:
- generate-dags

Просмотреть файл

@ -0,0 +1 @@
"""Backfill."""

Просмотреть файл

@ -0,0 +1,174 @@
"""Parse backfill entries."""
import enum
import os
from datetime import date
from pathlib import Path
from typing import List
import attr
import yaml
from bigquery_etl.query_scheduling.utils import is_email_or_github_identity
BACKFILL_FILE = "backfill.yaml"
DEFAULT_WATCHER = "nobody@mozilla.com"
DEFAULT_REASON = "Please provide a reason for the backfill and links to any related bugzilla or jira tickets"
class UniqueKeyLoader(yaml.SafeLoader):
"""YAML loader to check duplicate keys."""
def construct_mapping(self, node, deep=False):
"""Create mapping while checking for duplicate keys."""
mapping = set()
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
if key in mapping:
raise ValueError(
f"Backfill entry already exists with entry date: {key}."
)
mapping.add(key)
return super().construct_mapping(node, deep)
class Literal(str):
"""Represents a YAML literal."""
pass
def literal_presenter(dumper, data):
"""Literal representer for YAML output."""
return dumper.represent_scalar("tag:yaml.org,2002:str", data)
yaml.add_representer(Literal, literal_presenter)
class BackfillStatus(enum.Enum):
"""Represents backfill status types."""
DRAFTING = "Drafting"
VALIDATING = "Validating"
COMPLETE = "Complete"
@attr.s(auto_attribs=True)
class Backfill:
"""
Representation of a backfill entry configuration.
Uses attrs to simplify the class definition and provide validation.
Docs: https://www.attrs.org
"""
entry_date: date = attr.ib()
start_date: date = attr.ib()
end_date: date = attr.ib()
excluded_dates: List[date] = attr.ib()
reason: str = attr.ib()
watchers: List[str] = attr.ib()
status: BackfillStatus = attr.ib()
@entry_date.validator
def validate_entry_date(self, attribute, value):
"""Check that provided entry date is valid."""
if date.today() < value:
raise ValueError(f"Backfill entry {value} can't be in the future.")
@start_date.validator
def validate_start_date(self, attribute, value):
"""Check that provided start date is valid."""
if self.end_date < value or self.entry_date < value:
raise ValueError(f"Invalid start date: {value}.")
@end_date.validator
def validate_end_date(self, attribute, value):
"""Check that provided end date is valid."""
if value < self.start_date or self.entry_date < self.end_date:
raise ValueError(f"Invalid end date: {value}.")
@excluded_dates.validator
def validate_excluded_dates(self, attribute, value):
"""Check that provided excluded dates are valid."""
if not all(map(lambda e: self.start_date < e < self.end_date, value)):
raise ValueError(f"Invalid excluded dates: {value}.")
@watchers.validator
def validate_watchers(self, attribute, value):
"""Check that provided watchers are valid."""
if not value or not all(
map(lambda e: e and is_email_or_github_identity(e), value)
):
raise ValueError(f"Invalid email or Github identity for watchers: {value}.")
@status.validator
def validate_status(self, attribute, value):
"""Check that provided status is valid."""
if not hasattr(BackfillStatus, value.name):
raise ValueError(f"Invalid status: {value.name}.")
@staticmethod
def is_backfill_file(file_path: Path) -> bool:
"""Check if the provided file is a backfill file."""
return os.path.basename(file_path) == BACKFILL_FILE
@classmethod
def entries_from_file(cls, file: Path) -> List["Backfill"]:
"""
Parse all backfill entries from the provided yaml file.
Return a list with all backfill entries.
"""
if not cls.is_backfill_file(file):
raise ValueError(f"Invalid file: {file}.")
backfill_entries: List[Backfill] = []
with open(file, "r") as yaml_stream:
try:
backfills = yaml.load(yaml_stream, Loader=UniqueKeyLoader) or {}
for entry_date, entry in backfills.items():
excluded_dates = []
if "excluded_dates" in entry:
excluded_dates = entry["excluded_dates"]
backfill = cls(
entry_date=entry_date,
start_date=entry["start_date"],
end_date=entry["end_date"],
excluded_dates=excluded_dates,
reason=entry["reason"],
watchers=entry["watchers"],
status=BackfillStatus[entry["status"].upper()],
)
backfill_entries.append(backfill)
except yaml.YAMLError as e:
raise e
return backfill_entries
def to_yaml(self) -> str:
"""Create dictionary version of yaml for writing to file."""
yaml_dict = {
self.entry_date: {
"start_date": self.start_date,
"end_date": self.end_date,
"excluded_dates": sorted(self.excluded_dates),
"reason": self.reason,
"watchers": self.watchers,
"status": self.status.value,
}
}
if yaml_dict[self.entry_date]["excluded_dates"] == []:
del yaml_dict[self.entry_date]["excluded_dates"]
return yaml.dump(
yaml_dict,
sort_keys=False,
)

Просмотреть файл

@ -0,0 +1,77 @@
"""Validate backfill entries."""
from pathlib import Path
from typing import List
from ..backfill.parse import DEFAULT_REASON, DEFAULT_WATCHER, Backfill, BackfillStatus
def validate_duplicate_entry_dates(entry_1: Backfill, entry_2: Backfill) -> None:
"""Check if backfill entries have the same entry dates."""
if entry_1.entry_date == entry_2.entry_date:
raise ValueError(f"Duplicate backfill with entry date: {entry_1.entry_date}.")
def validate_overlap_dates(entry_1: Backfill, entry_2: Backfill) -> None:
"""Check overlap dates between two backfill entries."""
if max(entry_1.start_date, entry_2.start_date) <= min(
entry_1.end_date, entry_2.end_date
):
raise ValueError(
f"Existing backfill entry with overlap dates from: {entry_2.entry_date}."
)
def validate_excluded_dates(entry: Backfill) -> None:
"""Check if backfill excluded dates are sorted and have no duplicates."""
if not entry.excluded_dates == sorted(entry.excluded_dates):
raise ValueError(
f"Existing backfill entry with excluded dates not sorted: {entry.entry_date}."
)
if not len(entry.excluded_dates) == len(set(entry.excluded_dates)):
raise ValueError(
f"Existing backfill entry with duplicate excluded dates: {entry.entry_date}."
)
def validate_reason(entry: Backfill) -> None:
"""Check if backfill reason is the same as default or empty."""
if not entry.reason or entry.reason == DEFAULT_REASON:
raise ValueError(f"Invalid Reason: {entry.reason}.")
def validate_watchers(entry: Backfill) -> None:
"""Check if backfill watcher is the same as default or duplicated."""
if DEFAULT_WATCHER in entry.watchers or len(entry.watchers) != len(
set(entry.watchers)
):
raise ValueError(f"Duplicate or default watcher in ({entry.watchers}).")
def validate_entries_are_sorted(backfills: List[Backfill]) -> None:
"""Check if list of backfill entries are sorted."""
entry_dates = [backfill.entry_date for backfill in backfills]
if not entry_dates == sorted(entry_dates, reverse=True):
raise ValueError("Backfill entries are not sorted")
def validate_file(file: Path) -> None:
"""Validate all entries from a given backfill.yaml file."""
backfills = Backfill.entries_from_file(file)
validate_entries(backfills)
def validate_entries(backfills: list) -> None:
"""Validate a list of backfill entries."""
for i, backfill_entry_1 in enumerate(backfills):
validate_watchers(backfill_entry_1)
validate_reason(backfill_entry_1)
validate_excluded_dates(backfill_entry_1)
# validate against other entries with drafting status
if backfill_entry_1.status == BackfillStatus.DRAFTING:
for backfill_entry_2 in backfills[i + 1 :]:
if backfill_entry_2.status == BackfillStatus.DRAFTING:
validate_duplicate_entry_dates(backfill_entry_1, backfill_entry_2)
validate_overlap_dates(backfill_entry_1, backfill_entry_2)
validate_entries_are_sorted(backfills)

Просмотреть файл

@ -8,6 +8,7 @@ from .._version import __version__
# We rename the import, otherwise it affects monkeypatching in tests
from ..cli.alchemer import alchemer as alchemer_
from ..cli.backfill import backfill
from ..cli.dag import dag
from ..cli.dryrun import dryrun
from ..cli.format import format
@ -45,6 +46,7 @@ def cli(prog_name=None):
"docs": docs_,
"copy_deduplicate": copy_deduplicate,
"stage": stage,
"backfill": backfill,
}
@click.group(commands=commands)

Просмотреть файл

@ -0,0 +1,222 @@
"""bigquery-etl CLI backfill command."""
import re
import sys
import tempfile
from datetime import date, datetime
from pathlib import Path
import click
import yaml
from ..backfill.parse import (
BACKFILL_FILE,
DEFAULT_REASON,
DEFAULT_WATCHER,
Backfill,
BackfillStatus,
)
from ..backfill.validate import (
validate_duplicate_entry_dates,
validate_file,
validate_overlap_dates,
)
from ..cli.utils import paths_matching_name_pattern, project_id_option, sql_dir_option
QUALIFIED_TABLE_NAME_RE = re.compile(
r"(?P<project_id>[a-zA-z0-9_-]+)\.(?P<dataset_id>[a-zA-z0-9_-]+)\.(?P<table_id>[a-zA-z0-9_-]+)"
)
@click.group(help="Commands for managing backfills.")
@click.pass_context
def backfill(ctx):
"""Create the CLI group for the backfill command."""
# create temporary directory generated content is written to
# the directory will be deleted automatically after the command exits
ctx.ensure_object(dict)
ctx.obj["TMP_DIR"] = ctx.with_resource(tempfile.TemporaryDirectory())
@backfill.command(
help="""Create a new backfill entry in the backfill.yaml file. Create
a backfill.yaml file if it does not already exist.
Examples:
\b
./bqetl backfill create moz-fx-data-shared-prod.telemetry_derived.deviations_v1 \\
--start_date=2021-03-01 \\
--end_date=2021-03-31 \\
--exclude=2021-03-03 \\
""",
)
@click.argument("qualified_table_name")
@sql_dir_option
@click.option(
"--start_date",
"--start-date",
"-s",
help="First date to be backfilled. Date format: yyyy-mm-dd",
type=click.DateTime(formats=["%Y-%m-%d"]),
required=True,
)
@click.option(
"--end_date",
"--end-date",
"-e",
help="Last date to be backfilled. Date format: yyyy-mm-dd",
type=click.DateTime(formats=["%Y-%m-%d"]),
default=datetime.today(),
)
@click.option(
"--exclude",
"-x",
multiple=True,
help="Dates excluded from backfill. Date format: yyyy-mm-dd",
type=click.DateTime(formats=["%Y-%m-%d"]),
default=None,
)
@click.option(
"--watcher",
"-w",
help="Watcher of the backfill (email address)",
default=DEFAULT_WATCHER,
)
@click.pass_context
def create(
ctx,
qualified_table_name,
sql_dir,
start_date,
end_date,
exclude,
watcher,
):
"""CLI command for creating a new backfill entry in backfill.yaml file.
A backfill.yaml file will be created if it does not already exist.
"""
try:
match = QUALIFIED_TABLE_NAME_RE.match(qualified_table_name)
project_id = match.group("project_id")
dataset_id = match.group("dataset_id")
table_id = match.group("table_id")
except AttributeError:
click.echo(
"Qualified table name must be named like:" + " <project>.<dataset>.<table>"
)
sys.exit(1)
path = Path(sql_dir)
query_path = path / project_id / dataset_id / table_id
if not query_path.exists():
click.echo(f"{project_id}.{dataset_id}.{table_id}" + " does not exist")
sys.exit(1)
backfill = Backfill(
entry_date=date.today(),
start_date=start_date.date(),
end_date=end_date.date(),
excluded_dates=[e.date() for e in list(exclude)],
reason=DEFAULT_REASON,
watchers=[watcher],
status=BackfillStatus.DRAFTING,
)
backfills = []
backfill_file = query_path / BACKFILL_FILE
if backfill_file.exists():
backfills = Backfill.entries_from_file(backfill_file)
for entry in backfills:
validate_duplicate_entry_dates(backfill, entry)
if entry.status == BackfillStatus.DRAFTING:
validate_overlap_dates(backfill, entry)
backfills.insert(0, backfill)
backfill_file.write_text(
"\n".join(backfill.to_yaml() for backfill in sorted(backfills, reverse=True))
)
click.echo(f"Created backfill entry in {backfill_file}")
@backfill.command(
help="""Validate backfill.yaml file format and content.
Examples:
./bqetl backfill validate moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6
\b
# validate all backfill.yaml files if table is not specified
Use the `--project_id` option to change the project to be validated;
default is `moz-fx-data-shared-prod`.
Examples:
./bqetl backfill validate
"""
)
@click.argument("qualified_table_name", required=False)
@sql_dir_option
@project_id_option("moz-fx-data-shared-prod")
@click.pass_context
def validate(
ctx,
qualified_table_name,
sql_dir,
project_id,
):
"""Validate backfill.yaml files."""
backfill_files = []
# TODO: this code can potentially be a util
if qualified_table_name:
try:
match = QUALIFIED_TABLE_NAME_RE.match(qualified_table_name)
project_id = match.group("project_id")
dataset_id = match.group("dataset_id")
table_id = match.group("table_id")
except AttributeError:
click.echo(
"Qualified table name must be named like:"
+ " <project>.<dataset>.<table>"
)
sys.exit(1)
path = Path(sql_dir)
query_path = path / project_id / dataset_id / table_id
if not query_path.exists():
click.echo(f"{project_id}.{dataset_id}.{table_id}" + " does not exist")
sys.exit(1)
backfill_file = path / project_id / dataset_id / table_id / BACKFILL_FILE
backfill_files.append(backfill_file)
else:
backfill_files = paths_matching_name_pattern(
None, sql_dir, project_id, [BACKFILL_FILE]
)
for file in backfill_files:
try:
validate_file(file)
except (yaml.YAMLError, ValueError) as e:
click.echo(f"{file} contains the following error:\n {e}")
sys.exit(1)
if qualified_table_name:
click.echo(
f"{BACKFILL_FILE} has been validated for {project_id}.{dataset_id}.{table_id} "
)
elif backfill_files:
click.echo(
f"All {BACKFILL_FILE} files have been validated for project {project_id}"
)

Просмотреть файл

@ -14,7 +14,7 @@ from bigquery_etl.util.common import TempDatasetReference, project_dirs
QUERY_FILE_RE = re.compile(
r"^.*/([a-zA-Z0-9-]+)/([a-zA-Z0-9_]+)/([a-zA-Z0-9_]+(_v[0-9]+)?)/"
r"(?:query\.sql|part1\.sql|script\.sql|query\.py|view\.sql|metadata\.yaml)$"
r"(?:query\.sql|part1\.sql|script\.sql|query\.py|view\.sql|metadata\.yaml|backfill\.yaml)$"
)
TEST_PROJECT = "bigquery-etl-integration-test"
MOZDATA = "mozdata"

Просмотреть файл

@ -35,6 +35,7 @@ Commands:
routine Commands for managing routines.
stripe Commands for Stripe ETL.
view Commands for managing views.
backfill Commands for managing backfills.
```

Просмотреть файл

@ -0,0 +1,9 @@
2021-05-03:
start_date: 2021-01-03
end_date: 2021-05-03
excluded_dates:
- 2021-02-03
reason: Please provide a reason for the backfill and links to any related bugzilla or jira tickets
watchers:
- nobody@mozilla.com
status: Drafting

Просмотреть файл

@ -0,0 +1,19 @@
2023-05-03:
start_date: 2023-01-03
end_date: 2023-05-03
excluded_dates:
- 2023-02-03
reason: Please provide a reason for the backfill and links to any related bugzilla or jira tickets
watchers:
- nobody@mozilla.com
status: Drafting
2021-05-03:
start_date: 2021-01-03
end_date: 2021-05-03
excluded_dates:
- 2021-02-03
reason: Please provide a reason for the backfill and links to any related bugzilla or jira tickets
watchers:
- nobody@mozilla.com
status: Drafting

Просмотреть файл

@ -0,0 +1,9 @@
2023-05-03:
start_date: 2021-01-03
end_date: 2021-05-03
excluded_dates:
- 2021-02-03
reason: no_reason
watchers:
- test@example.org
status: Drafting

Просмотреть файл

@ -0,0 +1,358 @@
from datetime import date, timedelta
from pathlib import Path
import pytest
from bigquery_etl.backfill.parse import (
BACKFILL_FILE,
DEFAULT_REASON,
DEFAULT_WATCHER,
Backfill,
BackfillStatus,
)
DEFAULT_STATUS = BackfillStatus.DRAFTING
TEST_DIR = Path(__file__).parent.parent
TEST_BACKFILL_1 = Backfill(
date(2021, 5, 3),
date(2021, 1, 3),
date(2021, 5, 3),
[date(2021, 2, 3)],
DEFAULT_REASON,
[DEFAULT_WATCHER],
DEFAULT_STATUS,
)
TEST_BACKFILL_2 = Backfill(
date(2023, 5, 3),
date(2023, 1, 3),
date(2023, 5, 3),
[date(2023, 2, 3)],
DEFAULT_REASON,
[DEFAULT_WATCHER],
DEFAULT_STATUS,
)
class TestParseBackfill(object):
def test_backfill_instantiation(self):
backfill = TEST_BACKFILL_1
assert backfill.entry_date == date(2021, 5, 3)
assert backfill.start_date == date(2021, 1, 3)
assert backfill.end_date == date(2021, 5, 3)
assert backfill.excluded_dates == [date(2021, 2, 3)]
assert backfill.reason == DEFAULT_REASON
assert backfill.watchers == [DEFAULT_WATCHER]
assert backfill.status == DEFAULT_STATUS
def test_invalid_watcher(self):
with pytest.raises(ValueError) as e:
invalid_watcher = ["test.org"]
Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
TEST_BACKFILL_1.excluded_dates,
TEST_BACKFILL_1.reason,
invalid_watcher,
TEST_BACKFILL_1.status,
)
assert "Invalid" in str(e.value)
assert "watchers" in str(e.value)
def test_invalid_watchers(self):
with pytest.raises(ValueError) as e:
invalid_watchers = [DEFAULT_WATCHER, "test.org"]
Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
TEST_BACKFILL_1.excluded_dates,
TEST_BACKFILL_1.reason,
invalid_watchers,
TEST_BACKFILL_1.status,
)
assert "Invalid" in str(e.value)
assert "watchers" in str(e.value)
def test_no_watchers(self):
with pytest.raises(ValueError) as e:
invalid_watchers = [""]
Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
TEST_BACKFILL_1.excluded_dates,
TEST_BACKFILL_1.reason,
invalid_watchers,
TEST_BACKFILL_1.status,
)
assert "Invalid" in str(e.value)
assert "watchers" in str(e.value)
def test_multiple_watchers(self):
valid_watchers = TEST_BACKFILL_1.watchers + [
"test2@example.org",
"test3@example.org",
]
backfill = Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
TEST_BACKFILL_1.excluded_dates,
TEST_BACKFILL_1.reason,
valid_watchers,
TEST_BACKFILL_1.status,
)
assert backfill.watchers == TEST_BACKFILL_1.watchers + [
"test2@example.org",
"test3@example.org",
]
def test_all_status(self):
valid_status = [status.value for status in BackfillStatus]
for i, status in enumerate(BackfillStatus):
backfill = Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
TEST_BACKFILL_1.excluded_dates,
TEST_BACKFILL_1.reason,
TEST_BACKFILL_1.watchers,
status,
)
assert backfill.status.value == valid_status[i]
def test_invalid_entry_date_greater_than_today(self):
with pytest.raises(ValueError) as e:
invalid_entry_date = date.today() + timedelta(days=1)
Backfill(
invalid_entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
TEST_BACKFILL_1.excluded_dates,
TEST_BACKFILL_1.reason,
TEST_BACKFILL_1.watchers,
TEST_BACKFILL_1.status,
)
assert "can't be in the future" in str(e.value)
def test_invalid_start_date_greater_than_entry_date(self):
with pytest.raises(ValueError) as e:
invalid_start_date = TEST_BACKFILL_1.entry_date + timedelta(days=1)
Backfill(
TEST_BACKFILL_1.entry_date,
invalid_start_date,
TEST_BACKFILL_1.end_date,
TEST_BACKFILL_1.excluded_dates,
TEST_BACKFILL_1.reason,
TEST_BACKFILL_1.watchers,
TEST_BACKFILL_1.status,
)
assert "Invalid start date" in str(e.value)
def test_invalid_start_date_greater_than_end_date(self):
with pytest.raises(ValueError) as e:
invalid_start_date = TEST_BACKFILL_1.end_date + timedelta(days=1)
Backfill(
TEST_BACKFILL_1.entry_date,
invalid_start_date,
TEST_BACKFILL_1.end_date,
TEST_BACKFILL_1.excluded_dates,
TEST_BACKFILL_1.reason,
TEST_BACKFILL_1.watchers,
TEST_BACKFILL_1.status,
)
assert "Invalid start date" in str(e.value)
def test_invalid_end_date_greater_than_entry_date(self):
with pytest.raises(ValueError) as e:
invalid_end_date = TEST_BACKFILL_1.entry_date + timedelta(days=1)
Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
invalid_end_date,
TEST_BACKFILL_1.excluded_dates,
TEST_BACKFILL_1.reason,
TEST_BACKFILL_1.watchers,
TEST_BACKFILL_1.status,
)
assert "Invalid end date" in str(e.value)
def test_invalid_excluded_dates_greater_than_end_date(self):
with pytest.raises(ValueError) as e:
invalid_excluded_dates = [TEST_BACKFILL_1.end_date + timedelta(days=1)]
Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
invalid_excluded_dates,
TEST_BACKFILL_1.reason,
TEST_BACKFILL_1.watchers,
TEST_BACKFILL_1.status,
)
assert "Invalid excluded dates" in str(e.value)
def test_invalid_excluded_dates_greater_than_end_date_multiple(self):
with pytest.raises(ValueError) as e:
invalid_excluded_dates = [
TEST_BACKFILL_1.end_date,
TEST_BACKFILL_1.end_date + timedelta(days=1),
]
Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
invalid_excluded_dates,
TEST_BACKFILL_1.reason,
TEST_BACKFILL_1.watchers,
TEST_BACKFILL_1.status,
)
assert "Invalid excluded dates" in str(e.value)
def test_invalid_excluded_dates_less_than_start_date(self):
with pytest.raises(ValueError) as e:
invalid_excluded_dates = [TEST_BACKFILL_1.start_date - timedelta(days=1)]
Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
invalid_excluded_dates,
TEST_BACKFILL_1.reason,
TEST_BACKFILL_1.watchers,
TEST_BACKFILL_1.status,
)
assert "Invalid excluded dates" in str(e.value)
def test_invalid_excluded_dates_less_than_start_date_multiple(self):
with pytest.raises(ValueError) as e:
invalid_excluded_dates = [
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.start_date - timedelta(days=1),
]
Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
invalid_excluded_dates,
TEST_BACKFILL_1.reason,
TEST_BACKFILL_1.watchers,
TEST_BACKFILL_1.status,
)
assert "Invalid excluded dates" in str(e.value)
def test_invalid_status(self):
with pytest.raises(AttributeError):
invalid_status = "invalid_status"
Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
TEST_BACKFILL_1.excluded_dates,
TEST_BACKFILL_1.reason,
TEST_BACKFILL_1.watchers,
invalid_status,
)
def test_non_existing_file(self):
backfill_file = TEST_DIR / "nonexisting_dir" / BACKFILL_FILE
with pytest.raises(FileNotFoundError):
Backfill.entries_from_file(backfill_file)
def test_of_backfill_file_no_backfill(self):
backfill_file = TEST_DIR / "test" / BACKFILL_FILE
with pytest.raises(FileNotFoundError):
Backfill.entries_from_file(backfill_file)
def test_of_backfill_file_one(self):
backfill_file = TEST_DIR / "backfill" / BACKFILL_FILE
backfills = Backfill.entries_from_file(backfill_file)
backfill = backfills[0]
assert backfill.entry_date == date(2021, 5, 3)
assert backfill.start_date == date(2021, 1, 3)
assert backfill.end_date == date(2021, 5, 3)
assert backfill.excluded_dates == [date(2021, 2, 3)]
assert backfill.reason == DEFAULT_REASON
assert backfill.watchers == [DEFAULT_WATCHER]
assert backfill.status == DEFAULT_STATUS
def test_entries_from_file_multiple(self):
backfill_file = TEST_DIR / "backfill" / "test_dir_multiple" / BACKFILL_FILE
backfills = Backfill.entries_from_file(backfill_file)
backfill_1 = TEST_BACKFILL_1
backfill_2 = TEST_BACKFILL_2
assert backfills[0] == backfill_2
assert backfills[1] == backfill_1
def test_invalid_file(self):
backfill_file = TEST_DIR / "test" / "invalid_file_name.yaml"
with pytest.raises(ValueError) as e:
Backfill.entries_from_file(backfill_file)
assert "Invalid file" in str(e.value)
def test_of_non_existing_table(self):
backfill_file = TEST_DIR / "non_exist_folder" / BACKFILL_FILE
with pytest.raises(FileNotFoundError):
Backfill.entries_from_file(backfill_file)
def test_is_backfill_file(self):
assert Backfill.is_backfill_file("foo/bar/invalid.json") is False
assert Backfill.is_backfill_file("foo/bar/invalid.yaml") is False
assert Backfill.is_backfill_file(BACKFILL_FILE)
assert Backfill.is_backfill_file("some/path/to/" + BACKFILL_FILE)
def test_to_yaml(self):
expected = (
"2021-05-03:\n"
" start_date: 2021-01-03\n"
" end_date: 2021-05-03\n"
" excluded_dates:\n"
" - 2021-02-03\n"
" reason: Please provide a reason for the backfill and links to any related "
"bugzilla\n"
" or jira tickets\n"
" watchers:\n"
" - nobody@mozilla.com\n"
" status: Drafting\n"
)
results = TEST_BACKFILL_1.to_yaml()
assert results == expected
def test_to_yaml_no_excluded_dates(self):
expected = (
"2021-05-03:\n"
" start_date: 2021-01-03\n"
" end_date: 2021-05-03\n"
" reason: Please provide a reason for the backfill and links to any related "
"bugzilla\n"
" or jira tickets\n"
" watchers:\n"
" - nobody@mozilla.com\n"
" status: Drafting\n"
)
TEST_BACKFILL_1.excluded_dates = []
results = TEST_BACKFILL_1.to_yaml()
assert results == expected

Просмотреть файл

@ -0,0 +1,144 @@
from datetime import date
from pathlib import Path
import pytest
from bigquery_etl.backfill.parse import BACKFILL_FILE, DEFAULT_REASON, Backfill
from bigquery_etl.backfill.validate import (
validate_duplicate_entry_dates,
validate_entries,
validate_entries_are_sorted,
validate_excluded_dates,
validate_file,
validate_overlap_dates,
validate_reason,
)
from tests.backfill.test_parse_backfill import TEST_BACKFILL_1, TEST_BACKFILL_2
TEST_DIR = Path(__file__).parent.parent
VALID_REASON = "test_reason"
VALID_WATCHER = "test@example.org"
class TestValidateBackfill(object):
def test_duplicate_entry_dates_pass(self):
validate_duplicate_entry_dates(TEST_BACKFILL_1, TEST_BACKFILL_2)
def test_duplicate_entry_dates_fail(self):
with pytest.raises(ValueError) as e:
validate_duplicate_entry_dates(TEST_BACKFILL_1, TEST_BACKFILL_1)
assert "Duplicate backfill" in str(e.value)
def test_overlap_dates_pass(self):
validate_overlap_dates(TEST_BACKFILL_1, TEST_BACKFILL_2)
def test_overlap_dates_fail(self):
with pytest.raises(ValueError) as e:
validate_overlap_dates(TEST_BACKFILL_1, TEST_BACKFILL_1)
assert "overlap dates" in str(e.value)
def test_excluded_dates_duplicates(self):
invalid_excluded_dates = [date(2021, 2, 3), date(2021, 2, 3)]
invalid_backfill = Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
invalid_excluded_dates,
TEST_BACKFILL_1.reason,
TEST_BACKFILL_1.watchers,
TEST_BACKFILL_1.status,
)
with pytest.raises(ValueError) as e:
validate_excluded_dates(invalid_backfill)
assert "duplicate excluded dates" in str(e.value)
def test_excluded_dates_not_sorted(self):
invalid_excluded_dates = [date(2021, 2, 4), date(2021, 2, 3)]
invalid_backfill = Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
invalid_excluded_dates,
TEST_BACKFILL_1.reason,
TEST_BACKFILL_1.watchers,
TEST_BACKFILL_1.status,
)
with pytest.raises(ValueError) as e:
validate_excluded_dates(invalid_backfill)
assert "excluded dates not sorted" in str(e.value)
def test_valid_reason_pass(self):
valid_backfill = Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
TEST_BACKFILL_1.excluded_dates,
VALID_REASON,
TEST_BACKFILL_1.watchers,
TEST_BACKFILL_1.status,
)
validate_reason(valid_backfill)
def test_reason_default_fail(self):
invalid_reason = DEFAULT_REASON
invalid_backfill = Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
TEST_BACKFILL_1.excluded_dates,
invalid_reason,
TEST_BACKFILL_1.watchers,
TEST_BACKFILL_1.status,
)
with pytest.raises(ValueError) as e:
validate_reason(invalid_backfill)
assert "Invalid Reason" in str(e.value)
def test_reason_empty_fail(self):
invalid_reason = ""
invalid_backfill = Backfill(
TEST_BACKFILL_1.entry_date,
TEST_BACKFILL_1.start_date,
TEST_BACKFILL_1.end_date,
TEST_BACKFILL_1.excluded_dates,
invalid_reason,
TEST_BACKFILL_1.watchers,
TEST_BACKFILL_1.status,
)
with pytest.raises(ValueError) as e:
validate_reason(invalid_backfill)
assert "Invalid Reason" in str(e.value)
def test_entries_sorted(self):
backfills = [TEST_BACKFILL_2, TEST_BACKFILL_1]
validate_entries_are_sorted(backfills)
def test_entries_not_sorted(self):
backfills = [TEST_BACKFILL_1, TEST_BACKFILL_2]
with pytest.raises(ValueError) as e:
validate_entries_are_sorted(backfills)
assert "Backfill entries are not sorted" in str(e.value)
def test_validate_entries_pass(self):
TEST_BACKFILL_1.watchers = [VALID_WATCHER]
TEST_BACKFILL_1.reason = VALID_REASON
TEST_BACKFILL_2.watchers = [VALID_WATCHER]
TEST_BACKFILL_2.reason = VALID_REASON
backfills = [TEST_BACKFILL_2, TEST_BACKFILL_1]
validate_entries(backfills)
def test_validate_file(self):
backfill_file = TEST_DIR / "backfill" / "test_dir_valid" / BACKFILL_FILE
validate_file(backfill_file)

Просмотреть файл

@ -0,0 +1,714 @@
import os
from datetime import date, timedelta
from pathlib import Path
import pytest
from click.testing import CliRunner
from bigquery_etl.backfill.parse import (
BACKFILL_FILE,
DEFAULT_REASON,
DEFAULT_WATCHER,
Backfill,
BackfillStatus,
)
from bigquery_etl.cli.backfill import create, validate
DEFAULT_STATUS = BackfillStatus.DRAFTING
VALID_REASON = "test_reason"
VALID_WATCHER = "test@example.org"
VALID_BACKFILL = Backfill(
date(2021, 5, 3),
date(2021, 1, 3),
date(2021, 5, 3),
[date(2021, 2, 3)],
VALID_REASON,
[VALID_WATCHER],
DEFAULT_STATUS,
)
BACKFILL_YAML_TEMPLATE = (
"2021-05-04:\n"
" start_date: 2021-01-03\n"
" end_date: 2021-05-03\n"
" excluded_dates:\n"
" - 2021-02-03\n"
" reason: test_reason\n"
" watchers:\n"
" - test@example.org\n"
" status: Drafting\n"
)
class TestBackfill:
@pytest.fixture
def runner(self):
return CliRunner()
def test_create_backfill(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
result = runner.invoke(
create,
[
"moz-fx-data-shared-prod.test.test_query_v1",
"--start_date=2021-03-01",
],
)
assert result.exit_code == 0
assert BACKFILL_FILE in os.listdir(
"sql/moz-fx-data-shared-prod/test/test_query_v1"
)
backfill_file = SQL_DIR + "/" + BACKFILL_FILE
backfill = Backfill.entries_from_file(backfill_file)[0]
assert backfill.entry_date == date.today()
assert backfill.start_date == date(2021, 3, 1)
assert backfill.end_date == date.today()
assert backfill.watchers == [DEFAULT_WATCHER]
assert backfill.reason == DEFAULT_REASON
assert backfill.status == DEFAULT_STATUS
def test_create_backfill_with_invalid_watcher(self, runner):
with runner.isolated_filesystem():
os.makedirs("sql/moz-fx-data-shared-prod/test/test_query_v1")
invalid_watcher = "test.org"
result = runner.invoke(
create,
[
"moz-fx-data-shared-prod.test.test_query_v1",
"--start_date=2021-03-01",
"--watcher=" + invalid_watcher,
],
)
assert result.exit_code == 1
assert "Invalid" in str(result.exception)
assert "watchers" in str(result.exception)
def test_create_backfill_with_invalid_path(self, runner):
with runner.isolated_filesystem():
invalid_path = "test.test_query_v1"
result = runner.invoke(create, [invalid_path, "--start_date=2021-03-01"])
assert result.exit_code == 2
assert "Invalid" in result.output
assert "path" in result.output
def test_create_backfill_with_invalid_start_date_greater_than_end_date(
self, runner
):
with runner.isolated_filesystem():
os.makedirs("sql/moz-fx-data-shared-prod/test/test_query_v1")
invalid_start_date = "2021-05-01"
result = runner.invoke(
create,
[
"moz-fx-data-shared-prod.test.test_query_v1",
"--start_date=" + invalid_start_date,
"--end_date=2021-03-01",
],
)
assert result.exit_code == 1
assert "Invalid start date" in str(result.exception)
def test_create_backfill_with_invalid_excluded_dates_before_start_date(
self, runner
):
with runner.isolated_filesystem():
os.makedirs("sql/moz-fx-data-shared-prod/test/test_query_v1")
invalid_exclude_date = "2021-03-01"
result = runner.invoke(
create,
[
"moz-fx-data-shared-prod.test.test_query_v1",
"--start_date=2021-05-01",
"--exclude=" + invalid_exclude_date,
],
)
assert result.exit_code == 1
assert "Invalid excluded dates" in str(result.exception)
def test_create_backfill_with_excluded_dates_after_end_date(self, runner):
with runner.isolated_filesystem():
os.makedirs("sql/moz-fx-data-shared-prod/test/test_query_v1")
invalid_exclude_date = "2021-07-01"
result = runner.invoke(
create,
[
"moz-fx-data-shared-prod.test.test_query_v1",
"--start_date=2021-05-01",
"--end_date=2021-06-01",
"--exclude=" + invalid_exclude_date,
],
)
assert result.exit_code == 1
assert "Invalid excluded dates" in str(result.exception)
def test_create_backfill_entry_with_all_params(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
result = runner.invoke(
create,
[
"moz-fx-data-shared-prod.test.test_query_v1",
"--start_date=2021-03-01",
"--end_date=2021-03-10",
"--exclude=2021-03-05",
"--watcher=test@example.org",
],
)
assert result.exit_code == 0
assert BACKFILL_FILE in os.listdir(
"sql/moz-fx-data-shared-prod/test/test_query_v1"
)
backfill_file = SQL_DIR + "/" + BACKFILL_FILE
backfill = Backfill.entries_from_file(backfill_file)[0]
assert backfill.start_date == date(2021, 3, 1)
assert backfill.end_date == date(2021, 3, 10)
assert backfill.watchers == [VALID_WATCHER]
assert backfill.reason == DEFAULT_REASON
assert backfill.status == DEFAULT_STATUS
def test_create_backfill_with_exsting_entry(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_entry_1 = Backfill(
date(2021, 5, 3),
date(2021, 1, 3),
date(2021, 5, 3),
[date(2021, 2, 3)],
VALID_REASON,
[VALID_WATCHER],
DEFAULT_STATUS,
)
backfill_entry_2 = Backfill(
date.today(),
date(2023, 3, 1),
date(2023, 3, 10),
[],
DEFAULT_REASON,
[DEFAULT_WATCHER],
DEFAULT_STATUS,
)
backfill_file = (
Path("sql/moz-fx-data-shared-prod/test/test_query_v1") / BACKFILL_FILE
)
backfill_file.write_text(backfill_entry_1.to_yaml())
assert BACKFILL_FILE in os.listdir(
"sql/moz-fx-data-shared-prod/test/test_query_v1"
)
backfills = Backfill.entries_from_file(backfill_file)
assert backfills[0] == backfill_entry_1
result = runner.invoke(
create,
[
"moz-fx-data-shared-prod.test.test_query_v1",
"--start_date=2023-03-01",
"--end_date=2023-03-10",
],
)
assert result.exit_code == 0
backfills = Backfill.entries_from_file(backfill_file)
assert backfills[1] == backfill_entry_1
assert backfills[0] == backfill_entry_2
def test_validate_backfill(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
backfill_file.write_text(BACKFILL_YAML_TEMPLATE)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 0
def test_validate_backfill_invalid_table_name(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.",
],
)
assert validate_backfill_result.exit_code == 1
assert (
"Qualified table name must be named like: <project>.<dataset>.<table>"
in validate_backfill_result.output
)
def test_validate_backfill_non_existing_table_name(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v2",
],
)
assert validate_backfill_result.exit_code == 1
assert "does not exist" in validate_backfill_result.output
def test_validate_backfill_invalid_default_reason(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
invalid_backfill = BACKFILL_YAML_TEMPLATE.replace(
VALID_REASON, DEFAULT_REASON
)
backfill_file.write_text(invalid_backfill)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid Reason" in validate_backfill_result.output
def test_validate_backfill_empty_reason(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
invalid_reason = ""
invalid_backfill = BACKFILL_YAML_TEMPLATE.replace(
VALID_REASON, invalid_reason
)
backfill_file.write_text(invalid_backfill)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid Reason" in validate_backfill_result.output
def test_validate_backfill_invalid_watcher(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
invalid_watcher = "test@example"
invalid_backfill = BACKFILL_YAML_TEMPLATE.replace(
VALID_WATCHER, invalid_watcher
)
backfill_file.write_text(invalid_backfill)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid" in validate_backfill_result.output
assert "watchers" in validate_backfill_result.output
def test_validate_backfill_empty_watcher(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
invalid_watcher = ""
invalid_backfill = BACKFILL_YAML_TEMPLATE.replace(
VALID_WATCHER, invalid_watcher
)
backfill_file.write_text(invalid_backfill)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid" in validate_backfill_result.output
assert "watchers" in validate_backfill_result.output
def test_validate_backfill_watchers_duplicated(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
invalid_watchers = " - test@example.org\n" " - test@example.org\n"
invalid_backfill = BACKFILL_YAML_TEMPLATE.replace(
" - " + VALID_WATCHER, invalid_watchers
)
backfill_file.write_text(invalid_backfill)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Duplicate or default watcher" in validate_backfill_result.output
def test_validate_backfill_invalid_status(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
invalid_status = "INVALIDSTATUS"
invalid_backfill = BACKFILL_YAML_TEMPLATE.replace(
DEFAULT_STATUS.value, invalid_status
)
backfill_file.write_text(invalid_backfill)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert invalid_status in str(validate_backfill_result.exception)
def test_validate_backfill_duplicate_entry_dates(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
duplicate_entry_date = "2021-05-05"
invalid_backfill = BACKFILL_YAML_TEMPLATE.replace(
"2021-05-04", duplicate_entry_date
)
backfill_file.write_text(invalid_backfill + "\n" + invalid_backfill)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Backfill entry already exists" in validate_backfill_result.output
def test_validate_backfill_invalid_entry_date(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
invalid_entry_date = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
backfill_file.write_text(
BACKFILL_YAML_TEMPLATE.replace("2021-05-04", invalid_entry_date)
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "can't be in the future" in validate_backfill_result.output
def test_validate_backfill_invalid_start_date_greater_than_end_date(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
invalid_start_date = "2021-05-04"
backfill_file.write_text(
BACKFILL_YAML_TEMPLATE.replace("2021-01-03", invalid_start_date)
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid start date" in validate_backfill_result.output
#
def test_validate_backfill_invalid_start_date_greater_than_entry_date(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
invalid_start_date = "2021-05-05"
backfill_file.write_text(
BACKFILL_YAML_TEMPLATE.replace("2021-01-03", invalid_start_date)
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid start date" in validate_backfill_result.output
def test_validate_backfill_invalid_end_date_greater_than_entry_date(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
invalid_end_date = "2021-05-05"
backfill_file.write_text(
BACKFILL_YAML_TEMPLATE.replace("2021-05-03", invalid_end_date)
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid end date" in validate_backfill_result.output
def test_validate_backfill_invalid_excluded_dates_less_than_start_date(
self, runner
):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
invalid_excluded_date = "2021-01-02"
backfill_file.write_text(
BACKFILL_YAML_TEMPLATE.replace("2021-02-03", invalid_excluded_date)
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid excluded dates" in validate_backfill_result.output
def test_validate_backfill_invalid_excluded_dates_greater_than_end_date(
self, runner
):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
invalid_excluded_date = "2021-05-04"
backfill_file.write_text(
BACKFILL_YAML_TEMPLATE.replace("2021-02-03", invalid_excluded_date)
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "Invalid excluded dates" in validate_backfill_result.output
def test_validate_backfill_invalid_excluded_dates_duplicated(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
duplicate_excluded_dates = " - 2021-02-03\n" " - 2021-02-03\n"
backfill_file.write_text(
(
"2021-05-04:\n"
" start_date: 2021-01-03\n"
" end_date: 2021-05-03\n"
" excluded_dates:\n"
+ duplicate_excluded_dates
+ " reason: test_reason\n"
" watchers:\n"
" - test@example.org\n"
" status: Drafting\n"
)
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "duplicate excluded dates" in validate_backfill_result.output
def test_validate_backfill_invalid_excluded_dates_not_sorted(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
duplicate_excluded_dates = " - 2021-02-04\n" " - 2021-02-03\n"
backfill_file.write_text(
"2021-05-04:\n"
" start_date: 2021-01-03\n"
" end_date: 2021-05-03\n"
" excluded_dates:\n"
+ duplicate_excluded_dates
+ " reason: test_reason\n"
" watchers:\n"
" - test@example.org\n"
" status: Drafting\n"
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "excluded dates not sorted" in validate_backfill_result.output
def test_validate_backfill_entries_not_sorted(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
backfill_file.write_text(
BACKFILL_YAML_TEMPLATE + "\n"
"2023-05-04:\n"
" start_date: 2020-01-03\n"
" end_date: 2020-05-03\n"
" reason: test_reason\n"
" watchers:\n"
" - test@example.org\n"
" status: Drafting\n"
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "entries are not sorted" in validate_backfill_result.output
def test_validate_backfill_overlap_dates(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
backfill_file.write_text(
BACKFILL_YAML_TEMPLATE + "\n"
"2021-05-03:\n"
" start_date: 2021-01-03\n"
" end_date: 2021-05-03\n"
" reason: test_reason\n"
" watchers:\n"
" - test@example.org\n"
" status: Drafting\n"
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 1
assert "overlap dates" in validate_backfill_result.output
def test_validate_backfill_overlap_dates_not_drafting_status(self, runner):
with runner.isolated_filesystem():
SQL_DIR = "sql/moz-fx-data-shared-prod/test/test_query_v1"
os.makedirs(SQL_DIR)
backfill_file = Path(SQL_DIR) / BACKFILL_FILE
backfill_file.write_text(
BACKFILL_YAML_TEMPLATE + "\n"
"2021-05-03:\n"
" start_date: 2021-01-03\n"
" end_date: 2021-05-03\n"
" reason: test_reason\n"
" watchers:\n"
" - test@example.org\n"
" status: Complete\n"
)
assert BACKFILL_FILE in os.listdir(SQL_DIR)
validate_backfill_result = runner.invoke(
validate,
[
"moz-fx-data-shared-prod.test.test_query_v1",
],
)
assert validate_backfill_result.exit_code == 0