Define `event_monitoring_live_v1` views in `view.sql` files (#4576)

* Define `event_monitoring_live_v1` views in `view.sql` files.

So they get automatically deployed by the `bqetl_artifact_deployment.publish_views` Airflow task.

* Support materialized views in view naming validation.

* Handle `IF NOT EXISTS` in view naming validation.

* Use regular expression to extract view ID in view naming validation.

This simplifies the logic and avoids a sqlparse bug where it doesn't recognize the `MATERIALIZED` keyword.

* Update other view regular expressions to allow for materialized views.
This commit is contained in:
Sean Rose 2023-12-08 11:54:02 -08:00 коммит произвёл GitHub
Родитель ff6f08a3e1
Коммит 2c4cc5eefe
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 28 добавлений и 33 удалений

Просмотреть файл

@ -26,7 +26,8 @@ from bigquery_etl.util.common import render
# Regex matching CREATE VIEW statement so it can be removed to get the view query
CREATE_VIEW_PATTERN = re.compile(
r"CREATE\s+OR\s+REPLACE\s+VIEW\s+[^\s]+\s+AS", re.IGNORECASE
r"CREATE(?:\s+OR\s+REPLACE)?(?:\s+MATERIALIZED)?\s+VIEW(?:\s+IF\s+NOT\s+EXISTS)?\s+[^\s]+\s+AS",
re.IGNORECASE,
)
@ -184,22 +185,16 @@ class View:
def _valid_view_naming(self):
"""Validate that the created view naming matches the directory structure."""
parsed = sqlparse.parse(self.content)[0]
tokens = [
t
for t in parsed.tokens
if not (t.is_whitespace or isinstance(t, sqlparse.sql.Comment))
]
is_view_statement = (
" ".join(tokens[0].normalized.split()) == "CREATE OR REPLACE"
and tokens[1].normalized == "VIEW"
)
if is_view_statement:
target_view = str(tokens[2]).strip().split()[0]
sql = sqlparse.format(self.content, strip_comments=True).strip()
if view_statement_match := re.match(
r"CREATE(?:\s+OR\s+REPLACE)?(?:\s+MATERIALIZED)?\s+VIEW(?:\s+IF\s+NOT\s+EXISTS)?"
r"\s+(?P<view_id>(?:(?:`?[\w-]+`?\.)?`?\w+`?\.)?`?\w+`?)",
sql,
re.IGNORECASE,
):
target_view = view_statement_match["view_id"].replace("`", "")
try:
[project_id, dataset_id, view_id] = target_view.replace("`", "").split(
"."
)
[project_id, dataset_id, view_id] = target_view.split(".")
if not (
self.name == view_id
and self.dataset == dataset_id

Просмотреть файл

@ -195,7 +195,7 @@ dry_run:
- sql/moz-fx-data-shared-prod/org_mozilla_firefox_beta_derived/experiment_events_live_v1/init.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/experiment_enrollment_cumulative_population_estimate_v1/view.sql
- sql/moz-fx-data-shared-prod/telemetry/experiment_enrollment_cumulative_population_estimate/view.sql
- sql/moz-fx-data-shared-prod/**/event_monitoring_live_v1/init.sql
- sql/moz-fx-data-shared-prod/**/event_monitoring_live_v1/view.sql
- sql/moz-fx-data-shared-prod/monitoring/event_monitoring_live/view.sql
# Already exists (and lacks an "OR REPLACE" clause)
- sql/moz-fx-data-shared-prod/org_mozilla_firefox_derived/clients_first_seen_v1/init.sql

Просмотреть файл

@ -26,7 +26,7 @@ class EventMonitoringLive(GleanTable):
def __init__(self) -> None:
"""Initialize materialized view generation."""
self.no_init = False
self.no_init = True
self.per_app_id_enabled = True
self.per_app_enabled = False
self.across_apps_enabled = True
@ -38,9 +38,10 @@ class EventMonitoringLive(GleanTable):
def generate_per_app_id(
self, project_id, baseline_table, output_dir=None, use_cloud_function=True, app_info=[]
):
"""Generate per-app_id views."""
tables = table_names_from_baseline(baseline_table, include_project_id=False)
init_filename = f"{self.target_table_id}.init.sql"
view_filename = f"{self.target_table_id}.view.sql"
metadata_filename = f"{self.target_table_id}.metadata.yaml"
table = tables[f"{self.prefix}"]
@ -68,23 +69,21 @@ class EventMonitoringLive(GleanTable):
Artifact = namedtuple("Artifact", "table_id basename sql")
artifacts = []
if not self.no_init:
init_sql = render(
init_filename, template_folder=PATH / "templates", **render_kwargs
)
metadata = render(
metadata_filename,
template_folder=PATH / "templates",
format=False,
**render_kwargs,
)
artifacts.append(Artifact(table, "metadata.yaml", metadata))
view_sql = render(
view_filename, template_folder=PATH / "templates", **render_kwargs
)
metadata = render(
metadata_filename,
template_folder=PATH / "templates",
format=False,
**render_kwargs,
)
artifacts.append(Artifact(table, "metadata.yaml", metadata))
skip_existing_artifact = self.skip_existing(output_dir, project_id)
if output_dir:
if not self.no_init:
artifacts.append(Artifact(table, "init.sql", init_sql))
artifacts.append(Artifact(table, "view.sql", view_sql))
for artifact in artifacts:
destination = (

Просмотреть файл

@ -110,7 +110,8 @@ def write_view_if_not_exists(target_project: str, sql_dir: Path, schema: SchemaF
from sql_generators.stable_views import VIEW_METADATA_TEMPLATE, VIEW_QUERY_TEMPLATE
VIEW_CREATE_REGEX = re.compile(
r"CREATE OR REPLACE VIEW\n\s*[^\s]+\s*\nAS", re.IGNORECASE
r"CREATE(?:\s+OR\s+REPLACE)?(?:\s+MATERIALIZED)?\s+VIEW(?:\s+IF\s+NOT\s+EXISTS)?\s+[^\s]+\s+AS",
re.IGNORECASE,
)
SKIP_VIEW_SCHEMA = {