Define `event_monitoring_live_v1` views in `view.sql` files (#4576)
* Define `event_monitoring_live_v1` views in `view.sql` files. So they get automatically deployed by the `bqetl_artifact_deployment.publish_views` Airflow task. * Support materialized views in view naming validation. * Handle `IF NOT EXISTS` in view naming validation. * Use regular expression to extract view ID in view naming validation. This simplifies the logic and avoids a sqlparse bug where it doesn't recognize the `MATERIALIZED` keyword. * Update other view regular expressions to allow for materialized views.
This commit is contained in:
Родитель
ff6f08a3e1
Коммит
2c4cc5eefe
|
@ -26,7 +26,8 @@ from bigquery_etl.util.common import render
|
|||
|
||||
# Regex matching CREATE VIEW statement so it can be removed to get the view query
|
||||
CREATE_VIEW_PATTERN = re.compile(
|
||||
r"CREATE\s+OR\s+REPLACE\s+VIEW\s+[^\s]+\s+AS", re.IGNORECASE
|
||||
r"CREATE(?:\s+OR\s+REPLACE)?(?:\s+MATERIALIZED)?\s+VIEW(?:\s+IF\s+NOT\s+EXISTS)?\s+[^\s]+\s+AS",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
|
@ -184,22 +185,16 @@ class View:
|
|||
|
||||
def _valid_view_naming(self):
|
||||
"""Validate that the created view naming matches the directory structure."""
|
||||
parsed = sqlparse.parse(self.content)[0]
|
||||
tokens = [
|
||||
t
|
||||
for t in parsed.tokens
|
||||
if not (t.is_whitespace or isinstance(t, sqlparse.sql.Comment))
|
||||
]
|
||||
is_view_statement = (
|
||||
" ".join(tokens[0].normalized.split()) == "CREATE OR REPLACE"
|
||||
and tokens[1].normalized == "VIEW"
|
||||
)
|
||||
if is_view_statement:
|
||||
target_view = str(tokens[2]).strip().split()[0]
|
||||
sql = sqlparse.format(self.content, strip_comments=True).strip()
|
||||
if view_statement_match := re.match(
|
||||
r"CREATE(?:\s+OR\s+REPLACE)?(?:\s+MATERIALIZED)?\s+VIEW(?:\s+IF\s+NOT\s+EXISTS)?"
|
||||
r"\s+(?P<view_id>(?:(?:`?[\w-]+`?\.)?`?\w+`?\.)?`?\w+`?)",
|
||||
sql,
|
||||
re.IGNORECASE,
|
||||
):
|
||||
target_view = view_statement_match["view_id"].replace("`", "")
|
||||
try:
|
||||
[project_id, dataset_id, view_id] = target_view.replace("`", "").split(
|
||||
"."
|
||||
)
|
||||
[project_id, dataset_id, view_id] = target_view.split(".")
|
||||
if not (
|
||||
self.name == view_id
|
||||
and self.dataset == dataset_id
|
||||
|
|
|
@ -195,7 +195,7 @@ dry_run:
|
|||
- sql/moz-fx-data-shared-prod/org_mozilla_firefox_beta_derived/experiment_events_live_v1/init.sql
|
||||
- sql/moz-fx-data-shared-prod/telemetry_derived/experiment_enrollment_cumulative_population_estimate_v1/view.sql
|
||||
- sql/moz-fx-data-shared-prod/telemetry/experiment_enrollment_cumulative_population_estimate/view.sql
|
||||
- sql/moz-fx-data-shared-prod/**/event_monitoring_live_v1/init.sql
|
||||
- sql/moz-fx-data-shared-prod/**/event_monitoring_live_v1/view.sql
|
||||
- sql/moz-fx-data-shared-prod/monitoring/event_monitoring_live/view.sql
|
||||
# Already exists (and lacks an "OR REPLACE" clause)
|
||||
- sql/moz-fx-data-shared-prod/org_mozilla_firefox_derived/clients_first_seen_v1/init.sql
|
||||
|
|
|
@ -26,7 +26,7 @@ class EventMonitoringLive(GleanTable):
|
|||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize materialized view generation."""
|
||||
self.no_init = False
|
||||
self.no_init = True
|
||||
self.per_app_id_enabled = True
|
||||
self.per_app_enabled = False
|
||||
self.across_apps_enabled = True
|
||||
|
@ -38,9 +38,10 @@ class EventMonitoringLive(GleanTable):
|
|||
def generate_per_app_id(
|
||||
self, project_id, baseline_table, output_dir=None, use_cloud_function=True, app_info=[]
|
||||
):
|
||||
"""Generate per-app_id views."""
|
||||
tables = table_names_from_baseline(baseline_table, include_project_id=False)
|
||||
|
||||
init_filename = f"{self.target_table_id}.init.sql"
|
||||
view_filename = f"{self.target_table_id}.view.sql"
|
||||
metadata_filename = f"{self.target_table_id}.metadata.yaml"
|
||||
|
||||
table = tables[f"{self.prefix}"]
|
||||
|
@ -68,23 +69,21 @@ class EventMonitoringLive(GleanTable):
|
|||
Artifact = namedtuple("Artifact", "table_id basename sql")
|
||||
artifacts = []
|
||||
|
||||
if not self.no_init:
|
||||
init_sql = render(
|
||||
init_filename, template_folder=PATH / "templates", **render_kwargs
|
||||
)
|
||||
metadata = render(
|
||||
metadata_filename,
|
||||
template_folder=PATH / "templates",
|
||||
format=False,
|
||||
**render_kwargs,
|
||||
)
|
||||
artifacts.append(Artifact(table, "metadata.yaml", metadata))
|
||||
view_sql = render(
|
||||
view_filename, template_folder=PATH / "templates", **render_kwargs
|
||||
)
|
||||
metadata = render(
|
||||
metadata_filename,
|
||||
template_folder=PATH / "templates",
|
||||
format=False,
|
||||
**render_kwargs,
|
||||
)
|
||||
artifacts.append(Artifact(table, "metadata.yaml", metadata))
|
||||
|
||||
skip_existing_artifact = self.skip_existing(output_dir, project_id)
|
||||
|
||||
if output_dir:
|
||||
if not self.no_init:
|
||||
artifacts.append(Artifact(table, "init.sql", init_sql))
|
||||
artifacts.append(Artifact(table, "view.sql", view_sql))
|
||||
|
||||
for artifact in artifacts:
|
||||
destination = (
|
||||
|
|
|
@ -110,7 +110,8 @@ def write_view_if_not_exists(target_project: str, sql_dir: Path, schema: SchemaF
|
|||
from sql_generators.stable_views import VIEW_METADATA_TEMPLATE, VIEW_QUERY_TEMPLATE
|
||||
|
||||
VIEW_CREATE_REGEX = re.compile(
|
||||
r"CREATE OR REPLACE VIEW\n\s*[^\s]+\s*\nAS", re.IGNORECASE
|
||||
r"CREATE(?:\s+OR\s+REPLACE)?(?:\s+MATERIALIZED)?\s+VIEW(?:\s+IF\s+NOT\s+EXISTS)?\s+[^\s]+\s+AS",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
SKIP_VIEW_SCHEMA = {
|
||||
|
|
Загрузка…
Ссылка в новой задаче