Generate events_unnested views

This commit is contained in:
Anna Scholtz 2021-05-13 14:32:13 -07:00
Родитель 15de5cc6d8
Коммит 9fe639172f
8 изменённых файлов: 59 добавлений и 2 удалений

Просмотреть файл

@ -12,6 +12,7 @@ from ..glean_usage import (
baseline_clients_daily,
baseline_clients_first_seen,
baseline_clients_last_seen,
events_unnested,
)
from ..glean_usage.common import list_baseline_tables, get_app_info
@ -109,3 +110,14 @@ def generate(project_id, output_dir, parallelism, exclude, only, app_name):
partial(table.generate_per_app, project_id, output_dir=output_dir),
app_info,
)
# generate per-app events_unnested views
with ThreadPool(parallelism) as pool:
pool.map(
partial(
events_unnested.EventsUnnestedTable().generate_per_app,
project_id,
output_dir=output_dir,
),
app_info,
)

Просмотреть файл

@ -10,6 +10,7 @@ class BaselineClientsDailyTable(GleanTable):
def __init__(self):
"""Initialize baseline_clients_daily table."""
GleanTable.__init__(self)
self.target_table_id = BASELINE_DAILY_TABLE_ID
self.prefix = PREFIX
self.custom_render_kwargs = {}

Просмотреть файл

@ -11,6 +11,7 @@ class BaselineClientsFirstSeenTable(GleanTable):
def __init__(self):
"""Initialize baseline_clients_first_seen table."""
GleanTable.__init__(self)
self.target_table_id = TARGET_TABLE_ID
self.prefix = PREFIX
self.no_init = False

Просмотреть файл

@ -12,6 +12,7 @@ class BaselineClientsLastSeenTable(GleanTable):
def __init__(self):
"""Initialize baseline_clients_last_seen table."""
GleanTable.__init__(self)
self.target_table_id = TARGET_TABLE_ID
self.prefix = PREFIX
self.custom_render_kwargs = dict(

Просмотреть файл

@ -167,6 +167,7 @@ class GleanTable:
self.prefix = ""
self.custom_render_kwargs = {}
self.no_init = True
self.cross_channel_template = "cross_channel.view.sql"
def generate_per_app_id(self, project_id, baseline_table, output_dir=None):
"""Generate the baseline table query per app_id."""
@ -233,9 +234,10 @@ class GleanTable:
target_view=f"{target_dataset}.{target_view_name}",
datasets=datasets,
table=target_view_name,
app_name=app_info[0]["app_name"],
)
sql = render("cross_channel.view.sql", **render_kwargs)
sql = render(self.cross_channel_template, **render_kwargs)
view = f"{project_id}.{target_dataset}.{target_view_name}"
if output_dir:

Просмотреть файл

@ -0,0 +1,24 @@
"""Generate unnested events queries for Glean apps."""
import logging
from bigquery_etl.glean_usage.common import GleanTable
TARGET_TABLE_ID = "events_unnested_v1"
PREFIX = "events_unnested"
class EventsUnnestedTable(GleanTable):
"""Represents generated events_unnested table."""
def __init__(self):
"""Initialize events_unnested table."""
self.target_table_id = TARGET_TABLE_ID
self.prefix = PREFIX
self.custom_render_kwargs = {}
self.no_init = False
self.cross_channel_template = "cross_channel_events_unnested.view.sql"
def generate_per_app_id(self, project_id, baseline_table, output_dir=None):
"""Generate the baseline table query per app_name."""
logging.info("generate_per_app_id() not implemented for EventsUnnestedTable")

Просмотреть файл

@ -7,7 +7,7 @@ AS
UNION ALL
{% endif -%}
{% if app_name == "fenix" -%}
SELECT * REPLACE(mozfun.norm.fenix_app_info("{{ dataset }}", app_build_id).channel AS normalized_channel)
SELECT * REPLACE(mozfun.norm.fenix_app_info("{{ dataset }}", app_build).channel AS normalized_channel)
{% else -%}
SELECT * REPLACE("{{ channel }}" AS normalized_channel)
{% endif -%}

Просмотреть файл

@ -0,0 +1,16 @@
-- Generated via ./bqetl glean_usage generate
CREATE OR REPLACE VIEW
`{{ project_id }}.{{ target_view }}`
AS
{% for (dataset, channel) in datasets -%}
{% if not loop.first -%}
UNION ALL
{% endif -%}
{% if app_name == "fenix" -%}
SELECT e.* EXCEPT (events) REPLACE(mozfun.norm.fenix_app_info("{{ dataset }}", client_info.app_build).channel AS normalized_channel), event.*
{% else -%}
SELECT e.* EXCEPT (events) REPLACE("{{ channel }}" AS normalized_channel), event.*
{% endif -%}
FROM `{{ project_id }}.{{ dataset }}.events` e
LEFT JOIN UNNEST(e.events) AS event
{% endfor %}