diff --git a/bigquery_etl/cli/glean_usage.py b/bigquery_etl/cli/glean_usage.py index a0bbe70afc..2a822db180 100644 --- a/bigquery_etl/cli/glean_usage.py +++ b/bigquery_etl/cli/glean_usage.py @@ -12,6 +12,7 @@ from ..glean_usage import ( baseline_clients_daily, baseline_clients_first_seen, baseline_clients_last_seen, + events_unnested, ) from ..glean_usage.common import list_baseline_tables, get_app_info @@ -109,3 +110,14 @@ def generate(project_id, output_dir, parallelism, exclude, only, app_name): partial(table.generate_per_app, project_id, output_dir=output_dir), app_info, ) + + # generate per-app events_unnested views + with ThreadPool(parallelism) as pool: + pool.map( + partial( + events_unnested.EventsUnnestedTable().generate_per_app, + project_id, + output_dir=output_dir, + ), + app_info, + ) diff --git a/bigquery_etl/glean_usage/baseline_clients_daily.py b/bigquery_etl/glean_usage/baseline_clients_daily.py index 3921532732..0fc85bbeb9 100644 --- a/bigquery_etl/glean_usage/baseline_clients_daily.py +++ b/bigquery_etl/glean_usage/baseline_clients_daily.py @@ -10,6 +10,7 @@ class BaselineClientsDailyTable(GleanTable): def __init__(self): """Initialize baseline_clients_daily table.""" + GleanTable.__init__(self) self.target_table_id = BASELINE_DAILY_TABLE_ID self.prefix = PREFIX self.custom_render_kwargs = {} diff --git a/bigquery_etl/glean_usage/baseline_clients_first_seen.py b/bigquery_etl/glean_usage/baseline_clients_first_seen.py index f93e8d3e04..5856cb0558 100644 --- a/bigquery_etl/glean_usage/baseline_clients_first_seen.py +++ b/bigquery_etl/glean_usage/baseline_clients_first_seen.py @@ -11,6 +11,7 @@ class BaselineClientsFirstSeenTable(GleanTable): def __init__(self): """Initialize baseline_clients_first_seen table.""" + GleanTable.__init__(self) self.target_table_id = TARGET_TABLE_ID self.prefix = PREFIX self.no_init = False diff --git a/bigquery_etl/glean_usage/baseline_clients_last_seen.py b/bigquery_etl/glean_usage/baseline_clients_last_seen.py index ab9232be47..2d4167d559 100644 --- a/bigquery_etl/glean_usage/baseline_clients_last_seen.py +++ b/bigquery_etl/glean_usage/baseline_clients_last_seen.py @@ -12,6 +12,7 @@ class BaselineClientsLastSeenTable(GleanTable): def __init__(self): """Initialize baseline_clients_last_seen table.""" + GleanTable.__init__(self) self.target_table_id = TARGET_TABLE_ID self.prefix = PREFIX self.custom_render_kwargs = dict( diff --git a/bigquery_etl/glean_usage/common.py b/bigquery_etl/glean_usage/common.py index 3354d134ec..77d93cc742 100644 --- a/bigquery_etl/glean_usage/common.py +++ b/bigquery_etl/glean_usage/common.py @@ -167,6 +167,7 @@ class GleanTable: self.prefix = "" self.custom_render_kwargs = {} self.no_init = True + self.cross_channel_template = "cross_channel.view.sql" def generate_per_app_id(self, project_id, baseline_table, output_dir=None): """Generate the baseline table query per app_id.""" @@ -233,9 +234,10 @@ class GleanTable: target_view=f"{target_dataset}.{target_view_name}", datasets=datasets, table=target_view_name, + app_name=app_info[0]["app_name"], ) - sql = render("cross_channel.view.sql", **render_kwargs) + sql = render(self.cross_channel_template, **render_kwargs) view = f"{project_id}.{target_dataset}.{target_view_name}" if output_dir: diff --git a/bigquery_etl/glean_usage/events_unnested.py b/bigquery_etl/glean_usage/events_unnested.py new file mode 100644 index 0000000000..d9b180e454 --- /dev/null +++ b/bigquery_etl/glean_usage/events_unnested.py @@ -0,0 +1,24 @@ +"""Generate unnested events queries for Glean apps.""" + +import logging + +from bigquery_etl.glean_usage.common import GleanTable + +TARGET_TABLE_ID = "events_unnested_v1" +PREFIX = "events_unnested" + + +class EventsUnnestedTable(GleanTable): + """Represents generated events_unnested table.""" + + def __init__(self): + """Initialize events_unnested table.""" + self.target_table_id = TARGET_TABLE_ID + self.prefix = PREFIX + self.custom_render_kwargs = {} + self.no_init = False + self.cross_channel_template = "cross_channel_events_unnested.view.sql" + + def generate_per_app_id(self, project_id, baseline_table, output_dir=None): + """Generate the baseline table query per app_name.""" + logging.info("generate_per_app_id() not implemented for EventsUnnestedTable") diff --git a/bigquery_etl/glean_usage/templates/cross_channel.view.sql b/bigquery_etl/glean_usage/templates/cross_channel.view.sql index 1731a03305..7a6ecae617 100644 --- a/bigquery_etl/glean_usage/templates/cross_channel.view.sql +++ b/bigquery_etl/glean_usage/templates/cross_channel.view.sql @@ -7,7 +7,7 @@ AS UNION ALL {% endif -%} {% if app_name == "fenix" -%} -SELECT * REPLACE(mozfun.norm.fenix_app_info("{{ dataset }}", app_build_id).channel AS normalized_channel) +SELECT * REPLACE(mozfun.norm.fenix_app_info("{{ dataset }}", app_build).channel AS normalized_channel) {% else -%} SELECT * REPLACE("{{ channel }}" AS normalized_channel) {% endif -%} diff --git a/bigquery_etl/glean_usage/templates/cross_channel_events_unnested.view.sql b/bigquery_etl/glean_usage/templates/cross_channel_events_unnested.view.sql new file mode 100644 index 0000000000..5191262266 --- /dev/null +++ b/bigquery_etl/glean_usage/templates/cross_channel_events_unnested.view.sql @@ -0,0 +1,16 @@ +-- Generated via ./bqetl glean_usage generate +CREATE OR REPLACE VIEW + `{{ project_id }}.{{ target_view }}` +AS +{% for (dataset, channel) in datasets -%} +{% if not loop.first -%} +UNION ALL +{% endif -%} +{% if app_name == "fenix" -%} +SELECT e.* EXCEPT (events) REPLACE(mozfun.norm.fenix_app_info("{{ dataset }}", client_info.app_build).channel AS normalized_channel), event.* +{% else -%} +SELECT e.* EXCEPT (events) REPLACE("{{ channel }}" AS normalized_channel), event.* +{% endif -%} +FROM `{{ project_id }}.{{ dataset }}.events` e +LEFT JOIN UNNEST(e.events) AS event +{% endfor %}