Generate clients_last_seen queries

This commit is contained in:
Anna Scholtz 2021-10-05 11:36:16 -07:00
Родитель f2b63b50a4
Коммит 5669881de3
15 изменённых файлов: 229 добавлений и 107 удалений

Просмотреть файл

@ -15,19 +15,21 @@ from ..glean_usage import (
baseline_clients_last_seen,
events_unnested,
clients_daily_metrics,
clients_last_seen_metrics
clients_last_seen_metrics,
clients_last_seen_joined,
)
from ..glean_usage.common import list_baseline_tables, get_app_info
# list of methods for generating queries
GLEAN_TABLES = [
# glean_app_ping_views.GleanAppPingViews(),
# baseline_clients_daily.BaselineClientsDailyTable(),
# baseline_clients_first_seen.BaselineClientsFirstSeenTable(),
# baseline_clients_last_seen.BaselineClientsLastSeenTable(),
# events_unnested.EventsUnnestedTable(),
glean_app_ping_views.GleanAppPingViews(),
baseline_clients_daily.BaselineClientsDailyTable(),
baseline_clients_first_seen.BaselineClientsFirstSeenTable(),
baseline_clients_last_seen.BaselineClientsLastSeenTable(),
events_unnested.EventsUnnestedTable(),
clients_daily_metrics.ClientsDailyMetrics(),
clients_last_seen_metrics.ClientsLastSeenMetrics(),
clients_last_seen_joined.ClientsLastSeenJoined(),
]

Просмотреть файл

@ -2,6 +2,7 @@
import os
from pathlib import Path
import yaml
from bigquery_etl.glean_usage.common import GleanTable
@ -9,6 +10,7 @@ from bigquery_etl.glean_usage.common import GleanTable
TARGET_TABLE_ID = "clients_daily_metrics_v1"
PREFIX = "clients_daily_metrics"
class ClientsDailyMetrics(GleanTable):
"""Represents generated clients_daily_metrics table."""
@ -20,6 +22,9 @@ class ClientsDailyMetrics(GleanTable):
self.per_app_id_enabled = False
self.cross_channel_template = None
with open(Path(os.path.dirname(__file__)) / "templates" / "metrics_templating.yaml", "r") as f:
with open(
Path(os.path.dirname(__file__)) / "templates" / "metrics_templating.yaml",
"r",
) as f:
metrics_config = yaml.safe_load(f) or {}
self.custom_render_kwargs = {"metrics": metrics_config}

Просмотреть файл

@ -0,0 +1,18 @@
"""Generate and run clients_last_seen_joined queries for Glean apps."""
from bigquery_etl.glean_usage.common import GleanTable
TARGET_TABLE_ID = "clients_last_seen_joined_v1"
PREFIX = "clients_last_seen_joined"
class ClientsLastSeenJoined(GleanTable):
"""Represents generated clients_last_seen_joined table."""
def __init__(self):
"""Initialize clients_last_seen_joined table."""
GleanTable.__init__(self)
self.target_table_id = TARGET_TABLE_ID
self.no_init = True
self.per_app_id_enabled = False
self.cross_channel_template = None

Просмотреть файл

@ -2,6 +2,7 @@
import os
from pathlib import Path
import yaml
from bigquery_etl.glean_usage.common import GleanTable
@ -9,6 +10,7 @@ from bigquery_etl.glean_usage.common import GleanTable
TARGET_TABLE_ID = "clients_last_seen_metrics_v1"
PREFIX = "clients_last_seen_metrics"
class ClientsLastSeenMetrics(GleanTable):
"""Represents generated clients_last_seen_metrics table."""
@ -20,6 +22,9 @@ class ClientsLastSeenMetrics(GleanTable):
self.per_app_id_enabled = False
self.cross_channel_template = None
with open(Path(os.path.dirname(__file__)) / "templates" / "metrics_templating.yaml", "r") as f:
with open(
Path(os.path.dirname(__file__)) / "templates" / "metrics_templating.yaml",
"r",
) as f:
metrics_config = yaml.safe_load(f) or {}
self.custom_render_kwargs = {"metrics": metrics_config}

Просмотреть файл

@ -3,16 +3,15 @@
import logging
import os
import re
import requests
from jinja2 import TemplateNotFound
from pathlib import Path
from jinja2 import Environment, PackageLoader
import requests
from jinja2 import Environment, PackageLoader, TemplateNotFound
from bigquery_etl.dryrun import DryRun
from bigquery_etl.util import standard_args # noqa E402
from bigquery_etl.util.common import render, write_sql
from bigquery_etl.util.bigquery_id import sql_table_id # noqa E402
from bigquery_etl.util.common import render, write_sql
from bigquery_etl.view import generate_stable_views
APP_LISTINGS_URL = "https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings"
@ -241,12 +240,17 @@ class GleanTable:
query_filename = f"{target_view_name}.query.sql"
query_sql = render(query_filename, **render_kwargs)
view_sql = render(f"{target_view_name}.view.sql", **render_kwargs)
metadata = render(f"{self.target_table_id[:-3]}.metadata.yaml", **render_kwargs)
metadata = render(
f"{self.target_table_id[:-3]}.metadata.yaml", **render_kwargs
)
table = f"{project_id}.{target_dataset}_derived.{self.target_table_id}"
view = f"{project_id}.{target_dataset}.{target_view_name}"
if not (referenced_table_exists(query_sql)):
logging.info("Skipping query for table which doesn't exist:" f" {self.target_table_id}")
logging.info(
"Skipping query for table which doesn't exist:"
f" {self.target_table_id}"
)
return
if output_dir:

Просмотреть файл

@ -4,10 +4,11 @@ Generate app (as opposed to channel) specific views for Glean ping tables.
At the moment we only do this for the release channel.
"""
import os
from bigquery_etl.glean_usage.common import GleanTable
from mozilla_schema_generator.glean_ping import GleanPing
from bigquery_etl.util.common import get_table_dir, write_sql
from mozilla_schema_generator.glean_ping import GleanPing
from bigquery_etl.glean_usage.common import GleanTable
from bigquery_etl.util.common import get_table_dir, write_sql
VIEW_QUERY_TEMPLATE = """\
-- Generated by Generated via bigquery_etl.glean_usage.GleanAppPingViews

Просмотреть файл

@ -0,0 +1,10 @@
---
# Generated via bigquery_etl.glean_usage
friendly_name: 'Clients Last Seen Joined'
description: >
Join on baseline and metrics views, but uses a LEFT JOIN
such that a given client will not appear in the result if
it is only represented in the metrics-based table.
owners:
- ascholtz@mozilla.com

Просмотреть файл

@ -0,0 +1,23 @@
WITH baseline AS (
SELECT
*
FROM
`{{ project_id }}.{{ app_name }}.baseline_clients_last_seen`
WHERE
submission_date = @submission_date
),
metics AS (
SELECT
*
FROM
`{{ project_id }}.{{ app_name }}.clients_last_seen_metrics`
WHERE
submission_date = DATE_ADD(@submission_date, INTERVAL 1 DAY)
)
SELECT
baseline.submission_date,
* EXCEPT(submission_date)
FROM
baseline
LEFT JOIN metrics
USING (client_id, sample_id)

Просмотреть файл

@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`{{ project_id }}.{{ target_view }}`
AS
SELECT
*
FROM
`{{ project_id }}.{{ target_table }}`

Просмотреть файл

@ -1,11 +1,5 @@
friendly_name: 'Clients Daily Metrics'
description: >
Daily per-client aggregates on top of metrics pings
owners:
- ascholtz@mozilla.com
labels:
application: fenix
schedule: daily
incremental: true
scheduling:
dag_name: bqetl_org_mozilla_fenix_derived
---
# Generated via bigquery_etl.glean_usage
friendly_name : 'Clients Daily Metrics' description : >Daily per - client aggregates
ON
top OF metrics pings owners : -ascholtz @mozilla.com

Просмотреть файл

@ -1,33 +1,83 @@
WITH fenix_unioned AS (
SELECT
*
FROM
org_mozilla_firefox.metrics
WHERE
DATE(submission_timestamp) = @submission_date
UNION ALL
SELECT
*
FROM
org_mozilla_firefox_beta.metrics
WHERE
DATE(submission_timestamp) = @submission_date
UNION ALL
SELECT
*
FROM
org_mozilla_fennec_aurora.metrics
WHERE
DATE(submission_timestamp) = @submission_date
)
SELECT
DATE(submission_timestamp) AS submission_date,
client_info.client_id AS client_id,
sample_id,
COUNT(*) AS n_metrics_ping,
1 AS days_sent_metrics_ping_bits,
SUM(metrics.counter.events_normal_and_private_uri_count) AS uri_count,
LOGICAL_OR(metrics.boolean.metrics_default_browser) AS is_default_browser
LOGICAL_OR(metrics.boolean.metrics_default_browser) AS is_default_browser,
FROM
fenix_unioned
`org_mozilla_firefox.metrics` m
WHERE
DATE(submission_timestamp) = @submission_date
GROUP BY
submission_date,
client_id,
sample_id
UNION ALL
SELECT
DATE(submission_timestamp) AS submission_date,
client_info.client_id AS client_id,
sample_id,
COUNT(*) AS n_metrics_ping,
1 AS days_sent_metrics_ping_bits,
SUM(metrics.counter.events_normal_and_private_uri_count) AS uri_count,
LOGICAL_OR(metrics.boolean.metrics_default_browser) AS is_default_browser,
FROM
`org_mozilla_firefox_beta.metrics` m
WHERE
DATE(submission_timestamp) = @submission_date
GROUP BY
submission_date,
client_id,
sample_id
UNION ALL
SELECT
DATE(submission_timestamp) AS submission_date,
client_info.client_id AS client_id,
sample_id,
COUNT(*) AS n_metrics_ping,
1 AS days_sent_metrics_ping_bits,
SUM(metrics.counter.events_normal_and_private_uri_count) AS uri_count,
LOGICAL_OR(metrics.boolean.metrics_default_browser) AS is_default_browser,
FROM
`org_mozilla_fenix.metrics` m
WHERE
DATE(submission_timestamp) = @submission_date
GROUP BY
submission_date,
client_id,
sample_id
UNION ALL
SELECT
DATE(submission_timestamp) AS submission_date,
client_info.client_id AS client_id,
sample_id,
COUNT(*) AS n_metrics_ping,
1 AS days_sent_metrics_ping_bits,
SUM(metrics.counter.events_normal_and_private_uri_count) AS uri_count,
LOGICAL_OR(metrics.boolean.metrics_default_browser) AS is_default_browser,
FROM
`org_mozilla_fenix_nightly.metrics` m
WHERE
DATE(submission_timestamp) = @submission_date
GROUP BY
submission_date,
client_id,
sample_id
UNION ALL
SELECT
DATE(submission_timestamp) AS submission_date,
client_info.client_id AS client_id,
sample_id,
COUNT(*) AS n_metrics_ping,
1 AS days_sent_metrics_ping_bits,
SUM(metrics.counter.events_normal_and_private_uri_count) AS uri_count,
LOGICAL_OR(metrics.boolean.metrics_default_browser) AS is_default_browser,
FROM
`org_mozilla_fennec_aurora.metrics` m
WHERE
DATE(submission_timestamp) = @submission_date
GROUP BY
submission_date,
client_id,

Просмотреть файл

@ -1,11 +1,5 @@
friendly_name: 'Clients Daily Metrics'
description: >
Daily per-client aggregates on top of metrics pings
owners:
- ascholtz@mozilla.com
labels:
application: firefox_ios
schedule: daily
incremental: true
scheduling:
dag_name: bqetl_firefox_ios
---
# Generated via bigquery_etl.glean_usage
friendly_name : 'Clients Daily Metrics' description : >Daily per - client aggregates
ON
top OF metrics pings owners : -ascholtz @mozilla.com

Просмотреть файл

@ -1,28 +1,49 @@
WITH ios_unioned AS (
SELECT
*
FROM
org_mozilla_ios_firefox.metrics
WHERE
DATE(submission_timestamp) = @submission_date
UNION ALL
SELECT
*
FROM
org_mozilla_ios_firefoxbeta.metrics
WHERE
DATE(submission_timestamp) = @submission_date
)
SELECT
DATE(submission_timestamp) AS submission_date,
client_info.client_id AS client_id,
sample_id,
-- no URI count on fx ios - there is cumulative tab count.
SUM(CAST(NULL AS int64)) AS uri_count,
-- https://dictionary.telemetry.mozilla.org/apps/firefox_ios/metrics/app_opened_as_default_browser
LOGICAL_OR(metrics.counter.app_opened_as_default_browser > 0) AS is_default_browser
COUNT(*) AS n_metrics_ping,
1 AS days_sent_metrics_ping_bits,
SUM(CAST(NULL AS int64)) AS uri_count AS uri_count,
LOGICAL_OR(metrics.counter.app_opened_as_default_browser > 0) AS is_default_browser,
FROM
ios_unioned
`org_mozilla_ios_firefox.metrics` m
WHERE
DATE(submission_timestamp) = @submission_date
GROUP BY
submission_date,
client_id,
sample_id
UNION ALL
SELECT
DATE(submission_timestamp) AS submission_date,
client_info.client_id AS client_id,
sample_id,
COUNT(*) AS n_metrics_ping,
1 AS days_sent_metrics_ping_bits,
SUM(CAST(NULL AS int64)) AS uri_count AS uri_count,
LOGICAL_OR(metrics.counter.app_opened_as_default_browser > 0) AS is_default_browser,
FROM
`org_mozilla_ios_firefoxbeta.metrics` m
WHERE
DATE(submission_timestamp) = @submission_date
GROUP BY
submission_date,
client_id,
sample_id
UNION ALL
SELECT
DATE(submission_timestamp) AS submission_date,
client_info.client_id AS client_id,
sample_id,
COUNT(*) AS n_metrics_ping,
1 AS days_sent_metrics_ping_bits,
SUM(CAST(NULL AS int64)) AS uri_count AS uri_count,
LOGICAL_OR(metrics.counter.app_opened_as_default_browser > 0) AS is_default_browser,
FROM
`org_mozilla_ios_fennec.metrics` m
WHERE
DATE(submission_timestamp) = @submission_date
GROUP BY
submission_date,
client_id,

Просмотреть файл

@ -1,11 +1,5 @@
friendly_name: 'Clients Daily Metrics'
description: >
Daily per-client aggregates on top of metrics pings
owners:
- ascholtz@mozilla.com
labels:
application: fenix
schedule: daily
incremental: true
scheduling:
dag_name: bqetl_focus_ios
---
# Generated via bigquery_etl.glean_usage
friendly_name : 'Clients Daily Metrics' description : >Daily per - client aggregates
ON
top OF metrics pings owners : -ascholtz @mozilla.com

Просмотреть файл

@ -1,21 +1,15 @@
WITH ios_focus_unioned AS (
SELECT
*
FROM
org_mozilla_ios_focus.metrics
WHERE
DATE(submission_timestamp) = @submission_date
)
SELECT
DATE(submission_timestamp) AS submission_date,
client_info.client_id AS client_id,
sample_id,
-- no URI count on fx ios - there is cumulative tab count.
SUM(CAST(NULL AS int64)) AS uri_count,
-- no default browser setting on focus ios
LOGICAL_OR(CAST(NULL AS boolean)) AS is_default_browser
COUNT(*) AS n_metrics_ping,
1 AS days_sent_metrics_ping_bits,
SUM(CAST(NULL AS int64)) AS uri_count AS uri_count,
LOGICAL_OR(CAST(NULL AS boolean)) AS is_default_browser AS is_default_browser,
FROM
ios_focus_unioned
`org_mozilla_ios_focus.metrics` m
WHERE
DATE(submission_timestamp) = @submission_date
GROUP BY
submission_date,
client_id,