Fenix baseline_daily and clients_last_seen tables
As announced in: https://mail.mozilla.org/pipermail/fx-data-dev/2020-April/000339.html
This commit is contained in:
Родитель
f6035861f3
Коммит
568d0a2b6c
|
@ -2,6 +2,14 @@
|
|||
|
||||
This document records interesting code that we've deleted for the sake of discoverability for the future.
|
||||
|
||||
## 2020-04 Fenix baseline_daily and clients_last_seen
|
||||
|
||||
- [Removal PR](https://github.com/mozilla/bigquery-etl/pull/925)
|
||||
|
||||
We are now using dynamically generated queries for generic Glean
|
||||
ETL on top of baseline pings, so we have deprecated previous versions
|
||||
of daily and last_seen tables.
|
||||
|
||||
## Smoot Usage v1
|
||||
|
||||
- [Removal PR](https://github.com/mozilla/bigquery-etl/pull/460)
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-shared-prod.org_mozilla_fenix.clients_last_seen`
|
||||
AS
|
||||
WITH with_days_since AS (
|
||||
SELECT
|
||||
-- We cannot use UDFs in a view, so we paste the body of udf.bitpos(bits) literally here.
|
||||
CAST(SAFE.LOG(days_seen_bits & -days_seen_bits, 2) AS INT64) AS days_since_seen,
|
||||
CAST(
|
||||
SAFE.LOG(days_created_profile_bits & -days_created_profile_bits, 2) AS INT64
|
||||
) AS days_since_created_profile,
|
||||
*
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.org_mozilla_fenix_derived.clients_last_seen_v1`
|
||||
)
|
||||
--
|
||||
SELECT
|
||||
-- Include date_last_seen for compatibility with existing queries.
|
||||
DATE_SUB(submission_date, INTERVAL days_since_seen DAY) AS date_last_seen,
|
||||
*
|
||||
FROM
|
||||
with_days_since
|
|
@ -1,104 +0,0 @@
|
|||
WITH unioned AS (
|
||||
SELECT
|
||||
submission_timestamp,
|
||||
document_id,
|
||||
client_info,
|
||||
sample_id,
|
||||
metadata,
|
||||
normalized_channel,
|
||||
metrics AS baseline_metrics,
|
||||
NULL AS metrics
|
||||
FROM
|
||||
org_mozilla_fenix_stable.baseline_v1
|
||||
UNION ALL
|
||||
SELECT
|
||||
submission_timestamp,
|
||||
document_id,
|
||||
client_info,
|
||||
sample_id,
|
||||
metadata,
|
||||
normalized_channel,
|
||||
NULL AS baseline_metrics,
|
||||
metrics
|
||||
FROM
|
||||
org_mozilla_fenix_stable.metrics_v1
|
||||
),
|
||||
--
|
||||
base AS (
|
||||
SELECT
|
||||
DATE(submission_timestamp) AS submission_date,
|
||||
LOWER(client_info.client_id) AS client_id,
|
||||
*
|
||||
FROM
|
||||
unioned
|
||||
WHERE
|
||||
client_info.client_id IS NOT NULL
|
||||
),
|
||||
--
|
||||
windowed AS (
|
||||
SELECT
|
||||
submission_date,
|
||||
client_id,
|
||||
sample_id,
|
||||
ROW_NUMBER() OVER w1_unframed AS _n,
|
||||
'Fenix' AS app_name,
|
||||
--
|
||||
-- Take the earliest first_run_date if ambiguous.
|
||||
MIN(SAFE.PARSE_DATE('%F', SUBSTR(client_info.first_run_date, 1, 10))) OVER w1 AS first_run_date,
|
||||
--
|
||||
-- Sums over distinct baseline pings.
|
||||
SUM(
|
||||
udf.glean_timespan_seconds(baseline_metrics.timespan.glean_baseline_duration)
|
||||
) OVER w1 AS durations,
|
||||
--
|
||||
-- For all other dimensions, we use the mode of observed values in the day.
|
||||
udf.mode_last(ARRAY_AGG(client_info.os) OVER w1) AS os,
|
||||
udf.mode_last(ARRAY_AGG(client_info.os_version) OVER w1) AS os_version,
|
||||
udf.mode_last(ARRAY_AGG(baseline_metrics.string.glean_baseline_locale) OVER w1) AS locale,
|
||||
udf.json_mode_last(
|
||||
ARRAY_AGG(udf.geo_struct(metadata.geo.country, metadata.geo.city, NULL, NULL)) OVER w1
|
||||
).* EXCEPT (geo_subdivision1, geo_subdivision2),
|
||||
udf.mode_last(ARRAY_AGG(client_info.device_manufacturer) OVER w1) AS device_manufacturer,
|
||||
udf.mode_last(ARRAY_AGG(client_info.device_model) OVER w1) AS device_model,
|
||||
udf.mode_last(ARRAY_AGG(client_info.app_build) OVER w1) AS app_build,
|
||||
udf.mode_last(ARRAY_AGG(normalized_channel) OVER w1) AS normalized_channel,
|
||||
udf.mode_last(ARRAY_AGG(client_info.architecture) OVER w1) AS architecture,
|
||||
udf.mode_last(
|
||||
ARRAY_AGG(metrics.string.search_default_engine_name) OVER w1
|
||||
) AS search_default_engine_name,
|
||||
udf.mode_last(
|
||||
ARRAY_AGG(metrics.string.search_default_engine_name) OVER w1
|
||||
) AS search_default_engine_code,
|
||||
udf.mode_last(ARRAY_AGG(metrics.boolean.metrics_default_browser) OVER w1) AS default_browser,
|
||||
udf.mode_last(ARRAY_AGG(client_info.app_display_version) OVER w1) AS app_display_version
|
||||
FROM
|
||||
base
|
||||
WHERE
|
||||
-- Reprocess all dates by running this query with --parameter=submission_date:DATE:NULL
|
||||
(@submission_date IS NULL OR @submission_date = submission_date)
|
||||
WINDOW
|
||||
w1 AS (
|
||||
PARTITION BY
|
||||
client_id,
|
||||
submission_date
|
||||
ORDER BY
|
||||
submission_timestamp
|
||||
ROWS BETWEEN
|
||||
UNBOUNDED PRECEDING
|
||||
AND UNBOUNDED FOLLOWING
|
||||
),
|
||||
-- We must provide a modified window for ROW_NUMBER which cannot accept a frame clause.
|
||||
w1_unframed AS (
|
||||
PARTITION BY
|
||||
client_id,
|
||||
submission_date
|
||||
ORDER BY
|
||||
submission_timestamp
|
||||
)
|
||||
)
|
||||
SELECT
|
||||
* EXCEPT (_n)
|
||||
FROM
|
||||
windowed
|
||||
WHERE
|
||||
_n = 1
|
|
@ -1,25 +0,0 @@
|
|||
CREATE TABLE
|
||||
`moz-fx-data-shared-prod.org_mozilla_fenix_derived.clients_last_seen_v1`
|
||||
PARTITION BY
|
||||
submission_date
|
||||
CLUSTER BY
|
||||
app_name,
|
||||
os,
|
||||
sample_id
|
||||
OPTIONS
|
||||
(require_partition_filter = TRUE)
|
||||
AS
|
||||
SELECT
|
||||
CAST(NULL AS DATE) AS submission_date,
|
||||
0 AS days_seen_bits,
|
||||
0 AS days_created_profile_bits,
|
||||
-- We make sure to delay * until the end so that as new columns are added
|
||||
-- to clients_daily, we can add those columns in the same order to the end
|
||||
-- of this schema, which may be necessary for the daily join query between
|
||||
-- the two tables to validate.
|
||||
* EXCEPT (submission_date)
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.org_mozilla_fenix_derived.clients_daily_v1`
|
||||
WHERE
|
||||
-- Output empty table and read no input rows
|
||||
FALSE
|
|
@ -1,49 +0,0 @@
|
|||
WITH _current AS (
|
||||
SELECT
|
||||
-- In this raw table, we capture the history of activity over the past
|
||||
-- 28 days for each usage criterion as a single 64-bit integer. The
|
||||
-- rightmost bit represents whether the user was active in the current day.
|
||||
CAST(TRUE AS INT64) AS days_seen_bits,
|
||||
udf.days_since_created_profile_as_28_bits(
|
||||
DATE_DIFF(submission_date, first_run_date, DAY)
|
||||
) AS days_created_profile_bits,
|
||||
* EXCEPT (submission_date)
|
||||
FROM
|
||||
clients_daily_v1
|
||||
WHERE
|
||||
submission_date = @submission_date
|
||||
),
|
||||
--
|
||||
_previous AS (
|
||||
SELECT
|
||||
* EXCEPT (submission_date)
|
||||
FROM
|
||||
clients_last_seen_v1 AS cls
|
||||
WHERE
|
||||
submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY)
|
||||
-- Filter out rows from yesterday that have now fallen outside the 28-day window.
|
||||
AND udf.shift_28_bits_one_day(days_seen_bits) > 0
|
||||
)
|
||||
--
|
||||
SELECT
|
||||
@submission_date AS submission_date,
|
||||
IF(
|
||||
_current.client_id IS NOT NULL,
|
||||
_current,
|
||||
_previous
|
||||
).* REPLACE ( --
|
||||
udf.combine_adjacent_days_28_bits(
|
||||
_previous.days_seen_bits,
|
||||
_current.days_seen_bits
|
||||
) AS days_seen_bits,
|
||||
udf.coalesce_adjacent_days_28_bits(
|
||||
_previous.days_created_profile_bits,
|
||||
_current.days_created_profile_bits
|
||||
) AS days_created_profile_bits
|
||||
)
|
||||
FROM
|
||||
_current
|
||||
FULL JOIN
|
||||
_previous
|
||||
USING
|
||||
(client_id)
|
|
@ -1,7 +0,0 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-shared-prod.org_mozilla_firefox.baseline_daily`
|
||||
AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.org_mozilla_firefox_derived.baseline_daily_v1`
|
|
@ -1,17 +0,0 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-shared-prod.org_mozilla_firefox.clients_last_seen`
|
||||
AS
|
||||
SELECT
|
||||
`moz-fx-data-shared-prod.udf.pos_of_trailing_set_bit`(baseline.days_seen_bits) AS days_since_seen,
|
||||
`moz-fx-data-shared-prod.udf.pos_of_trailing_set_bit`(
|
||||
baseline.days_seen_session_start_bits
|
||||
) AS days_since_seen_session_start,
|
||||
`moz-fx-data-shared-prod.udf.pos_of_trailing_set_bit`(
|
||||
baseline.days_seen_session_end_bits
|
||||
) AS days_since_seen_session_end,
|
||||
DATE_DIFF(submission_date, baseline.first_run_date, DAY) AS days_since_created_profile,
|
||||
* EXCEPT (baseline, metrics),
|
||||
baseline.*,
|
||||
metrics.*
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.org_mozilla_firefox_derived.clients_last_seen_v1`
|
|
@ -1,7 +0,0 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-shared-prod.org_mozilla_firefox.metrics_daily`
|
||||
AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.org_mozilla_firefox_derived.metrics_daily_v1`
|
|
@ -1,10 +0,0 @@
|
|||
friendly_name: Baseline Daily
|
||||
description: >
|
||||
A daily aggregate of baseline pings from each Firefox for Android (Fenix)
|
||||
client across all release channels, partitioned by day
|
||||
owners:
|
||||
- jklukas@mozilla.com
|
||||
labels:
|
||||
application: fenix
|
||||
schedule: daily
|
||||
incremental: true
|
|
@ -1,169 +0,0 @@
|
|||
/*
|
||||
|
||||
This is a daily aggregation of just baseline pings from across the various
|
||||
apps that correspond to the different channels of "Fenix", the new Firefox for Android.
|
||||
|
||||
*/
|
||||
-- The schemas for the different apps differ slightly such that we can't
|
||||
-- blindly union over the baseline views; instead, we extract out the set of
|
||||
-- fields we want via a UDF before we union.
|
||||
CREATE TEMP FUNCTION extract_fields(baseline ANY TYPE) AS (
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
baseline.submission_timestamp,
|
||||
DATE(baseline.submission_timestamp) AS submission_date,
|
||||
LOWER(baseline.client_info.client_id) AS client_id,
|
||||
baseline.sample_id,
|
||||
SAFE.PARSE_DATE('%F', SUBSTR(baseline.client_info.first_run_date, 1, 10)) AS first_run_date,
|
||||
baseline.ping_info.parsed_end_time,
|
||||
udf.glean_timespan_seconds(baseline.metrics.timespan.glean_baseline_duration) AS duration,
|
||||
baseline.client_info.android_sdk_version,
|
||||
baseline.client_info.app_build,
|
||||
baseline.client_info.app_channel,
|
||||
baseline.client_info.app_display_version,
|
||||
baseline.client_info.architecture,
|
||||
baseline.client_info.device_manufacturer,
|
||||
baseline.client_info.device_model,
|
||||
baseline.client_info.telemetry_sdk_build,
|
||||
baseline.client_info.locale,
|
||||
baseline.metadata.geo.city,
|
||||
baseline.metadata.geo.country,
|
||||
baseline.metrics.string.glean_baseline_locale,
|
||||
baseline.normalized_os,
|
||||
baseline.normalized_os_version,
|
||||
)
|
||||
);
|
||||
|
||||
WITH unioned AS (
|
||||
SELECT
|
||||
extract_fields(baseline).*,
|
||||
'release' AS normalized_channel
|
||||
FROM
|
||||
org_mozilla_firefox.baseline AS baseline
|
||||
UNION ALL
|
||||
SELECT
|
||||
extract_fields(baseline).*,
|
||||
'beta' AS normalized_channel
|
||||
FROM
|
||||
org_mozilla_firefox_beta.baseline AS baseline
|
||||
UNION ALL
|
||||
SELECT
|
||||
extract_fields(baseline).*,
|
||||
'nightly' AS normalized_channel
|
||||
FROM
|
||||
org_mozilla_fenix.baseline AS baseline
|
||||
-- These final two apps should be retired in February.
|
||||
UNION ALL
|
||||
SELECT
|
||||
extract_fields(baseline).*,
|
||||
'aurora nightly' AS normalized_channel
|
||||
FROM
|
||||
org_mozilla_fennec_aurora.baseline AS baseline
|
||||
UNION ALL
|
||||
SELECT
|
||||
extract_fields(baseline).*,
|
||||
'preview nightly' AS normalized_channel
|
||||
FROM
|
||||
org_mozilla_fenix_nightly.baseline AS baseline
|
||||
),
|
||||
--
|
||||
with_dates AS (
|
||||
SELECT
|
||||
*,
|
||||
-- For explanation of session start time calculation, see Glean docs:
|
||||
-- https://mozilla.github.io/glean/book/user/pings/baseline.html#contents
|
||||
DATE(TIMESTAMP_SUB(parsed_end_time, INTERVAL duration SECOND)) AS session_start_date,
|
||||
DATE(parsed_end_time) AS session_end_date,
|
||||
FROM
|
||||
unioned
|
||||
),
|
||||
--
|
||||
base AS (
|
||||
SELECT
|
||||
*,
|
||||
DATE_DIFF(submission_date, session_start_date, DAY) AS session_start_date_offset,
|
||||
DATE_DIFF(submission_date, session_end_date, DAY) AS session_end_date_offset,
|
||||
FROM
|
||||
with_dates
|
||||
),
|
||||
--
|
||||
windowed AS (
|
||||
SELECT
|
||||
submission_date,
|
||||
client_id,
|
||||
sample_id,
|
||||
ROW_NUMBER() OVER w1_unframed AS _n,
|
||||
--
|
||||
-- Take the earliest first_run_date if ambiguous.
|
||||
MIN(first_run_date) OVER w1 AS first_run_date,
|
||||
--
|
||||
-- Sums over distinct baseline pings.
|
||||
SUM(IF(duration BETWEEN 0 AND 100000, duration, 0)) OVER w1 AS durations,
|
||||
--
|
||||
-- Bit patterns capturing activity dates relative to the submission date.
|
||||
BIT_OR(
|
||||
1 << IF(session_start_date_offset BETWEEN 0 AND 27, session_start_date_offset, NULL)
|
||||
) OVER w1 AS days_seen_session_start_bits,
|
||||
BIT_OR(
|
||||
1 << IF(session_end_date_offset BETWEEN 0 AND 27, session_end_date_offset, NULL)
|
||||
) OVER w1 AS days_seen_session_end_bits,
|
||||
--
|
||||
-- For all other dimensions, we use the mode of observed values in the day.
|
||||
udf.mode_last(ARRAY_AGG(normalized_channel) OVER w1) AS normalized_channel,
|
||||
udf.mode_last(ARRAY_AGG(android_sdk_version) OVER w1) AS android_sdk_version,
|
||||
udf.mode_last(ARRAY_AGG(app_build) OVER w1) AS app_build,
|
||||
udf.mode_last(ARRAY_AGG(app_channel) OVER w1) AS app_channel,
|
||||
udf.mode_last(ARRAY_AGG(app_display_version) OVER w1) AS app_display_version,
|
||||
udf.mode_last(ARRAY_AGG(architecture) OVER w1) AS architecture,
|
||||
udf.mode_last(ARRAY_AGG(device_manufacturer) OVER w1) AS device_manufacturer,
|
||||
udf.mode_last(ARRAY_AGG(device_model) OVER w1) AS device_model,
|
||||
udf.mode_last(ARRAY_AGG(telemetry_sdk_build) OVER w1) AS telemetry_sdk_build,
|
||||
udf.mode_last(ARRAY_AGG(locale) OVER w1) AS locale,
|
||||
udf.mode_last(ARRAY_AGG(city) OVER w1) AS city,
|
||||
udf.mode_last(ARRAY_AGG(country) OVER w1) AS country,
|
||||
udf.mode_last(ARRAY_AGG(glean_baseline_locale) OVER w1) AS glean_baseline_locale,
|
||||
udf.mode_last(ARRAY_AGG(normalized_os) OVER w1) AS os,
|
||||
udf.mode_last(ARRAY_AGG(normalized_os_version) OVER w1) AS os_version,
|
||||
FROM
|
||||
base
|
||||
WHERE
|
||||
-- Reprocess all dates by running this query with --parameter=submission_date:DATE:NULL
|
||||
(@submission_date IS NULL OR @submission_date = submission_date)
|
||||
WINDOW
|
||||
w1 AS (
|
||||
PARTITION BY
|
||||
sample_id,
|
||||
client_id,
|
||||
submission_date
|
||||
ORDER BY
|
||||
submission_timestamp
|
||||
ROWS BETWEEN
|
||||
UNBOUNDED PRECEDING
|
||||
AND UNBOUNDED FOLLOWING
|
||||
),
|
||||
-- We must provide a modified window for ROW_NUMBER which cannot accept a frame clause.
|
||||
w1_unframed AS (
|
||||
PARTITION BY
|
||||
sample_id,
|
||||
client_id,
|
||||
submission_date
|
||||
ORDER BY
|
||||
submission_timestamp
|
||||
)
|
||||
)
|
||||
--
|
||||
SELECT
|
||||
wnd.submission_date,
|
||||
cfs.fenix_first_seen_date,
|
||||
cfs.fennec_first_seen_date,
|
||||
wnd.* EXCEPT (_n, submission_date)
|
||||
FROM
|
||||
windowed AS wnd
|
||||
-- We incur the expense of joining in first_seen dates here so that we can
|
||||
-- identify returning users cheaply in further views on top of baseline_daily.
|
||||
LEFT JOIN
|
||||
clients_first_seen_v1 AS cfs
|
||||
USING
|
||||
(client_id)
|
||||
WHERE
|
||||
_n = 1
|
|
@ -1,101 +0,0 @@
|
|||
-- See query.sql for more explanation of what's going on here.
|
||||
CREATE TABLE
|
||||
`moz-fx-data-shared-prod.org_mozilla_firefox_derived.clients_first_seen_v1`
|
||||
PARTITION BY
|
||||
(fenix_first_seen_date)
|
||||
AS
|
||||
WITH base AS (
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
CAST(NULL AS STRING) AS fennec_id
|
||||
FROM
|
||||
org_mozilla_firefox.baseline
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
CAST(NULL AS STRING) AS fennec_id
|
||||
FROM
|
||||
org_mozilla_firefox_beta.baseline
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
CAST(NULL AS STRING) AS fennec_id
|
||||
FROM
|
||||
org_mozilla_fennec_aurora.baseline
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
CAST(NULL AS STRING) AS fennec_id
|
||||
FROM
|
||||
org_mozilla_fenix.baseline
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
CAST(NULL AS STRING) AS fennec_id
|
||||
FROM
|
||||
org_mozilla_fenix_nightly.baseline
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
|
||||
FROM
|
||||
org_mozilla_firefox.migration
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
|
||||
FROM
|
||||
org_mozilla_firefox_beta.migration
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
|
||||
FROM
|
||||
org_mozilla_fennec_aurora.migration
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
|
||||
FROM
|
||||
org_mozilla_fenix.migration
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
|
||||
FROM
|
||||
org_mozilla_fenix_nightly.migration
|
||||
),
|
||||
--
|
||||
per_client_id AS (
|
||||
SELECT
|
||||
client_id,
|
||||
DATE(MIN(submission_timestamp)) AS fenix_first_seen_date,
|
||||
udf.mode_last(ARRAY_AGG(LOWER(fennec_id))) AS fennec_id,
|
||||
FROM
|
||||
base
|
||||
WHERE
|
||||
submission_timestamp > '2010-01-01'
|
||||
GROUP BY
|
||||
client_id
|
||||
)
|
||||
--
|
||||
SELECT
|
||||
per_client_id.client_id,
|
||||
per_client_id.fenix_first_seen_date,
|
||||
core_clients_first_seen.first_seen_date AS fennec_first_seen_date,
|
||||
FROM
|
||||
per_client_id
|
||||
LEFT JOIN
|
||||
telemetry_derived.core_clients_first_seen_v1 AS core_clients_first_seen
|
||||
ON
|
||||
(fennec_id = core_clients_first_seen.client_id)
|
|
@ -1,11 +0,0 @@
|
|||
friendly_name: Clients First Seen
|
||||
description: >
|
||||
A brief summary of the first date each Firefox for Android (Fenix) client
|
||||
sent a ping to the telemetry infrastructure. Each client will appear exactly
|
||||
once in this table over all time.
|
||||
owners:
|
||||
- jklukas@mozilla.com
|
||||
labels:
|
||||
application: fenix
|
||||
schedule: daily
|
||||
incremental: true
|
|
@ -1,107 +0,0 @@
|
|||
/*
|
||||
|
||||
This clients_first_seen table is incrementally populated every day such that
|
||||
each client only appears once in the entire table. For each day's query, we
|
||||
scan the entire history of the table to exclude clients we've already recorded
|
||||
as seen.
|
||||
|
||||
This table is convenient for retention calculations where we want to be able to
|
||||
tell the difference between a lapsed user returning and a truly new user.
|
||||
|
||||
*/
|
||||
-- We union over baseline pings and migration pings so that we can pull the
|
||||
-- user's fennec user ID from the migration ping if they are being upgraded
|
||||
-- from fennec.
|
||||
WITH base AS (
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
CAST(NULL AS STRING) AS fennec_id
|
||||
FROM
|
||||
org_mozilla_firefox.baseline
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
CAST(NULL AS STRING) AS fennec_id
|
||||
FROM
|
||||
org_mozilla_firefox_beta.baseline
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
CAST(NULL AS STRING) AS fennec_id
|
||||
FROM
|
||||
org_mozilla_fennec_aurora.baseline
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
CAST(NULL AS STRING) AS fennec_id
|
||||
FROM
|
||||
org_mozilla_fenix.baseline
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
CAST(NULL AS STRING) AS fennec_id
|
||||
FROM
|
||||
org_mozilla_fenix_nightly.baseline
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
|
||||
FROM
|
||||
org_mozilla_firefox.migration
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
|
||||
FROM
|
||||
org_mozilla_firefox_beta.migration
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
|
||||
FROM
|
||||
org_mozilla_fennec_aurora.migration
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
|
||||
FROM
|
||||
org_mozilla_fenix.migration
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_info.client_id,
|
||||
submission_timestamp,
|
||||
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
|
||||
FROM
|
||||
org_mozilla_fenix_nightly.migration
|
||||
),
|
||||
per_client AS (
|
||||
SELECT
|
||||
client_id,
|
||||
DATE(MIN(submission_timestamp)) AS fenix_first_seen_date,
|
||||
udf.mode_last(ARRAY_AGG(LOWER(fennec_id))) AS fennec_id,
|
||||
FROM
|
||||
base
|
||||
WHERE
|
||||
DATE(submission_timestamp) = @submission_date
|
||||
GROUP BY
|
||||
client_id
|
||||
)
|
||||
SELECT
|
||||
per_client.client_id,
|
||||
per_client.fenix_first_seen_date,
|
||||
core_clients_first_seen.first_seen_date AS fennec_first_seen_date,
|
||||
FROM
|
||||
per_client
|
||||
LEFT JOIN
|
||||
telemetry_derived.core_clients_first_seen_v1 AS core_clients_first_seen
|
||||
ON
|
||||
(fennec_id = core_clients_first_seen.client_id)
|
|
@ -1,34 +0,0 @@
|
|||
CREATE TABLE
|
||||
`moz-fx-data-shared-prod.org_mozilla_firefox_derived.clients_last_seen_v1`
|
||||
PARTITION BY
|
||||
submission_date
|
||||
CLUSTER BY
|
||||
normalized_channel,
|
||||
sample_id
|
||||
OPTIONS
|
||||
(require_partition_filter = TRUE)
|
||||
AS
|
||||
SELECT
|
||||
CAST(NULL AS DATE) AS submission_date,
|
||||
client_id,
|
||||
sample_id,
|
||||
baseline.normalized_channel,
|
||||
-- We make sure to delay * until the end so that as new columns are added
|
||||
-- to clients_daily, we can add those columns in the same order to the end
|
||||
-- of this schema, which may be necessary for the daily join query between
|
||||
-- the two tables to validate.
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
0 AS days_seen_bits,
|
||||
baseline.* EXCEPT (submission_date, client_id, sample_id, normalized_channel)
|
||||
) AS baseline,
|
||||
(SELECT AS STRUCT metrics.* EXCEPT (submission_date, client_id, sample_id)) AS metrics,
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.org_mozilla_firefox_derived.baseline_daily_v1` AS baseline
|
||||
LEFT JOIN
|
||||
`moz-fx-data-shared-prod.org_mozilla_firefox_derived.metrics_daily_v1` AS metrics
|
||||
USING
|
||||
(client_id, sample_id)
|
||||
WHERE
|
||||
-- Output empty table and read no input rows
|
||||
FALSE
|
|
@ -1,11 +0,0 @@
|
|||
friendly_name: Clients Last Seen
|
||||
description: >
|
||||
A rolling view of aggregated activity over the past 28 days
|
||||
of each Firefox for Android (Fenix) client across all release channels,
|
||||
partitioned by day
|
||||
owners:
|
||||
- jklukas@mozilla.com
|
||||
labels:
|
||||
application: fenix
|
||||
schedule: daily
|
||||
incremental: true
|
|
@ -1,85 +0,0 @@
|
|||
/*
|
||||
|
||||
This query carries information about each client over a 28-day window,
|
||||
encoding various forms of usage over those 28 days as bit patterns.
|
||||
|
||||
For Glean, the concept of a user being active or not is determined only
|
||||
from baseline pings; we also record some dimensions here based on metrics
|
||||
pings, but we are careful to keep the baseline info separate from the
|
||||
metrics info so that metrics pings only have the effect of providing
|
||||
updated dimension values, but never have the effect of marking a user as
|
||||
active for a given day.
|
||||
|
||||
*/
|
||||
WITH cls_yesterday AS (
|
||||
SELECT
|
||||
* EXCEPT (submission_date)
|
||||
FROM
|
||||
clients_last_seen_v1 AS cls
|
||||
WHERE
|
||||
submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY)
|
||||
),
|
||||
--
|
||||
cls_today AS (
|
||||
SELECT
|
||||
client_id,
|
||||
sample_id,
|
||||
baseline_today.normalized_channel,
|
||||
baseline_today.client_id IS NOT NULL AS baseline_received_today,
|
||||
metrics_today.client_id IS NOT NULL AS metrics_received_today,
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
-- In this raw table, we capture the history of activity over the past
|
||||
-- 28 days for each usage criterion as a single 64-bit integer. The
|
||||
-- rightmost bit represents whether the user was active in the current day.
|
||||
CAST(baseline_today.client_id IS NOT NULL AS INT64) AS days_seen_bits,
|
||||
baseline_today.* EXCEPT (submission_date, client_id, sample_id, normalized_channel)
|
||||
) AS baseline,
|
||||
(SELECT AS STRUCT metrics_today.* EXCEPT (submission_date, client_id, sample_id)) AS metrics,
|
||||
FROM
|
||||
baseline_daily_v1 AS baseline_today
|
||||
FULL JOIN
|
||||
metrics_daily_v1 AS metrics_today
|
||||
USING
|
||||
(submission_date, client_id, sample_id)
|
||||
WHERE
|
||||
submission_date = @submission_date
|
||||
),
|
||||
--
|
||||
adjacent_days_combined AS (
|
||||
SELECT
|
||||
@submission_date AS submission_date,
|
||||
client_id,
|
||||
sample_id,
|
||||
COALESCE(cls_today.normalized_channel, cls_yesterday.normalized_channel) AS normalized_channel,
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
IF(baseline_received_today, cls_today.baseline, cls_yesterday.baseline).* REPLACE (
|
||||
udf.combine_adjacent_days_28_bits(
|
||||
cls_yesterday.baseline.days_seen_bits,
|
||||
cls_today.baseline.days_seen_bits
|
||||
) AS days_seen_bits,
|
||||
udf.combine_adjacent_days_28_bits(
|
||||
cls_yesterday.baseline.days_seen_session_start_bits,
|
||||
cls_today.baseline.days_seen_session_start_bits
|
||||
) AS days_seen_session_start_bits,
|
||||
udf.combine_adjacent_days_28_bits(
|
||||
cls_yesterday.baseline.days_seen_session_end_bits,
|
||||
cls_today.baseline.days_seen_session_end_bits
|
||||
) AS days_seen_session_end_bits
|
||||
)
|
||||
) AS baseline,
|
||||
IF(metrics_received_today, cls_today.metrics, cls_yesterday.metrics) AS metrics,
|
||||
FROM
|
||||
cls_today
|
||||
FULL JOIN
|
||||
cls_yesterday
|
||||
USING
|
||||
(client_id, sample_id)
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
adjacent_days_combined
|
||||
WHERE
|
||||
baseline.days_seen_bits > 0
|
|
@ -1,10 +0,0 @@
|
|||
friendly_name: Metrics Daily
|
||||
description: >
|
||||
A daily aggregate of metrics pings from each Firefox for Android (Fenix)
|
||||
client across all release channels, partitioned by day
|
||||
owners:
|
||||
- jklukas@mozilla.com
|
||||
labels:
|
||||
application: fenix
|
||||
schedule: daily
|
||||
incremental: true
|
|
@ -1,90 +0,0 @@
|
|||
/*
|
||||
|
||||
Very similar to baseline_daily, but considers only metrics pings.
|
||||
|
||||
*/
|
||||
CREATE TEMP FUNCTION extract_fields(metrics ANY TYPE) AS (
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
metrics.submission_timestamp,
|
||||
DATE(metrics.submission_timestamp) AS submission_date,
|
||||
LOWER(metrics.client_info.client_id) AS client_id,
|
||||
metrics.sample_id,
|
||||
metrics.metrics.string.search_default_engine_name,
|
||||
metrics.metrics.string.search_default_engine_code,
|
||||
metrics.metrics.boolean.metrics_default_browser,
|
||||
)
|
||||
);
|
||||
|
||||
WITH base AS (
|
||||
SELECT
|
||||
extract_fields(metrics).*,
|
||||
FROM
|
||||
org_mozilla_firefox.metrics AS metrics
|
||||
UNION ALL
|
||||
SELECT
|
||||
extract_fields(metrics).*,
|
||||
FROM
|
||||
org_mozilla_firefox_beta.metrics AS metrics
|
||||
UNION ALL
|
||||
SELECT
|
||||
extract_fields(metrics).*,
|
||||
FROM
|
||||
org_mozilla_fennec_aurora.metrics AS metrics
|
||||
UNION ALL
|
||||
SELECT
|
||||
extract_fields(metrics).*,
|
||||
FROM
|
||||
org_mozilla_fenix.metrics AS metrics
|
||||
UNION ALL
|
||||
SELECT
|
||||
extract_fields(metrics).*,
|
||||
FROM
|
||||
org_mozilla_fenix_nightly.metrics AS metrics
|
||||
),
|
||||
--
|
||||
windowed AS (
|
||||
SELECT
|
||||
submission_date,
|
||||
client_id,
|
||||
sample_id,
|
||||
ROW_NUMBER() OVER w1_unframed AS _n,
|
||||
--
|
||||
-- For all other dimensions, we use the mode of observed values in the day.
|
||||
udf.mode_last(ARRAY_AGG(search_default_engine_name) OVER w1) AS search_default_engine_name,
|
||||
udf.mode_last(ARRAY_AGG(search_default_engine_code) OVER w1) AS search_default_engine_code,
|
||||
udf.mode_last(ARRAY_AGG(metrics_default_browser) OVER w1) AS metrics_default_browser,
|
||||
FROM
|
||||
base
|
||||
WHERE
|
||||
-- Reprocess all dates by running this query with --parameter=submission_date:DATE:NULL
|
||||
(@submission_date IS NULL OR @submission_date = submission_date)
|
||||
WINDOW
|
||||
w1 AS (
|
||||
PARTITION BY
|
||||
sample_id,
|
||||
client_id,
|
||||
submission_date
|
||||
ORDER BY
|
||||
submission_timestamp
|
||||
ROWS BETWEEN
|
||||
UNBOUNDED PRECEDING
|
||||
AND UNBOUNDED FOLLOWING
|
||||
),
|
||||
-- We must provide a modified window for ROW_NUMBER which cannot accept a frame clause.
|
||||
w1_unframed AS (
|
||||
PARTITION BY
|
||||
sample_id,
|
||||
client_id,
|
||||
submission_date
|
||||
ORDER BY
|
||||
submission_timestamp
|
||||
)
|
||||
)
|
||||
--
|
||||
SELECT
|
||||
* EXCEPT (_n)
|
||||
FROM
|
||||
windowed
|
||||
WHERE
|
||||
_n = 1
|
Загрузка…
Ссылка в новой задаче