Fenix baseline_daily and clients_last_seen tables

As announced in:

https://mail.mozilla.org/pipermail/fx-data-dev/2020-April/000339.html
This commit is contained in:
Jeff Klukas 2020-04-22 12:48:18 -04:00
Родитель f6035861f3
Коммит 568d0a2b6c
18 изменённых файлов: 8 добавлений и 858 удалений

Просмотреть файл

@ -2,6 +2,14 @@
This document records interesting code that we've deleted for the sake of discoverability for the future.
## 2020-04 Fenix baseline_daily and clients_last_seen
- [Removal PR](https://github.com/mozilla/bigquery-etl/pull/925)
We are now using dynamically generated queries for generic Glean
ETL on top of baseline pings, so we have deprecated previous versions
of daily and last_seen tables.
## Smoot Usage v1
- [Removal PR](https://github.com/mozilla/bigquery-etl/pull/460)

Просмотреть файл

@ -1,21 +0,0 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.org_mozilla_fenix.clients_last_seen`
AS
WITH with_days_since AS (
SELECT
-- We cannot use UDFs in a view, so we paste the body of udf.bitpos(bits) literally here.
CAST(SAFE.LOG(days_seen_bits & -days_seen_bits, 2) AS INT64) AS days_since_seen,
CAST(
SAFE.LOG(days_created_profile_bits & -days_created_profile_bits, 2) AS INT64
) AS days_since_created_profile,
*
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_derived.clients_last_seen_v1`
)
--
SELECT
-- Include date_last_seen for compatibility with existing queries.
DATE_SUB(submission_date, INTERVAL days_since_seen DAY) AS date_last_seen,
*
FROM
with_days_since

Просмотреть файл

@ -1,104 +0,0 @@
WITH unioned AS (
SELECT
submission_timestamp,
document_id,
client_info,
sample_id,
metadata,
normalized_channel,
metrics AS baseline_metrics,
NULL AS metrics
FROM
org_mozilla_fenix_stable.baseline_v1
UNION ALL
SELECT
submission_timestamp,
document_id,
client_info,
sample_id,
metadata,
normalized_channel,
NULL AS baseline_metrics,
metrics
FROM
org_mozilla_fenix_stable.metrics_v1
),
--
base AS (
SELECT
DATE(submission_timestamp) AS submission_date,
LOWER(client_info.client_id) AS client_id,
*
FROM
unioned
WHERE
client_info.client_id IS NOT NULL
),
--
windowed AS (
SELECT
submission_date,
client_id,
sample_id,
ROW_NUMBER() OVER w1_unframed AS _n,
'Fenix' AS app_name,
--
-- Take the earliest first_run_date if ambiguous.
MIN(SAFE.PARSE_DATE('%F', SUBSTR(client_info.first_run_date, 1, 10))) OVER w1 AS first_run_date,
--
-- Sums over distinct baseline pings.
SUM(
udf.glean_timespan_seconds(baseline_metrics.timespan.glean_baseline_duration)
) OVER w1 AS durations,
--
-- For all other dimensions, we use the mode of observed values in the day.
udf.mode_last(ARRAY_AGG(client_info.os) OVER w1) AS os,
udf.mode_last(ARRAY_AGG(client_info.os_version) OVER w1) AS os_version,
udf.mode_last(ARRAY_AGG(baseline_metrics.string.glean_baseline_locale) OVER w1) AS locale,
udf.json_mode_last(
ARRAY_AGG(udf.geo_struct(metadata.geo.country, metadata.geo.city, NULL, NULL)) OVER w1
).* EXCEPT (geo_subdivision1, geo_subdivision2),
udf.mode_last(ARRAY_AGG(client_info.device_manufacturer) OVER w1) AS device_manufacturer,
udf.mode_last(ARRAY_AGG(client_info.device_model) OVER w1) AS device_model,
udf.mode_last(ARRAY_AGG(client_info.app_build) OVER w1) AS app_build,
udf.mode_last(ARRAY_AGG(normalized_channel) OVER w1) AS normalized_channel,
udf.mode_last(ARRAY_AGG(client_info.architecture) OVER w1) AS architecture,
udf.mode_last(
ARRAY_AGG(metrics.string.search_default_engine_name) OVER w1
) AS search_default_engine_name,
udf.mode_last(
ARRAY_AGG(metrics.string.search_default_engine_name) OVER w1
) AS search_default_engine_code,
udf.mode_last(ARRAY_AGG(metrics.boolean.metrics_default_browser) OVER w1) AS default_browser,
udf.mode_last(ARRAY_AGG(client_info.app_display_version) OVER w1) AS app_display_version
FROM
base
WHERE
-- Reprocess all dates by running this query with --parameter=submission_date:DATE:NULL
(@submission_date IS NULL OR @submission_date = submission_date)
WINDOW
w1 AS (
PARTITION BY
client_id,
submission_date
ORDER BY
submission_timestamp
ROWS BETWEEN
UNBOUNDED PRECEDING
AND UNBOUNDED FOLLOWING
),
-- We must provide a modified window for ROW_NUMBER which cannot accept a frame clause.
w1_unframed AS (
PARTITION BY
client_id,
submission_date
ORDER BY
submission_timestamp
)
)
SELECT
* EXCEPT (_n)
FROM
windowed
WHERE
_n = 1

Просмотреть файл

@ -1,25 +0,0 @@
CREATE TABLE
`moz-fx-data-shared-prod.org_mozilla_fenix_derived.clients_last_seen_v1`
PARTITION BY
submission_date
CLUSTER BY
app_name,
os,
sample_id
OPTIONS
(require_partition_filter = TRUE)
AS
SELECT
CAST(NULL AS DATE) AS submission_date,
0 AS days_seen_bits,
0 AS days_created_profile_bits,
-- We make sure to delay * until the end so that as new columns are added
-- to clients_daily, we can add those columns in the same order to the end
-- of this schema, which may be necessary for the daily join query between
-- the two tables to validate.
* EXCEPT (submission_date)
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_derived.clients_daily_v1`
WHERE
-- Output empty table and read no input rows
FALSE

Просмотреть файл

@ -1,49 +0,0 @@
WITH _current AS (
SELECT
-- In this raw table, we capture the history of activity over the past
-- 28 days for each usage criterion as a single 64-bit integer. The
-- rightmost bit represents whether the user was active in the current day.
CAST(TRUE AS INT64) AS days_seen_bits,
udf.days_since_created_profile_as_28_bits(
DATE_DIFF(submission_date, first_run_date, DAY)
) AS days_created_profile_bits,
* EXCEPT (submission_date)
FROM
clients_daily_v1
WHERE
submission_date = @submission_date
),
--
_previous AS (
SELECT
* EXCEPT (submission_date)
FROM
clients_last_seen_v1 AS cls
WHERE
submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY)
-- Filter out rows from yesterday that have now fallen outside the 28-day window.
AND udf.shift_28_bits_one_day(days_seen_bits) > 0
)
--
SELECT
@submission_date AS submission_date,
IF(
_current.client_id IS NOT NULL,
_current,
_previous
).* REPLACE ( --
udf.combine_adjacent_days_28_bits(
_previous.days_seen_bits,
_current.days_seen_bits
) AS days_seen_bits,
udf.coalesce_adjacent_days_28_bits(
_previous.days_created_profile_bits,
_current.days_created_profile_bits
) AS days_created_profile_bits
)
FROM
_current
FULL JOIN
_previous
USING
(client_id)

Просмотреть файл

@ -1,7 +0,0 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.org_mozilla_firefox.baseline_daily`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_derived.baseline_daily_v1`

Просмотреть файл

@ -1,17 +0,0 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.org_mozilla_firefox.clients_last_seen`
AS
SELECT
`moz-fx-data-shared-prod.udf.pos_of_trailing_set_bit`(baseline.days_seen_bits) AS days_since_seen,
`moz-fx-data-shared-prod.udf.pos_of_trailing_set_bit`(
baseline.days_seen_session_start_bits
) AS days_since_seen_session_start,
`moz-fx-data-shared-prod.udf.pos_of_trailing_set_bit`(
baseline.days_seen_session_end_bits
) AS days_since_seen_session_end,
DATE_DIFF(submission_date, baseline.first_run_date, DAY) AS days_since_created_profile,
* EXCEPT (baseline, metrics),
baseline.*,
metrics.*
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_derived.clients_last_seen_v1`

Просмотреть файл

@ -1,7 +0,0 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.org_mozilla_firefox.metrics_daily`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_derived.metrics_daily_v1`

Просмотреть файл

@ -1,10 +0,0 @@
friendly_name: Baseline Daily
description: >
A daily aggregate of baseline pings from each Firefox for Android (Fenix)
client across all release channels, partitioned by day
owners:
- jklukas@mozilla.com
labels:
application: fenix
schedule: daily
incremental: true

Просмотреть файл

@ -1,169 +0,0 @@
/*
This is a daily aggregation of just baseline pings from across the various
apps that correspond to the different channels of "Fenix", the new Firefox for Android.
*/
-- The schemas for the different apps differ slightly such that we can't
-- blindly union over the baseline views; instead, we extract out the set of
-- fields we want via a UDF before we union.
CREATE TEMP FUNCTION extract_fields(baseline ANY TYPE) AS (
(
SELECT AS STRUCT
baseline.submission_timestamp,
DATE(baseline.submission_timestamp) AS submission_date,
LOWER(baseline.client_info.client_id) AS client_id,
baseline.sample_id,
SAFE.PARSE_DATE('%F', SUBSTR(baseline.client_info.first_run_date, 1, 10)) AS first_run_date,
baseline.ping_info.parsed_end_time,
udf.glean_timespan_seconds(baseline.metrics.timespan.glean_baseline_duration) AS duration,
baseline.client_info.android_sdk_version,
baseline.client_info.app_build,
baseline.client_info.app_channel,
baseline.client_info.app_display_version,
baseline.client_info.architecture,
baseline.client_info.device_manufacturer,
baseline.client_info.device_model,
baseline.client_info.telemetry_sdk_build,
baseline.client_info.locale,
baseline.metadata.geo.city,
baseline.metadata.geo.country,
baseline.metrics.string.glean_baseline_locale,
baseline.normalized_os,
baseline.normalized_os_version,
)
);
WITH unioned AS (
SELECT
extract_fields(baseline).*,
'release' AS normalized_channel
FROM
org_mozilla_firefox.baseline AS baseline
UNION ALL
SELECT
extract_fields(baseline).*,
'beta' AS normalized_channel
FROM
org_mozilla_firefox_beta.baseline AS baseline
UNION ALL
SELECT
extract_fields(baseline).*,
'nightly' AS normalized_channel
FROM
org_mozilla_fenix.baseline AS baseline
-- These final two apps should be retired in February.
UNION ALL
SELECT
extract_fields(baseline).*,
'aurora nightly' AS normalized_channel
FROM
org_mozilla_fennec_aurora.baseline AS baseline
UNION ALL
SELECT
extract_fields(baseline).*,
'preview nightly' AS normalized_channel
FROM
org_mozilla_fenix_nightly.baseline AS baseline
),
--
with_dates AS (
SELECT
*,
-- For explanation of session start time calculation, see Glean docs:
-- https://mozilla.github.io/glean/book/user/pings/baseline.html#contents
DATE(TIMESTAMP_SUB(parsed_end_time, INTERVAL duration SECOND)) AS session_start_date,
DATE(parsed_end_time) AS session_end_date,
FROM
unioned
),
--
base AS (
SELECT
*,
DATE_DIFF(submission_date, session_start_date, DAY) AS session_start_date_offset,
DATE_DIFF(submission_date, session_end_date, DAY) AS session_end_date_offset,
FROM
with_dates
),
--
windowed AS (
SELECT
submission_date,
client_id,
sample_id,
ROW_NUMBER() OVER w1_unframed AS _n,
--
-- Take the earliest first_run_date if ambiguous.
MIN(first_run_date) OVER w1 AS first_run_date,
--
-- Sums over distinct baseline pings.
SUM(IF(duration BETWEEN 0 AND 100000, duration, 0)) OVER w1 AS durations,
--
-- Bit patterns capturing activity dates relative to the submission date.
BIT_OR(
1 << IF(session_start_date_offset BETWEEN 0 AND 27, session_start_date_offset, NULL)
) OVER w1 AS days_seen_session_start_bits,
BIT_OR(
1 << IF(session_end_date_offset BETWEEN 0 AND 27, session_end_date_offset, NULL)
) OVER w1 AS days_seen_session_end_bits,
--
-- For all other dimensions, we use the mode of observed values in the day.
udf.mode_last(ARRAY_AGG(normalized_channel) OVER w1) AS normalized_channel,
udf.mode_last(ARRAY_AGG(android_sdk_version) OVER w1) AS android_sdk_version,
udf.mode_last(ARRAY_AGG(app_build) OVER w1) AS app_build,
udf.mode_last(ARRAY_AGG(app_channel) OVER w1) AS app_channel,
udf.mode_last(ARRAY_AGG(app_display_version) OVER w1) AS app_display_version,
udf.mode_last(ARRAY_AGG(architecture) OVER w1) AS architecture,
udf.mode_last(ARRAY_AGG(device_manufacturer) OVER w1) AS device_manufacturer,
udf.mode_last(ARRAY_AGG(device_model) OVER w1) AS device_model,
udf.mode_last(ARRAY_AGG(telemetry_sdk_build) OVER w1) AS telemetry_sdk_build,
udf.mode_last(ARRAY_AGG(locale) OVER w1) AS locale,
udf.mode_last(ARRAY_AGG(city) OVER w1) AS city,
udf.mode_last(ARRAY_AGG(country) OVER w1) AS country,
udf.mode_last(ARRAY_AGG(glean_baseline_locale) OVER w1) AS glean_baseline_locale,
udf.mode_last(ARRAY_AGG(normalized_os) OVER w1) AS os,
udf.mode_last(ARRAY_AGG(normalized_os_version) OVER w1) AS os_version,
FROM
base
WHERE
-- Reprocess all dates by running this query with --parameter=submission_date:DATE:NULL
(@submission_date IS NULL OR @submission_date = submission_date)
WINDOW
w1 AS (
PARTITION BY
sample_id,
client_id,
submission_date
ORDER BY
submission_timestamp
ROWS BETWEEN
UNBOUNDED PRECEDING
AND UNBOUNDED FOLLOWING
),
-- We must provide a modified window for ROW_NUMBER which cannot accept a frame clause.
w1_unframed AS (
PARTITION BY
sample_id,
client_id,
submission_date
ORDER BY
submission_timestamp
)
)
--
SELECT
wnd.submission_date,
cfs.fenix_first_seen_date,
cfs.fennec_first_seen_date,
wnd.* EXCEPT (_n, submission_date)
FROM
windowed AS wnd
-- We incur the expense of joining in first_seen dates here so that we can
-- identify returning users cheaply in further views on top of baseline_daily.
LEFT JOIN
clients_first_seen_v1 AS cfs
USING
(client_id)
WHERE
_n = 1

Просмотреть файл

@ -1,101 +0,0 @@
-- See query.sql for more explanation of what's going on here.
CREATE TABLE
`moz-fx-data-shared-prod.org_mozilla_firefox_derived.clients_first_seen_v1`
PARTITION BY
(fenix_first_seen_date)
AS
WITH base AS (
SELECT
client_info.client_id,
submission_timestamp,
CAST(NULL AS STRING) AS fennec_id
FROM
org_mozilla_firefox.baseline
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
CAST(NULL AS STRING) AS fennec_id
FROM
org_mozilla_firefox_beta.baseline
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
CAST(NULL AS STRING) AS fennec_id
FROM
org_mozilla_fennec_aurora.baseline
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
CAST(NULL AS STRING) AS fennec_id
FROM
org_mozilla_fenix.baseline
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
CAST(NULL AS STRING) AS fennec_id
FROM
org_mozilla_fenix_nightly.baseline
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
FROM
org_mozilla_firefox.migration
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
FROM
org_mozilla_firefox_beta.migration
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
FROM
org_mozilla_fennec_aurora.migration
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
FROM
org_mozilla_fenix.migration
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
FROM
org_mozilla_fenix_nightly.migration
),
--
per_client_id AS (
SELECT
client_id,
DATE(MIN(submission_timestamp)) AS fenix_first_seen_date,
udf.mode_last(ARRAY_AGG(LOWER(fennec_id))) AS fennec_id,
FROM
base
WHERE
submission_timestamp > '2010-01-01'
GROUP BY
client_id
)
--
SELECT
per_client_id.client_id,
per_client_id.fenix_first_seen_date,
core_clients_first_seen.first_seen_date AS fennec_first_seen_date,
FROM
per_client_id
LEFT JOIN
telemetry_derived.core_clients_first_seen_v1 AS core_clients_first_seen
ON
(fennec_id = core_clients_first_seen.client_id)

Просмотреть файл

@ -1,11 +0,0 @@
friendly_name: Clients First Seen
description: >
A brief summary of the first date each Firefox for Android (Fenix) client
sent a ping to the telemetry infrastructure. Each client will appear exactly
once in this table over all time.
owners:
- jklukas@mozilla.com
labels:
application: fenix
schedule: daily
incremental: true

Просмотреть файл

@ -1,107 +0,0 @@
/*
This clients_first_seen table is incrementally populated every day such that
each client only appears once in the entire table. For each day's query, we
scan the entire history of the table to exclude clients we've already recorded
as seen.
This table is convenient for retention calculations where we want to be able to
tell the difference between a lapsed user returning and a truly new user.
*/
-- We union over baseline pings and migration pings so that we can pull the
-- user's fennec user ID from the migration ping if they are being upgraded
-- from fennec.
WITH base AS (
SELECT
client_info.client_id,
submission_timestamp,
CAST(NULL AS STRING) AS fennec_id
FROM
org_mozilla_firefox.baseline
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
CAST(NULL AS STRING) AS fennec_id
FROM
org_mozilla_firefox_beta.baseline
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
CAST(NULL AS STRING) AS fennec_id
FROM
org_mozilla_fennec_aurora.baseline
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
CAST(NULL AS STRING) AS fennec_id
FROM
org_mozilla_fenix.baseline
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
CAST(NULL AS STRING) AS fennec_id
FROM
org_mozilla_fenix_nightly.baseline
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
FROM
org_mozilla_firefox.migration
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
FROM
org_mozilla_firefox_beta.migration
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
FROM
org_mozilla_fennec_aurora.migration
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
FROM
org_mozilla_fenix.migration
UNION ALL
SELECT
client_info.client_id,
submission_timestamp,
metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id
FROM
org_mozilla_fenix_nightly.migration
),
per_client AS (
SELECT
client_id,
DATE(MIN(submission_timestamp)) AS fenix_first_seen_date,
udf.mode_last(ARRAY_AGG(LOWER(fennec_id))) AS fennec_id,
FROM
base
WHERE
DATE(submission_timestamp) = @submission_date
GROUP BY
client_id
)
SELECT
per_client.client_id,
per_client.fenix_first_seen_date,
core_clients_first_seen.first_seen_date AS fennec_first_seen_date,
FROM
per_client
LEFT JOIN
telemetry_derived.core_clients_first_seen_v1 AS core_clients_first_seen
ON
(fennec_id = core_clients_first_seen.client_id)

Просмотреть файл

@ -1,34 +0,0 @@
CREATE TABLE
`moz-fx-data-shared-prod.org_mozilla_firefox_derived.clients_last_seen_v1`
PARTITION BY
submission_date
CLUSTER BY
normalized_channel,
sample_id
OPTIONS
(require_partition_filter = TRUE)
AS
SELECT
CAST(NULL AS DATE) AS submission_date,
client_id,
sample_id,
baseline.normalized_channel,
-- We make sure to delay * until the end so that as new columns are added
-- to clients_daily, we can add those columns in the same order to the end
-- of this schema, which may be necessary for the daily join query between
-- the two tables to validate.
(
SELECT AS STRUCT
0 AS days_seen_bits,
baseline.* EXCEPT (submission_date, client_id, sample_id, normalized_channel)
) AS baseline,
(SELECT AS STRUCT metrics.* EXCEPT (submission_date, client_id, sample_id)) AS metrics,
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_derived.baseline_daily_v1` AS baseline
LEFT JOIN
`moz-fx-data-shared-prod.org_mozilla_firefox_derived.metrics_daily_v1` AS metrics
USING
(client_id, sample_id)
WHERE
-- Output empty table and read no input rows
FALSE

Просмотреть файл

@ -1,11 +0,0 @@
friendly_name: Clients Last Seen
description: >
A rolling view of aggregated activity over the past 28 days
of each Firefox for Android (Fenix) client across all release channels,
partitioned by day
owners:
- jklukas@mozilla.com
labels:
application: fenix
schedule: daily
incremental: true

Просмотреть файл

@ -1,85 +0,0 @@
/*
This query carries information about each client over a 28-day window,
encoding various forms of usage over those 28 days as bit patterns.
For Glean, the concept of a user being active or not is determined only
from baseline pings; we also record some dimensions here based on metrics
pings, but we are careful to keep the baseline info separate from the
metrics info so that metrics pings only have the effect of providing
updated dimension values, but never have the effect of marking a user as
active for a given day.
*/
WITH cls_yesterday AS (
SELECT
* EXCEPT (submission_date)
FROM
clients_last_seen_v1 AS cls
WHERE
submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY)
),
--
cls_today AS (
SELECT
client_id,
sample_id,
baseline_today.normalized_channel,
baseline_today.client_id IS NOT NULL AS baseline_received_today,
metrics_today.client_id IS NOT NULL AS metrics_received_today,
(
SELECT AS STRUCT
-- In this raw table, we capture the history of activity over the past
-- 28 days for each usage criterion as a single 64-bit integer. The
-- rightmost bit represents whether the user was active in the current day.
CAST(baseline_today.client_id IS NOT NULL AS INT64) AS days_seen_bits,
baseline_today.* EXCEPT (submission_date, client_id, sample_id, normalized_channel)
) AS baseline,
(SELECT AS STRUCT metrics_today.* EXCEPT (submission_date, client_id, sample_id)) AS metrics,
FROM
baseline_daily_v1 AS baseline_today
FULL JOIN
metrics_daily_v1 AS metrics_today
USING
(submission_date, client_id, sample_id)
WHERE
submission_date = @submission_date
),
--
adjacent_days_combined AS (
SELECT
@submission_date AS submission_date,
client_id,
sample_id,
COALESCE(cls_today.normalized_channel, cls_yesterday.normalized_channel) AS normalized_channel,
(
SELECT AS STRUCT
IF(baseline_received_today, cls_today.baseline, cls_yesterday.baseline).* REPLACE (
udf.combine_adjacent_days_28_bits(
cls_yesterday.baseline.days_seen_bits,
cls_today.baseline.days_seen_bits
) AS days_seen_bits,
udf.combine_adjacent_days_28_bits(
cls_yesterday.baseline.days_seen_session_start_bits,
cls_today.baseline.days_seen_session_start_bits
) AS days_seen_session_start_bits,
udf.combine_adjacent_days_28_bits(
cls_yesterday.baseline.days_seen_session_end_bits,
cls_today.baseline.days_seen_session_end_bits
) AS days_seen_session_end_bits
)
) AS baseline,
IF(metrics_received_today, cls_today.metrics, cls_yesterday.metrics) AS metrics,
FROM
cls_today
FULL JOIN
cls_yesterday
USING
(client_id, sample_id)
)
SELECT
*
FROM
adjacent_days_combined
WHERE
baseline.days_seen_bits > 0

Просмотреть файл

@ -1,10 +0,0 @@
friendly_name: Metrics Daily
description: >
A daily aggregate of metrics pings from each Firefox for Android (Fenix)
client across all release channels, partitioned by day
owners:
- jklukas@mozilla.com
labels:
application: fenix
schedule: daily
incremental: true

Просмотреть файл

@ -1,90 +0,0 @@
/*
Very similar to baseline_daily, but considers only metrics pings.
*/
CREATE TEMP FUNCTION extract_fields(metrics ANY TYPE) AS (
(
SELECT AS STRUCT
metrics.submission_timestamp,
DATE(metrics.submission_timestamp) AS submission_date,
LOWER(metrics.client_info.client_id) AS client_id,
metrics.sample_id,
metrics.metrics.string.search_default_engine_name,
metrics.metrics.string.search_default_engine_code,
metrics.metrics.boolean.metrics_default_browser,
)
);
WITH base AS (
SELECT
extract_fields(metrics).*,
FROM
org_mozilla_firefox.metrics AS metrics
UNION ALL
SELECT
extract_fields(metrics).*,
FROM
org_mozilla_firefox_beta.metrics AS metrics
UNION ALL
SELECT
extract_fields(metrics).*,
FROM
org_mozilla_fennec_aurora.metrics AS metrics
UNION ALL
SELECT
extract_fields(metrics).*,
FROM
org_mozilla_fenix.metrics AS metrics
UNION ALL
SELECT
extract_fields(metrics).*,
FROM
org_mozilla_fenix_nightly.metrics AS metrics
),
--
windowed AS (
SELECT
submission_date,
client_id,
sample_id,
ROW_NUMBER() OVER w1_unframed AS _n,
--
-- For all other dimensions, we use the mode of observed values in the day.
udf.mode_last(ARRAY_AGG(search_default_engine_name) OVER w1) AS search_default_engine_name,
udf.mode_last(ARRAY_AGG(search_default_engine_code) OVER w1) AS search_default_engine_code,
udf.mode_last(ARRAY_AGG(metrics_default_browser) OVER w1) AS metrics_default_browser,
FROM
base
WHERE
-- Reprocess all dates by running this query with --parameter=submission_date:DATE:NULL
(@submission_date IS NULL OR @submission_date = submission_date)
WINDOW
w1 AS (
PARTITION BY
sample_id,
client_id,
submission_date
ORDER BY
submission_timestamp
ROWS BETWEEN
UNBOUNDED PRECEDING
AND UNBOUNDED FOLLOWING
),
-- We must provide a modified window for ROW_NUMBER which cannot accept a frame clause.
w1_unframed AS (
PARTITION BY
sample_id,
client_id,
submission_date
ORDER BY
submission_timestamp
)
)
--
SELECT
* EXCEPT (_n)
FROM
windowed
WHERE
_n = 1