Bug 1643683 Add Fenix DAU query for AMO stats (#1056)

* Bug 1643683 Add Fenix DAU query for AMO stats

Co-authored-by: Anna Scholtz <anna@scholtzan.net>
This commit is contained in:
Jeff Klukas 2020-07-02 15:18:48 -04:00 коммит произвёл GitHub
Родитель e8efd15766
Коммит 9fb4c4c90d
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 181 добавлений и 3 удалений

Просмотреть файл

@ -56,6 +56,18 @@ with DAG(
dag=dag,
)
amo_prod__fenix_addons_by_client__v1 = bigquery_etl_query(
task_id="amo_prod__fenix_addons_by_client__v1",
destination_table="fenix_addons_by_client_v1",
dataset_id="amo_prod",
project_id="moz-fx-data-shared-prod",
owner="jklukas@mozilla.com",
email=["jklukas@mozilla.com"],
date_partition_parameter="submission_date",
depends_on_past=False,
dag=dag,
)
amo_prod__amo_stats_dau__v2 = bigquery_etl_query(
task_id="amo_prod__amo_stats_dau__v2",
destination_table="amo_stats_dau_v2",
@ -96,8 +108,23 @@ with DAG(
wait_for_telemetry_derived__clients_daily__v6
)
wait_for_copy_deduplicate_copy_deduplicate_all = ExternalTaskSensor(
task_id="wait_for_copy_deduplicate_copy_deduplicate_all",
external_dag_id="copy_deduplicate",
external_task_id="copy_deduplicate_all",
check_existence=True,
mode="reschedule",
dag=dag,
)
amo_prod__fenix_addons_by_client__v1.set_upstream(
wait_for_copy_deduplicate_copy_deduplicate_all
)
amo_prod__amo_stats_dau__v2.set_upstream(amo_prod__desktop_addons_by_client__v1)
amo_prod__amo_stats_dau__v2.set_upstream(amo_prod__fenix_addons_by_client__v1)
wait_for_main_summary_copy_deduplicate_main_ping = ExternalTaskSensor(
task_id="wait_for_main_summary_copy_deduplicate_main_ping",
external_dag_id="main_summary",

Просмотреть файл

@ -26,6 +26,13 @@ SELECT
FROM
UNNEST(dau_by_app_version)
) AS dau_by_app_version,
ARRAY(
SELECT AS STRUCT
IFNULL(key, 'Unknown') AS key,
value
FROM
UNNEST(dau_by_fenix_build)
) AS dau_by_fenix_build,
ARRAY(
SELECT AS STRUCT
IFNULL(key, 'Unknown') AS key,

Просмотреть файл

@ -26,6 +26,13 @@ SELECT
FROM
UNNEST(dau_by_app_version)
) AS dau_by_app_version,
ARRAY(
SELECT AS STRUCT
IFNULL(key, 'Unknown') AS key,
value
FROM
UNNEST(dau_by_fenix_build)
) AS dau_by_fenix_build,
ARRAY(
SELECT AS STRUCT
IFNULL(key, 'Unknown') AS key,

Просмотреть файл

@ -8,13 +8,26 @@ and provides all the information needed to populate the various
*/
--
WITH unnested AS (
WITH unioned AS (
SELECT
dd.* EXCEPT (addons),
*,
'Desktop' AS app
FROM
amo_prod.desktop_addons_by_client_v1
UNION ALL
SELECT
*,
'Fenix' AS app
FROM
amo_prod.fenix_addons_by_client_v1
),
unnested AS (
SELECT
unioned.* EXCEPT (addons),
addon.id AS addon_id,
addon.version AS addon_version,
FROM
amo_prod.desktop_addons_by_client_v1 AS dd
unioned
CROSS JOIN
UNNEST(addons) AS addon
WHERE
@ -58,6 +71,33 @@ per_app_version AS (
COUNT(DISTINCT client_id) AS value
FROM
unnested
WHERE
app = 'Desktop'
GROUP BY
submission_date,
addon_id,
key
)
GROUP BY
submission_date,
addon_id
),
per_fenix_build AS (
SELECT
submission_date,
addon_id,
array_agg(STRUCT(key, value) ORDER BY value DESC) AS dau_by_fenix_build
FROM
(
SELECT
submission_date,
addon_id,
app_version AS key,
COUNT(DISTINCT client_id) AS value
FROM
unnested
WHERE
app = 'Fenix'
GROUP BY
submission_date,
addon_id,
@ -161,6 +201,10 @@ JOIN
per_app_version
USING
(submission_date, addon_id)
JOIN
per_fenix_build
USING
(submission_date, addon_id)
JOIN
per_locale
USING

Просмотреть файл

@ -0,0 +1,16 @@
friendly_name: Fenix addons by client
description: >-
Clients_daily-like table on top of the various Firefox for Android channels
that records only the dimensions and addon info necessary to power the daily
amo_stats_dau_v2 query.
owners:
- jklukas@mozilla.com
labels:
application: amo
incremental: true
schedule: daily
scheduling:
dag_name: bqetl_amo_stats
depends_on:
- task_id: copy_deduplicate_all
dag_name: copy_deduplicate

Просмотреть файл

@ -0,0 +1,77 @@
CREATE TEMP FUNCTION get_fields(m ANY TYPE) AS (
STRUCT(
m.submission_timestamp,
m.client_info.client_id,
m.sample_id,
m.metrics.string_list.addons_enabled_addons,
m.client_info.app_build,
m.normalized_country_code,
m.client_info.locale,
m.normalized_os
)
);
WITH unioned AS (
SELECT
get_fields(m1).*
FROM
org_mozilla_fenix.metrics AS m1
UNION ALL
SELECT
get_fields(m2).*
FROM
org_mozilla_fenix_nightly.metrics AS m2
UNION ALL
SELECT
get_fields(m3).*
FROM
org_mozilla_fennec_aurora.metrics AS m3
UNION ALL
SELECT
get_fields(m4).*
FROM
org_mozilla_firefox.metrics AS m4
UNION ALL
SELECT
get_fields(m5).*
FROM
org_mozilla_firefox_beta.metrics AS m5
),
per_client AS (
SELECT
DATE(submission_timestamp) AS submission_date,
client_id,
sample_id,
array_concat_agg(addons_enabled_addons) AS addons,
-- app_build is the Fenix equivalent of app_version; we use app_version as the name for
-- compatibility with the desktop table schema.
udf.mode_last(array_agg(app_build)) AS app_version,
udf.mode_last(array_agg(normalized_country_code)) AS country,
udf.mode_last(array_agg(locale)) AS locale,
udf.mode_last(array_agg(normalized_os)) AS app_os,
FROM
unioned
WHERE
DATE(submission_timestamp) = @submission_date
AND client_id IS NOT NULL
GROUP BY
submission_date,
sample_id,
client_id
)
SELECT
* EXCEPT (addons),
ARRAY(
SELECT AS STRUCT
addon,
-- As of 2020-07-01, the metrics ping from Fenix contains no data about
-- the version of installed addons, so we inject null and replace with
-- an appropriate placeholder value when we get to the app-facing view.
CAST(NULL AS STRING) AS version,
FROM
UNNEST(addons) AS addon
GROUP BY
addon
) AS addons
FROM
per_client