ADPRODUCTS-367: sponsored_tiles_clients_daily (#3203)

* sponsored_tiles_clients_daily

Client-level code for desktop + mobile. Issue with UNION ALL for desktop and mobile tables as the experiments structure is different between the devices

* Delete .pre-commit-config.yaml

* fix: experiments

* initial commit for sponsored tiles clients daily

* Add submission_date

* Clean up dag imports

* Revert import change, add dag to main summary

* Nan's edits

Co-authored-by: Rebecca BurWei <rburwei@mozilla.com>
Co-authored-by: Perry McManis <pmcmanis@mozilla.com>
Co-authored-by: Wil Stuckey <wstuckey@mozilla.com>
Co-authored-by: Curtis Morales <cmorales@mozilla.com>
This commit is contained in:
skahmann3 2022-10-31 07:50:11 -07:00 коммит произвёл GitHub
Родитель 1a02360a5b
Коммит 5c6772a5b6
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 515 добавлений и 0 удалений

Просмотреть файл

@ -838,3 +838,22 @@ bqetl_domain_meta:
tags:
- impact/tier_2
- repo/bigquery-etl
bqetl_sponsored_tiles_clients_daily:
default_args:
depends_on_past: false
email:
- telemetry-alerts@mozilla.com
- skahmann@mozilla.com
email_on_failure: true
email_on_retry: true
end_date: null
owner: skahmann@mozilla.com
retries: 2
retry_delay: 30m
start_date: '2022-09-13'
description: daily run of sponsored tiles related fields
schedule_interval: 0 4 * * *
tags:
- impact/tier_3
- repo/bigquery-etl

Просмотреть файл

@ -232,6 +232,13 @@ with DAG(
execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=82800)).isoformat() }}",
)
ExternalTaskMarker(
task_id="bqetl_sponsored_tiles_clients_daily__wait_for_telemetry_derived__clients_daily_joined__v1",
external_dag_id="bqetl_sponsored_tiles_clients_daily",
external_task_id="wait_for_telemetry_derived__clients_daily_joined__v1",
execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=79200)).isoformat() }}",
)
ExternalTaskMarker(
task_id="bqetl_devtools__wait_for_telemetry_derived__clients_daily_joined__v1",
external_dag_id="bqetl_devtools",

Просмотреть файл

@ -0,0 +1,86 @@
# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py
from airflow import DAG
from airflow.sensors.external_task import ExternalTaskMarker
from airflow.sensors.external_task import ExternalTaskSensor
from airflow.utils.task_group import TaskGroup
import datetime
from utils.constants import ALLOWED_STATES, FAILED_STATES
from utils.gcp import bigquery_etl_query, gke_command
docs = """
### bqetl_sponsored_tiles_clients_daily
Built from bigquery-etl repo, [`dags/bqetl_sponsored_tiles_clients_daily.py`](https://github.com/mozilla/bigquery-etl/blob/main/dags/bqetl_sponsored_tiles_clients_daily.py)
#### Description
daily run of sponsored tiles related fields
#### Owner
skahmann@mozilla.com
"""
default_args = {
"owner": "skahmann@mozilla.com",
"start_date": datetime.datetime(2022, 9, 13, 0, 0),
"end_date": None,
"email": ["telemetry-alerts@mozilla.com", "skahmann@mozilla.com"],
"depends_on_past": False,
"retry_delay": datetime.timedelta(seconds=1800),
"email_on_failure": True,
"email_on_retry": True,
"retries": 2,
}
tags = ["impact/tier_3", "repo/bigquery-etl"]
with DAG(
"bqetl_sponsored_tiles_clients_daily",
default_args=default_args,
schedule_interval="0 4 * * *",
doc_md=docs,
tags=tags,
) as dag:
sponsored_tiles_clients_daily_v1 = bigquery_etl_query(
task_id="sponsored_tiles_clients_daily_v1",
destination_table='sponsored_tiles_clients_daily_v1${{ macros.ds_format(macros.ds_add(ds, -1), "%Y-%m-%d", "%Y%m%d") }}',
dataset_id="telemetry_derived",
project_id="moz-fx-data-shared-prod",
owner="skahmann@mozilla.com",
email=["skahmann@mozilla.com", "telemetry-alerts@mozilla.com"],
date_partition_parameter=None,
depends_on_past=False,
parameters=["submission_date:DATE:{{macros.ds_add(ds, -1)}}"],
)
wait_for_clients_last_seen_joined = ExternalTaskSensor(
task_id="wait_for_clients_last_seen_joined",
external_dag_id="copy_deduplicate",
external_task_id="clients_last_seen_joined",
execution_delta=datetime.timedelta(seconds=10800),
check_existence=True,
mode="reschedule",
allowed_states=ALLOWED_STATES,
failed_states=FAILED_STATES,
pool="DATA_ENG_EXTERNALTASKSENSOR",
)
sponsored_tiles_clients_daily_v1.set_upstream(wait_for_clients_last_seen_joined)
wait_for_telemetry_derived__clients_daily_joined__v1 = ExternalTaskSensor(
task_id="wait_for_telemetry_derived__clients_daily_joined__v1",
external_dag_id="bqetl_main_summary",
external_task_id="telemetry_derived__clients_daily_joined__v1",
execution_delta=datetime.timedelta(seconds=7200),
check_existence=True,
mode="reschedule",
allowed_states=ALLOWED_STATES,
failed_states=FAILED_STATES,
pool="DATA_ENG_EXTERNALTASKSENSOR",
)
sponsored_tiles_clients_daily_v1.set_upstream(
wait_for_telemetry_derived__clients_daily_joined__v1
)

Просмотреть файл

@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.telemetry.sponsored_tiles_clients_daily`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.telemetry_derived.sponsored_tiles_clients_daily_v1`

Просмотреть файл

@ -0,0 +1,21 @@
friendly_name: Sponsored Tiles Clients Daily
description: |-
daily client-level aggregates of Sponsored Tiles-related fields
owners:
- skahmann@mozilla.com
labels:
incremental: true
scheduling:
dag_name: bqetl_sponsored_tiles_clients_daily
task_name: sponsored_tiles_clients_daily_v1
bigquery:
time_partitioning:
field: submission_date
type: day
require_partition_filter: true
expiration_days: null
clustering:
fields:
- normalized_channel
- sample_id
references: {}

Просмотреть файл

@ -0,0 +1,300 @@
------ DESKTOP SPONSORED TILES
WITH newtab_unnested AS (
SELECT AS STRUCT
t.client_info.client_id,
date(t.submission_timestamp) AS submission_date,
t.sample_id,
s.name,
s.category,
s.extra
FROM
`mozdata.firefox_desktop.newtab` t
CROSS JOIN
UNNEST(t.events) s
WHERE
date(t.submission_timestamp) = @submission_date
),
desktop_events_1 AS (
-- desktop tiles clicks and impressions
SELECT
client_id,
submission_date,
COUNTIF(
name = "click"
AND category = "topsites"
AND mozfun.map.get_key(extra, "is_sponsored") = "true"
) AS sponsored_tiles_click_count,
COUNTIF(
name = "impression"
AND category = "topsites"
AND mozfun.map.get_key(extra, "is_sponsored") = "true"
) AS sponsored_tiles_impression_count,
FROM
newtab_unnested
WHERE
submission_date = @submission_date
GROUP BY
1,
2
),
desktop_events_2 AS (
-- desktop Sponsored Tile Dismissals and Disables
SELECT
client_id,
DATE(submission_timestamp) AS submission_date,
COUNTIF(
event = 'BLOCK'
AND value LIKE '%spoc%'
AND source = 'TOP_SITES'
) AS sponsored_tiles_dismissal_count,
COUNTIF(
event = 'PREF_CHANGED'
AND source = 'SPONSORED_TOP_SITES'
AND value LIKE '%false%'
) AS sponsored_tiles_disable_count
FROM
`mozdata.activity_stream.events`
WHERE
DATE(submission_timestamp) = @submission_date
GROUP BY
1,
2
),
desktop_clients AS (
SELECT
os,
submission_date,
client_id,
browser_version_info,
experiments,
country,
locale,
normalized_channel,
normalized_os_version,
profile_age_in_days,
sample_id
FROM
`moz-fx-data-shared-prod.telemetry.clients_daily`
WHERE
submission_date = @submission_date
-- Desktop Sponsored Tiles is only available for the following clients:
AND country IN UNNEST(["AU", "BR", "CA", "DE", "ES", "FR", "GB", "IN", "IT", "JP", "MX", "US"])
AND browser_version_info.major_version >= 92
AND browser_version_info.version NOT IN ('92', '92.', '92.0', '92.0.0')
),
------ iOS SPONSORED TILES METRICS
ios_events AS (
-- iOS clicks and impressions
SELECT
client_info.client_id,
DATE(submission_timestamp) AS submission_date,
COUNTIF(
event_category LIKE 'top_site%'
AND event_name = 'contile_click'
) AS sponsored_tiles_click_count,
COUNTIF(
event_category LIKE 'top_site%'
AND event_name = 'contile_impression'
) AS sponsored_tiles_impression_count,
COUNTIF(
event_category = 'preferences'
AND event_name = "changed"
AND `mozfun.map.get_key`(event_extra, 'preference') = 'sponsoredTiles'
AND `mozfun.map.get_key`(event_extra, 'changed_to') = 'false'
) AS sponsored_tiles_disables_count
FROM
`mozdata.firefox_ios.events_unnested` events
WHERE
DATE(submission_timestamp) = @submission_date
GROUP BY
1,
2
),
ios_clients AS (
SELECT
date(submission_timestamp) AS submission_date,
client_info.client_id,
`mozfun.norm.browser_version_info`(client_info.app_display_version) AS browser_version_info,
mozfun.glean.legacy_compatible_experiments(ping_info.experiments) AS experiments,
normalized_country_code AS country,
client_info.locale,
normalized_channel,
normalized_os_version,
sample_id
FROM
`moz-fx-data-shared-prod.firefox_ios.baseline`
WHERE
DATE(submission_timestamp) = @submission_date
-- iOS Sponsored Tiles is only available for the following clients:
AND normalized_country_code IN UNNEST(["US"])
AND `mozfun.norm.browser_version_info`(client_info.app_display_version).major_version >= 101
),
android_events AS (
-- Android clicks and impressions
SELECT
client_info.client_id,
DATE(submission_timestamp) AS submission_date,
COUNTIF(
event_category = 'top_sites'
AND event_name = 'contile_click'
) AS sponsored_tiles_click_count,
COUNTIF(
event_category = 'top_sites'
AND event_name = 'contile_impression'
) AS sponsored_tiles_impression_count,
COUNTIF(
event_category = 'customize_home'
AND event_name = "preference_toggled"
AND `mozfun.map.get_key`(event_extra, 'preference_key') = 'contile'
AND `mozfun.map.get_key`(event_extra, 'enabled') = 'false'
) AS sponsored_tiles_disable_count
FROM
`mozdata.fenix.events_unnested` events
WHERE
DATE(submission_timestamp) = @submission_date
GROUP BY
1,
2
),
android_metrics AS (
SELECT
client_info.client_id,
DATE(submission_timestamp) AS submission_date,
metrics.boolean.customize_home_contile AS sponsored_tiles_enabled_at_startup,
FROM
`mozdata.fenix.metrics`
WHERE
metrics.boolean.customize_home_contile IS NOT NULL
AND DATE(submission_timestamp) = @submission_date
),
android_clients AS (
SELECT
date(submission_timestamp) AS submission_date,
client_info.client_id,
`mozfun.norm.browser_version_info`(client_info.app_display_version) AS browser_version_info,
mozfun.glean.legacy_compatible_experiments(ping_info.experiments) AS experiments,
normalized_country_code AS country,
client_info.locale,
normalized_channel,
normalized_os_version,
-- profile_age_in_days,
sample_id
FROM
`moz-fx-data-shared-prod.fenix.baseline`
WHERE
DATE(submission_timestamp) = @submission_date
-- Android Sponsored Tiles is only available for the following clients:
AND normalized_country_code IN UNNEST(["US"])
AND `mozfun.norm.browser_version_info`(client_info.app_display_version).major_version >= 100
)
-- merge on measures by client
-- desktop
SELECT
@submission_date AS submission_date,
"desktop" AS device,
os,
client_id,
browser_version_info,
experiments,
country,
locale,
normalized_channel,
normalized_os_version,
profile_age_in_days,
sample_id,
sponsored_tiles_click_count,
sponsored_tiles_impression_count,
sponsored_tiles_dismissal_count,
sponsored_tiles_disable_count,
NULL AS sponsored_tiles_enabled_at_startup
FROM
desktop_events_1
FULL JOIN
desktop_events_2
USING
(submission_date, client_id)
INNER JOIN
desktop_clients
USING
(submission_date, client_id)
UNION ALL
-- ios
SELECT
@submission_date AS submission_date,
"mobile" AS device,
"iOS" AS os,
client_id,
browser_version_info,
experiments,
country,
locale,
normalized_channel,
normalized_os_version,
profile_age_in_days,
sample_id,
sponsored_tiles_click_count,
sponsored_tiles_impression_count,
NULL AS sponsored_tiles_dismissal_count,
NULL AS sponsored_tiles_disable_count,
NULL AS sponsored_tiles_enabled_at_startup
FROM
ios_events
INNER JOIN
ios_clients
USING
(submission_date, client_id)
LEFT JOIN
(
SELECT
submission_date,
client_id,
days_since_created_profile AS profile_age_in_days
FROM
`moz-fx-data-shared-prod.firefox_ios.clients_last_seen_joined`
WHERE
submission_date = @submission_date
) profile_age_data
USING
(submission_date, client_id)
UNION ALL
SELECT
@submission_date AS submission_date,
"mobile" AS device,
"Android" AS os,
client_id,
browser_version_info,
experiments,
country,
locale,
normalized_channel,
normalized_os_version,
profile_age_in_days,
sample_id,
sponsored_tiles_click_count,
sponsored_tiles_impression_count,
NULL AS sponsored_tiles_dismissal_count,
NULL AS sponsored_tiles_disable_count,
sponsored_tiles_enabled_at_startup
FROM
android_events
INNER JOIN
android_clients
USING
(submission_date, client_id)
LEFT JOIN
(
SELECT
submission_date,
client_id,
days_since_created_profile AS profile_age_in_days
FROM
`moz-fx-data-shared-prod.fenix.clients_last_seen_joined`
WHERE
submission_date = @submission_date
) profile_age_data
USING
(submission_date, client_id)
LEFT JOIN
android_metrics
USING
(submission_date, client_id)

Просмотреть файл

@ -0,0 +1,75 @@
fields:
- mode: NULLABLE
name: submission_date
type: DATE
- mode: NULLABLE
name: device
type: STRING
- mode: NULLABLE
name: os
type: STRING
- mode: NULLABLE
name: client_id
type: STRING
- fields:
- mode: NULLABLE
name: version
type: STRING
- mode: NULLABLE
name: major_version
type: NUMERIC
- mode: NULLABLE
name: minor_version
type: NUMERIC
- mode: NULLABLE
name: patch_revision
type: NUMERIC
- mode: NULLABLE
name: is_major_release
type: BOOLEAN
mode: NULLABLE
name: browser_version_info
type: RECORD
- fields:
- mode: NULLABLE
name: key
type: STRING
- mode: NULLABLE
name: value
type: STRING
mode: REPEATED
name: experiments
type: RECORD
- mode: NULLABLE
name: country
type: STRING
- mode: NULLABLE
name: locale
type: STRING
- mode: NULLABLE
name: normalized_channel
type: STRING
- mode: NULLABLE
name: normalized_os_version
type: STRING
- mode: NULLABLE
name: profile_age_in_days
type: INTEGER
- mode: NULLABLE
name: sample_id
type: INTEGER
- mode: NULLABLE
name: sponsored_tiles_click_count
type: INTEGER
- mode: NULLABLE
name: sponsored_tiles_impression_count
type: INTEGER
- mode: NULLABLE
name: sponsored_tiles_dismissal_count
type: INTEGER
- mode: NULLABLE
name: sponsored_tiles_disable_count
type: INTEGER
- mode: NULLABLE
name: sponsored_tiles_enabled_at_startup
type: BOOLEAN