DENG-1352 - Migrate contextual services ETL to desktop glean pings (#4474)

This commit is contained in:
Daniel Thorn 2023-12-07 16:12:07 -08:00 коммит произвёл GitHub
Родитель e8f3f759d5
Коммит 25c18112b5
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
16 изменённых файлов: 511 добавлений и 0 удалений

Просмотреть файл

@ -1,4 +1,20 @@
WITH combined AS (
SELECT
metrics.uuid.top_sites_context_id AS context_id,
DATE(submission_timestamp) AS submission_date,
'desktop' AS form_factor,
normalized_country_code AS country,
REPLACE(LOWER(metrics.string.top_sites_advertiser), "o=45:a", "yandex") AS advertiser,
SPLIT(metadata.user_agent.os, ' ')[SAFE_OFFSET(0)] AS normalized_os,
client_info.app_channel AS release_channel,
metrics.quantity.top_sites_position AS position,
IF(metrics.url.top_sites_reporting_url IS NULL, 'remote settings', 'contile') AS provider,
IF(metrics.string.top_sites_ping_type = "topsites-click", "click", "impression") AS event_type,
FROM
`moz-fx-data-shared-prod.firefox_desktop.top_sites`
WHERE
metrics.string.top_sites_ping_type IN ("topsites-click", "topsites-impression")
UNION ALL
SELECT
context_id,
DATE(submission_timestamp) AS submission_date,
@ -12,6 +28,10 @@ WITH combined AS (
'impression' AS event_type,
FROM
`moz-fx-data-shared-prod.contextual_services.topsites_impression`
WHERE
-- For firefox 116+ use firefox_desktop.top_sites instead
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1836283
SAFE_CAST(metadata.user_agent.version AS INT64) < 116
UNION ALL
SELECT
context_id,
@ -26,6 +46,10 @@ WITH combined AS (
'click' AS event_type,
FROM
`moz-fx-data-shared-prod.contextual_services.topsites_click`
WHERE
-- For firefox 116+ use firefox_desktop.top_sites instead
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1836283
SAFE_CAST(metadata.user_agent.version AS INT64) < 116
UNION ALL
SELECT
metrics.uuid.top_sites_context_id AS context_id,

Просмотреть файл

@ -1,4 +1,33 @@
WITH combined AS (
SELECT
metrics.uuid.quick_suggest_context_id AS context_id,
DATE(submission_timestamp) AS submission_date,
'desktop' AS form_factor,
normalized_country_code AS country,
LOWER(metrics.string.quick_suggest_advertiser) AS advertiser,
SPLIT(metadata.user_agent.os, ' ')[SAFE_OFFSET(0)] AS normalized_os,
client_info.app_channel AS release_channel,
metrics.quantity.quick_suggest_position AS position,
IF(
NULLIF(metrics.string.quick_suggest_request_id, "") IS NULL,
'remote settings',
'merino'
) AS provider,
metrics.string.quick_suggest_match_type AS match_type,
COALESCE(
metrics.boolean.quick_suggest_improve_suggest_experience,
FALSE
) AS suggest_data_sharing_enabled,
IF(
metrics.string.quick_suggest_ping_type = "quicksuggest-click",
"click",
"impression"
) AS event_type,
FROM
`moz-fx-data-shared-prod.firefox_desktop.quick_suggest`
WHERE
metrics.string.quick_suggest_ping_type IN ("quicksuggest-click", "quicksuggest-impression")
UNION ALL
SELECT
context_id,
DATE(submission_timestamp) AS submission_date,
@ -20,6 +49,10 @@ WITH combined AS (
'impression' AS event_type,
FROM
`moz-fx-data-shared-prod.contextual_services.quicksuggest_impression`
WHERE
-- For firefox 116+ use firefox_desktop.quick_suggest instead
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1836283
SAFE_CAST(metadata.user_agent.version AS INT64) < 116
UNION ALL
SELECT
context_id,
@ -42,6 +75,10 @@ WITH combined AS (
'click' AS event_type,
FROM
`moz-fx-data-shared-prod.contextual_services.quicksuggest_click`
WHERE
-- For firefox 116+ use firefox_desktop.quick_suggest instead
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1836283
SAFE_CAST(metadata.user_agent.version AS INT64) < 116
),
with_event_count AS (
SELECT

Просмотреть файл

@ -1,4 +1,32 @@
WITH combined AS (
SELECT
metrics.uuid.quick_suggest_context_id AS context_id,
DATE(submission_timestamp) AS submission_date,
'suggest' AS source,
IF(
metrics.string.quick_suggest_ping_type = "quicksuggest-click",
"click",
"impression"
) AS event_type,
'desktop' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
metrics.string.quick_suggest_advertiser AS advertiser,
client_info.app_channel AS release_channel,
metrics.quantity.quick_suggest_position AS position,
CASE
WHEN NULLIF(metrics.string.quick_suggest_request_id, "") IS NULL
THEN 'remote settings'
ELSE 'merino'
END AS provider,
metrics.string.quick_suggest_match_type AS match_type,
SPLIT(metadata.user_agent.os, ' ')[SAFE_OFFSET(0)] AS normalized_os,
(metrics.boolean.quick_suggest_improve_suggest_experience) AS suggest_data_sharing_enabled,
FROM
firefox_desktop.quick_suggest
WHERE
metrics.string.quick_suggest_ping_type IN ("quicksuggest-click", "quicksuggest-impression")
UNION ALL
SELECT
context_id,
DATE(submission_timestamp) AS submission_date,
@ -25,6 +53,10 @@ WITH combined AS (
) AS suggest_data_sharing_enabled,
FROM
contextual_services.quicksuggest_impression
WHERE
-- For firefox 116+ use firefox_desktop.quick_suggest instead
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1836283
SAFE_CAST(metadata.user_agent.version AS INT64) < 116
UNION ALL
SELECT
context_id,
@ -52,6 +84,36 @@ WITH combined AS (
) AS suggest_data_sharing_enabled,
FROM
contextual_services.quicksuggest_click
WHERE
-- For firefox 116+ use firefox_desktop.quick_suggest instead
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1836283
SAFE_CAST(metadata.user_agent.version AS INT64) < 116
UNION ALL
SELECT
metrics.uuid.top_sites_context_id AS context_id,
DATE(submission_timestamp) AS submission_date,
'topsites' AS source,
IF(metrics.string.top_sites_ping_type = "topsites-click", "click", "impression") AS event_type,
'desktop' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
metrics.string.top_sites_advertiser AS advertiser,
client_info.app_channel AS release_channel,
metrics.quantity.top_sites_position AS position,
CASE
WHEN metrics.url.top_sites_reporting_url IS NULL
THEN 'remote settings'
ELSE 'contile'
END AS provider,
-- `match_type` is only available for `quicksuggest_*` tables
NULL AS match_type,
SPLIT(metadata.user_agent.os, ' ')[SAFE_OFFSET(0)] AS normalized_os,
-- 'suggest_data_sharing_enabled' is only available for `quicksuggest_*` tables
NULL AS suggest_data_sharing_enabled,
FROM
firefox_desktop.top_sites
WHERE
metrics.string.top_sites_ping_type IN ("topsites-click", "topsites-impression")
UNION ALL
SELECT
context_id,
@ -76,6 +138,10 @@ WITH combined AS (
NULL AS suggest_data_sharing_enabled,
FROM
contextual_services.topsites_impression
WHERE
-- For firefox 116+ use firefox_desktop.top_sites instead
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1836283
SAFE_CAST(metadata.user_agent.version AS INT64) < 116
UNION ALL
SELECT
context_id,
@ -100,6 +166,10 @@ WITH combined AS (
NULL AS suggest_data_sharing_enabled,
FROM
contextual_services.topsites_click
WHERE
-- For firefox 116+ use firefox_desktop.top_sites instead
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1836283
SAFE_CAST(metadata.user_agent.version AS INT64) < 116
UNION ALL
SELECT
metrics.uuid.top_sites_context_id AS context_id,

Просмотреть файл

@ -9,6 +9,25 @@ WITH parsed AS (
AND error_type = 'SendRequest'
),
ping_data AS (
SELECT DISTINCT
metrics.uuid.quick_suggest_context_id AS context_id,
IF(
metrics.string.quick_suggest_ping_type = "quicksuggest-click",
"click",
"impression"
) AS interaction_type,
metadata.geo.country AS country_code,
metadata.geo.subdivision1 AS region_code,
metadata.user_agent.os AS os_family,
metadata.user_agent.version AS product_version,
FROM
`moz-fx-data-shared-prod.firefox_desktop.quick_suggest`
WHERE
DATE(submission_timestamp) = @submission_date
AND metrics.string.quick_suggest_advertiser != "wikipedia"
AND metrics.url.quick_suggest_reporting_url IS NOT NULL
AND metrics.string.quick_suggest_ping_type IN ("quicksuggest-click", "quicksuggest-impression")
UNION ALL
SELECT DISTINCT
context_id,
"impression" AS interaction_type,
@ -22,6 +41,9 @@ ping_data AS (
DATE(submission_timestamp) = @submission_date
AND advertiser != "wikipedia"
AND reporting_url IS NOT NULL
-- For firefox 116+ use firefox_desktop.quick_suggest instead
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1836283
AND SAFE_CAST(metadata.user_agent.version AS INT64) < 116
UNION ALL
SELECT DISTINCT
context_id,
@ -36,6 +58,9 @@ ping_data AS (
DATE(submission_timestamp) = @submission_date
AND advertiser != "wikipedia"
AND reporting_url IS NOT NULL
-- For firefox 116+ use firefox_desktop.quick_suggest instead
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1836283
AND SAFE_CAST(metadata.user_agent.version AS INT64) < 116
),
quicksuggest AS (
SELECT

Просмотреть файл

@ -6,6 +6,7 @@
subdivision1: AZ
user_agent:
os: "Windows 11"
version: "116.0.0"
normalized_country_code: US
advertiser: ad1
release_channel: release

Просмотреть файл

@ -6,6 +6,7 @@
subdivision1: AZ
user_agent:
os: "Windows 11"
version: "116.0.0"
normalized_country_code: US
advertiser: ad1
release_channel: release

Просмотреть файл

@ -6,6 +6,7 @@
subdivision1: AZ
user_agent:
os: "Windows 11"
version: "116.0.0"
normalized_country_code: US
advertiser: ad1
release_channel: release

Просмотреть файл

@ -7,6 +7,7 @@
subdivision1: AZ
user_agent:
os: "Windows 11"
version: "116.0.0"
normalized_country_code: US
advertiser: ad1
release_channel: release

Просмотреть файл

@ -0,0 +1,29 @@
---
- &click
submission_timestamp: "2030-01-01 01:00:00"
client_info:
app_channel: release
metadata:
geo:
subdivision1: AZ
user_agent:
os: "Windows 11"
metrics: &click_metrics
boolean:
quick_suggest_improve_suggest_experience: true
quantity:
quick_suggest_position: 1
string: &click_strings
quick_suggest_advertiser: ad1
quick_suggest_match_type: firefox-suggest
quick_suggest_ping_type: quicksuggest-click
quick_suggest_request_id: HASH123
uuid:
quick_suggest_context_id: a
normalized_country_code: US
- <<: *click
metrics:
<<: *click_metrics
string:
<<: *click_strings
quick_suggest_ping_type: quicksuggest-impression

Просмотреть файл

@ -0,0 +1,40 @@
---
- &click
submission_timestamp: "2030-01-01 01:00:00"
client_info:
app_channel: release
metadata:
geo:
subdivision1: AZ
user_agent:
os: "Windows 11"
metrics: &click_metrics
quantity:
top_sites_position: 1
string:
top_sites_advertiser: ad1
top_sites_ping_type: topsites-click
url:
top_sites_reporting_url: "http://foo.com"
uuid:
top_sites_context_id: a
normalized_country_code: US
- &impression
<<: *click
metrics: &impression_metrics
<<: *click_metrics
string:
top_sites_advertiser: ad1
top_sites_ping_type: topsites-impression
- <<: *impression
- <<: *impression
metrics:
<<: *impression_metrics
uuid:
top_sites_context_id: b
- <<: *impression
metrics:
<<: *impression_metrics
string:
top_sites_advertiser: ad2
top_sites_ping_type: topsites-impression

Просмотреть файл

@ -7,6 +7,7 @@
subdivision1: AZ
user_agent:
os: "Windows 11"
version: "116.0.0"
normalized_country_code: US
advertiser: ad1
release_channel: release

Просмотреть файл

@ -7,6 +7,7 @@
subdivision1: AZ
user_agent:
os: "Windows 11"
version: "116.0.0"
normalized_country_code: US
advertiser: ad1
release_channel: release

Просмотреть файл

@ -7,6 +7,7 @@
subdivision1: AZ
user_agent:
os: "Windows 11"
version: "116.0.0"
normalized_country_code: US
advertiser: ad1
release_channel: release

Просмотреть файл

@ -7,6 +7,7 @@
subdivision1: AZ
user_agent:
os: "Windows 11"
version: "116.0.0"
normalized_country_code: US
advertiser: ad1
release_channel: release

Просмотреть файл

@ -0,0 +1,140 @@
---
- &click
submission_timestamp: "2030-01-01 01:00:00"
client_info:
app_channel: release
metadata:
geo:
subdivision1: AZ
user_agent:
os: "Windows 11"
metrics: &click_metrics
boolean:
quick_suggest_improve_suggest_experience: true
quantity:
quick_suggest_position: 1
string: &click_strings
quick_suggest_advertiser: ad1
quick_suggest_match_type: firefox-suggest
quick_suggest_ping_type: quicksuggest-click
quick_suggest_request_id: HASH123
uuid:
quick_suggest_context_id: a
normalized_country_code: US
- <<: *click
metrics:
<<: *click_metrics
uuid:
quick_suggest_context_id: b
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- &impression
<<: *click
metrics: &impression_metrics
<<: *click_metrics
string:
<<: *click_strings
quick_suggest_ping_type: quicksuggest-impression
- <<: *impression
metrics:
<<: *impression_metrics
uuid:
quick_suggest_context_id: b
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression

Просмотреть файл

@ -0,0 +1,138 @@
---
- &click
submission_timestamp: "2030-01-01 01:00:00"
client_info:
app_channel: release
metadata:
geo:
subdivision1: AZ
user_agent:
os: "Windows 11"
metrics: &click_metrics
quantity:
top_sites_position: 1
string:
top_sites_advertiser: ad1
top_sites_ping_type: topsites-click
url:
top_sites_reporting_url: "http://foo.com"
uuid:
top_sites_context_id: a
normalized_country_code: US
- <<: *click
metrics:
<<: *click_metrics
uuid:
top_sites_context_id: b
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- *click
- &impression
<<: *click
metrics: &impression_metrics
<<: *click_metrics
string:
top_sites_advertiser: ad1
top_sites_ping_type: topsites-impression
- <<: *impression
metrics:
<<: *impression_metrics
uuid:
top_sites_context_id: b
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression
- *impression