Bug 1673979 - Add Search dashboard queries (#1619)

* Add search dashboard desktop search aggregates by userstate table and schedule daily

* Add desktop_search_aggregates_for_searchreport

* Add mobile_search_aggregates_for_searchreport

Co-authored-by: Sunah Suh <ssuh@mozilla.com>
This commit is contained in:
XuanL 2020-12-14 15:27:14 -08:00 коммит произвёл GitHub
Родитель 210f869441
Коммит 7f14e799b4
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
14 изменённых файлов: 416 добавлений и 0 удалений

Просмотреть файл

@ -287,6 +287,20 @@ bqetl_marketing_fetch:
retries: 2
retry_delay: 30m
bqetl_search_dashboard:
default_args:
depends_on_past: false
email:
- telemetry-alerts@mozilla.com
- ssuh@mozilla.com
email_on_failure: true
email_on_retry: true
owner: ssuh@mozilla.com
retries: 2
retry_delay: 30m
start_date: '2020-12-14'
schedule_interval: 0 4 * * *
bqetl_desktop_platform:
schedule_interval: 0 3 * * *
default_args:

Просмотреть файл

@ -0,0 +1,104 @@
# Generated via https://github.com/mozilla/bigquery-etl/blob/master/bigquery_etl/query_scheduling/generate_airflow_dags.py
from airflow import DAG
from airflow.operators.sensors import ExternalTaskSensor
import datetime
from utils.gcp import bigquery_etl_query, gke_command
default_args = {
"owner": "ssuh@mozilla.com",
"start_date": datetime.datetime(2020, 12, 14, 0, 0),
"email": ["telemetry-alerts@mozilla.com", "ssuh@mozilla.com"],
"depends_on_past": False,
"retry_delay": datetime.timedelta(seconds=1800),
"email_on_failure": True,
"email_on_retry": True,
"retries": 2,
}
with DAG(
"bqetl_search_dashboard", default_args=default_args, schedule_interval="0 4 * * *"
) as dag:
search_derived__desktop_search_aggregates_by_userstate__v1 = bigquery_etl_query(
task_id="search_derived__desktop_search_aggregates_by_userstate__v1",
destination_table="desktop_search_aggregates_by_userstate_v1",
dataset_id="search_derived",
project_id="moz-fx-data-shared-prod",
owner="xluo@mozilla.com",
email=["ssuh@mozilla.com", "telemetry-alerts@mozilla.com", "xluo@mozilla.com"],
date_partition_parameter="submission_date",
depends_on_past=False,
dag=dag,
)
search_derived__desktop_search_aggregates_for_searchreport__v1 = bigquery_etl_query(
task_id="search_derived__desktop_search_aggregates_for_searchreport__v1",
destination_table="desktop_search_aggregates_for_searchreport_v1",
dataset_id="search_derived",
project_id="moz-fx-data-shared-prod",
owner="xluo@mozilla.com",
email=["ssuh@mozilla.com", "telemetry-alerts@mozilla.com", "xluo@mozilla.com"],
date_partition_parameter="submission_date",
depends_on_past=False,
dag=dag,
)
search_derived__mobile_search_aggregates_for_searchreport__v1 = bigquery_etl_query(
task_id="search_derived__mobile_search_aggregates_for_searchreport__v1",
destination_table="mobile_search_aggregates_for_searchreport_v1",
dataset_id="search_derived",
project_id="moz-fx-data-shared-prod",
owner="mmccorquodale@mozilla.com",
email=[
"mmccorquodale@mozilla.com",
"ssuh@mozilla.com",
"telemetry-alerts@mozilla.com",
"xluo@mozilla.com",
],
date_partition_parameter="submission_date",
depends_on_past=False,
dag=dag,
)
wait_for_telemetry_derived__clients_last_seen__v1 = ExternalTaskSensor(
task_id="wait_for_telemetry_derived__clients_last_seen__v1",
external_dag_id="bqetl_main_summary",
external_task_id="telemetry_derived__clients_last_seen__v1",
execution_delta=datetime.timedelta(seconds=7200),
check_existence=True,
mode="reschedule",
pool="DATA_ENG_EXTERNALTASKSENSOR",
)
search_derived__desktop_search_aggregates_by_userstate__v1.set_upstream(
wait_for_telemetry_derived__clients_last_seen__v1
)
wait_for_search_derived__search_aggregates__v8 = ExternalTaskSensor(
task_id="wait_for_search_derived__search_aggregates__v8",
external_dag_id="bqetl_search",
external_task_id="search_derived__search_aggregates__v8",
execution_delta=datetime.timedelta(seconds=3600),
check_existence=True,
mode="reschedule",
pool="DATA_ENG_EXTERNALTASKSENSOR",
)
search_derived__desktop_search_aggregates_for_searchreport__v1.set_upstream(
wait_for_search_derived__search_aggregates__v8
)
wait_for_search_derived__mobile_search_clients_daily__v1 = ExternalTaskSensor(
task_id="wait_for_search_derived__mobile_search_clients_daily__v1",
external_dag_id="bqetl_mobile_search",
external_task_id="search_derived__mobile_search_clients_daily__v1",
execution_delta=datetime.timedelta(seconds=7200),
check_existence=True,
mode="reschedule",
pool="DATA_ENG_EXTERNALTASKSENSOR",
)
search_derived__mobile_search_aggregates_for_searchreport__v1.set_upstream(
wait_for_search_derived__mobile_search_clients_daily__v1
)

Просмотреть файл

@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.search.desktop_search_aggregates_by_userstate`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.search_derived.desktop_search_aggregates_by_userstate_v1`

Просмотреть файл

@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.search.desktop_search_aggregates_for_searchreport`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.search_derived.desktop_search_aggregates_for_searchreport_v1`

Просмотреть файл

@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.search.mobile_search_aggregates_for_searchreport`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.search_derived.mobile_search_aggregates_for_searchreport_v1`

Просмотреть файл

@ -0,0 +1,19 @@
CREATE TABLE IF NOT EXISTS
`moz-fx-data-shared-prod.search_derived.desktop_search_aggregates_by_userstate_v1`(
submission_date DATE,
geo STRING,
user_state STRING,
client_count INT64,
search_client_count INT64,
sap INT64,
search_with_ads INT64,
ad_clicks INT64,
tagged_follow_on INT64,
tagged_sap INT64,
organic INT64
)
PARTITION BY
submission_date
CLUSTER BY
geo,
user_state

Просмотреть файл

@ -0,0 +1,15 @@
description: >-
This query creates a table based on clients_last_seen containing
search metrics aggregated by user state, geo, submission_date
and whether the user searched or not.
Originally created for the search report dashboard.
friendly_name: Desktop Search Aggregates By Userstate
labels:
incremental: true
schedule: daily
owners:
- xluo@mozilla.com
- ssuh@mozilla.com
scheduling:
dag_name: bqetl_search_dashboard
depends_on_past: false

Просмотреть файл

@ -0,0 +1,42 @@
-- Query for search_derived.desktop_search_aggregates_by_userstate_v1
SELECT
submission_date,
CASE
WHEN
country IN ('US', 'DE', 'FR', 'GB', 'CA')
THEN
country
ELSE
'non-Tier1'
END
AS geo,
CASE
WHEN
is_regular_user_v3
THEN
'regular'
WHEN
is_new_or_resurrected_v3
THEN
'new_or_resurrected'
ELSE
'irregular' -- originally use 'other', but suggested to use 'irregular'
END
AS user_state,
count(client_id) AS client_count,
countif(search_count_all > 0) AS search_client_count,
sum(search_count_all) AS sap,
sum(search_with_ads_count_all) AS search_with_ads,
sum(ad_clicks_count_all) AS ad_clicks,
sum(search_count_tagged_follow_on) AS tagged_follow_on,
sum(search_count_tagged_sap) AS tagged_sap,
sum(search_count_organic) AS organic
FROM
telemetry.clients_last_seen
WHERE
submission_date = @submission_date
AND days_since_seen = 0
GROUP BY
1,
2,
3;

Просмотреть файл

@ -0,0 +1,23 @@
CREATE TABLE IF NOT EXISTS
`moz-fx-data-shared-prod.search_derived.desktop_search_aggregates_for_searchreport_v1`(
submission_date DATE,
geo STRING,
locale STRING,
engine STRING,
os STRING,
app_version STRING,
dcc INT64,
sap INT64,
tagged_sap INT64,
tagged_follow_on INT64,
search_with_ads INT64,
ad_click INT64,
organic INT64
)
PARTITION BY
submission_date
CLUSTER BY
geo,
locale,
engine,
app_version

Просмотреть файл

@ -0,0 +1,15 @@
description: >-
This query creates a table based on search_aggregate containing
search metrics aggregated by geo, locale, os, engine, app_version,
submission_date.
Originally created fro the search report dashboard.
friendly_name: Desktop Search Aggregates For Search Report
labels:
incremental: true
schedule: daily
owners:
- xluo@mozilla.com
- ssuh@mozilla.com
scheduling:
dag_name: bqetl_search_dashboard
depends_on_past: false

Просмотреть файл

@ -0,0 +1,60 @@
-- Query for search_derived.desktop_search_aggregates_for_searchreport_v1
SELECT
submission_date,
CASE
WHEN
country IN (
'US',
'DE',
'FR',
'GB',
'CA',
'BR',
'RU',
'PL',
'CN',
'IN',
'IT',
'ES',
'ID',
'KE',
'JP'
)
THEN
country
ELSE
'others'
END
AS geo,
CASE
WHEN
substr(locale, 0, 2) IN ('en', 'de', 'es', 'fr', 'ru', 'zh', 'pt', 'pl', 'ja', 'it')
THEN
substr(locale, 0, 2)
ELSE
'others'
END
AS locale,
normalized_engine AS engine,
mozfun.norm.os(os) AS os,
SPLIT(app_version, '.')[offset(0)] AS app_version,
SUM(
client_count
) AS dcc, # be careful of double counting for client_id with 1+ engine on the same day
SUM(sap) AS sap,
SUM(tagged_sap) AS tagged_sap,
SUM(tagged_follow_on) AS tagged_follow_on,
SUM(search_with_ads) AS search_with_ads,
SUM(ad_click) AS ad_click,
SUM(organic) AS organic
FROM
`moz-fx-data-shared-prod.search.search_aggregates`
WHERE
submission_date = @submission_date
GROUP BY
1,
2,
3,
4,
5,
6

Просмотреть файл

@ -0,0 +1,21 @@
CREATE TABLE IF NOT EXISTS
`moz-fx-data-shared-prod.search_derived.mobile_search_aggregates_for_searchreport_v1`(
submission_date DATE,
country STRING,
product STRING,
normalized_engine STRING,
clients INT64,
search_clients INT64,
sap INT64,
tagged_sap INT64,
tagged_follow_on INT64,
ad_click INT64,
search_with_ads INT64,
organic INT64
)
PARTITION BY
submission_date
CLUSTER BY
country,
product,
normalized_engine

Просмотреть файл

@ -0,0 +1,16 @@
description: >-
This query creates a table based on mobile_search_clients_daily
containing search metrics aggregated by country, product,
engine, and submission_date.
Originally created for the search report dashboard.
friendly_name: Mobile Search Aggregates For Searchreport
labels:
incremental: true
schedule: daily
owners:
- mmccorquodale@mozilla.com
- xluo@mozilla.com
- ssuh@mozilla.com
scheduling:
dag_name: bqetl_search_dashboard
depends_on_past: false

Просмотреть файл

@ -0,0 +1,66 @@
-- Query for search_derived.mobile_search_aggregates_for_searchreport_v1
SELECT
submission_date,
country,
CASE
WHEN
app_name IN ('Fenix', 'Firefox Preview')
THEN
app_name
WHEN
app_name = 'Fennec'
AND os = 'Android'
THEN
'Fennec'
WHEN
app_name = 'Fennec'
AND os = 'iOS'
THEN
'Firefox iOS'
WHEN
app_name = 'Focus'
AND os = 'Android'
THEN
'Focus Android'
WHEN
app_name = 'Focus'
AND os = 'iOS'
THEN
'Focus iOS'
ELSE
'Other'
END
AS product,
normalized_engine,
count(DISTINCT client_id) AS clients,
count(
DISTINCT(
CASE
WHEN
sap > 0
OR tagged_sap > 0
OR tagged_follow_on > 0
THEN
client_id
ELSE
NULL
END
)
) AS search_clients,
sum(sap) AS sap,
sum(tagged_sap) AS tagged_sap,
sum(tagged_follow_on) AS tagged_follow_on,
sum(ad_click) AS ad_click,
sum(search_with_ads) AS search_with_ads,
sum(organic) AS organic
FROM
search.mobile_search_clients_daily
WHERE
app_name IN ('Fenix', 'Fennec', 'Firefox Preview', 'Focus')
AND app_name IS NOT NULL
AND submission_date = @submission_date
GROUP BY
1,
2,
3,
4