Add desktop mobile monthly search agg (#3152)

* add desktop+mobile monthly search aggregates

* apply feedback

* generate dag via dags.yaml

* update dag to reference empty tables

* correct yamllint issues
This commit is contained in:
Alekhya 2022-08-18 07:47:22 -04:00 коммит произвёл GitHub
Родитель 9ddfc506e6
Коммит 0e51590f91
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 200 добавлений и 0 удалений

Просмотреть файл

@ -786,3 +786,22 @@ bqetl_newtab:
schedule_interval: daily
tags:
- impact/tier_1
bqetl_desktop_mobile_search_monthly:
default_args:
depends_on_past: false
email:
- telemetry-alerts@mozilla.com
- akommasani@mozilla.com
email_on_failure: true
email_on_retry: true
end_date: null
owner: akommasani@mozilla.com
retries: 2
retry_delay: 30m
start_date: '2019-01-01'
description: Generate mnthly client data from daily search table
schedule_interval: "0 5 2 * *"
tags:
- impact/tier_1
- repo/bigquery-etl

Просмотреть файл

@ -0,0 +1,56 @@
# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py
from airflow import DAG
from airflow.sensors.external_task import ExternalTaskMarker
from airflow.sensors.external_task import ExternalTaskSensor
from airflow.utils.task_group import TaskGroup
import datetime
from utils.constants import ALLOWED_STATES, FAILED_STATES
from utils.gcp import bigquery_etl_query, gke_command
docs = """
### bqetl_desktop_mobile_search_monthly
Built from bigquery-etl repo, [`dags/bqetl_desktop_mobile_search_monthly.py`](https://github.com/mozilla/bigquery-etl/blob/main/dags/bqetl_desktop_mobile_search_monthly.py)
#### Description
Generate mnthly client data from daily search table
#### Owner
akommasani@mozilla.com
"""
default_args = {
"owner": "akommasani@mozilla.com",
"start_date": datetime.datetime(2019, 1, 1, 0, 0),
"end_date": None,
"email": ["telemetry-alerts@mozilla.com", "akommasani@mozilla.com"],
"depends_on_past": False,
"retry_delay": datetime.timedelta(seconds=1800),
"email_on_failure": True,
"email_on_retry": True,
"retries": 2,
}
tags = ["impact/tier_1", "repo/bigquery-etl"]
with DAG(
"bqetl_desktop_mobile_search_monthly",
default_args=default_args,
schedule_interval="0 5 2 * *",
doc_md=docs,
tags=tags,
) as dag:
search_derived__desktop_mobile_search_clients_monthly__v1 = bigquery_etl_query(
task_id="search_derived__desktop_mobile_search_clients_monthly__v1",
destination_table="desktop_mobile_search_clients_monthly_v1",
dataset_id="search_derived",
project_id="moz-fx-data-shared-prod",
owner="akommasani@mozilla.com",
email=["akommasani@mozilla.com", "telemetry-alerts@mozilla.com"],
date_partition_parameter="submission_date",
depends_on_past=False,
)

Просмотреть файл

@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.search.desktop_mobile_search_clients_monthly`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.search_derived.desktop_mobile_search_clients_monthly_v1`

Просмотреть файл

@ -0,0 +1,18 @@
friendly_name: Desktop and Mobile Search Monthly Data
description: |-
Daily search clients aggregated monthly, across unique sets of dimensions.
This table will be populated on 2nd of every month for the previous month
owners:
- akommasani@mozilla.com
labels:
incremental: true
schedule: monthly
scheduling:
dag_name: bqetl_desktop_mobile_search_monthly
referenced_tables: []
bigquery:
time_partitioning:
field: submission_month
type: month
require_partition_filter: true
clustering: null

Просмотреть файл

@ -0,0 +1,60 @@
-- Query for search_derived.desktop_mobile_monthly_search_v1
-- For more information on writing queries see:
-- https://docs.telemetry.mozilla.org/cookbooks/bigquery/querying.html
SELECT
client_id,
DATE_TRUNC(submission_date, MONTH) AS submission_month,
"mobile" AS device,
normalized_engine,
normalized_app_name,
os,
country,
COUNT(DISTINCT submission_date) AS days_of_use,
COALESCE(SUM(sap), 0) AS searches,
COALESCE(SUM(search_with_ads), 0) AS search_with_ads,
COALESCE(SUM(ad_click), 0) AS ad_click,
COALESCE(SUM(tagged_follow_on), 0) AS tagged_follow_on,
COALESCE(SUM(tagged_sap), 0) AS tagged_sap,
FROM
search.mobile_search_clients_engines_sources_daily
WHERE
submission_date
BETWEEN date_trunc(date_sub(@submission_date, INTERVAL 1 month), month)
AND last_day(date_sub(@submission_date, INTERVAL 1 month), month)
GROUP BY
client_id,
submission_month,
device,
normalized_engine,
normalized_app_name,
os,
country
UNION ALL
SELECT
client_id,
DATE_TRUNC(submission_date, MONTH) AS submission_month,
"desktop" AS device,
normalized_engine,
'Firefox Desktop' AS normalized_app_name,
os,
country,
count(DISTINCT submission_date) AS days_of_use,
COALESCE(SUM(sap), 0) AS searches,
COALESCE(SUM(search_with_ads), 0) AS search_with_ads,
COALESCE(SUM(ad_click), 0) AS ad_click,
COALESCE(SUM(tagged_follow_on), 0) AS tagged_follow_on,
COALESCE(SUM(tagged_sap), 0) AS tagged_sap,
FROM
search.search_clients_engines_sources_daily
WHERE
submission_date
BETWEEN date_trunc(date_sub(@submission_date, INTERVAL 1 month), month)
AND last_day(date_sub(@submission_date, INTERVAL 1 month), month)
GROUP BY
client_id,
submission_month,
device,
normalized_engine,
normalized_app_name,
os,
country

Просмотреть файл

@ -0,0 +1,40 @@
fields:
- mode: NULLABLE
name: client_id
type: STRING
- mode: NULLABLE
name: submission_month
type: DATE
- mode: NULLABLE
name: device
type: STRING
- mode: NULLABLE
name: normalized_engine
type: STRING
- mode: NULLABLE
name: normalized_app_name
type: STRING
- mode: NULLABLE
name: os
type: STRING
- mode: NULLABLE
name: country
type: STRING
- mode: NULLABLE
name: days_of_use
type: INTEGER
- mode: NULLABLE
name: searches
type: INTEGER
- mode: NULLABLE
name: search_with_ads
type: INTEGER
- mode: NULLABLE
name: ad_click
type: INTEGER
- mode: NULLABLE
name: tagged_follow_on
type: INTEGER
- mode: NULLABLE
name: tagged_sap
type: INTEGER