Add desktop mobile monthly search agg (#3152)
* add desktop+mobile monthly search aggregates * apply feedback * generate dag via dags.yaml * update dag to reference empty tables * correct yamllint issues
This commit is contained in:
Родитель
9ddfc506e6
Коммит
0e51590f91
19
dags.yaml
19
dags.yaml
|
@ -786,3 +786,22 @@ bqetl_newtab:
|
|||
schedule_interval: daily
|
||||
tags:
|
||||
- impact/tier_1
|
||||
|
||||
bqetl_desktop_mobile_search_monthly:
|
||||
default_args:
|
||||
depends_on_past: false
|
||||
email:
|
||||
- telemetry-alerts@mozilla.com
|
||||
- akommasani@mozilla.com
|
||||
email_on_failure: true
|
||||
email_on_retry: true
|
||||
end_date: null
|
||||
owner: akommasani@mozilla.com
|
||||
retries: 2
|
||||
retry_delay: 30m
|
||||
start_date: '2019-01-01'
|
||||
description: Generate mnthly client data from daily search table
|
||||
schedule_interval: "0 5 2 * *"
|
||||
tags:
|
||||
- impact/tier_1
|
||||
- repo/bigquery-etl
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.sensors.external_task import ExternalTaskMarker
|
||||
from airflow.sensors.external_task import ExternalTaskSensor
|
||||
from airflow.utils.task_group import TaskGroup
|
||||
import datetime
|
||||
from utils.constants import ALLOWED_STATES, FAILED_STATES
|
||||
from utils.gcp import bigquery_etl_query, gke_command
|
||||
|
||||
docs = """
|
||||
### bqetl_desktop_mobile_search_monthly
|
||||
|
||||
Built from bigquery-etl repo, [`dags/bqetl_desktop_mobile_search_monthly.py`](https://github.com/mozilla/bigquery-etl/blob/main/dags/bqetl_desktop_mobile_search_monthly.py)
|
||||
|
||||
#### Description
|
||||
|
||||
Generate mnthly client data from daily search table
|
||||
#### Owner
|
||||
|
||||
akommasani@mozilla.com
|
||||
"""
|
||||
|
||||
|
||||
default_args = {
|
||||
"owner": "akommasani@mozilla.com",
|
||||
"start_date": datetime.datetime(2019, 1, 1, 0, 0),
|
||||
"end_date": None,
|
||||
"email": ["telemetry-alerts@mozilla.com", "akommasani@mozilla.com"],
|
||||
"depends_on_past": False,
|
||||
"retry_delay": datetime.timedelta(seconds=1800),
|
||||
"email_on_failure": True,
|
||||
"email_on_retry": True,
|
||||
"retries": 2,
|
||||
}
|
||||
|
||||
tags = ["impact/tier_1", "repo/bigquery-etl"]
|
||||
|
||||
with DAG(
|
||||
"bqetl_desktop_mobile_search_monthly",
|
||||
default_args=default_args,
|
||||
schedule_interval="0 5 2 * *",
|
||||
doc_md=docs,
|
||||
tags=tags,
|
||||
) as dag:
|
||||
|
||||
search_derived__desktop_mobile_search_clients_monthly__v1 = bigquery_etl_query(
|
||||
task_id="search_derived__desktop_mobile_search_clients_monthly__v1",
|
||||
destination_table="desktop_mobile_search_clients_monthly_v1",
|
||||
dataset_id="search_derived",
|
||||
project_id="moz-fx-data-shared-prod",
|
||||
owner="akommasani@mozilla.com",
|
||||
email=["akommasani@mozilla.com", "telemetry-alerts@mozilla.com"],
|
||||
date_partition_parameter="submission_date",
|
||||
depends_on_past=False,
|
||||
)
|
|
@ -0,0 +1,7 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-shared-prod.search.desktop_mobile_search_clients_monthly`
|
||||
AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.search_derived.desktop_mobile_search_clients_monthly_v1`
|
|
@ -0,0 +1,18 @@
|
|||
friendly_name: Desktop and Mobile Search Monthly Data
|
||||
description: |-
|
||||
Daily search clients aggregated monthly, across unique sets of dimensions.
|
||||
This table will be populated on 2nd of every month for the previous month
|
||||
owners:
|
||||
- akommasani@mozilla.com
|
||||
labels:
|
||||
incremental: true
|
||||
schedule: monthly
|
||||
scheduling:
|
||||
dag_name: bqetl_desktop_mobile_search_monthly
|
||||
referenced_tables: []
|
||||
bigquery:
|
||||
time_partitioning:
|
||||
field: submission_month
|
||||
type: month
|
||||
require_partition_filter: true
|
||||
clustering: null
|
|
@ -0,0 +1,60 @@
|
|||
-- Query for search_derived.desktop_mobile_monthly_search_v1
|
||||
-- For more information on writing queries see:
|
||||
-- https://docs.telemetry.mozilla.org/cookbooks/bigquery/querying.html
|
||||
SELECT
|
||||
client_id,
|
||||
DATE_TRUNC(submission_date, MONTH) AS submission_month,
|
||||
"mobile" AS device,
|
||||
normalized_engine,
|
||||
normalized_app_name,
|
||||
os,
|
||||
country,
|
||||
COUNT(DISTINCT submission_date) AS days_of_use,
|
||||
COALESCE(SUM(sap), 0) AS searches,
|
||||
COALESCE(SUM(search_with_ads), 0) AS search_with_ads,
|
||||
COALESCE(SUM(ad_click), 0) AS ad_click,
|
||||
COALESCE(SUM(tagged_follow_on), 0) AS tagged_follow_on,
|
||||
COALESCE(SUM(tagged_sap), 0) AS tagged_sap,
|
||||
FROM
|
||||
search.mobile_search_clients_engines_sources_daily
|
||||
WHERE
|
||||
submission_date
|
||||
BETWEEN date_trunc(date_sub(@submission_date, INTERVAL 1 month), month)
|
||||
AND last_day(date_sub(@submission_date, INTERVAL 1 month), month)
|
||||
GROUP BY
|
||||
client_id,
|
||||
submission_month,
|
||||
device,
|
||||
normalized_engine,
|
||||
normalized_app_name,
|
||||
os,
|
||||
country
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_id,
|
||||
DATE_TRUNC(submission_date, MONTH) AS submission_month,
|
||||
"desktop" AS device,
|
||||
normalized_engine,
|
||||
'Firefox Desktop' AS normalized_app_name,
|
||||
os,
|
||||
country,
|
||||
count(DISTINCT submission_date) AS days_of_use,
|
||||
COALESCE(SUM(sap), 0) AS searches,
|
||||
COALESCE(SUM(search_with_ads), 0) AS search_with_ads,
|
||||
COALESCE(SUM(ad_click), 0) AS ad_click,
|
||||
COALESCE(SUM(tagged_follow_on), 0) AS tagged_follow_on,
|
||||
COALESCE(SUM(tagged_sap), 0) AS tagged_sap,
|
||||
FROM
|
||||
search.search_clients_engines_sources_daily
|
||||
WHERE
|
||||
submission_date
|
||||
BETWEEN date_trunc(date_sub(@submission_date, INTERVAL 1 month), month)
|
||||
AND last_day(date_sub(@submission_date, INTERVAL 1 month), month)
|
||||
GROUP BY
|
||||
client_id,
|
||||
submission_month,
|
||||
device,
|
||||
normalized_engine,
|
||||
normalized_app_name,
|
||||
os,
|
||||
country
|
|
@ -0,0 +1,40 @@
|
|||
fields:
|
||||
- mode: NULLABLE
|
||||
name: client_id
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: submission_month
|
||||
type: DATE
|
||||
- mode: NULLABLE
|
||||
name: device
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: normalized_engine
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: normalized_app_name
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: os
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: country
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: days_of_use
|
||||
type: INTEGER
|
||||
- mode: NULLABLE
|
||||
name: searches
|
||||
type: INTEGER
|
||||
- mode: NULLABLE
|
||||
name: search_with_ads
|
||||
type: INTEGER
|
||||
- mode: NULLABLE
|
||||
name: ad_click
|
||||
type: INTEGER
|
||||
- mode: NULLABLE
|
||||
name: tagged_follow_on
|
||||
type: INTEGER
|
||||
- mode: NULLABLE
|
||||
name: tagged_sap
|
||||
type: INTEGER
|
Загрузка…
Ссылка в новой задаче