[RS-1278] Desktop tiles forecasting inputs table (#5929)
* Create derived table containing data inputs into desktop tiles forecasts. * Create derived table containing data inputs into desktop tiles forecasts. * Query adjustments and better data descriptions * Making sure table descriptions are identical. * Correcting partitioning field * updated to avoid manual lookback and removed session columns * Update sql/moz-fx-data-shared-prod/ads_derived/desktop_tiles_forecast_inputs_v1/metadata.yaml Co-authored-by: Curtis Morales <cmorales@mozilla.com> * Apply suggestions from code review Co-authored-by: Curtis Morales <cmorales@mozilla.com> * move max date filter * add dag for job * column name change _3 to _1to3 * remove monthly run and add the new table to bqetl_ads * Apply suggestions from code review Co-authored-by: Curtis Morales <cmorales@mozilla.com> * fix name change in schema --------- Co-authored-by: Jared Snyder <jsnyder@mozilla.com> Co-authored-by: Jared Snyder <jaredssnyder@gmail.com> Co-authored-by: Curtis Morales <cmorales@mozilla.com>
This commit is contained in:
Родитель
a26ce8b19e
Коммит
bcc0644719
|
@ -0,0 +1,13 @@
|
|||
friendly_name: Tiles Revenue Forecast Inputs
|
||||
description: |-
|
||||
Monthly inputs, calculated from Fx telemetry, into the desktop tiles revenue forecast.
|
||||
Note that the forecast methodology was officially switched over from Activity Stream sessions to visits in June 2024.
|
||||
Also, note that PingCentre (and thus sessions telemetry) was deprecated in Feb 2024.
|
||||
Geo markets as of 2024/07/16: 'US','DE','FR','AU','CA','IT','ES','MX','BR','IN','GB','JP'.
|
||||
owners:
|
||||
- sbetancourt@mozilla.com
|
||||
- cmorales@mozilla.com
|
||||
workgroup_access:
|
||||
- role: roles/bigquery.dataViewer
|
||||
members:
|
||||
- workgroup:mozilla-confidential
|
|
@ -0,0 +1,7 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-shared-prod.ads.desktop_tiles_forecast_inputs`
|
||||
AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.ads_derived.desktop_tiles_forecast_inputs_v1`
|
|
@ -0,0 +1,26 @@
|
|||
friendly_name: Tiles Revenue Forecast Inputs
|
||||
description: |-
|
||||
Monthly inputs, calculated from Fx telemetry, into the desktop tiles revenue forecast.
|
||||
Note that the forecast methodology was officially switched over from Activity Stream sessions to visits in June 2024.
|
||||
Also, note that PingCentre (and thus sessions telemetry) was deprecated in Feb 2024.
|
||||
Geo markets as of 2024/07/16: 'US','DE','FR','AU','CA','IT','ES','MX','BR','IN','GB','JP'.
|
||||
owners:
|
||||
- sbetancourt@mozilla.com
|
||||
- cmorales@mozilla.com
|
||||
labels:
|
||||
incremental: true
|
||||
schedule: daily
|
||||
change_controlled: true
|
||||
scheduling:
|
||||
dag_name: bqetl_ads
|
||||
bigquery:
|
||||
time_partitioning:
|
||||
field: submission_month
|
||||
type: day
|
||||
expiration_days: null
|
||||
clustering: null
|
||||
references: {}
|
||||
workgroup_access:
|
||||
- role: roles/bigquery.dataViewer
|
||||
members:
|
||||
- workgroup:mozilla-confidential
|
|
@ -0,0 +1,108 @@
|
|||
WITH cs_impressions AS (
|
||||
SELECT
|
||||
country AS country_code,
|
||||
DATE_TRUNC(submission_date, MONTH) AS submission_month,
|
||||
SUM(IF(position <= 2, event_count, 0)) AS sponsored_impressions_1and2,
|
||||
SUM(event_count) AS sponsored_impressions_all
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.contextual_services.event_aggregates`
|
||||
WHERE
|
||||
event_type = 'impression'
|
||||
AND form_factor = 'desktop'
|
||||
AND source = 'topsites'
|
||||
AND (
|
||||
{% if is_init() %}
|
||||
submission_date >= DATE_TRUNC(PARSE_DATE('%Y-%m-%d', '2023-11-01'), MONTH)
|
||||
{% else %}
|
||||
submission_date >= DATE_TRUNC(DATE_SUB(@submission_date, INTERVAL 1 MONTH), MONTH)
|
||||
{% endif %}
|
||||
)
|
||||
AND submission_date < DATE_TRUNC(@submission_date, MONTH)
|
||||
AND country IN ('US', 'DE', 'FR', 'AU', 'CA', 'IT', 'ES', 'MX', 'BR', 'IN', 'GB', 'JP')
|
||||
GROUP BY
|
||||
country_code,
|
||||
submission_month
|
||||
)
|
||||
/* Deriving total users from unified_metrics given how the DAU forecast doesn't account for NT activity data */
|
||||
,
|
||||
users_table AS (
|
||||
SELECT
|
||||
country AS country_code,
|
||||
DATE_TRUNC(submission_date, MONTH) AS submission_month,
|
||||
COUNT(client_id) AS total_user_count
|
||||
FROM
|
||||
`mozdata.telemetry.unified_metrics`
|
||||
WHERE
|
||||
`mozfun`.bits28.active_in_range(days_seen_bits, 0, 1)
|
||||
AND (
|
||||
{% if is_init() %}
|
||||
submission_date >= DATE_TRUNC(PARSE_DATE('%Y-%m-%d', '2023-11-01'), MONTH)
|
||||
{% else %}
|
||||
submission_date >= DATE_TRUNC(DATE_SUB(@submission_date, INTERVAL 1 MONTH), MONTH)
|
||||
{% endif %}
|
||||
)
|
||||
AND submission_date < DATE_TRUNC(@submission_date, MONTH)
|
||||
AND country IN ('US', 'DE', 'FR', 'AU', 'CA', 'IT', 'ES', 'MX', 'BR', 'IN', 'GB', 'JP')
|
||||
AND normalized_app_name = 'Firefox Desktop'
|
||||
GROUP BY
|
||||
submission_month,
|
||||
country_code
|
||||
)
|
||||
/* Using 2x visits as total inventory while we sort out addressable inventory for eligible users */
|
||||
,
|
||||
nt_visits AS (
|
||||
SELECT
|
||||
DATE_TRUNC(submission_date, MONTH) AS submission_month,
|
||||
n.country_code,
|
||||
APPROX_COUNT_DISTINCT(newtab_visit_id) AS newtab_visits,
|
||||
APPROX_COUNT_DISTINCT(client_id) AS newtab_clients,
|
||||
2 * APPROX_COUNT_DISTINCT(newtab_visit_id) AS visits_total_inventory_1and2,
|
||||
3 * APPROX_COUNT_DISTINCT(newtab_visit_id) AS visits_total_inventory_1to3,
|
||||
SUM(t.sponsored_topsite_tile_impressions) AS sponsored_impressions
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.telemetry.newtab_visits` n,
|
||||
UNNEST(topsite_tile_interactions) t
|
||||
WHERE
|
||||
n.topsites_enabled
|
||||
AND n.topsites_sponsored_enabled
|
||||
AND (
|
||||
{% if is_init() %}
|
||||
submission_date >= DATE_TRUNC(PARSE_DATE('%Y-%m-%d', '2023-11-01'), MONTH)
|
||||
{% else %}
|
||||
submission_date >= DATE_TRUNC(DATE_SUB(@submission_date, INTERVAL 1 MONTH), MONTH)
|
||||
{% endif %}
|
||||
)
|
||||
AND submission_date < DATE_TRUNC(@submission_date, MONTH)
|
||||
AND n.country_code IN ('US', 'DE', 'FR', 'AU', 'CA', 'IT', 'ES', 'MX', 'BR', 'IN', 'GB', 'JP')
|
||||
GROUP BY
|
||||
submission_month,
|
||||
country_code
|
||||
)
|
||||
SELECT
|
||||
n.submission_month,
|
||||
n.country_code AS country,
|
||||
u.total_user_count AS user_count,
|
||||
c.sponsored_impressions_1and2 AS impression_count_1and2,
|
||||
c.sponsored_impressions_all,
|
||||
n.newtab_visits AS visit_count,
|
||||
n.newtab_clients AS clients,
|
||||
n.visits_total_inventory_1and2 AS total_inventory_1and2,
|
||||
ROUND(1.00 * c.sponsored_impressions_1and2 / n.visits_total_inventory_1and2, 3) AS fill_rate,
|
||||
n.visits_total_inventory_1to3,
|
||||
ROUND(
|
||||
1.00 * c.sponsored_impressions_all / n.visits_total_inventory_1to3,
|
||||
3
|
||||
) AS visits_total_fill_rate_1to3,
|
||||
FROM
|
||||
nt_visits n
|
||||
LEFT JOIN
|
||||
cs_impressions AS c
|
||||
ON c.country_code = n.country_code
|
||||
AND c.submission_month = n.submission_month
|
||||
LEFT JOIN
|
||||
users_table u
|
||||
ON u.country_code = n.country_code
|
||||
AND u.submission_month = n.submission_month
|
||||
ORDER BY
|
||||
country,
|
||||
submission_month
|
|
@ -0,0 +1,49 @@
|
|||
fields:
|
||||
- mode: NULLABLE
|
||||
name: submission_month
|
||||
type: DATE
|
||||
description: Data aggregation month
|
||||
- mode: NULLABLE
|
||||
name: country
|
||||
type: STRING
|
||||
description: Two-letter code corresponding to country where sponsored tiles are available. Please see `static.country_codes_v1`
|
||||
- mode: NULLABLE
|
||||
name: user_count
|
||||
type: INTEGER
|
||||
description: Count of desktop users for submission_month. Calculated from Unified Metrics.
|
||||
- mode: NULLABLE
|
||||
name: impression_count_1and2
|
||||
type: INTEGER
|
||||
description: Count of tile impressions in positions 1 and 2 for submission_month. Calculated from contextual-services.
|
||||
- mode: NULLABLE
|
||||
name: sponsored_impressions_all
|
||||
type: INTEGER
|
||||
description: Count of tile impressions across all three positions for submission_month. Calculated from contextual-services.
|
||||
- mode: NULLABLE
|
||||
name: visit_count
|
||||
type: INTEGER
|
||||
description: Count of distinct newtab visits for submission_month. Calculated from latest Newtab.
|
||||
- mode: NULLABLE
|
||||
name: clients
|
||||
type: INTEGER
|
||||
description: Count of distinct newtab clients opening the Newtab page for submission_month. Calculated from Newtab.
|
||||
- mode: NULLABLE
|
||||
name: total_inventory_1and2
|
||||
type: INTEGER
|
||||
description: Count of sponsored tile inventory for positions 1 and 2 for submission_month. Calculated from latest Newtab data.
|
||||
- mode: NULLABLE
|
||||
name: fill_rate
|
||||
type: FLOAT
|
||||
description: |-
|
||||
Float of sponsored tile fill rate for positions 1 and 2 for submission_month. Impressions / Inventory.
|
||||
Calculated from Newtab and contextual-services data.
|
||||
- mode: NULLABLE
|
||||
name: visits_total_inventory_1to3
|
||||
type: INTEGER
|
||||
description: Count of sponsored tile inventory for positions 1 to 3 for submission_month. Calculated from latest Newtab data.
|
||||
- mode: NULLABLE
|
||||
name: visits_total_fill_rate_1to3
|
||||
type: FLOAT
|
||||
description: |-
|
||||
Float of calculated fill rate (positions 1 to 3). Impressions / Inventory.
|
||||
Calculated from latest Newtab and contextual-services data.
|
Загрузка…
Ссылка в новой задаче