[RS-1278] Desktop tiles forecasting inputs table (#5929)

* Create derived table containing data inputs into desktop tiles forecasts.

* Create derived table containing data inputs into desktop tiles forecasts.

* Query adjustments and better data descriptions

* Making sure table descriptions are identical.

* Correcting partitioning field

* updated to avoid manual lookback and removed session columns

* Update sql/moz-fx-data-shared-prod/ads_derived/desktop_tiles_forecast_inputs_v1/metadata.yaml

Co-authored-by: Curtis Morales <cmorales@mozilla.com>

* Apply suggestions from code review

Co-authored-by: Curtis Morales <cmorales@mozilla.com>

* move max date filter

* add dag for job

* column name change _3 to _1to3

* remove monthly run and add the new table to bqetl_ads

* Apply suggestions from code review

Co-authored-by: Curtis Morales <cmorales@mozilla.com>

* fix name change in schema

---------

Co-authored-by: Jared Snyder <jsnyder@mozilla.com>
Co-authored-by: Jared Snyder <jaredssnyder@gmail.com>
Co-authored-by: Curtis Morales <cmorales@mozilla.com>
This commit is contained in:
Sergio E. Betancourt 2024-09-11 21:34:03 -04:00 коммит произвёл GitHub
Родитель a26ce8b19e
Коммит bcc0644719
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
5 изменённых файлов: 203 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,13 @@
friendly_name: Tiles Revenue Forecast Inputs
description: |-
Monthly inputs, calculated from Fx telemetry, into the desktop tiles revenue forecast.
Note that the forecast methodology was officially switched over from Activity Stream sessions to visits in June 2024.
Also, note that PingCentre (and thus sessions telemetry) was deprecated in Feb 2024.
Geo markets as of 2024/07/16: 'US','DE','FR','AU','CA','IT','ES','MX','BR','IN','GB','JP'.
owners:
- sbetancourt@mozilla.com
- cmorales@mozilla.com
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:mozilla-confidential

Просмотреть файл

@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.ads.desktop_tiles_forecast_inputs`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.ads_derived.desktop_tiles_forecast_inputs_v1`

Просмотреть файл

@ -0,0 +1,26 @@
friendly_name: Tiles Revenue Forecast Inputs
description: |-
Monthly inputs, calculated from Fx telemetry, into the desktop tiles revenue forecast.
Note that the forecast methodology was officially switched over from Activity Stream sessions to visits in June 2024.
Also, note that PingCentre (and thus sessions telemetry) was deprecated in Feb 2024.
Geo markets as of 2024/07/16: 'US','DE','FR','AU','CA','IT','ES','MX','BR','IN','GB','JP'.
owners:
- sbetancourt@mozilla.com
- cmorales@mozilla.com
labels:
incremental: true
schedule: daily
change_controlled: true
scheduling:
dag_name: bqetl_ads
bigquery:
time_partitioning:
field: submission_month
type: day
expiration_days: null
clustering: null
references: {}
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:mozilla-confidential

Просмотреть файл

@ -0,0 +1,108 @@
WITH cs_impressions AS (
SELECT
country AS country_code,
DATE_TRUNC(submission_date, MONTH) AS submission_month,
SUM(IF(position <= 2, event_count, 0)) AS sponsored_impressions_1and2,
SUM(event_count) AS sponsored_impressions_all
FROM
`moz-fx-data-shared-prod.contextual_services.event_aggregates`
WHERE
event_type = 'impression'
AND form_factor = 'desktop'
AND source = 'topsites'
AND (
{% if is_init() %}
submission_date >= DATE_TRUNC(PARSE_DATE('%Y-%m-%d', '2023-11-01'), MONTH)
{% else %}
submission_date >= DATE_TRUNC(DATE_SUB(@submission_date, INTERVAL 1 MONTH), MONTH)
{% endif %}
)
AND submission_date < DATE_TRUNC(@submission_date, MONTH)
AND country IN ('US', 'DE', 'FR', 'AU', 'CA', 'IT', 'ES', 'MX', 'BR', 'IN', 'GB', 'JP')
GROUP BY
country_code,
submission_month
)
/* Deriving total users from unified_metrics given how the DAU forecast doesn't account for NT activity data */
,
users_table AS (
SELECT
country AS country_code,
DATE_TRUNC(submission_date, MONTH) AS submission_month,
COUNT(client_id) AS total_user_count
FROM
`mozdata.telemetry.unified_metrics`
WHERE
`mozfun`.bits28.active_in_range(days_seen_bits, 0, 1)
AND (
{% if is_init() %}
submission_date >= DATE_TRUNC(PARSE_DATE('%Y-%m-%d', '2023-11-01'), MONTH)
{% else %}
submission_date >= DATE_TRUNC(DATE_SUB(@submission_date, INTERVAL 1 MONTH), MONTH)
{% endif %}
)
AND submission_date < DATE_TRUNC(@submission_date, MONTH)
AND country IN ('US', 'DE', 'FR', 'AU', 'CA', 'IT', 'ES', 'MX', 'BR', 'IN', 'GB', 'JP')
AND normalized_app_name = 'Firefox Desktop'
GROUP BY
submission_month,
country_code
)
/* Using 2x visits as total inventory while we sort out addressable inventory for eligible users */
,
nt_visits AS (
SELECT
DATE_TRUNC(submission_date, MONTH) AS submission_month,
n.country_code,
APPROX_COUNT_DISTINCT(newtab_visit_id) AS newtab_visits,
APPROX_COUNT_DISTINCT(client_id) AS newtab_clients,
2 * APPROX_COUNT_DISTINCT(newtab_visit_id) AS visits_total_inventory_1and2,
3 * APPROX_COUNT_DISTINCT(newtab_visit_id) AS visits_total_inventory_1to3,
SUM(t.sponsored_topsite_tile_impressions) AS sponsored_impressions
FROM
`moz-fx-data-shared-prod.telemetry.newtab_visits` n,
UNNEST(topsite_tile_interactions) t
WHERE
n.topsites_enabled
AND n.topsites_sponsored_enabled
AND (
{% if is_init() %}
submission_date >= DATE_TRUNC(PARSE_DATE('%Y-%m-%d', '2023-11-01'), MONTH)
{% else %}
submission_date >= DATE_TRUNC(DATE_SUB(@submission_date, INTERVAL 1 MONTH), MONTH)
{% endif %}
)
AND submission_date < DATE_TRUNC(@submission_date, MONTH)
AND n.country_code IN ('US', 'DE', 'FR', 'AU', 'CA', 'IT', 'ES', 'MX', 'BR', 'IN', 'GB', 'JP')
GROUP BY
submission_month,
country_code
)
SELECT
n.submission_month,
n.country_code AS country,
u.total_user_count AS user_count,
c.sponsored_impressions_1and2 AS impression_count_1and2,
c.sponsored_impressions_all,
n.newtab_visits AS visit_count,
n.newtab_clients AS clients,
n.visits_total_inventory_1and2 AS total_inventory_1and2,
ROUND(1.00 * c.sponsored_impressions_1and2 / n.visits_total_inventory_1and2, 3) AS fill_rate,
n.visits_total_inventory_1to3,
ROUND(
1.00 * c.sponsored_impressions_all / n.visits_total_inventory_1to3,
3
) AS visits_total_fill_rate_1to3,
FROM
nt_visits n
LEFT JOIN
cs_impressions AS c
ON c.country_code = n.country_code
AND c.submission_month = n.submission_month
LEFT JOIN
users_table u
ON u.country_code = n.country_code
AND u.submission_month = n.submission_month
ORDER BY
country,
submission_month

Просмотреть файл

@ -0,0 +1,49 @@
fields:
- mode: NULLABLE
name: submission_month
type: DATE
description: Data aggregation month
- mode: NULLABLE
name: country
type: STRING
description: Two-letter code corresponding to country where sponsored tiles are available. Please see `static.country_codes_v1`
- mode: NULLABLE
name: user_count
type: INTEGER
description: Count of desktop users for submission_month. Calculated from Unified Metrics.
- mode: NULLABLE
name: impression_count_1and2
type: INTEGER
description: Count of tile impressions in positions 1 and 2 for submission_month. Calculated from contextual-services.
- mode: NULLABLE
name: sponsored_impressions_all
type: INTEGER
description: Count of tile impressions across all three positions for submission_month. Calculated from contextual-services.
- mode: NULLABLE
name: visit_count
type: INTEGER
description: Count of distinct newtab visits for submission_month. Calculated from latest Newtab.
- mode: NULLABLE
name: clients
type: INTEGER
description: Count of distinct newtab clients opening the Newtab page for submission_month. Calculated from Newtab.
- mode: NULLABLE
name: total_inventory_1and2
type: INTEGER
description: Count of sponsored tile inventory for positions 1 and 2 for submission_month. Calculated from latest Newtab data.
- mode: NULLABLE
name: fill_rate
type: FLOAT
description: |-
Float of sponsored tile fill rate for positions 1 and 2 for submission_month. Impressions / Inventory.
Calculated from Newtab and contextual-services data.
- mode: NULLABLE
name: visits_total_inventory_1to3
type: INTEGER
description: Count of sponsored tile inventory for positions 1 to 3 for submission_month. Calculated from latest Newtab data.
- mode: NULLABLE
name: visits_total_fill_rate_1to3
type: FLOAT
description: |-
Float of calculated fill rate (positions 1 to 3). Impressions / Inventory.
Calculated from latest Newtab and contextual-services data.