DENG-4201 create www_site_hits_v1 & firefox_whatsnew_summary_v1 in shared-prod (#5879)

* DENG-4201 create www_site_hits_v1 in shared-prod

* DENG-4201 add firefox_whatsnew_summary_v1 to shared prod

* DENG-4201 fix schema.yaml
This commit is contained in:
Katie Windau 2024-07-02 17:14:24 -05:00 коммит произвёл GitHub
Родитель af4355fe03
Коммит 54341c597f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
6 изменённых файлов: 301 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,20 @@
description: This table aggregates the traffic to Firefox "what's new" pages and the number of bounces
friendly_name: Firefox "what's new" hits and bounces
labels:
incremental: true
schedule: daily
owners:
- rbaffourawuah@mozilla.com
scheduling:
dag_name: bqetl_google_analytics_derived
bigquery:
time_partitioning:
type: day
field: date
require_partition_filter: false
clustering:
fields:
- country
- locale
- version
deprecated: true

Просмотреть файл

@ -0,0 +1,42 @@
WITH wnp_visits AS (
SELECT
date,
visit_identifier,
TRIM(page_path_level1, '/') AS locale,
page_level_2 AS version,
mozfun.norm.browser_version_info(page_level_2) AS version_info,
country,
IF(hit_number = first_interaction AND bounces = 1, TRUE, FALSE) AS is_bounce
FROM
`moz-fx-data-shared-prod.mozilla_org_derived.www_site_hits_v1`
WHERE
date = @submission_date
AND hit_type = 'PAGE'
-- Match page paths like "/{locale}/firefox/{version}/whatsnew/..."
-- Version regular expression is adapted from https://github.com/mozilla/bedrock/blob/main/bedrock/releasenotes/__init__.py
AND page_level_1 = 'firefox'
AND REGEXP_CONTAINS(page_level_2, r'^\d{1,3}(\.\d{1,3}){1,3}((a|b(eta)?)\d*)?(pre\d*)?(esr)?$')
AND page_level_3 = 'whatsnew'
)
SELECT
date,
country,
locale,
version,
version_info.major_version,
version_info.minor_version,
version_info.patch_revision,
version_info.is_major_release,
COUNT(DISTINCT visit_identifier) AS visits,
COUNT(DISTINCT CASE WHEN is_bounce = TRUE THEN visit_identifier END) AS bounces
FROM
wnp_visits
GROUP BY
1,
2,
3,
4,
5,
6,
7,
8

Просмотреть файл

@ -0,0 +1,31 @@
fields:
- mode: NULLABLE
name: date
type: DATE
- mode: NULLABLE
name: country
type: STRING
- mode: NULLABLE
name: locale
type: STRING
- mode: NULLABLE
name: version
type: STRING
- mode: NULLABLE
name: major_version
type: NUMERIC
- mode: NULLABLE
name: minor_version
type: NUMERIC
- mode: NULLABLE
name: patch_revision
type: NUMERIC
- mode: NULLABLE
name: is_major_release
type: BOOLEAN
- mode: NULLABLE
name: visits
type: INTEGER
- mode: NULLABLE
name: bounces
type: INTEGER

Просмотреть файл

@ -0,0 +1,21 @@
friendly_name: WWW Site Hits
description: |-
Normalized individual hits for www.mozilla.org
owners:
- ascholtz@mozilla.com
labels:
incremental: true
schedule: daily
dag: bqetl_google_analytics_derived
owner1: ascholtz
scheduling:
dag_name: bqetl_google_analytics_derived
bigquery:
time_partitioning:
type: day
field: date
require_partition_filter: false
expiration_days: null
clustering: null
references: {}
deprecated: true

Просмотреть файл

@ -0,0 +1,75 @@
WITH hits AS (
SELECT
PARSE_DATE('%Y%m%d', date) AS date,
CONCAT(CAST(fullVisitorId AS STRING), CAST(visitId AS STRING)) AS visit_identifier,
fullVisitorId AS full_visitor_id,
visitStartTime AS visit_start_time,
hit.page.pagePath AS page_path,
hit.page.pagePathLevel1 AS page_path_level1,
-- splitting the pagePath to make it easier to filter on pages in dashboards
SPLIT(SPLIT(hit.page.pagePath, '?')[OFFSET(0)], '/') AS split_page_path,
hit.type AS hit_type,
hit.isExit AS is_exit,
hit.isEntrance AS is_entrance,
hit.hitNumber AS hit_number,
hit.eventInfo.eventCategory AS event_category,
hit.eventInfo.eventLabel AS event_label,
hit.eventInfo.eventAction AS event_action,
device.deviceCategory AS device_category,
device.operatingSystem AS operating_system,
device.language,
device.browser,
SPLIT(device.browserVersion, '.')[OFFSET(0)] AS browser_version,
geoNetwork.country,
trafficSource.source,
trafficSource.medium,
trafficSource.campaign,
trafficSource.adContent AS ad_content,
totals.visits,
totals.bounces,
hit.time / 1000 AS hit_time,
MIN(IF(hit.isInteraction IS NOT NULL, hit.hitNumber, 0)) OVER (
PARTITION BY
fullVisitorId,
visitStartTime
) AS first_interaction,
MAX(IF(hit.isInteraction IS NOT NULL, hit.time / 1000, 0)) OVER (
PARTITION BY
fullVisitorId,
visitStartTime
) AS last_interaction,
IF(hit.isEntrance IS NOT NULL, 1, 0) AS entrances,
IF(hit.isExit IS NOT NULL, 1, 0) AS exits,
CONCAT(
hit.eventInfo.eventCategory,
COALESCE(hit.eventInfo.eventaction, ''),
COALESCE(hit.eventInfo.eventLabel, '')
) AS event_id,
FROM
`moz-fx-data-marketing-prod.65789850.ga_sessions_*`
CROSS JOIN
UNNEST(hits) AS hit
WHERE
_TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', @submission_date)
),
page_levels AS (
SELECT
* EXCEPT (split_page_path),
split_page_path[SAFE_OFFSET(2)] AS page_level_1,
split_page_path[SAFE_OFFSET(3)] AS page_level_2,
split_page_path[SAFE_OFFSET(4)] AS page_level_3,
split_page_path[SAFE_OFFSET(5)] AS page_level_4,
split_page_path[SAFE_OFFSET(6)] AS page_level_5,
FROM
hits
)
SELECT
*,
-- Page name without locale and query string
IF(
page_level_2 IS NULL,
CONCAT('/', page_level_1, '/'),
ARRAY_TO_STRING(['', page_level_1, page_level_2, page_level_3, page_level_4, page_level_5], '/')
) AS page_name,
FROM
page_levels

Просмотреть файл

@ -0,0 +1,112 @@
fields:
- name: date
type: DATE
mode: NULLABLE
- name: visit_identifier
type: STRING
mode: NULLABLE
- name: full_visitor_id
type: STRING
mode: NULLABLE
- name: visit_start_time
type: INTEGER
mode: NULLABLE
- name: page_path
type: STRING
mode: NULLABLE
- name: page_path_level1
type: STRING
mode: NULLABLE
- name: hit_type
type: STRING
mode: NULLABLE
- name: is_exit
type: BOOLEAN
mode: NULLABLE
- name: is_entrance
type: BOOLEAN
mode: NULLABLE
- name: hit_number
type: INTEGER
mode: NULLABLE
- name: event_category
type: STRING
mode: NULLABLE
- name: event_label
type: STRING
mode: NULLABLE
- name: event_action
type: STRING
mode: NULLABLE
- name: device_category
type: STRING
mode: NULLABLE
- name: operating_system
type: STRING
mode: NULLABLE
- name: language
type: STRING
mode: NULLABLE
- name: browser
type: STRING
mode: NULLABLE
- name: browser_version
type: STRING
mode: NULLABLE
- name: country
type: STRING
mode: NULLABLE
- name: source
type: STRING
mode: NULLABLE
- name: medium
type: STRING
mode: NULLABLE
- name: campaign
type: STRING
mode: NULLABLE
- name: ad_content
type: STRING
mode: NULLABLE
- name: visits
type: INTEGER
mode: NULLABLE
- name: bounces
type: INTEGER
mode: NULLABLE
- name: hit_time
type: FLOAT
mode: NULLABLE
- name: first_interaction
type: INTEGER
mode: NULLABLE
- name: last_interaction
type: FLOAT
mode: NULLABLE
- name: entrances
type: INTEGER
mode: NULLABLE
- name: exits
type: INTEGER
mode: NULLABLE
- name: event_id
type: STRING
mode: NULLABLE
- name: page_level_1
type: STRING
mode: NULLABLE
- name: page_level_2
type: STRING
mode: NULLABLE
- name: page_level_3
type: STRING
mode: NULLABLE
- name: page_level_4
type: STRING
mode: NULLABLE
- name: page_level_5
type: STRING
mode: NULLABLE
- name: page_name
type: STRING
mode: NULLABLE