DENG-4201 create www_site_hits_v1 & firefox_whatsnew_summary_v1 in shared-prod (#5879)
* DENG-4201 create www_site_hits_v1 in shared-prod * DENG-4201 add firefox_whatsnew_summary_v1 to shared prod * DENG-4201 fix schema.yaml
This commit is contained in:
Родитель
af4355fe03
Коммит
54341c597f
|
@ -0,0 +1,20 @@
|
|||
description: This table aggregates the traffic to Firefox "what's new" pages and the number of bounces
|
||||
friendly_name: Firefox "what's new" hits and bounces
|
||||
labels:
|
||||
incremental: true
|
||||
schedule: daily
|
||||
owners:
|
||||
- rbaffourawuah@mozilla.com
|
||||
scheduling:
|
||||
dag_name: bqetl_google_analytics_derived
|
||||
bigquery:
|
||||
time_partitioning:
|
||||
type: day
|
||||
field: date
|
||||
require_partition_filter: false
|
||||
clustering:
|
||||
fields:
|
||||
- country
|
||||
- locale
|
||||
- version
|
||||
deprecated: true
|
|
@ -0,0 +1,42 @@
|
|||
WITH wnp_visits AS (
|
||||
SELECT
|
||||
date,
|
||||
visit_identifier,
|
||||
TRIM(page_path_level1, '/') AS locale,
|
||||
page_level_2 AS version,
|
||||
mozfun.norm.browser_version_info(page_level_2) AS version_info,
|
||||
country,
|
||||
IF(hit_number = first_interaction AND bounces = 1, TRUE, FALSE) AS is_bounce
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.mozilla_org_derived.www_site_hits_v1`
|
||||
WHERE
|
||||
date = @submission_date
|
||||
AND hit_type = 'PAGE'
|
||||
-- Match page paths like "/{locale}/firefox/{version}/whatsnew/..."
|
||||
-- Version regular expression is adapted from https://github.com/mozilla/bedrock/blob/main/bedrock/releasenotes/__init__.py
|
||||
AND page_level_1 = 'firefox'
|
||||
AND REGEXP_CONTAINS(page_level_2, r'^\d{1,3}(\.\d{1,3}){1,3}((a|b(eta)?)\d*)?(pre\d*)?(esr)?$')
|
||||
AND page_level_3 = 'whatsnew'
|
||||
)
|
||||
SELECT
|
||||
date,
|
||||
country,
|
||||
locale,
|
||||
version,
|
||||
version_info.major_version,
|
||||
version_info.minor_version,
|
||||
version_info.patch_revision,
|
||||
version_info.is_major_release,
|
||||
COUNT(DISTINCT visit_identifier) AS visits,
|
||||
COUNT(DISTINCT CASE WHEN is_bounce = TRUE THEN visit_identifier END) AS bounces
|
||||
FROM
|
||||
wnp_visits
|
||||
GROUP BY
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8
|
|
@ -0,0 +1,31 @@
|
|||
fields:
|
||||
- mode: NULLABLE
|
||||
name: date
|
||||
type: DATE
|
||||
- mode: NULLABLE
|
||||
name: country
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: locale
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: version
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: major_version
|
||||
type: NUMERIC
|
||||
- mode: NULLABLE
|
||||
name: minor_version
|
||||
type: NUMERIC
|
||||
- mode: NULLABLE
|
||||
name: patch_revision
|
||||
type: NUMERIC
|
||||
- mode: NULLABLE
|
||||
name: is_major_release
|
||||
type: BOOLEAN
|
||||
- mode: NULLABLE
|
||||
name: visits
|
||||
type: INTEGER
|
||||
- mode: NULLABLE
|
||||
name: bounces
|
||||
type: INTEGER
|
|
@ -0,0 +1,21 @@
|
|||
friendly_name: WWW Site Hits
|
||||
description: |-
|
||||
Normalized individual hits for www.mozilla.org
|
||||
owners:
|
||||
- ascholtz@mozilla.com
|
||||
labels:
|
||||
incremental: true
|
||||
schedule: daily
|
||||
dag: bqetl_google_analytics_derived
|
||||
owner1: ascholtz
|
||||
scheduling:
|
||||
dag_name: bqetl_google_analytics_derived
|
||||
bigquery:
|
||||
time_partitioning:
|
||||
type: day
|
||||
field: date
|
||||
require_partition_filter: false
|
||||
expiration_days: null
|
||||
clustering: null
|
||||
references: {}
|
||||
deprecated: true
|
|
@ -0,0 +1,75 @@
|
|||
WITH hits AS (
|
||||
SELECT
|
||||
PARSE_DATE('%Y%m%d', date) AS date,
|
||||
CONCAT(CAST(fullVisitorId AS STRING), CAST(visitId AS STRING)) AS visit_identifier,
|
||||
fullVisitorId AS full_visitor_id,
|
||||
visitStartTime AS visit_start_time,
|
||||
hit.page.pagePath AS page_path,
|
||||
hit.page.pagePathLevel1 AS page_path_level1,
|
||||
-- splitting the pagePath to make it easier to filter on pages in dashboards
|
||||
SPLIT(SPLIT(hit.page.pagePath, '?')[OFFSET(0)], '/') AS split_page_path,
|
||||
hit.type AS hit_type,
|
||||
hit.isExit AS is_exit,
|
||||
hit.isEntrance AS is_entrance,
|
||||
hit.hitNumber AS hit_number,
|
||||
hit.eventInfo.eventCategory AS event_category,
|
||||
hit.eventInfo.eventLabel AS event_label,
|
||||
hit.eventInfo.eventAction AS event_action,
|
||||
device.deviceCategory AS device_category,
|
||||
device.operatingSystem AS operating_system,
|
||||
device.language,
|
||||
device.browser,
|
||||
SPLIT(device.browserVersion, '.')[OFFSET(0)] AS browser_version,
|
||||
geoNetwork.country,
|
||||
trafficSource.source,
|
||||
trafficSource.medium,
|
||||
trafficSource.campaign,
|
||||
trafficSource.adContent AS ad_content,
|
||||
totals.visits,
|
||||
totals.bounces,
|
||||
hit.time / 1000 AS hit_time,
|
||||
MIN(IF(hit.isInteraction IS NOT NULL, hit.hitNumber, 0)) OVER (
|
||||
PARTITION BY
|
||||
fullVisitorId,
|
||||
visitStartTime
|
||||
) AS first_interaction,
|
||||
MAX(IF(hit.isInteraction IS NOT NULL, hit.time / 1000, 0)) OVER (
|
||||
PARTITION BY
|
||||
fullVisitorId,
|
||||
visitStartTime
|
||||
) AS last_interaction,
|
||||
IF(hit.isEntrance IS NOT NULL, 1, 0) AS entrances,
|
||||
IF(hit.isExit IS NOT NULL, 1, 0) AS exits,
|
||||
CONCAT(
|
||||
hit.eventInfo.eventCategory,
|
||||
COALESCE(hit.eventInfo.eventaction, ''),
|
||||
COALESCE(hit.eventInfo.eventLabel, '')
|
||||
) AS event_id,
|
||||
FROM
|
||||
`moz-fx-data-marketing-prod.65789850.ga_sessions_*`
|
||||
CROSS JOIN
|
||||
UNNEST(hits) AS hit
|
||||
WHERE
|
||||
_TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', @submission_date)
|
||||
),
|
||||
page_levels AS (
|
||||
SELECT
|
||||
* EXCEPT (split_page_path),
|
||||
split_page_path[SAFE_OFFSET(2)] AS page_level_1,
|
||||
split_page_path[SAFE_OFFSET(3)] AS page_level_2,
|
||||
split_page_path[SAFE_OFFSET(4)] AS page_level_3,
|
||||
split_page_path[SAFE_OFFSET(5)] AS page_level_4,
|
||||
split_page_path[SAFE_OFFSET(6)] AS page_level_5,
|
||||
FROM
|
||||
hits
|
||||
)
|
||||
SELECT
|
||||
*,
|
||||
-- Page name without locale and query string
|
||||
IF(
|
||||
page_level_2 IS NULL,
|
||||
CONCAT('/', page_level_1, '/'),
|
||||
ARRAY_TO_STRING(['', page_level_1, page_level_2, page_level_3, page_level_4, page_level_5], '/')
|
||||
) AS page_name,
|
||||
FROM
|
||||
page_levels
|
|
@ -0,0 +1,112 @@
|
|||
fields:
|
||||
- name: date
|
||||
type: DATE
|
||||
mode: NULLABLE
|
||||
- name: visit_identifier
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: full_visitor_id
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: visit_start_time
|
||||
type: INTEGER
|
||||
mode: NULLABLE
|
||||
- name: page_path
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: page_path_level1
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: hit_type
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: is_exit
|
||||
type: BOOLEAN
|
||||
mode: NULLABLE
|
||||
- name: is_entrance
|
||||
type: BOOLEAN
|
||||
mode: NULLABLE
|
||||
- name: hit_number
|
||||
type: INTEGER
|
||||
mode: NULLABLE
|
||||
- name: event_category
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: event_label
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: event_action
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: device_category
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: operating_system
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: language
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: browser
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: browser_version
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: country
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: source
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: medium
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: campaign
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: ad_content
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: visits
|
||||
type: INTEGER
|
||||
mode: NULLABLE
|
||||
- name: bounces
|
||||
type: INTEGER
|
||||
mode: NULLABLE
|
||||
- name: hit_time
|
||||
type: FLOAT
|
||||
mode: NULLABLE
|
||||
- name: first_interaction
|
||||
type: INTEGER
|
||||
mode: NULLABLE
|
||||
- name: last_interaction
|
||||
type: FLOAT
|
||||
mode: NULLABLE
|
||||
- name: entrances
|
||||
type: INTEGER
|
||||
mode: NULLABLE
|
||||
- name: exits
|
||||
type: INTEGER
|
||||
mode: NULLABLE
|
||||
- name: event_id
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: page_level_1
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: page_level_2
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: page_level_3
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: page_level_4
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: page_level_5
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: page_name
|
||||
type: STRING
|
||||
mode: NULLABLE
|
Загрузка…
Ссылка в новой задаче