DENG-2492 Create new GA4 derived table: blogs_sessions_v2 (#5018)
* DENG-2492 initial commit for new table blogs_sessions_v2 * DENG-2492 wrap keywords with backticks
This commit is contained in:
Родитель
63a4d72197
Коммит
ee8de94705
|
@ -0,0 +1,7 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-marketing-prod.ga.blogs_sessions`
|
||||
AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`moz-fx-data-marketing-prod.ga_derived.blogs_sessions_v2`
|
|
@ -0,0 +1,21 @@
|
|||
friendly_name: Blogs Sessions V2
|
||||
description: |-
|
||||
Intermediate table containing normalized sessions for blog.mozilla.org, sourced from Google Analytics 4 (GA4)
|
||||
owners:
|
||||
- kwindau@mozilla.com
|
||||
labels:
|
||||
incremental: true
|
||||
owner1: kwindau@mozilla.com
|
||||
scheduling:
|
||||
dag_name: bqetl_google_analytics_derived_ga4
|
||||
bigquery:
|
||||
time_partitioning:
|
||||
type: day
|
||||
field: date
|
||||
require_partition_filter: true
|
||||
expiration_days: null
|
||||
clustering:
|
||||
fields:
|
||||
- country
|
||||
references: {}
|
||||
deprecated: false
|
|
@ -0,0 +1,205 @@
|
|||
--Get all page views with the page location, and a flag for whether it was an entrance or not to the session
|
||||
WITH all_page_views AS (
|
||||
SELECT
|
||||
PARSE_DATE('%Y%m%d', event_date) AS `date`,
|
||||
event_timestamp,
|
||||
user_pseudo_id || '-' || CAST(
|
||||
(
|
||||
SELECT
|
||||
`value`
|
||||
FROM
|
||||
UNNEST(event_params)
|
||||
WHERE
|
||||
key = 'ga_session_id'
|
||||
LIMIT
|
||||
1
|
||||
).int_value AS STRING
|
||||
) AS visit_identifier,
|
||||
device.category AS device_category,
|
||||
device.operating_system AS operating_system,
|
||||
device.web_info.browser AS browser,
|
||||
device.language AS `language`,
|
||||
geo.country AS country,
|
||||
collected_traffic_source.manual_source AS source,
|
||||
collected_traffic_source.manual_medium AS medium,
|
||||
collected_traffic_source.manual_campaign_name AS campaign,
|
||||
collected_traffic_source.manual_content AS content,
|
||||
(
|
||||
SELECT
|
||||
`value`
|
||||
FROM
|
||||
UNNEST(event_params)
|
||||
WHERE
|
||||
key = 'page_location'
|
||||
LIMIT
|
||||
1
|
||||
).string_value AS page_location,
|
||||
(
|
||||
SELECT
|
||||
`value`
|
||||
FROM
|
||||
UNNEST(event_params)
|
||||
WHERE
|
||||
key = 'entrances'
|
||||
LIMIT
|
||||
1
|
||||
).int_value AS is_entrance
|
||||
FROM
|
||||
`moz-fx-data-marketing-prod.analytics_314399816.events_*`
|
||||
WHERE
|
||||
_TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', @submission_date)
|
||||
AND event_name = 'page_view'
|
||||
),
|
||||
--Filter to entrance pages only, and then filter to ensure only 1 entrance page per session
|
||||
--Theoretically Google should always only send 1 per session, but in case there is ever more than 1, which happens occasionally
|
||||
entrance_page_views_only AS (
|
||||
SELECT
|
||||
`date`,
|
||||
visit_identifier,
|
||||
device_category,
|
||||
operating_system,
|
||||
browser,
|
||||
`language`,
|
||||
country,
|
||||
source,
|
||||
medium,
|
||||
campaign,
|
||||
content,
|
||||
REGEXP_REPLACE(
|
||||
SPLIT(page_location, '?')[SAFE_OFFSET(0)],
|
||||
'^https://blog.mozilla.org',
|
||||
''
|
||||
) AS page_path,
|
||||
SPLIT(
|
||||
REGEXP_REPLACE(SPLIT(page_location, '?')[SAFE_OFFSET(0)], '^https://blog.mozilla.org', ''),
|
||||
'/'
|
||||
)[SAFE_OFFSET(1)] AS blog,
|
||||
SPLIT(
|
||||
REGEXP_REPLACE(SPLIT(page_location, '?')[SAFE_OFFSET(0)], '^https://blog.mozilla.org', ''),
|
||||
'/'
|
||||
)[SAFE_OFFSET(2)] AS subblog
|
||||
FROM
|
||||
all_page_views
|
||||
WHERE
|
||||
is_entrance = 1
|
||||
QUALIFY
|
||||
ROW_NUMBER() OVER (PARTITION BY visit_identifier ORDER BY event_timestamp ASC) = 1
|
||||
),
|
||||
staging AS (
|
||||
SELECT
|
||||
epvo.date,
|
||||
epvo.visit_identifier,
|
||||
epvo.device_category,
|
||||
epvo.operating_system,
|
||||
epvo.browser,
|
||||
epvo.language,
|
||||
epvo.country,
|
||||
epvo.source,
|
||||
epvo.medium,
|
||||
epvo.campaign,
|
||||
epvo.content,
|
||||
epvo.blog,
|
||||
epvo.subblog,
|
||||
COUNT(DISTINCT(visit_identifier)) AS sessions
|
||||
FROM
|
||||
entrance_page_views_only epvo
|
||||
GROUP BY
|
||||
epvo.date,
|
||||
epvo.visit_identifier,
|
||||
epvo.device_category,
|
||||
epvo.operating_system,
|
||||
epvo.browser,
|
||||
epvo.language,
|
||||
epvo.country,
|
||||
epvo.source,
|
||||
epvo.medium,
|
||||
epvo.campaign,
|
||||
epvo.content,
|
||||
epvo.blog,
|
||||
epvo.subblog
|
||||
)
|
||||
SELECT
|
||||
`date`,
|
||||
visit_identifier,
|
||||
device_category,
|
||||
operating_system,
|
||||
browser,
|
||||
`language`,
|
||||
country,
|
||||
source,
|
||||
medium,
|
||||
campaign,
|
||||
content,
|
||||
CASE
|
||||
WHEN blog LIKE "press%"
|
||||
THEN "press"
|
||||
WHEN blog = 'firefox'
|
||||
THEN 'The Firefox Frontier'
|
||||
WHEN blog = 'netPolicy'
|
||||
THEN 'Open Policy & Advocacy'
|
||||
WHEN LOWER(blog) = 'internetcitizen'
|
||||
THEN 'Internet Citizen'
|
||||
WHEN blog = 'futurereleases'
|
||||
THEN 'Future Releases'
|
||||
WHEN blog = 'careers'
|
||||
THEN 'Careers'
|
||||
WHEN blog = 'opendesign'
|
||||
THEN 'Open Design'
|
||||
WHEN blog = ""
|
||||
THEN "Blog Home Page"
|
||||
WHEN LOWER(blog) IN (
|
||||
'blog',
|
||||
'addons',
|
||||
'security',
|
||||
'opendesign',
|
||||
'nnethercote',
|
||||
'thunderbird',
|
||||
'community',
|
||||
'l10n',
|
||||
'theden',
|
||||
'webrtc',
|
||||
'berlin',
|
||||
'webdev',
|
||||
'services',
|
||||
'tanvi',
|
||||
'laguaridadefirefox',
|
||||
'ux',
|
||||
'fxtesteng',
|
||||
'foundation-archive',
|
||||
'nfroyd',
|
||||
'sumo',
|
||||
'javascript',
|
||||
'page',
|
||||
'data'
|
||||
)
|
||||
THEN LOWER(blog)
|
||||
ELSE 'other'
|
||||
END AS blog,
|
||||
CASE
|
||||
WHEN blog = "firefox"
|
||||
AND subblog IN ('ru', 'pt-br', 'pl', 'it', 'id', 'fr', 'es', 'de')
|
||||
THEN subblog
|
||||
WHEN blog = "firefox"
|
||||
THEN "Main"
|
||||
WHEN blog LIKE "press-%"
|
||||
AND blog IN (
|
||||
'press-de',
|
||||
'press-fr',
|
||||
'press-es',
|
||||
'press-uk',
|
||||
'press-pl',
|
||||
'press-it',
|
||||
'press-br',
|
||||
'press-nl'
|
||||
)
|
||||
THEN blog
|
||||
WHEN blog LIKE "press%"
|
||||
THEN "Main"
|
||||
WHEN blog = 'internetcitizen'
|
||||
AND subblog IN ('de', 'fr')
|
||||
THEN subblog
|
||||
ELSE "Main"
|
||||
END AS subblog,
|
||||
`sessions`
|
||||
FROM
|
||||
staging
|
|
@ -0,0 +1,57 @@
|
|||
fields:
|
||||
- mode: NULLABLE
|
||||
name: date
|
||||
type: DATE
|
||||
description: Date of the visit
|
||||
- mode: NULLABLE
|
||||
name: visit_identifier
|
||||
type: STRING
|
||||
description: Visit Identifier - Uniquely identifies a visit; concatenation of user_pseudo_id and ga_session_id
|
||||
- mode: NULLABLE
|
||||
name: device_category
|
||||
type: STRING
|
||||
description: Device Category - The device category the visitor used to visit the site
|
||||
- mode: NULLABLE
|
||||
name: operating_system
|
||||
type: STRING
|
||||
description: Operating System - The operating system the visitor used to visit the site
|
||||
- mode: NULLABLE
|
||||
name: browser
|
||||
type: STRING
|
||||
description: Browser - The browser the visiting device was using when it visited the site
|
||||
- mode: NULLABLE
|
||||
name: language
|
||||
type: STRING
|
||||
description: Language - The language the visiting device was using when it visited the site
|
||||
- mode: NULLABLE
|
||||
name: country
|
||||
type: STRING
|
||||
description: Country - The country from which events were reported, based on IP address
|
||||
- mode: NULLABLE
|
||||
name: source
|
||||
type: STRING
|
||||
description: Source - Referring partner domain
|
||||
- mode: NULLABLE
|
||||
name: medium
|
||||
type: STRING
|
||||
description: Medium - Category of the source, such as 'organic' for a search engine
|
||||
- mode: NULLABLE
|
||||
name: campaign
|
||||
type: STRING
|
||||
description: Campaign - Identifier for the marketing campaign
|
||||
- mode: NULLABLE
|
||||
name: content
|
||||
type: STRING
|
||||
description: Content - Indicates the particular link within a campaign
|
||||
- mode: NULLABLE
|
||||
name: blog
|
||||
type: STRING
|
||||
description: Blog
|
||||
- mode: NULLABLE
|
||||
name: subblog
|
||||
type: STRING
|
||||
description: Sub-Blog
|
||||
- mode: NULLABLE
|
||||
name: sessions
|
||||
type: INT64
|
||||
description: Number of Sessions
|
Загрузка…
Ссылка в новой задаче